Global polishing, some bugs corrected, dynamic allocation used instead
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs
2 Copyright (C) 1984,87,88,89,93,94 Free Software Foundation, Inc. and Ken Arnold
3 This file is not considered part of GNU Emacs.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18
19 /*
20 * Authors:
21 * Ctags originally by Ken Arnold.
22 * Fortran added by Jim Kleckner.
23 * Ed Pelegri-Llopart added C typedefs.
24 * Gnu Emacs TAGS format and modifications by RMS?
25 * Sam Kendall added C++.
26 * Francesco Potorti` reorganised C and C++ based on work by Joe Wells.
27 #ifdef ETAGS_REGEXPS
28 * Regexp tags by Tom Tromey.
29 #endif
30 *
31 * Francesco Potorti` (pot@cnuce.cnr.it) is the current maintainer.
32 */
33
34 char pot_etags_version[] = "@(#) pot revision number is 11.15";
35
36 #ifdef MSDOS
37 #include <fcntl.h>
38 #include <sys/param.h>
39 #endif /* MSDOS */
40
41 #ifdef WINDOWSNT
42 #include <stdlib.h>
43 #include <fcntl.h>
44 #include <string.h>
45 #define MAXPATHLEN _MAX_PATH
46 #endif
47
48 #ifdef HAVE_CONFIG_H
49 #include <config.h>
50 /* On some systems, Emacs defines static as nothing for the sake
51 of unexec. We don't want that here since we don't use unexec. */
52 #undef static
53 #endif
54
55 #include <stdio.h>
56 #include <ctype.h>
57 #include <errno.h>
58 #ifndef errno
59 extern int errno;
60 #endif
61 #include <sys/types.h>
62 #include <sys/stat.h>
63
64 #if !defined (S_ISREG) && defined (S_IFREG)
65 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
66 #endif
67
68 #include <getopt.h>
69
70 #ifdef ETAGS_REGEXPS
71 #include <regex.h>
72 #endif /* ETAGS_REGEXPS */
73
74 #define TRUE 1
75 #define FALSE 0
76
77 /* Define CTAGS to make the program "ctags" compatible with the usual one.
78 Let it undefined to make the program "etags", which makes emacs-style
79 tag tables and tags typedefs, #defines and struct/union/enum by default. */
80 #ifdef CTAGS
81 # undef CTAGS
82 # define CTAGS TRUE
83 #else
84 # define CTAGS FALSE
85 #endif
86
87 /* Exit codes for success and failure. */
88 #ifdef VMS
89 #define GOOD 1
90 #define BAD 0
91 #else
92 #define GOOD 0
93 #define BAD 1
94 #endif
95
96 /* C extensions. */
97 #define C_PLPL 0x00001 /* C++ */
98 #define C_STAR 0x00003 /* C* */
99 #define YACC 0x10000 /* yacc file */
100
101 #define streq(s,t) (strcmp (s, t) == 0)
102 #define strneq(s,t,n) (strncmp (s, t, n) == 0)
103
104 #define iswhite(arg) (_wht[arg]) /* T if char is white */
105 #define begtoken(arg) (_btk[arg]) /* T if char can start token */
106 #define intoken(arg) (_itk[arg]) /* T if char can be in token */
107 #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */
108
109 /*
110 * xnew -- allocate storage
111 *
112 * SYNOPSIS: Type *xnew (int n, Type);
113 */
114 #define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
115
116 typedef int logical;
117
118 typedef struct nd_st
119 { /* sorting structure */
120 char *name; /* function or type name */
121 char *file; /* file name */
122 logical is_func; /* use pattern or line no */
123 logical named; /* list name separately */
124 logical been_warned; /* set if noticed dup */
125 int lno; /* line number tag is on */
126 long cno; /* character number line starts on */
127 char *pat; /* search pattern */
128 struct nd_st *left, *right; /* left and right sons */
129 } NODE;
130
131 extern char *getenv ();
132
133 char *concat ();
134 char *savenstr (), *savestr ();
135 char *etags_strchr (), *etags_strrchr ();
136 char *etags_getcwd ();
137 char *relative_filename (), *absolute_filename (), *absolute_dirname ();
138 char *xmalloc (), *xrealloc ();
139
140 typedef void Lang_function ();
141 #if FALSE /* many compilers barf on this */
142 Lang_function Asm_labels;
143 Lang_function default_C_entries;
144 Lang_function C_entries;
145 Lang_function Cplusplus_entries;
146 Lang_function Cstar_entries;
147 Lang_function Fortran_functions;
148 Lang_function Yacc_entries;
149 Lang_function Lisp_functions;
150 Lang_function Pascal_functions;
151 Lang_function Prolog_functions;
152 Lang_function Scheme_functions;
153 Lang_function TeX_functions;
154 Lang_function just_read_file;
155 #else /* so let's write it this way */
156 void Asm_labels ();
157 void default_C_entries ();
158 void C_entries ();
159 void Cplusplus_entries ();
160 void Cstar_entries ();
161 void Fortran_functions ();
162 void Yacc_entries ();
163 void Lisp_functions ();
164 void Pascal_functions ();
165 void Prolog_functions ();
166 void Scheme_functions ();
167 void TeX_functions ();
168 void just_read_file ();
169 #endif
170
171 logical get_language ();
172 int total_size_of_entries ();
173 long readline ();
174 long readline_internal ();
175 #ifdef ETAGS_REGEXPS
176 void add_regex ();
177 #endif
178 void add_node ();
179 void error ();
180 void fatal (), pfatal ();
181 void find_entries ();
182 void free_tree ();
183 void getit ();
184 void init ();
185 void initbuffer ();
186 void pfnote ();
187 void process_file ();
188 void put_entries ();
189 void takeprec ();
190
191 \f
192 char searchar = '/'; /* use /.../ searches */
193
194 int lineno; /* line number of current line */
195 long charno; /* current character number */
196
197 long linecharno; /* charno of start of line; not used by C,
198 but by every other language. */
199
200 char *curfile; /* current input file name */
201 char *tagfile; /* output file */
202 char *progname; /* name this program was invoked with */
203 char *cwd; /* current working directory */
204 char *tagfiledir; /* directory of tagfile */
205
206 FILE *tagf; /* ioptr for tags file */
207 NODE *head; /* the head of the binary tree of tags */
208
209 /*
210 * A `struct linebuffer' is a structure which holds a line of text.
211 * `readline' reads a line from a stream into a linebuffer and works
212 * regardless of the length of the line.
213 */
214 struct linebuffer
215 {
216 long size;
217 char *buffer;
218 };
219
220 struct linebuffer lb; /* the current line */
221 struct
222 {
223 long linepos;
224 struct linebuffer lb; /* used by C_entries instead of lb */
225 } lbs[2];
226
227 /* boolean "functions" (see init) */
228 logical _wht[0177], _etk[0177], _itk[0177], _btk[0177];
229 char
230 *white = " \f\t\n\013", /* white chars */
231 *endtk = " \t\n\013\"'#()[]{}=-+%*/&|^~!<>;,.:?", /* token ending chars */
232 /* token starting chars */
233 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~",
234 /* valid in-token chars */
235 *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
236
237 logical append_to_tagfile; /* -a: append to tags */
238 /* The following three default to TRUE for etags, but to FALSE for ctags. */
239 logical typedefs; /* -t: create tags for typedefs */
240 logical typedefs_and_cplusplus; /* -T: create tags for typedefs, level */
241 /* 0 struct/enum/union decls, and C++ */
242 /* member functions. */
243 logical constantypedefs; /* -d: create tags for C #define and enum */
244 /* constants. Enum consts not implemented. */
245 /* -D: opposite of -d. Default under ctags. */
246 logical update; /* -u: update tags */
247 logical vgrind_style; /* -v: create vgrind style index output */
248 logical no_warnings; /* -w: suppress warnings */
249 logical cxref_style; /* -x: create cxref style output */
250 logical cplusplus; /* .[hc] means C++, not C */
251 logical noindentypedefs; /* -S: ignore indentation in C */
252 #define permit_duplicates TRUE /* allow duplicate tags */
253
254 struct option longopts[] =
255 {
256 { "append", no_argument, NULL, 'a' },
257 { "backward-search", no_argument, NULL, 'B' },
258 { "c++", no_argument, NULL, 'C' },
259 { "cxref", no_argument, NULL, 'x' },
260 { "defines", no_argument, NULL, 'd' },
261 { "help", no_argument, NULL, 'h' },
262 { "help", no_argument, NULL, 'H' },
263 { "ignore-indentation", no_argument, NULL, 'S' },
264 { "include", required_argument, NULL, 'i' },
265 { "language", required_argument, NULL, 'l' },
266 { "no-defines", no_argument, NULL, 'D' },
267 { "no-regex", no_argument, NULL, 'R' },
268 { "no-warn", no_argument, NULL, 'w' },
269 { "output", required_argument, NULL, 'o' },
270 { "regex", required_argument, NULL, 'r' },
271 { "typedefs", no_argument, NULL, 't' },
272 { "typedefs-and-c++", no_argument, NULL, 'T' },
273 { "update", no_argument, NULL, 'u' },
274 { "version", no_argument, NULL, 'V' },
275 { "vgrind", no_argument, NULL, 'v' },
276 { 0 }
277 };
278
279 #ifdef ETAGS_REGEXPS
280 /* Structure defining a regular expression. Elements are
281 the compiled pattern, and the name string. */
282 struct pattern
283 {
284 struct re_pattern_buffer *pattern;
285 struct re_registers regs;
286 char *name_pattern;
287 logical error_signaled;
288 };
289
290 /* Number of regexps found. */
291 int num_patterns = 0;
292
293 /* Array of all regexps. */
294 struct pattern *patterns = NULL;
295 #endif /* ETAGS_REGEXPS */
296
297 /* Language stuff. */
298 struct lang_entry
299 {
300 char *extension;
301 Lang_function *function;
302 };
303
304 /* Table of language names and corresponding functions. */
305 /* It is ok for a given function to be listed under more than one
306 name. I just didn't. */
307 /* "auto" language reverts to default behavior. */
308 struct lang_entry lang_names[] =
309 {
310 { "asm", Asm_labels },
311 { "c", default_C_entries },
312 { "c++", Cplusplus_entries },
313 { "c*", Cstar_entries },
314 { "fortran", Fortran_functions },
315 { "lisp", Lisp_functions },
316 { "none", just_read_file },
317 { "pascal", Pascal_functions },
318 { "scheme" , Scheme_functions },
319 { "tex", TeX_functions },
320 { "auto", NULL },
321 { NULL, NULL }
322 };
323
324 /* Table of file extensions and corresponding language functions. */
325 struct lang_entry lang_extensions[] =
326 {
327 /* Assume that ".s" or ".a" is assembly code. -wolfgang.
328 Or even ".sa". */
329 { "a", Asm_labels }, /* Unix assembler */
330 { "asm", Asm_labels }, /* Microcontroller assembly */
331 { "def", Asm_labels }, /* BSO/Tasking definition includes */
332 { "inc", Asm_labels }, /* Microcontroller include files */
333 { "ins", Asm_labels }, /* Microcontroller include files */
334 { "s", Asm_labels },
335 { "sa", Asm_labels }, /* Unix assembler */
336 { "src", Asm_labels }, /* BSO/Tasking C compiler output */
337
338 /* .aux, .bbl, .clo, .cls, .dtx or .tex implies LaTeX source code. */
339 { "aux", TeX_functions },
340 { "bbl", TeX_functions },
341 { "clo", TeX_functions },
342 { "cls", TeX_functions },
343 { "dtx", TeX_functions },
344 { "sty", TeX_functions },
345 { "tex", TeX_functions },
346
347 /* .l or .el or .lisp (or .cl or .clisp or ...) implies lisp source code */
348 { "cl", Lisp_functions },
349 { "clisp", Lisp_functions },
350 { "el", Lisp_functions },
351 { "l", Lisp_functions },
352 { "lisp", Lisp_functions },
353 { "lsp", Lisp_functions },
354
355 /* .scm or .sm or .scheme implies scheme source code */
356 { "SCM", Scheme_functions },
357 { "SM", Scheme_functions },
358 { "oak", Scheme_functions },
359 { "sch", Scheme_functions },
360 { "scheme", Scheme_functions },
361 { "scm", Scheme_functions },
362 { "sm", Scheme_functions },
363 { "t", Scheme_functions },
364 /* FIXME Can't do the `SCM' or `scm' prefix with a version number */
365
366 /* Note that ".c" and ".h" can be considered C++, if the --c++
367 flag was given. That is why default_C_entries is called here. */
368 { "c", default_C_entries },
369 { "h", default_C_entries },
370
371 /* .C or .H or .cpp or .cxx or .hxx or .hh or .cc or .cpp: a C++ file */
372 { "C", Cplusplus_entries },
373 { "H", Cplusplus_entries },
374 { "cc", Cplusplus_entries },
375 { "cpp", Cplusplus_entries },
376 { "cxx", Cplusplus_entries },
377 { "hh", Cplusplus_entries },
378 { "hxx", Cplusplus_entries },
379
380 /* .y: a yacc file */
381 { "y", Yacc_entries },
382
383 /* .cs or .hs: a C* file */
384 { "cs", Cstar_entries },
385 { "hs", Cstar_entries },
386
387 /* .f and .for are FORTRAN. */
388 { "F", Fortran_functions },
389 { "f", Fortran_functions },
390 { "for", Fortran_functions },
391
392 /* .pl implies prolog source code */
393 { "pl", Prolog_functions },
394
395 /* .p or .pas: a Pascal file */
396 { "p", Pascal_functions },
397 { "pas", Pascal_functions },
398
399 { NULL, NULL }
400 };
401
402 /* Non-NULL if language fixed. */
403 Lang_function *lang_func = NULL;
404
405 \f
406 void
407 print_language_names ()
408 {
409 struct lang_entry *name, *ext;
410
411 puts ("\nThese are the currently supported languages, along with the\n\
412 default extensions for files:");
413 for (name = lang_names; name->extension; ++name)
414 {
415 printf ("\t%s\t", name->extension);
416 for (ext = lang_extensions; ext->extension; ++ext)
417 if (name->function == ext->function)
418 printf (" .%s", ext->extension);
419 puts ("");
420 }
421 puts ("Where `auto' means use default language for files based on filename\n\
422 extension, and `none' means only do regexp processing on files.\n\
423 If no language is specified and no extension is found for some file,\n\
424 Fortran is tried first; if no tags are found, C is tried next.");
425 }
426
427 void
428 print_version ()
429 {
430 #ifdef VERSION
431 printf ("%s for Emacs version %s.\n", (CTAGS) ? "CTAGS" : "ETAGS", VERSION);
432 #else
433 printf ("%s for Emacs version 19.\n", (CTAGS) ? "CTAGS" : "ETAGS");
434 #endif
435
436 exit (GOOD);
437 }
438
439 void
440 print_help ()
441 {
442 printf ("These are the options accepted by %s. You may use unambiguous\n\
443 abbreviations for the long option names. A - as file name means read\n\
444 names from stdin.\n\n", progname);
445
446 puts ("-a, --append\n\
447 Append tag entries to existing tags file.");
448
449 if (CTAGS)
450 puts ("-B, --backward-search\n\
451 Write the search commands for the tag entries using '?', the\n\
452 backward-search command instead of '/', the forward-search command.");
453
454 puts ("-C, --c++\n\
455 Treat files whose extension defaults to C language as C++ files.");
456
457 if (CTAGS)
458 puts ("-d, --defines\n\
459 Create tag entries for constant C #defines, too.");
460 else
461 puts ("-D, --no-defines\n\
462 Don't create tag entries for constant C #defines. This makes\n\
463 the tags file smaller.");
464
465 if (!CTAGS)
466 {
467 puts ("-i FILE, --include=FILE\n\
468 Include a note in tag file indicating that, when searching for\n\
469 a tag, one should also consult the tags file FILE after\n\
470 checking the current file.");
471 puts ("-l LANG, --language=LANG\n\
472 Force the following files to be considered as written in the\n\
473 named language up to the next --language=LANG option.");
474 }
475
476 #ifdef ETAGS_REGEXPS
477 puts ("-r /REGEXP/, --regex=/REGEXP/\n\
478 Make a tag for each line matching pattern REGEXP in the\n\
479 following files. REGEXP is anchored (as if preceded by ^).\n\
480 The form /REGEXP/NAME/ creates a named tag. For example Tcl\n\
481 named tags can be created with:\n\
482 --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
483 puts ("-R, --no-regex\n\
484 Don't create tags from regexps for the following files.");
485 #endif /* ETAGS_REGEXPS */
486 puts ("-o FILE, --output=FILE\n\
487 Write the tags to FILE.");
488 puts ("-S, --ignore-indentation\n\
489 Don't rely on indentation quite as much as normal. Currently,\n\
490 this means not to assume that a closing brace in the first\n\
491 column is the final brace of a function or structure\n\
492 definition in C and C++.");
493
494 if (CTAGS)
495 {
496 puts ("-t, --typedefs\n\
497 Generate tag entries for C typedefs.");
498 puts ("-T, --typedefs-and-c++\n\
499 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
500 and C++ member functions.");
501 puts ("-u, --update\n\
502 Update the tag entries for the given files, leaving tag\n\
503 entries for other files in place. Currently, this is\n\
504 implemented by deleting the existing entries for the given\n\
505 files and then rewriting the new entries at the end of the\n\
506 tags file. It is often faster to simply rebuild the entire\n\
507 tag file than to use this.");
508 puts ("-v, --vgrind\n\
509 Generates an index of items intended for human consumption,\n\
510 similar to the output of vgrind. The index is sorted, and\n\
511 gives the page number of each item.");
512 puts ("-w, --no-warn\n\
513 Suppress warning messages about entries defined in multiple\n\
514 files.");
515 puts ("-x, --cxref\n\
516 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
517 The output uses line numbers instead of page numbers, but\n\
518 beyond that the differences are cosmetic; try both to see\n\
519 which you like.");
520 }
521
522 puts ("-V, --version\n\
523 Print the version of the program.\n\
524 -h, --help\n\
525 Print this help message.");
526
527 print_language_names ();
528
529 exit (GOOD);
530 }
531
532 \f
533 enum argument_type
534 {
535 at_language,
536 at_regexp,
537 at_filename
538 };
539
540 /* This structure helps us allow mixing of --lang and filenames. */
541 typedef struct
542 {
543 enum argument_type arg_type;
544 char *what;
545 Lang_function *function;
546 } ARGUMENT;
547
548 #ifdef VMS /* VMS specific functions */
549
550 #define EOS '\0'
551
552 /* This is a BUG! ANY arbitrary limit is a BUG!
553 Won't someone please fix this? */
554 #define MAX_FILE_SPEC_LEN 255
555 typedef struct {
556 short curlen;
557 char body[MAX_FILE_SPEC_LEN + 1];
558 } vspec;
559
560 /*
561 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
562 returning in each successive call the next filename matching the input
563 spec. The function expects that each in_spec passed
564 to it will be processed to completion; in particular, up to and
565 including the call following that in which the last matching name
566 is returned, the function ignores the value of in_spec, and will
567 only start processing a new spec with the following call.
568 If an error occurs, on return out_spec contains the value
569 of in_spec when the error occurred.
570
571 With each successive filename returned in out_spec, the
572 function's return value is one. When there are no more matching
573 names the function returns zero. If on the first call no file
574 matches in_spec, or there is any other error, -1 is returned.
575 */
576
577 #include <rmsdef.h>
578 #include <descrip.h>
579 #define OUTSIZE MAX_FILE_SPEC_LEN
580 short
581 fn_exp (out, in)
582 vspec *out;
583 char *in;
584 {
585 static long context = 0;
586 static struct dsc$descriptor_s o;
587 static struct dsc$descriptor_s i;
588 static logical pass1 = TRUE;
589 long status;
590 short retval;
591
592 if (pass1)
593 {
594 pass1 = FALSE;
595 o.dsc$a_pointer = (char *) out;
596 o.dsc$w_length = (short)OUTSIZE;
597 i.dsc$a_pointer = in;
598 i.dsc$w_length = (short)strlen(in);
599 i.dsc$b_dtype = DSC$K_DTYPE_T;
600 i.dsc$b_class = DSC$K_CLASS_S;
601 o.dsc$b_dtype = DSC$K_DTYPE_VT;
602 o.dsc$b_class = DSC$K_CLASS_VS;
603 }
604 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
605 {
606 out->body[out->curlen] = EOS;
607 return 1;
608 }
609 else if (status == RMS$_NMF)
610 retval = 0;
611 else
612 {
613 strcpy(out->body, in);
614 retval = -1;
615 }
616 lib$find_file_end(&context);
617 pass1 = TRUE;
618 return retval;
619 }
620
621 /*
622 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
623 name of each file specified by the provided arg expanding wildcards.
624 */
625 char *
626 gfnames (arg, p_error)
627 char *arg;
628 logical *p_error;
629 {
630 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
631
632 switch (fn_exp (&filename, arg))
633 {
634 case 1:
635 *p_error = FALSE;
636 return filename.body;
637 case 0:
638 *p_error = FALSE;
639 return NULL;
640 default:
641 *p_error = TRUE;
642 return filename.body;
643 }
644 }
645
646 #ifndef OLD /* Newer versions of VMS do provide `system'. */
647 system (cmd)
648 char *cmd;
649 {
650 fprintf (stderr, "system() function not implemented under VMS\n");
651 }
652 #endif
653
654 #define VERSION_DELIM ';'
655 char *massage_name (s)
656 char *s;
657 {
658 char *start = s;
659
660 for ( ; *s; s++)
661 if (*s == VERSION_DELIM)
662 {
663 *s = EOS;
664 break;
665 }
666 else
667 *s = tolower(*s);
668 return start;
669 }
670 #endif /* VMS */
671
672 \f
673 void
674 main (argc, argv)
675 int argc;
676 char *argv[];
677 {
678 int i;
679 unsigned int nincluded_files = 0;
680 char **included_files = xnew (argc, char *);
681 char *this_file;
682 ARGUMENT *argbuffer;
683 int current_arg = 0, file_count = 0;
684 struct linebuffer filename_lb;
685 #ifdef VMS
686 logical got_err;
687 #endif
688
689 #ifdef DOS_NT
690 _fmode = O_BINARY; /* all of files are treated as binary files */
691 #endif /* DOS_NT */
692
693 progname = argv[0];
694
695 /* Allocate enough no matter what happens. Overkill, but each one
696 is small. */
697 argbuffer = xnew (argc, ARGUMENT);
698
699 #ifdef ETAGS_REGEXPS
700 /* Set syntax for regular expression routines. */
701 re_set_syntax (RE_SYNTAX_EMACS);
702 #endif /* ETAGS_REGEXPS */
703
704 /*
705 * If etags, always find typedefs and structure tags. Why not?
706 * Also default is to find macro constants.
707 */
708 if (!CTAGS)
709 typedefs = typedefs_and_cplusplus = constantypedefs = TRUE;
710
711 while (1)
712 {
713 int opt = getopt_long (argc, argv,
714 "-aCdDf:l:o:r:RStTi:BuvxwVhH", longopts, 0);
715
716 if (opt == EOF)
717 break;
718
719 switch (opt)
720 {
721 case 0:
722 /* If getopt returns 0, then it has already processed a
723 long-named option. We should do nothing. */
724 break;
725
726 case 1:
727 /* This means that a filename has been seen. Record it. */
728 argbuffer[current_arg].arg_type = at_filename;
729 argbuffer[current_arg].what = optarg;
730 ++current_arg;
731 ++file_count;
732 break;
733
734 /* Common options. */
735 case 'a':
736 append_to_tagfile = TRUE;
737 break;
738 case 'C':
739 cplusplus = TRUE;
740 break;
741 case 'd':
742 constantypedefs = TRUE;
743 break;
744 case 'D':
745 constantypedefs = FALSE;
746 break;
747 case 'f': /* for compatibility with old makefiles */
748 case 'o':
749 if (tagfile)
750 {
751 fprintf(stderr,
752 "%s: -%c flag may only be given once.\n", progname, opt);
753 goto usage;
754 }
755 tagfile = optarg;
756 break;
757 case 'l':
758 if (!get_language (optarg, &argbuffer[current_arg].function))
759 {
760 fprintf (stderr, "%s: language \"%s\" not recognized.\n",
761 progname, optarg);
762 goto usage;
763 }
764 argbuffer[current_arg].arg_type = at_language;
765 ++current_arg;
766 break;
767 #ifdef ETAGS_REGEXPS
768 case 'r':
769 argbuffer[current_arg].arg_type = at_regexp;
770 argbuffer[current_arg].what = optarg;
771 ++current_arg;
772 break;
773 case 'R':
774 argbuffer[current_arg].arg_type = at_regexp;
775 argbuffer[current_arg].what = NULL;
776 ++current_arg;
777 break;
778 #endif /* ETAGS_REGEXPS */
779 case 'S':
780 noindentypedefs = TRUE;
781 break;
782 case 'V':
783 print_version ();
784 break;
785 case 'h':
786 case 'H':
787 print_help ();
788 break;
789 case 't':
790 typedefs = TRUE;
791 break;
792 case 'T':
793 typedefs = typedefs_and_cplusplus = TRUE;
794 break;
795 #if (!CTAGS)
796 /* Etags options */
797 case 'i':
798 included_files[nincluded_files++] = optarg;
799 break;
800 #else /* CTAGS */
801 /* Ctags options. */
802 case 'B':
803 searchar = '?';
804 break;
805 case 'u':
806 update = TRUE;
807 break;
808 case 'v':
809 vgrind_style = TRUE;
810 /*FALLTHRU*/
811 case 'x':
812 cxref_style = TRUE;
813 break;
814 case 'w':
815 no_warnings = TRUE;
816 break;
817 #endif /* CTAGS */
818 default:
819 fprintf (stderr,
820 "%s: -%c flag not recognised.\n", progname, opt);
821 goto usage;
822 }
823 }
824
825 for (; optind < argc; ++optind)
826 {
827 argbuffer[current_arg].arg_type = at_filename;
828 argbuffer[current_arg].what = argv[optind];
829 ++current_arg;
830 ++file_count;
831 }
832
833 if (nincluded_files == 0 && file_count == 0)
834 {
835 fprintf (stderr, "%s: No input files specified.\n", progname);
836
837 usage:
838 fprintf (stderr, "%s: Try `%s --help' for a complete list of options.\n",
839 progname, progname);
840 exit (BAD);
841 }
842
843 if (tagfile == NULL)
844 {
845 tagfile = CTAGS ? "tags" : "TAGS";
846 }
847 cwd = etags_getcwd (); /* the current working directory */
848 strcat (cwd, "/");
849 if (streq (tagfile, "-"))
850 {
851 tagfiledir = cwd;
852 }
853 else
854 {
855 tagfiledir = absolute_dirname (tagfile, cwd);
856 }
857
858 init (); /* set up boolean "functions" */
859
860 initbuffer (&lb);
861 initbuffer (&lbs[0].lb);
862 initbuffer (&lbs[1].lb);
863 initbuffer (&filename_lb);
864
865 if (!CTAGS)
866 {
867 if (streq (tagfile, "-"))
868 tagf = stdout;
869 else
870 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
871 if (tagf == NULL)
872 pfatal (tagfile);
873 }
874
875 /*
876 * Loop through files finding functions.
877 */
878 for (i = 0; i < current_arg; ++i)
879 {
880 switch (argbuffer[i].arg_type)
881 {
882 case at_language:
883 lang_func = argbuffer[i].function;
884 break;
885 #ifdef ETAGS_REGEXPS
886 case at_regexp:
887 add_regex (argbuffer[i].what);
888 break;
889 #endif
890 case at_filename:
891 #ifdef VMS
892 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
893 {
894 if (got_err)
895 {
896 error ("Can't find file %s\n", this_file);
897 argc--, argv++;
898 }
899 else
900 {
901 this_file = massage_name (this_file);
902 }
903 #else
904 this_file = argbuffer[i].what;
905 #endif
906 /* Input file named "-" means read file names from stdin
907 and use them. */
908 if (streq (this_file, "-"))
909 {
910 while (!feof (stdin))
911 {
912 (void) readline_internal (&filename_lb, stdin);
913 if (strlen (filename_lb.buffer) > 0)
914 process_file (filename_lb.buffer);
915 }
916 }
917 else
918 process_file (this_file);
919 #ifdef VMS
920 }
921 #endif
922 break;
923 }
924 }
925
926 if (!CTAGS)
927 {
928 while (nincluded_files-- > 0)
929 fprintf (tagf, "\f\n%s,include\n", *included_files++);
930
931 fclose (tagf);
932 exit (GOOD);
933 }
934
935 /* If CTAGS, we are here. process_file did not write the tags yet,
936 because we want them ordered. Let's do it now. */
937 if (cxref_style)
938 {
939 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
940 if (tagf == NULL)
941 pfatal (tagfile);
942 put_entries (head);
943 exit (GOOD);
944 }
945
946 if (update)
947 {
948 char cmd[BUFSIZ];
949 for (i = 0; i < current_arg; ++i)
950 {
951 if (argbuffer[i].arg_type != at_filename)
952 continue;
953 sprintf (cmd,
954 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
955 tagfile, argbuffer[i].what, tagfile);
956 if (system (cmd) != GOOD)
957 fatal ("failed to execute shell command");
958 }
959 append_to_tagfile = TRUE;
960 }
961
962 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
963 if (tagf == NULL)
964 pfatal (tagfile);
965 put_entries (head);
966 fclose (tagf);
967
968 if (update)
969 {
970 char cmd[BUFSIZ];
971 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
972 exit (system (cmd));
973 }
974 exit (GOOD);
975 }
976
977
978 /*
979 * Set the language, given the name.
980 */
981 logical
982 get_language (language, func)
983 char *language;
984 Lang_function **func;
985 {
986 struct lang_entry *lang;
987
988 for (lang = lang_names; lang->extension; ++lang)
989 {
990 if (streq (language, lang->extension))
991 {
992 *func = lang->function;
993 return TRUE;
994 }
995 }
996
997 return FALSE;
998 }
999
1000
1001 /*
1002 * This routine is called on each file argument.
1003 */
1004 void
1005 process_file (file)
1006 char *file;
1007 {
1008 struct stat stat_buf;
1009 FILE *inf;
1010
1011 if (stat (file, &stat_buf) == 0 && !S_ISREG (stat_buf.st_mode))
1012 {
1013 fprintf (stderr, "Skipping %s: it is not a regular file.\n", file);
1014 return;
1015 }
1016 if (streq (file, tagfile) && !streq (tagfile, "-"))
1017 {
1018 fprintf (stderr, "Skipping inclusion of %s in self.\n", file);
1019 return;
1020 }
1021 inf = fopen (file, "r");
1022 if (inf == NULL)
1023 {
1024 perror (file);
1025 return;
1026 }
1027
1028 find_entries (file, inf);
1029
1030 if (!CTAGS)
1031 {
1032 char *filename;
1033
1034 if (file[0] == '/')
1035 {
1036 /* file is an absolute filename. Canonicalise it. */
1037 filename = absolute_filename (file, cwd);
1038 }
1039 else
1040 {
1041 /* file is a filename relative to cwd. Make it relative
1042 to the directory of the tags file. */
1043 filename = relative_filename (file, tagfiledir);
1044 }
1045 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1046 put_entries (head);
1047 free_tree (head);
1048 head = NULL;
1049 }
1050 }
1051
1052 /*
1053 * This routine sets up the boolean pseudo-functions which work
1054 * by setting boolean flags dependent upon the corresponding character
1055 * Every char which is NOT in that string is not a white char. Therefore,
1056 * all of the array "_wht" is set to FALSE, and then the elements
1057 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1058 * of a char is TRUE if it is the string "white", else FALSE.
1059 */
1060 void
1061 init ()
1062 {
1063 register char *sp;
1064 register int i;
1065
1066 for (i = 0; i < 0177; i++)
1067 _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
1068 for (sp = white; *sp; sp++)
1069 _wht[*sp] = TRUE;
1070 for (sp = endtk; *sp; sp++)
1071 _etk[*sp] = TRUE;
1072 for (sp = intk; *sp; sp++)
1073 _itk[*sp] = TRUE;
1074 for (sp = begtk; *sp; sp++)
1075 _btk[*sp] = TRUE;
1076 _wht[0] = _wht['\n'];
1077 _etk[0] = _etk['\n'];
1078 _btk[0] = _btk['\n'];
1079 _itk[0] = _itk['\n'];
1080 }
1081
1082 /*
1083 * This routine opens the specified file and calls the function
1084 * which finds the function and type definitions.
1085 */
1086 void
1087 find_entries (file, inf)
1088 char *file;
1089 FILE *inf;
1090 {
1091 char *cp;
1092 struct lang_entry *lang;
1093 NODE *old_last_node;
1094 extern NODE *last_node;
1095
1096 curfile = savestr (file);
1097 cp = etags_strrchr (file, '.');
1098
1099 /* If user specified a language, use it. */
1100 if (lang_func != NULL)
1101 {
1102 lang_func (inf);
1103 fclose (inf);
1104 return;
1105 }
1106
1107 if (cp)
1108 {
1109 ++cp;
1110 for (lang = lang_extensions; lang->extension; ++lang)
1111 {
1112 if (streq (cp, lang->extension))
1113 {
1114 lang->function (inf);
1115 fclose (inf);
1116 return;
1117 }
1118 }
1119 }
1120
1121 /* Try Fortran. */
1122 old_last_node = last_node;
1123 Fortran_functions (inf);
1124
1125 /* No Fortran entries found. Try C. */
1126 if (old_last_node == last_node)
1127 default_C_entries (inf);
1128 fclose (inf);
1129 }
1130 \f
1131 /* Record a tag. */
1132 /* Should take a TOKEN* instead!! */
1133 void
1134 pfnote (name, is_func, named, linestart, linelen, lno, cno)
1135 char *name; /* tag name */
1136 logical is_func; /* tag is a function */
1137 logical named; /* tag different from text of definition */
1138 char *linestart; /* start of the line where tag is */
1139 int linelen; /* length of the line where tag is */
1140 int lno; /* line number */
1141 long cno; /* character number */
1142 {
1143 register NODE *np = xnew (1, NODE);
1144 register char *fp;
1145
1146 /* If ctags mode, change name "main" to M<thisfilename>. */
1147 if (CTAGS && !cxref_style && streq (name, "main"))
1148 {
1149 fp = etags_strrchr (curfile, '/');
1150 np->name = concat ("M", fp == 0 ? curfile : fp + 1, "");
1151 fp = etags_strrchr (np->name, '.');
1152 if (fp && fp[1] != '\0' && fp[2] == '\0')
1153 fp[0] = 0;
1154 np->named = TRUE;
1155 }
1156 else
1157 {
1158 np->name = name;
1159 np->named = named;
1160 }
1161 np->file = curfile;
1162 np->is_func = is_func;
1163 np->lno = lno;
1164 /* Our char numbers are 0-base, because of C language tradition?
1165 ctags compatibility? old versions compatibility? I don't know.
1166 Anyway, since emacs's are 1-base we espect etags.el to take care
1167 of the difference. If we wanted to have 1-based numbers, we would
1168 uncomment the +1 below. */
1169 np->cno = cno /* + 1 */ ;
1170 np->left = np->right = NULL;
1171 np->pat = savenstr (linestart, ((CTAGS && !cxref_style) ? 50 : linelen));
1172
1173 add_node (np, &head);
1174 }
1175
1176 /*
1177 * free_tree ()
1178 * recurse on left children, iterate on right children.
1179 */
1180 void
1181 free_tree (node)
1182 register NODE *node;
1183 {
1184 while (node)
1185 {
1186 register NODE *node_right = node->right;
1187 free_tree (node->left);
1188 if (node->named)
1189 free (node->name);
1190 free (node->pat);
1191 free ((char *) node);
1192 node = node_right;
1193 }
1194 }
1195
1196 /*
1197 * add_node ()
1198 * Adds a node to the tree of nodes. In etags mode, we don't keep
1199 * it sorted; we just keep a linear list. In ctags mode, maintain
1200 * an ordered tree, with no attempt at balancing.
1201 *
1202 * add_node is the only function allowed to add nodes, so it can
1203 * maintain state.
1204 */
1205 NODE *last_node = NULL;
1206 void
1207 add_node (node, cur_node_p)
1208 NODE *node, **cur_node_p;
1209 {
1210 register int dif;
1211 register NODE *cur_node = *cur_node_p;
1212
1213 if (cur_node == NULL)
1214 {
1215 *cur_node_p = node;
1216 last_node = node;
1217 return;
1218 }
1219
1220 if (!CTAGS)
1221 {
1222 /* Etags Mode */
1223 if (last_node == NULL)
1224 fatal ("internal error in add_node", 0);
1225 last_node->right = node;
1226 last_node = node;
1227 }
1228 else
1229 {
1230 /* Ctags Mode */
1231 dif = strcmp (node->name, cur_node->name);
1232
1233 /*
1234 * If this tag name matches an existing one, then
1235 * do not add the node, but maybe print a warning.
1236 */
1237 if (!dif)
1238 {
1239 if (node->file == cur_node->file)
1240 {
1241 if (!no_warnings)
1242 {
1243 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1244 node->file, lineno, node->name);
1245 fprintf (stderr, "Second entry ignored\n");
1246 }
1247 return;
1248 }
1249 if (!cur_node->been_warned && !no_warnings)
1250 {
1251 fprintf (stderr,
1252 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1253 node->file, cur_node->file, node->name);
1254 }
1255 cur_node->been_warned = TRUE;
1256 return;
1257 }
1258
1259 /* Maybe refuse to add duplicate nodes. */
1260 if (!permit_duplicates)
1261 {
1262 if (streq (node->name, cur_node->name)
1263 && streq (node->file, cur_node->file))
1264 return;
1265 }
1266
1267 /* Actually add the node */
1268 add_node (node, dif < 0 ? &cur_node->left : &cur_node->right);
1269 }
1270 }
1271 \f
1272 void
1273 put_entries (node)
1274 register NODE *node;
1275 {
1276 register char *sp;
1277
1278 if (node == NULL)
1279 return;
1280
1281 /* Output subentries that precede this one */
1282 put_entries (node->left);
1283
1284 /* Output this entry */
1285
1286 if (!CTAGS)
1287 {
1288 if (node->named)
1289 {
1290 fprintf (tagf, "%s\177%s\001%d,%d\n",
1291 node->pat, node->name,
1292 node->lno, node->cno);
1293 }
1294 else
1295 {
1296 fprintf (tagf, "%s\177%d,%d\n",
1297 node->pat,
1298 node->lno, node->cno);
1299 }
1300 }
1301 else if (!cxref_style)
1302 {
1303 fprintf (tagf, "%s\t%s\t",
1304 node->name, node->file);
1305
1306 if (node->is_func)
1307 { /* a function */
1308 putc (searchar, tagf);
1309 putc ('^', tagf);
1310
1311 for (sp = node->pat; *sp; sp++)
1312 {
1313 if (*sp == '\\' || *sp == searchar)
1314 putc ('\\', tagf);
1315 putc (*sp, tagf);
1316 }
1317 putc (searchar, tagf);
1318 }
1319 else
1320 { /* a typedef; text pattern inadequate */
1321 fprintf (tagf, "%d", node->lno);
1322 }
1323 putc ('\n', tagf);
1324 }
1325 else if (vgrind_style)
1326 fprintf (stdout, "%s %s %d\n",
1327 node->name, node->file, (node->lno + 63) / 64);
1328 else
1329 fprintf (stdout, "%-16s %3d %-16s %s\n",
1330 node->name, node->lno, node->file, node->pat);
1331
1332 /* Output subentries that follow this one */
1333 put_entries (node->right);
1334 }
1335
1336 /* Length of a number's decimal representation. */
1337 int
1338 number_len (num)
1339 long num;
1340 {
1341 int len = 0;
1342 if (!num)
1343 return 1;
1344 for (; num; num /= 10)
1345 ++len;
1346 return len;
1347 }
1348
1349 /*
1350 * Return total number of characters that put_entries will output for
1351 * the nodes in the subtree of the specified node. Works only if
1352 * we are not ctags, but called only in that case. This count
1353 * is irrelevant with the new tags.el, but is still supplied for
1354 * backward compatibility.
1355 */
1356 int
1357 total_size_of_entries (node)
1358 register NODE *node;
1359 {
1360 register int total;
1361
1362 if (node == NULL)
1363 return 0;
1364
1365 total = 0;
1366 for (; node; node = node->right)
1367 {
1368 /* Count left subentries. */
1369 total += total_size_of_entries (node->left);
1370
1371 /* Count this entry */
1372 total += strlen (node->pat) + 1;
1373 total += number_len ((long) node->lno) + 1 + number_len (node->cno) + 1;
1374 if (node->named)
1375 total += 1 + strlen (node->name); /* \001name */
1376 }
1377
1378 return total;
1379 }
1380 \f
1381 /*
1382 * The C symbol tables.
1383 */
1384 enum sym_type
1385 {
1386 st_none, st_C_struct, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1387 };
1388
1389 /* Feed stuff between (but not including) %[ and %] lines to:
1390 gperf -c -k1,3 -o -p -r -t
1391 %[
1392 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1393 %%
1394 class, C_PLPL, st_C_struct
1395 domain, C_STAR, st_C_struct
1396 union, 0, st_C_struct
1397 struct, 0, st_C_struct
1398 enum, 0, st_C_enum
1399 typedef, 0, st_C_typedef
1400 define, 0, st_C_define
1401 long, 0, st_C_typespec
1402 short, 0, st_C_typespec
1403 int, 0, st_C_typespec
1404 char, 0, st_C_typespec
1405 float, 0, st_C_typespec
1406 double, 0, st_C_typespec
1407 signed, 0, st_C_typespec
1408 unsigned, 0, st_C_typespec
1409 auto, 0, st_C_typespec
1410 void, 0, st_C_typespec
1411 extern, 0, st_C_typespec
1412 static, 0, st_C_typespec
1413 const, 0, st_C_typespec
1414 volatile, 0, st_C_typespec
1415 %]
1416 and replace lines between %< and %> with its output. */
1417 /*%<*/
1418 /* C code produced by gperf version 1.8.1 (K&R C version) */
1419 /* Command-line: gperf -c -k1,3 -o -p -r -t */
1420
1421
1422 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
1423
1424 #define MIN_WORD_LENGTH 3
1425 #define MAX_WORD_LENGTH 8
1426 #define MIN_HASH_VALUE 10
1427 #define MAX_HASH_VALUE 62
1428 /*
1429 21 keywords
1430 53 is the maximum key range
1431 */
1432
1433 static int
1434 hash (str, len)
1435 register char *str;
1436 register int len;
1437 {
1438 static unsigned char hash_table[] =
1439 {
1440 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1441 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1442 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1443 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1444 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1445 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1446 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1447 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1448 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1449 62, 62, 62, 62, 62, 62, 62, 2, 62, 7,
1450 6, 9, 15, 30, 62, 24, 62, 62, 1, 24,
1451 7, 27, 13, 62, 19, 26, 18, 27, 1, 62,
1452 62, 62, 62, 62, 62, 62, 62, 62,
1453 };
1454 return len + hash_table[str[2]] + hash_table[str[0]];
1455 }
1456
1457 struct C_stab_entry *
1458 in_word_set (str, len)
1459 register char *str;
1460 register int len;
1461 {
1462
1463 static struct C_stab_entry wordlist[] =
1464 {
1465 {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
1466 {"",},
1467 {"volatile", 0, st_C_typespec},
1468 {"",},
1469 {"long", 0, st_C_typespec},
1470 {"char", 0, st_C_typespec},
1471 {"class", C_PLPL, st_C_struct},
1472 {"",}, {"",}, {"",}, {"",},
1473 {"const", 0, st_C_typespec},
1474 {"",}, {"",}, {"",}, {"",},
1475 {"auto", 0, st_C_typespec},
1476 {"",}, {"",},
1477 {"define", 0, st_C_define},
1478 {"",},
1479 {"void", 0, st_C_typespec},
1480 {"",}, {"",}, {"",},
1481 {"extern", 0, st_C_typespec},
1482 {"static", 0, st_C_typespec},
1483 {"",},
1484 {"domain", C_STAR, st_C_struct},
1485 {"",},
1486 {"typedef", 0, st_C_typedef},
1487 {"double", 0, st_C_typespec},
1488 {"enum", 0, st_C_enum},
1489 {"",}, {"",}, {"",}, {"",},
1490 {"int", 0, st_C_typespec},
1491 {"",},
1492 {"float", 0, st_C_typespec},
1493 {"",}, {"",}, {"",},
1494 {"struct", 0, st_C_struct},
1495 {"",}, {"",}, {"",}, {"",},
1496 {"union", 0, st_C_struct},
1497 {"",},
1498 {"short", 0, st_C_typespec},
1499 {"",}, {"",},
1500 {"unsigned", 0, st_C_typespec},
1501 {"signed", 0, st_C_typespec},
1502 };
1503
1504 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
1505 {
1506 register int key = hash (str, len);
1507
1508 if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)
1509 {
1510 register char *s = wordlist[key].name;
1511
1512 if (*s == *str && strneq (str + 1, s + 1, len - 1))
1513 return &wordlist[key];
1514 }
1515 }
1516 return 0;
1517 }
1518 /*%>*/
1519
1520 enum sym_type
1521 C_symtype(str, len, c_ext)
1522 char *str;
1523 int len;
1524 int c_ext;
1525 {
1526 register struct C_stab_entry *se = in_word_set(str, len);
1527
1528 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
1529 return st_none;
1530 return se->type;
1531 }
1532 \f
1533 /*
1534 * C functions are recognized using a simple finite automaton.
1535 * funcdef is its state variable.
1536 */
1537 typedef enum
1538 {
1539 fnone, /* nothing seen */
1540 ftagseen, /* function-like tag seen */
1541 fstartlist, /* just after open parenthesis */
1542 finlist, /* in parameter list */
1543 flistseen, /* after parameter list */
1544 fignore /* before open brace */
1545 } FUNCST;
1546 FUNCST funcdef;
1547
1548
1549 /*
1550 * typedefs are recognized using a simple finite automaton.
1551 * typeddef is its state variable.
1552 */
1553 typedef enum
1554 {
1555 tnone, /* nothing seen */
1556 ttypedseen, /* typedef keyword seen */
1557 tinbody, /* inside typedef body */
1558 tend, /* just before typedef tag */
1559 tignore /* junk after typedef tag */
1560 } TYPEDST;
1561 TYPEDST typdef;
1562
1563
1564 /*
1565 * struct-like structures (enum, struct and union) are recognized
1566 * using another simple finite automaton. `structdef' is its state
1567 * variable.
1568 */
1569 typedef enum
1570 {
1571 snone, /* nothing seen yet */
1572 skeyseen, /* struct-like keyword seen */
1573 stagseen, /* struct-like tag seen */
1574 scolonseen, /* colon seen after struct-like tag */
1575 sinbody /* in struct body: recognize member func defs*/
1576 } STRUCTST;
1577 STRUCTST structdef;
1578
1579 /*
1580 * When structdef is stagseen, scolonseen, or sinbody, structtag is the
1581 * struct tag, and structtype is the type of the preceding struct-like
1582 * keyword.
1583 */
1584 char *structtag = "<uninited>";
1585 enum sym_type structtype;
1586
1587 /*
1588 * Yet another little state machine to deal with preprocessor lines.
1589 */
1590 typedef enum
1591 {
1592 dnone, /* nothing seen */
1593 dsharpseen, /* '#' seen as first char on line */
1594 ddefineseen, /* '#' and 'define' seen */
1595 dignorerest /* ignore rest of line */
1596 } DEFINEST;
1597 DEFINEST definedef;
1598
1599 /*
1600 * Set this to TRUE, and the next token considered is called a function.
1601 * Used only for GNU emacs's function-defining macros.
1602 */
1603 logical next_token_is_func;
1604
1605 /*
1606 * TRUE in the rules part of a yacc file, FALSE outside (parse as C).
1607 */
1608 logical yacc_rules;
1609
1610 /*
1611 * consider_token ()
1612 * checks to see if the current token is at the start of a
1613 * function, or corresponds to a typedef, or is a struct/union/enum
1614 * tag.
1615 *
1616 * *IS_FUNC gets TRUE iff the token is a function or macro with args.
1617 * C_EXT is which language we are looking at.
1618 *
1619 * In the future we will need some way to adjust where the end of
1620 * the token is; for instance, implementing the C++ keyword
1621 * `operator' properly will adjust the end of the token to be after
1622 * whatever follows `operator'.
1623 *
1624 * Globals
1625 * funcdef IN OUT
1626 * structdef IN OUT
1627 * definedef IN OUT
1628 * typdef IN OUT
1629 * next_token_is_func IN OUT
1630 */
1631
1632 logical
1633 consider_token (str, len, c, c_ext, cblev, is_func)
1634 register char *str; /* IN: token pointer */
1635 register int len; /* IN: token length */
1636 register char c; /* IN: first char after the token */
1637 int c_ext; /* IN: C extensions mask */
1638 int cblev; /* IN: curly brace level */
1639 logical *is_func; /* OUT: function found */
1640 {
1641 enum sym_type toktype = C_symtype (str, len, c_ext);
1642
1643 /*
1644 * Advance the definedef state machine.
1645 */
1646 switch (definedef)
1647 {
1648 case dnone:
1649 /* We're not on a preprocessor line. */
1650 break;
1651 case dsharpseen:
1652 if (toktype == st_C_define)
1653 {
1654 definedef = ddefineseen;
1655 }
1656 else
1657 {
1658 definedef = dignorerest;
1659 }
1660 return FALSE;
1661 case ddefineseen:
1662 /*
1663 * Make a tag for any macro, unless it is a constant
1664 * and constantypedefs is FALSE.
1665 */
1666 definedef = dignorerest;
1667 *is_func = (c == '(');
1668 if (!*is_func && !constantypedefs)
1669 return FALSE;
1670 else
1671 return TRUE;
1672 case dignorerest:
1673 return FALSE;
1674 default:
1675 error ("internal error: definedef value.", 0);
1676 }
1677
1678 /*
1679 * Now typedefs
1680 */
1681 switch (typdef)
1682 {
1683 case tnone:
1684 if (toktype == st_C_typedef)
1685 {
1686 if (typedefs)
1687 typdef = ttypedseen;
1688 funcdef = fnone;
1689 return FALSE;
1690 }
1691 break;
1692 case ttypedseen:
1693 switch (toktype)
1694 {
1695 case st_none:
1696 case st_C_typespec:
1697 typdef = tend;
1698 break;
1699 case st_C_struct:
1700 case st_C_enum:
1701 break;
1702 }
1703 /* Do not return here, so the structdef stuff has a chance. */
1704 break;
1705 case tend:
1706 switch (toktype)
1707 {
1708 case st_C_typespec:
1709 case st_C_struct:
1710 case st_C_enum:
1711 return FALSE;
1712 }
1713 return TRUE;
1714 }
1715
1716 /*
1717 * This structdef business is currently only invoked when cblev==0.
1718 * It should be recursively invoked whatever the curly brace level,
1719 * and a stack of states kept, to allow for definitions of structs
1720 * within structs.
1721 *
1722 * This structdef business is NOT invoked when we are ctags and the
1723 * file is plain C. This is because a struct tag may have the same
1724 * name as another tag, and this loses with ctags.
1725 *
1726 * This if statement deals with the typdef state machine as
1727 * follows: if typdef==ttypedseen and token is struct/union/class/enum,
1728 * return FALSE. All the other code here is for the structdef
1729 * state machine.
1730 */
1731 switch (toktype)
1732 {
1733 case st_C_struct:
1734 case st_C_enum:
1735 if (typdef == ttypedseen
1736 || (typedefs_and_cplusplus && cblev == 0 && structdef == snone))
1737 {
1738 structdef = skeyseen;
1739 structtype = toktype;
1740 }
1741 return FALSE;
1742 }
1743 if (structdef == skeyseen)
1744 {
1745 /* Save the tag for struct/union/class, for functions that may be
1746 defined inside. */
1747 if (structtype == st_C_struct)
1748 structtag = savenstr (str, len);
1749 else
1750 structtag = "<enum>";
1751 structdef = stagseen;
1752 return TRUE;
1753 }
1754
1755 /* Avoid entering funcdef stuff if typdef is going on. */
1756 if (typdef != tnone)
1757 {
1758 definedef = dnone;
1759 return FALSE;
1760 }
1761
1762 /* Detect GNU macros. */
1763 if (definedef == dnone)
1764 if (strneq (str, "DEFUN", 5) /* Used in emacs */
1765 #if FALSE
1766 These are defined inside C functions, so currently they
1767 are not met anyway.
1768 || strneq (str, "EXFUN", 5) /* Used in glibc */
1769 || strneq (str, "DEFVAR_", 7) /* Used in emacs */
1770 #endif
1771 || strneq (str, "SYSCALL", 7) /* Used in glibc (mach) */
1772 || strneq (str, "ENTRY", 5) /* Used in glibc */
1773 || strneq (str, "PSEUDO", 6)) /* Used in glibc */
1774
1775 {
1776 next_token_is_func = TRUE;
1777 return FALSE;
1778 }
1779 if (next_token_is_func)
1780 {
1781 next_token_is_func = FALSE;
1782 funcdef = fignore;
1783 *is_func = TRUE;
1784 return TRUE;
1785 }
1786
1787 /* A function? */
1788 switch (toktype)
1789 {
1790 case st_C_typespec:
1791 if (funcdef != finlist && funcdef != fignore)
1792 funcdef = fnone; /* should be useless */
1793 return FALSE;
1794 default:
1795 if (funcdef == fnone)
1796 {
1797 funcdef = ftagseen;
1798 *is_func = TRUE;
1799 return TRUE;
1800 }
1801 }
1802
1803 return FALSE;
1804 }
1805
1806 /*
1807 * C_entries ()
1808 * This routine finds functions, typedefs, #define's and
1809 * struct/union/enum definitions in C syntax and adds them
1810 * to the list.
1811 */
1812 typedef struct
1813 {
1814 char *str;
1815 logical named;
1816 int linelen;
1817 int lineno;
1818 } TOKEN;
1819
1820 #define current_lb_is_new (newndx == curndx)
1821 #define switch_line_buffers() (curndx = 1 - curndx)
1822
1823 #define curlb (lbs[curndx].lb)
1824 #define othlb (lbs[1-curndx].lb)
1825 #define newlb (lbs[newndx].lb)
1826 #define curlinepos (lbs[curndx].linepos)
1827 #define othlinepos (lbs[1-curndx].linepos)
1828 #define newlinepos (lbs[newndx].linepos)
1829
1830 #define CNL_SAVE_DEFINEDEF \
1831 do { \
1832 curlinepos = charno; \
1833 lineno++; \
1834 charno += readline (&curlb, inf); \
1835 lp = curlb.buffer; \
1836 quotednl = FALSE; \
1837 newndx = curndx; \
1838 } while (0)
1839
1840 #define CNL \
1841 do { \
1842 CNL_SAVE_DEFINEDEF; \
1843 if (token_saved) \
1844 { \
1845 tok = savetok; \
1846 token_saved = FALSE; \
1847 } \
1848 definedef = dnone; \
1849 } while (0)
1850
1851 #define make_tag_from_new_lb(isfun) pfnote (tok.str, isfun, tok.named, \
1852 newlb.buffer, tok.linelen, tok.lineno, newlinepos)
1853 #define make_tag_from_oth_lb(isfun) pfnote (tok.str, isfun, tok.named, \
1854 othlb.buffer, tok.linelen, tok.lineno, othlinepos)
1855
1856 void
1857 C_entries (c_ext, inf)
1858 int c_ext; /* extension of C */
1859 FILE *inf; /* input file */
1860 {
1861 register char c; /* latest char read; '\0' for end of line */
1862 register char *lp; /* pointer one beyond the character `c' */
1863 int curndx, newndx; /* indices for current and new lb */
1864 TOKEN tok; /* latest token read */
1865 register int tokoff; /* offset in line of start of current token */
1866 register int toklen; /* length of current token */
1867 int cblev; /* current curly brace level */
1868 int parlev; /* current parenthesis level */
1869 logical incomm, inquote, inchar, quotednl, midtoken;
1870 logical cplpl;
1871 logical token_saved; /* token saved */
1872 TOKEN savetok; /* token saved during preprocessor handling */
1873
1874 curndx = newndx = 0;
1875 lineno = 0;
1876 charno = 0;
1877 lp = curlb.buffer;
1878 *lp = 0;
1879
1880 definedef = dnone; funcdef = fnone; typdef = tnone; structdef = snone;
1881 next_token_is_func = yacc_rules = token_saved = FALSE;
1882 midtoken = inquote = inchar = incomm = quotednl = FALSE;
1883 cblev = 0;
1884 parlev = 0;
1885 cplpl = c_ext & C_PLPL;
1886
1887 while (!feof (inf))
1888 {
1889 c = *lp++;
1890 if (c == '\\')
1891 {
1892 /* If we're at the end of the line, the next character is a
1893 '\0'; don't skip it, because it's the thing that tells us
1894 to read the next line. */
1895 if (*lp == '\0')
1896 {
1897 quotednl = TRUE;
1898 continue;
1899 }
1900 lp++;
1901 c = ' ';
1902 }
1903 else if (incomm)
1904 {
1905 switch (c)
1906 {
1907 case '*':
1908 if (*lp == '/')
1909 {
1910 c = *lp++;
1911 incomm = FALSE;
1912 }
1913 break;
1914 case '\0':
1915 /* Newlines inside comments do not end macro definitions in
1916 traditional cpp. */
1917 CNL_SAVE_DEFINEDEF;
1918 break;
1919 }
1920 continue;
1921 }
1922 else if (inquote)
1923 {
1924 switch (c)
1925 {
1926 case '"':
1927 inquote = FALSE;
1928 break;
1929 case '\0':
1930 /* Newlines inside strings do not end macro definitions
1931 in traditional cpp, even though compilers don't
1932 usually accept them. */
1933 CNL_SAVE_DEFINEDEF;
1934 break;
1935 }
1936 continue;
1937 }
1938 else if (inchar)
1939 {
1940 switch (c)
1941 {
1942 case '\0':
1943 /* Hmmm, something went wrong. */
1944 CNL;
1945 /* FALLTHRU */
1946 case '\'':
1947 inchar = FALSE;
1948 break;
1949 }
1950 continue;
1951 }
1952 else
1953 switch (c)
1954 {
1955 case '"':
1956 inquote = TRUE;
1957 if (funcdef != finlist && funcdef != fignore)
1958 funcdef = fnone;
1959 continue;
1960 case '\'':
1961 inchar = TRUE;
1962 if (funcdef != finlist && funcdef != fignore)
1963 funcdef = fnone;
1964 continue;
1965 case '/':
1966 if (*lp == '*')
1967 {
1968 lp++;
1969 incomm = TRUE;
1970 continue;
1971 }
1972 else if (cplpl && *lp == '/')
1973 {
1974 c = 0;
1975 break;
1976 }
1977 else
1978 break;
1979 case '%':
1980 if ((c_ext & YACC) && *lp == '%')
1981 {
1982 /* entering or exiting rules section in yacc file */
1983 lp++;
1984 definedef = dnone; funcdef = fnone;
1985 typdef = tnone; structdef = snone;
1986 next_token_is_func = FALSE;
1987 midtoken = inquote = inchar = incomm = quotednl = FALSE;
1988 cblev = 0;
1989 yacc_rules = !yacc_rules;
1990 continue;
1991 }
1992 else
1993 break;
1994 case '#':
1995 if (definedef == dnone)
1996 {
1997 char *cp;
1998 logical cpptoken = TRUE;
1999
2000 /* Look back on this line. If all blanks, or nonblanks
2001 followed by an end of comment, this is a preprocessor
2002 token. */
2003 for (cp = newlb.buffer; cp < lp-1; cp++)
2004 if (!iswhite (*cp))
2005 {
2006 if (*cp == '*' && *(cp+1) == '/')
2007 {
2008 cp++;
2009 cpptoken = TRUE;
2010 }
2011 else
2012 cpptoken = FALSE;
2013 }
2014 if (cpptoken)
2015 definedef = dsharpseen;
2016 } /* if (definedef == dnone) */
2017
2018 continue;
2019 } /* switch (c) */
2020
2021
2022 /* Consider token only if some complicated conditions are satisfied. */
2023 if ((definedef != dnone
2024 || (cblev == 0 && structdef != scolonseen)
2025 || (cblev == 1 && cplpl && structdef == sinbody))
2026 && typdef != tignore
2027 && definedef != dignorerest
2028 && funcdef != finlist)
2029 {
2030 if (midtoken)
2031 {
2032 if (endtoken (c))
2033 {
2034 if (cplpl && c == ':' && *lp == ':' && begtoken(*(lp + 1)))
2035 {
2036 /*
2037 * This handles :: in the middle, but not at the
2038 * beginning of an identifier.
2039 */
2040 lp += 2;
2041 toklen += 3;
2042 }
2043 else
2044 {
2045 logical is_func = FALSE;
2046
2047 if (yacc_rules
2048 || consider_token (newlb.buffer + tokoff, toklen,
2049 c, c_ext, cblev, &is_func))
2050 {
2051 if (structdef == sinbody
2052 && definedef == dnone
2053 && is_func)
2054 /* function defined in C++ class body */
2055 {
2056 char *cp = newlb.buffer + tokoff + toklen;
2057 char c = *cp;
2058 *cp = '\0';
2059 tok.str = concat (structtag, "::",
2060 newlb.buffer + tokoff);
2061 *cp = c;
2062 tok.named = TRUE;
2063 }
2064 else
2065 {
2066 tok.str = savenstr (newlb.buffer+tokoff, toklen);
2067 if (structdef == stagseen
2068 || typdef == tend
2069 || (is_func
2070 && definedef == dignorerest)) /* macro */
2071 tok.named = TRUE;
2072 else
2073 tok.named = FALSE;
2074 }
2075 tok.lineno = lineno;
2076 tok.linelen = tokoff + toklen + 1;
2077
2078 if (definedef == dnone
2079 && (funcdef == ftagseen
2080 || structdef == stagseen
2081 || typdef == tend))
2082 {
2083 if (current_lb_is_new)
2084 switch_line_buffers ();
2085 }
2086 else
2087 make_tag_from_new_lb (is_func);
2088 }
2089 midtoken = FALSE;
2090 }
2091 } /* if (endtoken (c)) */
2092 else if (intoken (c))
2093 {
2094 toklen++;
2095 continue;
2096 }
2097 } /* if (midtoken) */
2098 else if (begtoken (c))
2099 {
2100 switch (definedef)
2101 {
2102 case dnone:
2103 switch (funcdef)
2104 {
2105 case fstartlist:
2106 funcdef = finlist;
2107 continue;
2108 case flistseen:
2109 make_tag_from_oth_lb (TRUE);
2110 funcdef = fignore;
2111 break;
2112 case ftagseen:
2113 funcdef = fnone;
2114 break;
2115 }
2116 if (structdef == stagseen)
2117 structdef = snone;
2118 break;
2119 case dsharpseen:
2120 /* Take a quick peek ahead for a define directive,
2121 so we can avoid saving the token when not absolutely
2122 necessary. [This is a speed hack.] */
2123 if (c == 'd' && strneq (lp, "efine", 5)
2124 && iswhite (*(lp + 5)))
2125 {
2126 savetok = tok;
2127 token_saved = TRUE;
2128 definedef = ddefineseen;
2129 lp += 6;
2130 }
2131 else
2132 definedef = dignorerest;
2133 continue;
2134 }
2135 if (!yacc_rules || lp == newlb.buffer + 1)
2136 {
2137 tokoff = lp - 1 - newlb.buffer;
2138 toklen = 1;
2139 midtoken = TRUE;
2140 }
2141 continue;
2142 }
2143 } /* if must look at token */
2144
2145
2146 /* Detect end of line, colon, comma, semicolon and various braces
2147 after having handled a token.*/
2148 switch (c)
2149 {
2150 case ':':
2151 if (definedef != dnone)
2152 break;
2153 if (structdef == stagseen)
2154 structdef = scolonseen;
2155 else
2156 switch (funcdef)
2157 {
2158 case ftagseen:
2159 if (yacc_rules)
2160 {
2161 make_tag_from_oth_lb (FALSE);
2162 funcdef = fignore;
2163 }
2164 break;
2165 case fstartlist:
2166 funcdef = fnone;
2167 break;
2168 }
2169 break;
2170 case ';':
2171 if (definedef != dnone)
2172 break;
2173 if (cblev == 0)
2174 switch (typdef)
2175 {
2176 case tend:
2177 make_tag_from_oth_lb (FALSE);
2178 /* FALLTHRU */
2179 default:
2180 typdef = tnone;
2181 }
2182 if (funcdef != fignore)
2183 funcdef = fnone;
2184 if (structdef == stagseen)
2185 structdef = snone;
2186 break;
2187 case ',':
2188 if (definedef != dnone)
2189 break;
2190 if (funcdef != finlist && funcdef != fignore)
2191 funcdef = fnone;
2192 if (structdef == stagseen)
2193 structdef = snone;
2194 break;
2195 case '[':
2196 if (definedef != dnone)
2197 break;
2198 if (cblev == 0 && typdef == tend)
2199 {
2200 typdef = tignore;
2201 make_tag_from_oth_lb (FALSE);
2202 break;
2203 }
2204 if (funcdef != finlist && funcdef != fignore)
2205 funcdef = fnone;
2206 if (structdef == stagseen)
2207 structdef = snone;
2208 break;
2209 case '(':
2210 if (definedef != dnone)
2211 break;
2212 switch (funcdef)
2213 {
2214 case fnone:
2215 switch (typdef)
2216 {
2217 case ttypedseen:
2218 case tend:
2219 /* Make sure that the next char is not a '*'.
2220 This handles constructs like:
2221 typedef void OperatorFun (int fun); */
2222 if (*lp != '*')
2223 {
2224 typdef = tignore;
2225 make_tag_from_oth_lb (FALSE);
2226 }
2227 break;
2228 } /* switch (typdef) */
2229 break;
2230 case ftagseen:
2231 funcdef = fstartlist;
2232 break;
2233 case flistseen:
2234 funcdef = finlist;
2235 break;
2236 }
2237 parlev++;
2238 break;
2239 case ')':
2240 if (definedef != dnone)
2241 break;
2242 if (--parlev == 0)
2243 {
2244 switch (funcdef)
2245 {
2246 case fstartlist:
2247 case finlist:
2248 funcdef = flistseen;
2249 break;
2250 }
2251 if (cblev == 0 && typdef == tend)
2252 {
2253 typdef = tignore;
2254 make_tag_from_oth_lb (FALSE);
2255 }
2256 }
2257 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
2258 parlev = 0;
2259 break;
2260 case '{':
2261 if (definedef != dnone)
2262 break;
2263 if (typdef == ttypedseen)
2264 typdef = tinbody;
2265 switch (structdef)
2266 {
2267 case skeyseen: /* unnamed struct */
2268 structtag = "_anonymous_";
2269 structdef = sinbody;
2270 break;
2271 case stagseen:
2272 case scolonseen: /* named struct */
2273 structdef = sinbody;
2274 make_tag_from_oth_lb (FALSE);
2275 break;
2276 }
2277 switch (funcdef)
2278 {
2279 case flistseen:
2280 make_tag_from_oth_lb (TRUE);
2281 /* FALLTHRU */
2282 case fignore:
2283 funcdef = fnone;
2284 break;
2285 case fnone:
2286 /* Neutralize `extern "C" {' grot and look inside structs. */
2287 if (cblev == 0 && structdef == snone && typdef == tnone)
2288 cblev = -1;
2289 }
2290 cblev++;
2291 break;
2292 case '*':
2293 if (definedef != dnone)
2294 break;
2295 if (funcdef == fstartlist)
2296 funcdef = fnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
2297 break;
2298 case '}':
2299 if (definedef != dnone)
2300 break;
2301 if (!noindentypedefs && lp == newlb.buffer + 1)
2302 {
2303 cblev = 0; /* reset curly brace level if first column */
2304 parlev = 0; /* also reset paren level, just in case... */
2305 }
2306 else if (cblev > 0)
2307 cblev--;
2308 if (cblev == 0)
2309 {
2310 if (typdef == tinbody)
2311 typdef = tend;
2312 #if FALSE /* too risky */
2313 if (structdef == sinbody)
2314 free (structtag);
2315 #endif
2316 structdef = snone;
2317 structtag = "<error>";
2318 }
2319 break;
2320 case '=':
2321 case '#': case '+': case '-': case '~': case '&': case '%': case '/':
2322 case '|': case '^': case '!': case '<': case '>': case '.': case '?':
2323 if (definedef != dnone)
2324 break;
2325 /* These surely cannot follow a function tag. */
2326 if (funcdef != finlist && funcdef != fignore)
2327 funcdef = fnone;
2328 break;
2329 case '\0':
2330 /* If a macro spans multiple lines don't reset its state. */
2331 if (quotednl)
2332 CNL_SAVE_DEFINEDEF;
2333 else
2334 CNL;
2335 break;
2336 } /* switch (c) */
2337
2338 } /* while not eof */
2339 }
2340
2341 /*
2342 * Process either a C++ file or a C file depending on the setting
2343 * of a global flag.
2344 */
2345 void
2346 default_C_entries (inf)
2347 FILE *inf;
2348 {
2349 C_entries (cplusplus ? C_PLPL : 0, inf);
2350 }
2351
2352 /* Always do C++. */
2353 void
2354 Cplusplus_entries (inf)
2355 FILE *inf;
2356 {
2357 C_entries (C_PLPL, inf);
2358 }
2359
2360 /* Always do C*. */
2361 void
2362 Cstar_entries (inf)
2363 FILE *inf;
2364 {
2365 C_entries (C_STAR, inf);
2366 }
2367
2368 /* Always do Yacc. */
2369 void
2370 Yacc_entries (inf)
2371 FILE *inf;
2372 {
2373 C_entries (YACC, inf);
2374 }
2375 \f
2376 /* Fortran parsing */
2377
2378 char *dbp;
2379
2380 logical
2381 tail (cp)
2382 char *cp;
2383 {
2384 register int len = 0;
2385
2386 while (*cp && (*cp | ' ') == (dbp[len] | ' '))
2387 cp++, len++;
2388 if (*cp == 0)
2389 {
2390 dbp += len;
2391 return TRUE;
2392 }
2393 return FALSE;
2394 }
2395
2396 void
2397 takeprec ()
2398 {
2399 while (isspace (*dbp))
2400 dbp++;
2401 if (*dbp != '*')
2402 return;
2403 dbp++;
2404 while (isspace (*dbp))
2405 dbp++;
2406 if (tail ("(*)"))
2407 return;
2408 if (!isdigit (*dbp))
2409 {
2410 --dbp; /* force failure */
2411 return;
2412 }
2413 do
2414 dbp++;
2415 while (isdigit (*dbp));
2416 }
2417
2418 void
2419 getit (inf)
2420 FILE *inf;
2421 {
2422 register char *cp;
2423
2424 while (isspace (*dbp))
2425 dbp++;
2426 if (*dbp == '\0')
2427 {
2428 lineno++;
2429 linecharno = charno;
2430 charno += readline (&lb, inf);
2431 dbp = lb.buffer;
2432 if (dbp[5] != '&')
2433 return;
2434 dbp += 6;
2435 while (isspace (*dbp))
2436 dbp++;
2437 }
2438 if (!isalpha (*dbp)
2439 && *dbp != '_'
2440 && *dbp != '$')
2441 return;
2442 for (cp = dbp + 1;
2443 (*cp
2444 && (isalpha (*cp) || isdigit (*cp) || (*cp == '_') || (*cp == '$')));
2445 cp++)
2446 continue;
2447 pfnote (savenstr (dbp, cp-dbp), TRUE, FALSE, lb.buffer,
2448 cp - lb.buffer + 1, lineno, linecharno);
2449 }
2450
2451 void
2452 Fortran_functions (inf)
2453 FILE *inf;
2454 {
2455 lineno = 0;
2456 charno = 0;
2457
2458 while (!feof (inf))
2459 {
2460 lineno++;
2461 linecharno = charno;
2462 charno += readline (&lb, inf);
2463 dbp = lb.buffer;
2464 if (*dbp == '%')
2465 dbp++; /* Ratfor escape to fortran */
2466 while (isspace (*dbp))
2467 dbp++;
2468 if (*dbp == 0)
2469 continue;
2470 switch (*dbp | ' ')
2471 {
2472 case 'i':
2473 if (tail ("integer"))
2474 takeprec ();
2475 break;
2476 case 'r':
2477 if (tail ("real"))
2478 takeprec ();
2479 break;
2480 case 'l':
2481 if (tail ("logical"))
2482 takeprec ();
2483 break;
2484 case 'c':
2485 if (tail ("complex") || tail ("character"))
2486 takeprec ();
2487 break;
2488 case 'd':
2489 if (tail ("double"))
2490 {
2491 while (isspace (*dbp))
2492 dbp++;
2493 if (*dbp == 0)
2494 continue;
2495 if (tail ("precision"))
2496 break;
2497 continue;
2498 }
2499 break;
2500 }
2501 while (isspace (*dbp))
2502 dbp++;
2503 if (*dbp == 0)
2504 continue;
2505 switch (*dbp | ' ')
2506 {
2507 case 'f':
2508 if (tail ("function"))
2509 getit (inf);
2510 continue;
2511 case 's':
2512 if (tail ("subroutine"))
2513 getit (inf);
2514 continue;
2515 case 'e':
2516 if (tail ("entry"))
2517 getit (inf);
2518 continue;
2519 case 'p':
2520 if (tail ("program"))
2521 {
2522 getit (inf);
2523 continue;
2524 }
2525 if (tail ("procedure"))
2526 getit (inf);
2527 continue;
2528 }
2529 }
2530 }
2531 \f
2532 /*
2533 * Bob Weiner, Motorola Inc., 4/3/94
2534 * Unix and microcontroller assembly tag handling
2535 * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
2536 */
2537 void
2538 Asm_labels (inf)
2539 FILE *inf;
2540 {
2541 register char *cp;
2542
2543 lineno = 0;
2544 charno = 0;
2545
2546 while (!feof (inf))
2547 {
2548 lineno++;
2549 linecharno = charno;
2550 charno += readline (&lb, inf);
2551 cp = lb.buffer;
2552
2553 /* If first char is alphabetic or one of [_.$], test for colon
2554 following identifier. */
2555 if (isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
2556 {
2557 /* Read past label. */
2558 cp++;
2559 while (isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
2560 cp++;
2561 if (*cp == ':' || isspace (*cp))
2562 {
2563 /* Found end of label, so copy it and add it to the table. */
2564 pfnote (savenstr (lb.buffer, cp-lb.buffer), TRUE, FALSE,
2565 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
2566 }
2567 }
2568 }
2569 }
2570 \f
2571 /* Added by Mosur Mohan, 4/22/88 */
2572 /* Pascal parsing */
2573
2574 #define GET_NEW_LINE \
2575 { \
2576 linecharno = charno; lineno++; \
2577 charno += 1 + readline (&lb, inf); \
2578 dbp = lb.buffer; \
2579 }
2580
2581 /*
2582 * Locates tags for procedures & functions. Doesn't do any type- or
2583 * var-definitions. It does look for the keyword "extern" or
2584 * "forward" immediately following the procedure statement; if found,
2585 * the tag is skipped.
2586 */
2587 void
2588 Pascal_functions (inf)
2589 FILE *inf;
2590 {
2591 struct linebuffer tline; /* mostly copied from C_entries */
2592 long save_lcno;
2593 int save_lineno;
2594 char c, *cp;
2595 char *nambuf;
2596
2597 logical /* each of these flags is TRUE iff: */
2598 incomment, /* point is inside a comment */
2599 inquote, /* point is inside '..' string */
2600 get_tagname, /* point is after PROCEDURE/FUNCTION */
2601 /* keyword, so next item = potential tag */
2602 found_tag, /* point is after a potential tag */
2603 inparms, /* point is within parameter-list */
2604 verify_tag; /* point has passed the parm-list, so the */
2605 /* next token will determine whether */
2606 /* this is a FORWARD/EXTERN to be */
2607 /* ignored, or whether it is a real tag */
2608
2609 lineno = 0;
2610 charno = 0;
2611 dbp = lb.buffer;
2612 *dbp = 0;
2613 initbuffer (&tline);
2614
2615 incomment = inquote = FALSE;
2616 found_tag = FALSE; /* have a proc name; check if extern */
2617 get_tagname = FALSE; /* have found "procedure" keyword */
2618 inparms = FALSE; /* found '(' after "proc" */
2619 verify_tag = FALSE; /* check if "extern" is ahead */
2620
2621 /* long main loop to get next char */
2622 while (!feof (inf))
2623 {
2624 c = *dbp++;
2625 if (c == '\0') /* if end of line */
2626 {
2627 GET_NEW_LINE;
2628 if (*dbp == '\0')
2629 continue;
2630 if (!((found_tag && verify_tag) ||
2631 get_tagname))
2632 c = *dbp++; /* only if don't need *dbp pointing */
2633 /* to the beginning of the name of */
2634 /* the procedure or function */
2635 }
2636 if (incomment)
2637 {
2638 if (c == '}') /* within { - } comments */
2639 incomment = FALSE;
2640 else if (c == '*' && dbp[1] == ')') /* within (* - *) comments */
2641 {
2642 dbp++;
2643 incomment = FALSE;
2644 }
2645 continue;
2646 }
2647 else if (inquote)
2648 {
2649 if (c == '\'')
2650 inquote = FALSE;
2651 continue;
2652 }
2653 else
2654 switch (c)
2655 {
2656 case '\'':
2657 inquote = TRUE; /* found first quote */
2658 continue;
2659 case '{': /* found open-{-comment */
2660 incomment = TRUE;
2661 continue;
2662 case '(':
2663 if (*dbp == '*') /* found open-(*-comment */
2664 {
2665 incomment = TRUE;
2666 dbp++;
2667 }
2668 else if (found_tag) /* found '(' after tag, i.e., parm-list */
2669 inparms = TRUE;
2670 continue;
2671 case ')': /* end of parms list */
2672 if (inparms)
2673 inparms = FALSE;
2674 continue;
2675 case ';':
2676 if ((found_tag) && (!inparms)) /* end of proc or fn stmt */
2677 {
2678 verify_tag = TRUE;
2679 break;
2680 }
2681 continue;
2682 }
2683 if ((found_tag) && (verify_tag) && (*dbp != ' '))
2684 {
2685 /* check if this is an "extern" declaration */
2686 if (*dbp == 0)
2687 continue;
2688 if ((*dbp == 'e') || (*dbp == 'E'))
2689 {
2690 if (tail ("extern")) /* superfluous, really! */
2691 {
2692 found_tag = FALSE;
2693 verify_tag = FALSE;
2694 }
2695 }
2696 else if ((*dbp == 'f') || (*dbp == 'F'))
2697 {
2698 if (tail ("forward")) /* check for forward reference */
2699 {
2700 found_tag = FALSE;
2701 verify_tag = FALSE;
2702 }
2703 }
2704 if ((found_tag) && (verify_tag)) /* not external proc, so make tag */
2705 {
2706 found_tag = FALSE;
2707 verify_tag = FALSE;
2708 pfnote (nambuf, TRUE, FALSE, tline.buffer,
2709 cp - tline.buffer + 1, save_lineno, save_lcno);
2710 continue;
2711 }
2712 }
2713 if (get_tagname) /* grab name of proc or fn */
2714 {
2715 if (*dbp == 0)
2716 continue;
2717
2718 /* save all values for later tagging */
2719 tline.size = lb.size;
2720 strcpy (tline.buffer, lb.buffer);
2721 save_lineno = lineno;
2722 save_lcno = linecharno;
2723
2724 /* grab block name */
2725 for (cp = dbp + 1; *cp && (!endtoken (*cp)); cp++)
2726 continue;
2727 nambuf = savenstr (dbp, cp-dbp);
2728 dbp = cp; /* restore dbp to e-o-token */
2729 get_tagname = FALSE;
2730 found_tag = TRUE;
2731 continue;
2732
2733 /* and proceed to check for "extern" */
2734 }
2735 else if (!incomment && !inquote && !found_tag)
2736 {
2737 /* check for proc/fn keywords */
2738 switch (c | ' ')
2739 {
2740 case 'p':
2741 if (tail ("rocedure")) /* c = 'p', dbp has advanced */
2742 get_tagname = TRUE;
2743 continue;
2744 case 'f':
2745 if (tail ("unction"))
2746 get_tagname = TRUE;
2747 continue;
2748 }
2749 }
2750 } /* while not eof */
2751 }
2752 \f
2753 /*
2754 * lisp tag functions
2755 * look for (def or (DEF, quote or QUOTE
2756 */
2757 int
2758 L_isdef (strp)
2759 register char *strp;
2760 {
2761 return ((strp[1] == 'd' || strp[1] == 'D')
2762 && (strp[2] == 'e' || strp[2] == 'E')
2763 && (strp[3] == 'f' || strp[3] == 'F'));
2764 }
2765
2766 int
2767 L_isquote (strp)
2768 register char *strp;
2769 {
2770 return ((*(++strp) == 'q' || *strp == 'Q')
2771 && (*(++strp) == 'u' || *strp == 'U')
2772 && (*(++strp) == 'o' || *strp == 'O')
2773 && (*(++strp) == 't' || *strp == 'T')
2774 && (*(++strp) == 'e' || *strp == 'E')
2775 && isspace(*(++strp)));
2776 }
2777
2778 void
2779 L_getit ()
2780 {
2781 register char *cp;
2782
2783 if (*dbp == '\'') /* Skip prefix quote */
2784 dbp++;
2785 else if (*dbp == '(' && L_isquote (dbp)) /* Skip "(quote " */
2786 {
2787 dbp += 7;
2788 while (isspace(*dbp))
2789 dbp++;
2790 }
2791 for (cp = dbp /*+1*/;
2792 *cp && *cp != '(' && *cp != ' ' && *cp != ')';
2793 cp++)
2794 continue;
2795 if (cp == dbp)
2796 return;
2797
2798 pfnote (savenstr (dbp, cp-dbp), TRUE, FALSE, lb.buffer,
2799 cp - lb.buffer + 1, lineno, linecharno);
2800 }
2801
2802 void
2803 Lisp_functions (inf)
2804 FILE *inf;
2805 {
2806 lineno = 0;
2807 charno = 0;
2808
2809 while (!feof (inf))
2810 {
2811 lineno++;
2812 linecharno = charno;
2813 charno += readline (&lb, inf);
2814 dbp = lb.buffer;
2815 if (dbp[0] == '(')
2816 {
2817 if (L_isdef (dbp))
2818 {
2819 while (!isspace (*dbp))
2820 dbp++;
2821 while (isspace (*dbp))
2822 dbp++;
2823 L_getit ();
2824 }
2825 else
2826 {
2827 /* Check for (foo::defmumble name-defined ... */
2828 do
2829 dbp++;
2830 while (*dbp && !isspace (*dbp)
2831 && *dbp != ':' && *dbp != '(' && *dbp != ')');
2832 if (*dbp == ':')
2833 {
2834 do
2835 dbp++;
2836 while (*dbp == ':');
2837
2838 if (L_isdef (dbp - 1))
2839 {
2840 while (!isspace (*dbp))
2841 dbp++;
2842 while (isspace (*dbp))
2843 dbp++;
2844 L_getit ();
2845 }
2846 }
2847 }
2848 }
2849 }
2850 }
2851 \f
2852 /*
2853 * Scheme tag functions
2854 * look for (def... xyzzy
2855 * look for (def... (xyzzy
2856 * look for (def ... ((...(xyzzy ....
2857 * look for (set! xyzzy
2858 */
2859
2860 void get_scheme ();
2861
2862 void
2863 Scheme_functions (inf)
2864 FILE *inf;
2865 {
2866 lineno = 0;
2867 charno = 0;
2868
2869 while (!feof (inf))
2870 {
2871 lineno++;
2872 linecharno = charno;
2873 charno += readline (&lb, inf);
2874 dbp = lb.buffer;
2875 if (dbp[0] == '(' &&
2876 (dbp[1] == 'D' || dbp[1] == 'd') &&
2877 (dbp[2] == 'E' || dbp[2] == 'e') &&
2878 (dbp[3] == 'F' || dbp[3] == 'f'))
2879 {
2880 while (!isspace (*dbp))
2881 dbp++;
2882 /* Skip over open parens and white space */
2883 while (*dbp && (isspace (*dbp) || *dbp == '('))
2884 dbp++;
2885 get_scheme ();
2886 }
2887 if (dbp[0] == '(' &&
2888 (dbp[1] == 'S' || dbp[1] == 's') &&
2889 (dbp[2] == 'E' || dbp[2] == 'e') &&
2890 (dbp[3] == 'T' || dbp[3] == 't') &&
2891 (dbp[4] == '!' || dbp[4] == '!') &&
2892 (isspace (dbp[5])))
2893 {
2894 while (!isspace (*dbp))
2895 dbp++;
2896 /* Skip over white space */
2897 while (isspace (*dbp))
2898 dbp++;
2899 get_scheme ();
2900 }
2901 }
2902 }
2903
2904 void
2905 get_scheme ()
2906 {
2907 register char *cp;
2908
2909 if (*dbp == 0)
2910 return;
2911 /* Go till you get to white space or a syntactic break */
2912 for (cp = dbp + 1;
2913 *cp && *cp != '(' && *cp != ')' && !isspace (*cp);
2914 cp++)
2915 continue;
2916 pfnote (savenstr (dbp, cp-dbp), TRUE, FALSE,
2917 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
2918 }
2919 \f
2920 /* Find tags in TeX and LaTeX input files. */
2921
2922 /* TEX_toktab is a table of TeX control sequences that define tags.
2923 Each TEX_tabent records one such control sequence.
2924 CONVERT THIS TO USE THE Stab TYPE!! */
2925 struct TEX_tabent
2926 {
2927 char *name;
2928 int len;
2929 };
2930
2931 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
2932
2933 /* Default set of control sequences to put into TEX_toktab.
2934 The value of environment var TEXTAGS is prepended to this. */
2935
2936 char *TEX_defenv = "\
2937 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem:typeout";
2938
2939 void TEX_mode ();
2940 struct TEX_tabent *TEX_decode_env ();
2941 void TEX_getit ();
2942 int TEX_Token ();
2943
2944 char TEX_esc = '\\';
2945 char TEX_opgrp = '{';
2946 char TEX_clgrp = '}';
2947
2948 /*
2949 * TeX/LaTeX scanning loop.
2950 */
2951 void
2952 TeX_functions (inf)
2953 FILE *inf;
2954 {
2955 char *lasthit;
2956
2957 lineno = 0;
2958 charno = 0;
2959
2960 /* Select either \ or ! as escape character. */
2961 TEX_mode (inf);
2962
2963 /* Initialize token table once from environment. */
2964 if (!TEX_toktab)
2965 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
2966
2967 while (!feof (inf))
2968 { /* Scan each line in file */
2969 lineno++;
2970 linecharno = charno;
2971 charno += readline (&lb, inf);
2972 dbp = lb.buffer;
2973 lasthit = dbp;
2974 while (dbp = etags_strchr (dbp, TEX_esc)) /* Look at each esc in line */
2975 {
2976 register int i;
2977
2978 if (!*(++dbp))
2979 break;
2980 linecharno += dbp - lasthit;
2981 lasthit = dbp;
2982 i = TEX_Token (lasthit);
2983 if (0 <= i)
2984 {
2985 TEX_getit (lasthit, TEX_toktab[i].len);
2986 break; /* We only save a line once */
2987 }
2988 }
2989 }
2990 }
2991
2992 #define TEX_LESC '\\'
2993 #define TEX_SESC '!'
2994 #define TEX_cmt '%'
2995
2996 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
2997 chars accordingly. */
2998 void
2999 TEX_mode (inf)
3000 FILE *inf;
3001 {
3002 int c;
3003
3004 while ((c = getc (inf)) != EOF)
3005 {
3006 /* Skip to next line if we hit the TeX comment char. */
3007 if (c == TEX_cmt)
3008 while (c != '\n')
3009 c = getc (inf);
3010 else if (c == TEX_LESC || c == TEX_SESC )
3011 break;
3012 }
3013
3014 if (c == TEX_LESC)
3015 {
3016 TEX_esc = TEX_LESC;
3017 TEX_opgrp = '{';
3018 TEX_clgrp = '}';
3019 }
3020 else
3021 {
3022 TEX_esc = TEX_SESC;
3023 TEX_opgrp = '<';
3024 TEX_clgrp = '>';
3025 }
3026 rewind (inf);
3027 }
3028
3029 /* Read environment and prepend it to the default string.
3030 Build token table. */
3031 struct TEX_tabent *
3032 TEX_decode_env (evarname, defenv)
3033 char *evarname;
3034 char *defenv;
3035 {
3036 register char *env, *p;
3037
3038 struct TEX_tabent *tab;
3039 int size, i;
3040
3041 /* Append default string to environment. */
3042 env = getenv (evarname);
3043 if (!env)
3044 env = defenv;
3045 else
3046 env = concat (env, defenv, "");
3047
3048 /* Allocate a token table */
3049 for (size = 1, p = env; p;)
3050 if ((p = etags_strchr (p, ':')) && *(++p))
3051 size++;
3052 /* Add 1 to leave room for null terminator. */
3053 tab = xnew (size + 1, struct TEX_tabent);
3054
3055 /* Unpack environment string into token table. Be careful about */
3056 /* zero-length strings (leading ':', "::" and trailing ':') */
3057 for (i = 0; *env;)
3058 {
3059 p = etags_strchr (env, ':');
3060 if (!p) /* End of environment string. */
3061 p = env + strlen (env);
3062 if (p - env > 0)
3063 { /* Only non-zero strings. */
3064 tab[i].name = savenstr (env, p - env);
3065 tab[i].len = strlen (tab[i].name);
3066 i++;
3067 }
3068 if (*p)
3069 env = p + 1;
3070 else
3071 {
3072 tab[i].name = NULL; /* Mark end of table. */
3073 tab[i].len = 0;
3074 break;
3075 }
3076 }
3077 return tab;
3078 }
3079
3080 /* Record a tag defined by a TeX command of length LEN and starting at NAME.
3081 The name being defined actually starts at (NAME + LEN + 1).
3082 But we seem to include the TeX command in the tag name. */
3083 void
3084 TEX_getit (name, len)
3085 char *name;
3086 int len;
3087 {
3088 char *p = name + len;
3089
3090 if (*name == 0)
3091 return;
3092
3093 /* Let tag name extend to next group close (or end of line) */
3094 while (*p && *p != TEX_clgrp)
3095 p++;
3096 pfnote (savenstr (name, p-name), TRUE, FALSE, lb.buffer,
3097 strlen (lb.buffer), lineno, linecharno);
3098 }
3099
3100 /* If the text at CP matches one of the tag-defining TeX command names,
3101 return the pointer to the first occurrence of that command in TEX_toktab.
3102 Otherwise return -1.
3103 Keep the capital `T' in `Token' for dumb truncating compilers
3104 (this distinguishes it from `TEX_toktab' */
3105 int
3106 TEX_Token (cp)
3107 char *cp;
3108 {
3109 int i;
3110
3111 for (i = 0; TEX_toktab[i].len > 0; i++)
3112 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
3113 return i;
3114 return -1;
3115 }
3116 \f
3117 /* Support for Prolog. */
3118
3119 /* Whole head (not only functor, but also arguments)
3120 is gotten in compound term. */
3121 void
3122 prolog_getit (s)
3123 char *s;
3124 {
3125 char *save_s;
3126 int insquote, npar;
3127
3128 save_s = s;
3129 insquote = FALSE;
3130 npar = 0;
3131 while (1)
3132 {
3133 if (s[0] == '\0') /* syntax error. */
3134 return;
3135 else if (insquote && s[0] == '\'' && s[1] == '\'')
3136 s += 2;
3137 else if (s[0] == '\'')
3138 {
3139 insquote = !insquote;
3140 s++;
3141 }
3142 else if (!insquote && s[0] == '(')
3143 {
3144 npar++;
3145 s++;
3146 }
3147 else if (!insquote && s[0] == ')')
3148 {
3149 npar--;
3150 s++;
3151 if (npar == 0)
3152 break;
3153 else if (npar < 0) /* syntax error. */
3154 return;
3155 }
3156 else if (!insquote && s[0] == '.'
3157 && (isspace (s[1]) || s[1] == '\0'))
3158 { /* fullstop. */
3159 if (npar != 0) /* syntax error. */
3160 return;
3161 s++;
3162 break;
3163 }
3164 else
3165 s++;
3166 }
3167 pfnote (savenstr (save_s, s-save_s), TRUE, FALSE,
3168 save_s, s-save_s, lineno, linecharno);
3169 }
3170
3171 /* It is assumed that prolog predicate starts from column 0. */
3172 void
3173 Prolog_functions (inf)
3174 FILE *inf;
3175 {
3176 void skip_comment (), prolog_getit ();
3177
3178 lineno = linecharno = charno = 0;
3179 while (!feof (inf))
3180 {
3181 lineno++;
3182 linecharno += charno;
3183 charno = readline (&lb, inf) + 1; /* 1 for newline. */
3184 dbp = lb.buffer;
3185 if (isspace (dbp[0])) /* not predicate header. */
3186 continue;
3187 else if (dbp[0] == '%') /* comment. */
3188 continue;
3189 else if (dbp[0] == '/' && dbp[1] == '*') /* comment. */
3190 skip_comment (&lb, inf, &lineno, &linecharno);
3191 else /* found. */
3192 prolog_getit (dbp);
3193 }
3194 }
3195
3196 void
3197 skip_comment (plb, inf, plineno, plinecharno)
3198 struct linebuffer *plb;
3199 FILE *inf;
3200 int *plineno; /* result */
3201 long *plinecharno; /* result */
3202 {
3203 char *cp;
3204
3205 do
3206 {
3207 for (cp = plb->buffer; *cp != '\0'; cp++)
3208 if (cp[0] == '*' && cp[1] == '/')
3209 return;
3210 (*plineno)++;
3211 *plinecharno += readline (plb, inf) + 1; /* 1 for newline. */
3212 }
3213 while (!feof(inf));
3214 }
3215 \f
3216 #ifdef ETAGS_REGEXPS
3217 /* Take a string like "/blah/" and turn it into "blah", making sure
3218 that the first and last characters are the same, and handling
3219 quoted separator characters. Actually, stops on the occurence of
3220 an unquoted separator. Also turns "\t" into a Tab character.
3221 Returns pointer to terminating separator. Works in place. Null
3222 terminates name string. */
3223 char *
3224 scan_separators (name)
3225 char *name;
3226 {
3227 char sep = name[0];
3228 char *copyto = name;
3229 logical quoted = FALSE;
3230
3231 for (++name; *name != '\0'; ++name)
3232 {
3233 if (quoted)
3234 {
3235 if (*name == 't')
3236 *copyto++ = '\t';
3237 else if (*name == sep)
3238 *copyto++ = sep;
3239 else
3240 {
3241 /* Something else is quoted, so preserve the quote. */
3242 *copyto++ = '\\';
3243 *copyto++ = *name;
3244 }
3245 quoted = FALSE;
3246 }
3247 else if (*name == '\\')
3248 quoted = TRUE;
3249 else if (*name == sep)
3250 break;
3251 else
3252 *copyto++ = *name;
3253 }
3254
3255 /* Terminate copied string. */
3256 *copyto = '\0';
3257 return name;
3258 }
3259
3260 /* Turn a name, which is an ed-style (but Emacs syntax) regular
3261 expression, into a real regular expression by compiling it. */
3262 void
3263 add_regex (regexp_pattern)
3264 char *regexp_pattern;
3265 {
3266 char *name;
3267 const char *err;
3268 struct re_pattern_buffer *patbuf;
3269
3270 if (regexp_pattern == NULL)
3271 {
3272 /* Remove existing regexps. */
3273 num_patterns = 0;
3274 patterns = NULL;
3275 return;
3276 }
3277
3278 if (regexp_pattern[0] == '\0')
3279 {
3280 error ("missing regexp", 0);
3281 return;
3282 }
3283 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
3284 {
3285 error ("%s: unterminated regexp", regexp_pattern);
3286 return;
3287 }
3288 name = scan_separators (regexp_pattern);
3289 if (regexp_pattern[0] == '\0')
3290 {
3291 error ("null regexp", 0);
3292 return;
3293 }
3294 (void) scan_separators (name);
3295
3296 patbuf = xnew (1, struct re_pattern_buffer);
3297 patbuf->translate = NULL;
3298 patbuf->fastmap = NULL;
3299 patbuf->buffer = NULL;
3300 patbuf->allocated = 0;
3301
3302 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
3303 if (err != NULL)
3304 {
3305 error ("%s while compiling pattern", err);
3306 return;
3307 }
3308
3309 num_patterns += 1;
3310 if (num_patterns == 1)
3311 patterns = xnew (1, struct pattern);
3312 else
3313 patterns = ((struct pattern *)
3314 xrealloc (patterns,
3315 (num_patterns * sizeof (struct pattern))));
3316 patterns[num_patterns - 1].pattern = patbuf;
3317 patterns[num_patterns - 1].name_pattern = savestr (name);
3318 patterns[num_patterns - 1].error_signaled = FALSE;
3319 }
3320
3321 /*
3322 * Do the subtitutions indicated by the regular expression and
3323 * arguments.
3324 */
3325 char *
3326 substitute (in, out, regs)
3327 char *in, *out;
3328 struct re_registers *regs;
3329 {
3330 char *result = NULL, *t;
3331 int size = 0;
3332
3333 /* Pass 1: figure out how much size to allocate. */
3334 for (t = out; *t; ++t)
3335 {
3336 if (*t == '\\')
3337 {
3338 ++t;
3339 if (!*t)
3340 {
3341 fprintf (stderr, "%s: pattern subtitution ends prematurely\n",
3342 progname);
3343 return NULL;
3344 }
3345 if (isdigit (*t))
3346 {
3347 int dig = *t - '0';
3348 size += regs->end[dig] - regs->start[dig];
3349 }
3350 }
3351 }
3352
3353 /* Allocate space and do the substitutions. */
3354 result = xnew (size + 1, char);
3355 size = 0;
3356 for (; *out; ++out)
3357 {
3358 if (*out == '\\')
3359 {
3360 ++out;
3361 if (isdigit (*out))
3362 {
3363 /* Using "dig2" satisfies my debugger. Bleah. */
3364 int dig2 = *out - '0';
3365 strncpy (result + size, in + regs->start[dig2],
3366 regs->end[dig2] - regs->start[dig2]);
3367 size += regs->end[dig2] - regs->start[dig2];
3368 }
3369 else
3370 {
3371 switch (*out)
3372 {
3373 case '\t':
3374 result[size++] = '\t';
3375 break;
3376 case '\\':
3377 *out = '\\';
3378 break;
3379 default:
3380 result[size++] = *out;
3381 break;
3382 }
3383 }
3384 }
3385 else
3386 result[size++] = *out;
3387 }
3388 result[size] = '\0';
3389
3390 return result;
3391 }
3392 \f
3393 #endif /* ETAGS_REGEXPS */
3394 /* Initialize a linebuffer for use */
3395 void
3396 initbuffer (linebuffer)
3397 struct linebuffer *linebuffer;
3398 {
3399 linebuffer->size = 200;
3400 linebuffer->buffer = xnew (200, char);
3401 }
3402
3403 /*
3404 * Read a line of text from `stream' into `linebuffer'.
3405 * Return the number of characters read from `stream',
3406 * which is the length of the line including the newline, if any.
3407 */
3408 long
3409 readline_internal (linebuffer, stream)
3410 struct linebuffer *linebuffer;
3411 register FILE *stream;
3412 {
3413 char *buffer = linebuffer->buffer;
3414 register char *p = linebuffer->buffer;
3415 register char *pend;
3416 int chars_deleted;
3417
3418 pend = p + linebuffer->size; /* Separate to avoid 386/IX compiler bug. */
3419
3420 while (1)
3421 {
3422 register int c = getc (stream);
3423 if (p == pend)
3424 {
3425 linebuffer->size *= 2;
3426 buffer = (char *) xrealloc (buffer, linebuffer->size);
3427 p += buffer - linebuffer->buffer;
3428 pend = buffer + linebuffer->size;
3429 linebuffer->buffer = buffer;
3430 }
3431 if (c == EOF)
3432 {
3433 chars_deleted = 0;
3434 break;
3435 }
3436 if (c == '\n')
3437 {
3438 if (p[-1] == '\r' && p > buffer)
3439 {
3440 *--p = '\0';
3441 chars_deleted = 2;
3442 }
3443 else
3444 {
3445 *p = '\0';
3446 chars_deleted = 1;
3447 }
3448 break;
3449 }
3450 *p++ = c;
3451 }
3452
3453 return p - buffer + chars_deleted;
3454 }
3455
3456 /*
3457 * Like readline_internal, above, but try to match the input
3458 * line against any existing regular expressions.
3459 */
3460 long
3461 readline (linebuffer, stream)
3462 struct linebuffer *linebuffer;
3463 FILE *stream;
3464 {
3465 /* Read new line. */
3466 int i;
3467 long result = readline_internal (linebuffer, stream);
3468
3469 #ifdef ETAGS_REGEXPS
3470 /* Match against all listed patterns. */
3471 for (i = 0; i < num_patterns; ++i)
3472 {
3473 int match = re_match (patterns[i].pattern, linebuffer->buffer,
3474 (int)result, 0, &patterns[i].regs);
3475 switch (match)
3476 {
3477 case -2:
3478 /* Some error. */
3479 if (!patterns[i].error_signaled)
3480 {
3481 error ("error while matching pattern %d", i);
3482 patterns[i].error_signaled = TRUE;
3483 }
3484 break;
3485 case -1:
3486 /* No match. */
3487 break;
3488 default:
3489 /* Match occurred. Construct a tag. */
3490 if (patterns[i].name_pattern[0] != '\0')
3491 {
3492 /* Make a named tag. */
3493 char *name = substitute (linebuffer->buffer,
3494 patterns[i].name_pattern,
3495 &patterns[i].regs);
3496 if (name != NULL)
3497 pfnote (name, TRUE, TRUE, linebuffer->buffer,
3498 match, lineno, linecharno);
3499 }
3500 else
3501 {
3502 /* Make an unnamed tag. */
3503 pfnote (NULL, TRUE, FALSE, linebuffer->buffer,
3504 match, lineno, linecharno);
3505 }
3506 break;
3507 }
3508 }
3509 #endif /* ETAGS_REGEXPS */
3510
3511 return result;
3512 }
3513
3514 /*
3515 * Read a file, but do no processing. This is used to do regexp
3516 * matching on files that have no language defined.
3517 */
3518 void
3519 just_read_file (inf)
3520 FILE *inf;
3521 {
3522 while (!feof (inf))
3523 {
3524 ++lineno;
3525 linecharno = charno;
3526 charno += readline (&lb, inf) + 1;
3527 }
3528 }
3529
3530 \f
3531 /*
3532 * Return a pointer to a space of size strlen(cp)+1 allocated
3533 * with xnew where the string CP has been copied.
3534 */
3535 char *
3536 savestr (cp)
3537 char *cp;
3538 {
3539 return savenstr (cp, strlen (cp));
3540 }
3541
3542 /*
3543 * Return a pointer to a space of size LEN+1 allocated with xnew where
3544 * the string CP has been copied for at most the first LEN characters.
3545 */
3546 char *
3547 savenstr (cp, len)
3548 char *cp;
3549 int len;
3550 {
3551 register char *dp;
3552
3553 dp = xnew (len + 1, char);
3554 strncpy (dp, cp, len);
3555 dp[len] = '\0';
3556 return dp;
3557 }
3558
3559 /*
3560 * Return the ptr in sp at which the character c last
3561 * appears; NULL if not found
3562 *
3563 * Identical to System V strrchr, included for portability.
3564 */
3565 char *
3566 etags_strrchr (sp, c)
3567 register char *sp, c;
3568 {
3569 register char *r;
3570
3571 r = NULL;
3572 do
3573 {
3574 if (*sp == c)
3575 r = sp;
3576 } while (*sp++);
3577 return r;
3578 }
3579
3580
3581 /*
3582 * Return the ptr in sp at which the character c first
3583 * appears; NULL if not found
3584 *
3585 * Identical to System V strchr, included for portability.
3586 */
3587 char *
3588 etags_strchr (sp, c)
3589 register char *sp, c;
3590 {
3591 do
3592 {
3593 if (*sp == c)
3594 return sp;
3595 } while (*sp++);
3596 return NULL;
3597 }
3598
3599 /* Print error message and exit. */
3600 void
3601 fatal (s1, s2)
3602 char *s1, *s2;
3603 {
3604 error (s1, s2);
3605 exit (BAD);
3606 }
3607
3608 void
3609 pfatal (s1)
3610 char *s1;
3611 {
3612 perror (s1);
3613 exit (BAD);
3614 }
3615
3616 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
3617 void
3618 error (s1, s2)
3619 char *s1, *s2;
3620 {
3621 fprintf (stderr, "%s: ", progname);
3622 fprintf (stderr, s1, s2);
3623 fprintf (stderr, "\n");
3624 }
3625
3626 /* Return a newly-allocated string whose contents
3627 concatenate those of s1, s2, s3. */
3628 char *
3629 concat (s1, s2, s3)
3630 char *s1, *s2, *s3;
3631 {
3632 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
3633 char *result = xnew (len1 + len2 + len3 + 1, char);
3634
3635 strcpy (result, s1);
3636 strcpy (result + len1, s2);
3637 strcpy (result + len1 + len2, s3);
3638 result[len1 + len2 + len3] = '\0';
3639
3640 return result;
3641 }
3642 \f
3643 /* Does the same work as the system V getcwd, but does not need to
3644 guess buffer size in advance. */
3645 char *
3646 etags_getcwd ()
3647 #ifdef DOS_NT
3648 {
3649 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
3650
3651 getwd (path);
3652 p = path;
3653 while (*p)
3654 if (*p == '\\')
3655 *p++ = '/';
3656 else
3657 *p++ = tolower (*p);
3658
3659 return strdup (path);
3660 }
3661 #elif HAVE_GETCWD /* not DOS_NT */
3662 {
3663 int bufsize = 200;
3664 char *path = xnew (bufsize, char);
3665
3666 while (getcwd (path, bufsize) == NULL)
3667 {
3668 if (errno != ERANGE)
3669 pfatal ("pwd");
3670 bufsize *= 2;
3671 path = xnew (bufsize, char);
3672 }
3673
3674 return path;
3675 }
3676 #else /* not DOS_NT and not HAVE_GETCWD */
3677 {
3678 struct linebuffer path;
3679 FILE *pipe;
3680
3681 initbuffer (&path);
3682 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
3683 if (pipe == NULL || readline_internal (&path, pipe) == 0)
3684 pfatal ("pwd");
3685 pclose (pipe);
3686
3687 return path.buffer;
3688 }
3689 #endif /* not DOS_NT and not HAVE_GETCWD */
3690
3691 /* Return a newly allocated string containing the filename
3692 of FILE relative to the absolute directory DIR (which
3693 should end with a slash). */
3694 char *
3695 relative_filename (file, dir)
3696 char *file, *dir;
3697 {
3698 char *fp, *dp, *res;
3699
3700 /* Find the common root of file and dir. */
3701 fp = absolute_filename (file, cwd);
3702 dp = dir;
3703 while (*fp++ == *dp++)
3704 continue;
3705 do
3706 {
3707 fp--;
3708 dp--;
3709 }
3710 while (*fp != '/');
3711
3712 /* Build a sequence of "../" strings for the resulting relative filename. */
3713 for (dp = etags_strchr (dp + 1, '/'), res = "";
3714 dp != NULL;
3715 dp = etags_strchr (dp + 1, '/'))
3716 {
3717 res = concat (res, "../", "");
3718 }
3719
3720 /* Add the filename relative to the common root of file and dir. */
3721 res = concat (res, fp + 1, "");
3722
3723 return res; /* temporary stub */
3724 }
3725
3726 /* Return a newly allocated string containing the
3727 absolute filename of FILE given CWD (which should
3728 end with a slash). */
3729 char *
3730 absolute_filename (file, cwd)
3731 char *file, *cwd;
3732 {
3733 char *slashp, *cp, *res;
3734
3735 if (file[0] == '/')
3736 res = concat (file, "", "");
3737 else
3738 res = concat (cwd, file, "");
3739
3740 /* Delete the "/dirname/.." and "/." substrings. */
3741 slashp = etags_strchr (res, '/');
3742 while (slashp != NULL && slashp[0] != '\0')
3743 {
3744 if (slashp[1] == '.')
3745 {
3746 if (slashp[2] == '.'
3747 && (slashp[3] == '/' || slashp[3] == '\0'))
3748 {
3749 cp = slashp;
3750 do
3751 cp--;
3752 while (cp >= res && *cp != '/');
3753 if (*cp == '/')
3754 {
3755 strcpy (cp, slashp + 3);
3756 }
3757 else /* else (cp == res) */
3758 {
3759 if (slashp[3] != '\0')
3760 strcpy (cp, slashp + 4);
3761 else
3762 return ".";
3763 }
3764 slashp = cp;
3765 continue;
3766 }
3767 else if (slashp[2] == '/' || slashp[2] == '\0')
3768 {
3769 strcpy (slashp, slashp + 2);
3770 continue;
3771 }
3772 }
3773
3774 slashp = etags_strchr (slashp + 1, '/');
3775 }
3776
3777 return res;
3778 }
3779
3780 /* Return a newly allocated string containing the absolute
3781 filename of dir where FILE resides given CWD (which should
3782 end with a slash). */
3783 char *
3784 absolute_dirname (file, cwd)
3785 char *file, *cwd;
3786 {
3787 char *slashp, *res;
3788 char save;
3789
3790 slashp = etags_strrchr (file, '/');
3791 if (slashp == NULL)
3792 return cwd;
3793 save = slashp[1];
3794 slashp[1] = '\0';
3795 res = absolute_filename (file, cwd);
3796 slashp[1] = save;
3797
3798 return res;
3799 }
3800
3801 /* Like malloc but get fatal error if memory is exhausted. */
3802 char *
3803 xmalloc (size)
3804 unsigned int size;
3805 {
3806 char *result = (char *) malloc (size);
3807 if (result == NULL)
3808 fatal ("virtual memory exhausted", 0);
3809 return result;
3810 }
3811
3812 char *
3813 xrealloc (ptr, size)
3814 char *ptr;
3815 unsigned int size;
3816 {
3817 char *result = (char *) realloc (ptr, size);
3818 if (result == NULL)
3819 fatal ("virtual memory exhausted");
3820 return result;
3821 }