* etags.c (get_lang_from_name, get_lang_from_interpreter,
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs
2 Copyright (C) 1984, 87, 88, 89, 93, 94, 95
3 Free Software Foundation, Inc. and Ken Arnold
4 This file is not considered part of GNU Emacs.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20 /*
21 * Authors:
22 * Ctags originally by Ken Arnold.
23 * Fortran added by Jim Kleckner.
24 * Ed Pelegri-Llopart added C typedefs.
25 * Gnu Emacs TAGS format and modifications by RMS?
26 * Sam Kendall added C++.
27 * Francesco Potorti` reorganised C and C++ based on work by Joe Wells.
28 #ifdef ETAGS_REGEXPS
29 * Regexp tags by Tom Tromey.
30 #endif
31 *
32 * Francesco Potorti` (pot@cnuce.cnr.it) is the current maintainer.
33 */
34
35 char pot_etags_version[] = "@(#) pot revision number is 11.45";
36
37 #define TRUE 1
38 #define FALSE 0
39 #ifndef DEBUG
40 # define DEBUG FALSE
41 #endif
42
43 #ifdef MSDOS
44 #include <fcntl.h>
45 #include <sys/param.h>
46 #endif /* MSDOS */
47
48 #ifdef WINDOWSNT
49 #include <stdlib.h>
50 #include <fcntl.h>
51 #include <string.h>
52 #define MAXPATHLEN _MAX_PATH
53 #endif
54
55 #ifdef HAVE_CONFIG_H
56 #include <config.h>
57 /* On some systems, Emacs defines static as nothing for the sake
58 of unexec. We don't want that here since we don't use unexec. */
59 #undef static
60 #endif
61
62 #include <stdio.h>
63 #include <ctype.h>
64 #include <errno.h>
65 #ifndef errno
66 extern int errno;
67 #endif
68 #include <sys/types.h>
69 #include <sys/stat.h>
70
71 #if !defined (S_ISREG) && defined (S_IFREG)
72 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
73 #endif
74
75 #include <getopt.h>
76
77 #ifdef ETAGS_REGEXPS
78 #include <regex.h>
79 #endif /* ETAGS_REGEXPS */
80
81 /* Define CTAGS to make the program "ctags" compatible with the usual one.
82 Let it undefined to make the program "etags", which makes emacs-style
83 tag tables and tags typedefs, #defines and struct/union/enum by default. */
84 #ifdef CTAGS
85 # undef CTAGS
86 # define CTAGS TRUE
87 #else
88 # define CTAGS FALSE
89 #endif
90
91 /* Exit codes for success and failure. */
92 #ifdef VMS
93 #define GOOD 1
94 #define BAD 0
95 #else
96 #define GOOD 0
97 #define BAD 1
98 #endif
99
100 /* C extensions. */
101 #define C_PLPL 0x00001 /* C++ */
102 #define C_STAR 0x00003 /* C* */
103 #define YACC 0x10000 /* yacc file */
104
105 #define streq(s,t) (strcmp (s, t) == 0)
106 #define strneq(s,t,n) (strncmp (s, t, n) == 0)
107
108 #define lowcase(c) tolower ((unsigned char)c)
109
110 #define iswhite(arg) (_wht[arg]) /* T if char is white */
111 #define begtoken(arg) (_btk[arg]) /* T if char can start token */
112 #define intoken(arg) (_itk[arg]) /* T if char can be in token */
113 #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */
114
115 #ifdef DOS_NT
116 # define absolutefn(fn) (fn[0] == '/' || (isalpha (fn[0]) && fn[1] == ':'))
117 #else
118 # define absolutefn(fn) (fn[0] == '/')
119 #endif
120
121
122 /*
123 * xnew -- allocate storage
124 *
125 * SYNOPSIS: Type *xnew (int n, Type);
126 */
127 #define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
128
129 typedef int logical;
130
131 typedef struct nd_st
132 { /* sorting structure */
133 char *name; /* function or type name */
134 char *file; /* file name */
135 logical is_func; /* use pattern or line no */
136 logical been_warned; /* set if noticed dup */
137 int lno; /* line number tag is on */
138 long cno; /* character number line starts on */
139 char *pat; /* search pattern */
140 struct nd_st *left, *right; /* left and right sons */
141 } NODE;
142
143 extern char *getenv ();
144
145 char *concat ();
146 char *savenstr (), *savestr ();
147 char *etags_strchr (), *etags_strrchr ();
148 char *etags_getcwd ();
149 char *relative_filename (), *absolute_filename (), *absolute_dirname ();
150 long *xmalloc (), *xrealloc ();
151
152 typedef void Lang_function ();
153 #if FALSE /* many compilers barf on this */
154 Lang_function Asm_labels;
155 Lang_function default_C_entries;
156 Lang_function C_entries;
157 Lang_function Cplusplus_entries;
158 Lang_function Cstar_entries;
159 Lang_function Fortran_functions;
160 Lang_function Yacc_entries;
161 Lang_function Lisp_functions;
162 Lang_function Pascal_functions;
163 Lang_function Perl_functions;
164 Lang_function Prolog_functions;
165 Lang_function Scheme_functions;
166 Lang_function TeX_functions;
167 Lang_function just_read_file;
168 #else /* so let's write it this way */
169 void Asm_labels ();
170 void C_entries ();
171 void default_C_entries ();
172 void plain_C_entries ();
173 void Cplusplus_entries ();
174 void Cstar_entries ();
175 void Fortran_functions ();
176 void Yacc_entries ();
177 void Lisp_functions ();
178 void Pascal_functions ();
179 void Perl_functions ();
180 void Prolog_functions ();
181 void Scheme_functions ();
182 void TeX_functions ();
183 void just_read_file ();
184 #endif
185
186 Lang_function *get_language_from_name ();
187 Lang_function *get_language_from_interpreter ();
188 Lang_function *get_language_from_suffix ();
189 int total_size_of_entries ();
190 long readline ();
191 long readline_internal ();
192 #ifdef ETAGS_REGEXPS
193 void add_regex ();
194 #endif
195 void add_node ();
196 void error ();
197 void fatal (), pfatal ();
198 void find_entries ();
199 void free_tree ();
200 void getit ();
201 void init ();
202 void initbuffer ();
203 void pfnote ();
204 void process_file ();
205 void put_entries ();
206 void takeprec ();
207
208 \f
209 char searchar = '/'; /* use /.../ searches */
210
211 int lineno; /* line number of current line */
212 long charno; /* current character number */
213
214 long linecharno; /* charno of start of line; not used by C,
215 but by every other language. */
216
217 char *curfile; /* current input file name */
218 char *tagfile; /* output file */
219 char *progname; /* name this program was invoked with */
220 char *cwd; /* current working directory */
221 char *tagfiledir; /* directory of tagfile */
222
223 FILE *tagf; /* ioptr for tags file */
224 NODE *head; /* the head of the binary tree of tags */
225
226 /*
227 * A `struct linebuffer' is a structure which holds a line of text.
228 * `readline' reads a line from a stream into a linebuffer and works
229 * regardless of the length of the line.
230 */
231 struct linebuffer
232 {
233 long size;
234 char *buffer;
235 };
236
237 struct linebuffer lb; /* the current line */
238 struct linebuffer token_name; /* used by C_entries as temporary area */
239 struct
240 {
241 long linepos;
242 struct linebuffer lb; /* used by C_entries instead of lb */
243 } lbs[2];
244
245 /* boolean "functions" (see init) */
246 logical _wht[0177], _etk[0177], _itk[0177], _btk[0177];
247 char
248 *white = " \f\t\n\013", /* white chars */
249 *endtk = " \t\n\013\"'#()[]{}=-+%*/&|^~!<>;,.:?", /* token ending chars */
250 /* token starting chars */
251 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~",
252 /* valid in-token chars */
253 *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
254
255 logical append_to_tagfile; /* -a: append to tags */
256 /* The following three default to TRUE for etags, but to FALSE for ctags. */
257 logical typedefs; /* -t: create tags for typedefs */
258 logical typedefs_and_cplusplus; /* -T: create tags for typedefs, level */
259 /* 0 struct/enum/union decls, and C++ */
260 /* member functions. */
261 logical constantypedefs; /* -d: create tags for C #define and enum */
262 /* constants. Enum consts not implemented. */
263 /* -D: opposite of -d. Default under ctags. */
264 logical update; /* -u: update tags */
265 logical vgrind_style; /* -v: create vgrind style index output */
266 logical no_warnings; /* -w: suppress warnings */
267 logical cxref_style; /* -x: create cxref style output */
268 logical cplusplus; /* .[hc] means C++, not C */
269 logical noindentypedefs; /* -I: ignore indentation in C */
270
271 struct option longopts[] =
272 {
273 { "append", no_argument, NULL, 'a' },
274 { "backward-search", no_argument, NULL, 'B' },
275 { "c++", no_argument, NULL, 'C' },
276 { "cxref", no_argument, NULL, 'x' },
277 { "defines", no_argument, NULL, 'd' },
278 { "help", no_argument, NULL, 'h' },
279 { "help", no_argument, NULL, 'H' },
280 { "ignore-indentation", no_argument, NULL, 'I' },
281 { "include", required_argument, NULL, 'i' },
282 { "language", required_argument, NULL, 'l' },
283 { "no-defines", no_argument, NULL, 'D' },
284 { "no-regex", no_argument, NULL, 'R' },
285 { "no-warn", no_argument, NULL, 'w' },
286 { "output", required_argument, NULL, 'o' },
287 { "regex", required_argument, NULL, 'r' },
288 { "typedefs", no_argument, NULL, 't' },
289 { "typedefs-and-c++", no_argument, NULL, 'T' },
290 { "update", no_argument, NULL, 'u' },
291 { "version", no_argument, NULL, 'V' },
292 { "vgrind", no_argument, NULL, 'v' },
293 { 0 }
294 };
295
296 #ifdef ETAGS_REGEXPS
297 /* Structure defining a regular expression. Elements are
298 the compiled pattern, and the name string. */
299 struct pattern
300 {
301 struct re_pattern_buffer *pattern;
302 struct re_registers regs;
303 char *name_pattern;
304 logical error_signaled;
305 };
306
307 /* Number of regexps found. */
308 int num_patterns = 0;
309
310 /* Array of all regexps. */
311 struct pattern *patterns = NULL;
312 #endif /* ETAGS_REGEXPS */
313
314 /*
315 * Language stuff.
316 */
317
318 /* Non-NULL if language fixed. */
319 Lang_function *lang_func = NULL;
320
321 /* Assembly code */
322 char *Asm_suffixes [] = { "a", /* Unix assembler */
323 "asm", /* Microcontroller assembly */
324 "def", /* BSO/Tasking definition includes */
325 "inc", /* Microcontroller include files */
326 "ins", /* Microcontroller include files */
327 "s", "sa", /* Unix assembler */
328 "src", /* BSO/Tasking C compiler output */
329 NULL
330 };
331
332 /* Note that .c and .h can be considered C++, if the --c++ flag was
333 given. That is why default_C_entries is called here. */
334 char *default_C_suffixes [] =
335 { "c", "h", NULL };
336
337 /* C++ file */
338 char *Cplusplus_suffixes [] =
339 { "C", "H", "c++", "cc", "cpp", "cxx", "h++", "hh", "hpp", "hxx", NULL };
340
341 /* C* file */
342 char *Cstar_suffixes [] =
343 { "cs", "hs", NULL };
344
345 /* Fortran */
346 char *Fortran_suffixes [] =
347 { "F", "f", "f90", "for", NULL };
348
349 /* Lisp source code */
350 char *Lisp_suffixes [] =
351 { "cl", "clisp", "el", "l", "lisp", "lsp", "ml", NULL };
352
353 /* Pascal file */
354 char *Pascal_suffixes [] =
355 { "p", "pas", NULL };
356
357 /* Perl file */
358 char *Perl_suffixes [] =
359 { "pl", "pm", NULL };
360 char *Perl_interpreters [] =
361 { "perl", NULL };
362
363 /* Pro*C file. */
364 char *plain_C_suffixes [] =
365 { "pc", NULL };
366
367 /* Prolog source code */
368 char *Prolog_suffixes [] =
369 { "prolog", NULL };
370
371 /* Scheme source code */
372 /* FIXME Can't do the `SCM' or `scm' prefix with a version number */
373 char *Scheme_suffixes [] =
374 { "SCM", "SM", "oak", "sch", "scheme", "scm", "sm", "t", NULL };
375
376 /* TeX/LaTeX source code */
377 char *TeX_suffixes [] =
378 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
379
380 /* Yacc file */
381 char *Yacc_suffixes [] =
382 { "y", NULL };
383
384 /* Table of language names and corresponding functions, file suffixes
385 and interpreter names.
386 It is ok for a given function to be listed under more than one
387 name. I just didn't. */
388 struct lang_entry
389 {
390 char *name;
391 Lang_function *function;
392 char **suffixes;
393 char **interpreters;
394 };
395
396 struct lang_entry lang_names [] =
397 {
398 { "asm", Asm_labels, Asm_suffixes },
399 { "c", default_C_entries, default_C_suffixes },
400 { "c++", Cplusplus_entries, Cplusplus_suffixes },
401 { "c*", Cstar_entries, Cstar_suffixes },
402 { "fortran", Fortran_functions, Fortran_suffixes },
403 { "lisp", Lisp_functions, Lisp_suffixes },
404 { "pascal", Pascal_functions, Pascal_suffixes },
405 { "perl", Perl_functions, Perl_suffixes, Perl_interpreters },
406 { "proc", plain_C_entries, plain_C_suffixes },
407 { "prolog", Prolog_functions, Prolog_suffixes },
408 { "scheme" , Scheme_functions, Scheme_suffixes },
409 { "tex", TeX_functions, TeX_suffixes },
410 { "yacc", Yacc_entries, Yacc_suffixes },
411 { "auto", NULL }, /* default guessing scheme */
412 { "none", just_read_file }, /* regexp matching only */
413 { NULL, NULL } /* end of list */
414 };
415
416 \f
417 void
418 print_language_names ()
419 {
420 struct lang_entry *lang;
421 char **ext;
422
423 puts ("\nThese are the currently supported languages, along with the\n\
424 default file name suffixes:");
425 for (lang = lang_names; lang->name != NULL; lang++)
426 {
427 printf ("\t%s\t", lang->name);
428 if (lang->suffixes != NULL)
429 for (ext = lang->suffixes; *ext != NULL; ext++)
430 printf (" .%s", *ext);
431 puts ("");
432 }
433 puts ("Where `auto' means use default language for files based on file\n\
434 name suffix, and `none' means only do regexp processing on files.\n\
435 If no language is specified and no matching suffix is found,\n\
436 the first line of the file is read for a sharp-bang (#!) sequence\n\
437 followed by the name of an interpreter. If no such sequence is found,\n\
438 Fortran is tried first; if no tags are found, C is tried next.");
439 }
440
441 #ifndef VERSION
442 # define VERSION "19"
443 #endif
444 void
445 print_version ()
446 {
447 printf ("%s for Emacs version %s\n", (CTAGS) ? "ctags" : "etags", VERSION);
448
449 exit (GOOD);
450 }
451
452 void
453 print_help ()
454 {
455 printf ("These are the options accepted by %s. You may use unambiguous\n\
456 abbreviations for the long option names. A - as file name means read\n\
457 names from stdin.\n\n", progname);
458
459 puts ("-a, --append\n\
460 Append tag entries to existing tags file.");
461
462 if (CTAGS)
463 puts ("-B, --backward-search\n\
464 Write the search commands for the tag entries using '?', the\n\
465 backward-search command instead of '/', the forward-search command.");
466
467 puts ("-C, --c++\n\
468 Treat files whose name suffix defaults to C language as C++ files.");
469
470 if (CTAGS)
471 puts ("-d, --defines\n\
472 Create tag entries for constant C #defines, too.");
473 else
474 puts ("-D, --no-defines\n\
475 Don't create tag entries for constant C #defines. This makes\n\
476 the tags file smaller.");
477
478 if (!CTAGS)
479 {
480 puts ("-i FILE, --include=FILE\n\
481 Include a note in tag file indicating that, when searching for\n\
482 a tag, one should also consult the tags file FILE after\n\
483 checking the current file.");
484 puts ("-l LANG, --language=LANG\n\
485 Force the following files to be considered as written in the\n\
486 named language up to the next --language=LANG option.");
487 }
488
489 #ifdef ETAGS_REGEXPS
490 puts ("-r /REGEXP/, --regex=/REGEXP/\n\
491 Make a tag for each line matching pattern REGEXP in the\n\
492 following files. REGEXP is anchored (as if preceded by ^).\n\
493 The form /REGEXP/NAME/ creates a named tag. For example Tcl\n\
494 named tags can be created with:\n\
495 --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
496 puts ("-R, --no-regex\n\
497 Don't create tags from regexps for the following files.");
498 #endif /* ETAGS_REGEXPS */
499 puts ("-o FILE, --output=FILE\n\
500 Write the tags to FILE.");
501 puts ("-I, --ignore-indentation\n\
502 Don't rely on indentation quite as much as normal. Currently,\n\
503 this means not to assume that a closing brace in the first\n\
504 column is the final brace of a function or structure\n\
505 definition in C and C++.");
506
507 if (CTAGS)
508 {
509 puts ("-t, --typedefs\n\
510 Generate tag entries for C typedefs.");
511 puts ("-T, --typedefs-and-c++\n\
512 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
513 and C++ member functions.");
514 puts ("-u, --update\n\
515 Update the tag entries for the given files, leaving tag\n\
516 entries for other files in place. Currently, this is\n\
517 implemented by deleting the existing entries for the given\n\
518 files and then rewriting the new entries at the end of the\n\
519 tags file. It is often faster to simply rebuild the entire\n\
520 tag file than to use this.");
521 puts ("-v, --vgrind\n\
522 Generates an index of items intended for human consumption,\n\
523 similar to the output of vgrind. The index is sorted, and\n\
524 gives the page number of each item.");
525 puts ("-w, --no-warn\n\
526 Suppress warning messages about entries defined in multiple\n\
527 files.");
528 puts ("-x, --cxref\n\
529 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
530 The output uses line numbers instead of page numbers, but\n\
531 beyond that the differences are cosmetic; try both to see\n\
532 which you like.");
533 }
534
535 puts ("-V, --version\n\
536 Print the version of the program.\n\
537 -h, --help\n\
538 Print this help message.");
539
540 print_language_names ();
541
542 exit (GOOD);
543 }
544
545 \f
546 enum argument_type
547 {
548 at_language,
549 at_regexp,
550 at_filename
551 };
552
553 /* This structure helps us allow mixing of --lang and filenames. */
554 typedef struct
555 {
556 enum argument_type arg_type;
557 char *what;
558 Lang_function *function;
559 } argument;
560
561 #ifdef VMS /* VMS specific functions */
562
563 #define EOS '\0'
564
565 /* This is a BUG! ANY arbitrary limit is a BUG!
566 Won't someone please fix this? */
567 #define MAX_FILE_SPEC_LEN 255
568 typedef struct {
569 short curlen;
570 char body[MAX_FILE_SPEC_LEN + 1];
571 } vspec;
572
573 /*
574 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
575 returning in each successive call the next filename matching the input
576 spec. The function expects that each in_spec passed
577 to it will be processed to completion; in particular, up to and
578 including the call following that in which the last matching name
579 is returned, the function ignores the value of in_spec, and will
580 only start processing a new spec with the following call.
581 If an error occurs, on return out_spec contains the value
582 of in_spec when the error occurred.
583
584 With each successive filename returned in out_spec, the
585 function's return value is one. When there are no more matching
586 names the function returns zero. If on the first call no file
587 matches in_spec, or there is any other error, -1 is returned.
588 */
589
590 #include <rmsdef.h>
591 #include <descrip.h>
592 #define OUTSIZE MAX_FILE_SPEC_LEN
593 short
594 fn_exp (out, in)
595 vspec *out;
596 char *in;
597 {
598 static long context = 0;
599 static struct dsc$descriptor_s o;
600 static struct dsc$descriptor_s i;
601 static logical pass1 = TRUE;
602 long status;
603 short retval;
604
605 if (pass1)
606 {
607 pass1 = FALSE;
608 o.dsc$a_pointer = (char *) out;
609 o.dsc$w_length = (short)OUTSIZE;
610 i.dsc$a_pointer = in;
611 i.dsc$w_length = (short)strlen(in);
612 i.dsc$b_dtype = DSC$K_DTYPE_T;
613 i.dsc$b_class = DSC$K_CLASS_S;
614 o.dsc$b_dtype = DSC$K_DTYPE_VT;
615 o.dsc$b_class = DSC$K_CLASS_VS;
616 }
617 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
618 {
619 out->body[out->curlen] = EOS;
620 return 1;
621 }
622 else if (status == RMS$_NMF)
623 retval = 0;
624 else
625 {
626 strcpy(out->body, in);
627 retval = -1;
628 }
629 lib$find_file_end(&context);
630 pass1 = TRUE;
631 return retval;
632 }
633
634 /*
635 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
636 name of each file specified by the provided arg expanding wildcards.
637 */
638 char *
639 gfnames (arg, p_error)
640 char *arg;
641 logical *p_error;
642 {
643 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
644
645 switch (fn_exp (&filename, arg))
646 {
647 case 1:
648 *p_error = FALSE;
649 return filename.body;
650 case 0:
651 *p_error = FALSE;
652 return NULL;
653 default:
654 *p_error = TRUE;
655 return filename.body;
656 }
657 }
658
659 #ifndef OLD /* Newer versions of VMS do provide `system'. */
660 system (cmd)
661 char *cmd;
662 {
663 fprintf (stderr, "system() function not implemented under VMS\n");
664 }
665 #endif
666
667 #define VERSION_DELIM ';'
668 char *massage_name (s)
669 char *s;
670 {
671 char *start = s;
672
673 for ( ; *s; s++)
674 if (*s == VERSION_DELIM)
675 {
676 *s = EOS;
677 break;
678 }
679 else
680 *s = lowcase (*s);
681 return start;
682 }
683 #endif /* VMS */
684
685 \f
686 void
687 main (argc, argv)
688 int argc;
689 char *argv[];
690 {
691 int i;
692 unsigned int nincluded_files = 0;
693 char **included_files = xnew (argc, char *);
694 char *this_file;
695 argument *argbuffer;
696 int current_arg = 0, file_count = 0;
697 struct linebuffer filename_lb;
698 #ifdef VMS
699 logical got_err;
700 #endif
701
702 #ifdef DOS_NT
703 _fmode = O_BINARY; /* all of files are treated as binary files */
704 #endif /* DOS_NT */
705
706 progname = argv[0];
707
708 /* Allocate enough no matter what happens. Overkill, but each one
709 is small. */
710 argbuffer = xnew (argc, argument);
711
712 #ifdef ETAGS_REGEXPS
713 /* Set syntax for regular expression routines. */
714 re_set_syntax (RE_SYNTAX_EMACS);
715 #endif /* ETAGS_REGEXPS */
716
717 /*
718 * If etags, always find typedefs and structure tags. Why not?
719 * Also default is to find macro constants.
720 */
721 if (!CTAGS)
722 typedefs = typedefs_and_cplusplus = constantypedefs = TRUE;
723
724 while (1)
725 {
726 int opt = getopt_long (argc, argv,
727 "-aCdDf:Il:o:r:RStTi:BuvxwVhH", longopts, 0);
728
729 if (opt == EOF)
730 break;
731
732 switch (opt)
733 {
734 case 0:
735 /* If getopt returns 0, then it has already processed a
736 long-named option. We should do nothing. */
737 break;
738
739 case 1:
740 /* This means that a filename has been seen. Record it. */
741 argbuffer[current_arg].arg_type = at_filename;
742 argbuffer[current_arg].what = optarg;
743 ++current_arg;
744 ++file_count;
745 break;
746
747 /* Common options. */
748 case 'a':
749 append_to_tagfile = TRUE;
750 break;
751 case 'C':
752 cplusplus = TRUE;
753 break;
754 case 'd':
755 constantypedefs = TRUE;
756 break;
757 case 'D':
758 constantypedefs = FALSE;
759 break;
760 case 'f': /* for compatibility with old makefiles */
761 case 'o':
762 if (tagfile)
763 {
764 fprintf (stderr, "%s: -%c option may only be given once.\n",
765 progname, opt);
766 goto usage;
767 }
768 tagfile = optarg;
769 break;
770 case 'I':
771 case 'S': /* for backward compatibility */
772 noindentypedefs = TRUE;
773 break;
774 case 'l':
775 argbuffer[current_arg].function = get_language_from_name (optarg);
776 if (argbuffer[current_arg].function == NULL)
777 {
778 fprintf (stderr, "%s: language \"%s\" not recognized.\n",
779 progname, optarg);
780 goto usage;
781 }
782 argbuffer[current_arg].arg_type = at_language;
783 ++current_arg;
784 break;
785 #ifdef ETAGS_REGEXPS
786 case 'r':
787 argbuffer[current_arg].arg_type = at_regexp;
788 argbuffer[current_arg].what = optarg;
789 ++current_arg;
790 break;
791 case 'R':
792 argbuffer[current_arg].arg_type = at_regexp;
793 argbuffer[current_arg].what = NULL;
794 ++current_arg;
795 break;
796 #endif /* ETAGS_REGEXPS */
797 case 'V':
798 print_version ();
799 break;
800 case 'h':
801 case 'H':
802 print_help ();
803 break;
804 case 't':
805 typedefs = TRUE;
806 break;
807 case 'T':
808 typedefs = typedefs_and_cplusplus = TRUE;
809 break;
810 #if (!CTAGS)
811 /* Etags options */
812 case 'i':
813 included_files[nincluded_files++] = optarg;
814 break;
815 #else /* CTAGS */
816 /* Ctags options. */
817 case 'B':
818 searchar = '?';
819 break;
820 case 'u':
821 update = TRUE;
822 break;
823 case 'v':
824 vgrind_style = TRUE;
825 /*FALLTHRU*/
826 case 'x':
827 cxref_style = TRUE;
828 break;
829 case 'w':
830 no_warnings = TRUE;
831 break;
832 #endif /* CTAGS */
833 default:
834 goto usage;
835 }
836 }
837
838 for (; optind < argc; ++optind)
839 {
840 argbuffer[current_arg].arg_type = at_filename;
841 argbuffer[current_arg].what = argv[optind];
842 ++current_arg;
843 ++file_count;
844 }
845
846 if (nincluded_files == 0 && file_count == 0)
847 {
848 fprintf (stderr, "%s: No input files specified.\n", progname);
849
850 usage:
851 fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
852 progname);
853 exit (BAD);
854 }
855
856 if (tagfile == NULL)
857 {
858 tagfile = CTAGS ? "tags" : "TAGS";
859 }
860 cwd = etags_getcwd (); /* the current working directory */
861 strcat (cwd, "/");
862 if (streq (tagfile, "-"))
863 {
864 tagfiledir = cwd;
865 }
866 else
867 {
868 tagfiledir = absolute_dirname (tagfile, cwd);
869 }
870
871 init (); /* set up boolean "functions" */
872
873 initbuffer (&lb);
874 initbuffer (&token_name);
875 initbuffer (&lbs[0].lb);
876 initbuffer (&lbs[1].lb);
877 initbuffer (&filename_lb);
878
879 if (!CTAGS)
880 {
881 if (streq (tagfile, "-"))
882 tagf = stdout;
883 else
884 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
885 if (tagf == NULL)
886 pfatal (tagfile);
887 }
888
889 /*
890 * Loop through files finding functions.
891 */
892 for (i = 0; i < current_arg; ++i)
893 {
894 switch (argbuffer[i].arg_type)
895 {
896 case at_language:
897 lang_func = argbuffer[i].function;
898 break;
899 #ifdef ETAGS_REGEXPS
900 case at_regexp:
901 add_regex (argbuffer[i].what);
902 break;
903 #endif
904 case at_filename:
905 #ifdef VMS
906 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
907 {
908 if (got_err)
909 {
910 error ("Can't find file %s\n", this_file);
911 argc--, argv++;
912 }
913 else
914 {
915 this_file = massage_name (this_file);
916 }
917 #else
918 this_file = argbuffer[i].what;
919 #endif
920 /* Input file named "-" means read file names from stdin
921 and use them. */
922 if (streq (this_file, "-"))
923 while (readline_internal (&filename_lb, stdin) > 0)
924 process_file (filename_lb.buffer);
925 else
926 process_file (this_file);
927 #ifdef VMS
928 }
929 #endif
930 break;
931 }
932 }
933
934 if (!CTAGS)
935 {
936 while (nincluded_files-- > 0)
937 fprintf (tagf, "\f\n%s,include\n", *included_files++);
938
939 fclose (tagf);
940 exit (GOOD);
941 }
942
943 /* If CTAGS, we are here. process_file did not write the tags yet,
944 because we want them ordered. Let's do it now. */
945 if (cxref_style)
946 {
947 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
948 if (tagf == NULL)
949 pfatal (tagfile);
950 put_entries (head);
951 exit (GOOD);
952 }
953
954 if (update)
955 {
956 char cmd[BUFSIZ];
957 for (i = 0; i < current_arg; ++i)
958 {
959 if (argbuffer[i].arg_type != at_filename)
960 continue;
961 sprintf (cmd,
962 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
963 tagfile, argbuffer[i].what, tagfile);
964 if (system (cmd) != GOOD)
965 fatal ("failed to execute shell command");
966 }
967 append_to_tagfile = TRUE;
968 }
969
970 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
971 if (tagf == NULL)
972 pfatal (tagfile);
973 put_entries (head);
974 fclose (tagf);
975
976 if (update)
977 {
978 char cmd[BUFSIZ];
979 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
980 exit (system (cmd));
981 }
982 exit (GOOD);
983 }
984
985
986 /*
987 * Return a Lang_function given the name.
988 */
989 Lang_function *
990 get_language_from_name (name)
991 char *name;
992 {
993 struct lang_entry *lang;
994
995 if (name == NULL)
996 return NULL;
997 for (lang = lang_names; lang->name != NULL; lang++)
998 {
999 if (streq (name, lang->name))
1000 return lang->function;
1001 }
1002
1003 return NULL;
1004 }
1005
1006
1007 /*
1008 * Return a Lang_function given the interpreter name.
1009 */
1010 Lang_function *
1011 get_language_from_interpreter (interpreter)
1012 char *interpreter;
1013 {
1014 struct lang_entry *lang;
1015 char **iname;
1016
1017 if (interpreter == NULL)
1018 return NULL;
1019 for (lang = lang_names; lang->name != NULL; lang++)
1020 if (lang->interpreters != NULL)
1021 for (iname = lang->interpreters; *iname != NULL; iname++)
1022 if (streq (*iname, interpreter))
1023 return lang->function;
1024
1025 return NULL;
1026 }
1027
1028
1029
1030 /*
1031 * Return a Lang_function given the file suffix.
1032 */
1033 Lang_function *
1034 get_language_from_suffix (suffix)
1035 char *suffix;
1036 {
1037 struct lang_entry *lang;
1038 char **ext;
1039
1040 if (suffix == NULL)
1041 return NULL;
1042 for (lang = lang_names; lang->name != NULL; lang++)
1043 if (lang->suffixes != NULL)
1044 for (ext = lang->suffixes; *ext != NULL; ext++)
1045 if (streq (*ext, suffix))
1046 return lang->function;
1047
1048 return NULL;
1049 }
1050
1051
1052 /*
1053 * This routine is called on each file argument.
1054 */
1055 void
1056 process_file (file)
1057 char *file;
1058 {
1059 struct stat stat_buf;
1060 FILE *inf;
1061
1062 if (stat (file, &stat_buf) == 0 && !S_ISREG (stat_buf.st_mode))
1063 {
1064 fprintf (stderr, "Skipping %s: it is not a regular file.\n", file);
1065 return;
1066 }
1067 if (streq (file, tagfile) && !streq (tagfile, "-"))
1068 {
1069 fprintf (stderr, "Skipping inclusion of %s in self.\n", file);
1070 return;
1071 }
1072 inf = fopen (file, "r");
1073 if (inf == NULL)
1074 {
1075 perror (file);
1076 return;
1077 }
1078
1079 find_entries (file, inf);
1080
1081 if (!CTAGS)
1082 {
1083 char *filename;
1084
1085 if (absolutefn (file))
1086 {
1087 /* file is an absolute filename. Canonicalise it. */
1088 filename = absolute_filename (file, cwd);
1089 }
1090 else
1091 {
1092 /* file is a filename relative to cwd. Make it relative
1093 to the directory of the tags file. */
1094 filename = relative_filename (file, tagfiledir);
1095 }
1096 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1097 free (filename);
1098 put_entries (head);
1099 free_tree (head);
1100 head = NULL;
1101 }
1102 }
1103
1104 /*
1105 * This routine sets up the boolean pseudo-functions which work
1106 * by setting boolean flags dependent upon the corresponding character
1107 * Every char which is NOT in that string is not a white char. Therefore,
1108 * all of the array "_wht" is set to FALSE, and then the elements
1109 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1110 * of a char is TRUE if it is the string "white", else FALSE.
1111 */
1112 void
1113 init ()
1114 {
1115 register char *sp;
1116 register int i;
1117
1118 for (i = 0; i < 0177; i++)
1119 _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
1120 for (sp = white; *sp; sp++)
1121 _wht[*sp] = TRUE;
1122 for (sp = endtk; *sp; sp++)
1123 _etk[*sp] = TRUE;
1124 for (sp = intk; *sp; sp++)
1125 _itk[*sp] = TRUE;
1126 for (sp = begtk; *sp; sp++)
1127 _btk[*sp] = TRUE;
1128 _wht[0] = _wht['\n'];
1129 _etk[0] = _etk['\n'];
1130 _btk[0] = _btk['\n'];
1131 _itk[0] = _itk['\n'];
1132 }
1133
1134 /*
1135 * This routine opens the specified file and calls the function
1136 * which finds the function and type definitions.
1137 */
1138 void
1139 find_entries (file, inf)
1140 char *file;
1141 FILE *inf;
1142 {
1143 char *cp;
1144 Lang_function *function;
1145 NODE *old_last_node;
1146 extern NODE *last_node;
1147
1148 /* Memory leakage here: the memory block pointed by curfile is never
1149 released. The amount of memory leaked here is the sum of the
1150 lengths of the input file names. */
1151 curfile = savestr (file);
1152
1153 /* If user specified a language, use it. */
1154 function = lang_func;
1155 if (function != NULL)
1156 {
1157 function (inf);
1158 fclose (inf);
1159 return;
1160 }
1161
1162 cp = etags_strrchr (file, '.');
1163 if (cp != NULL)
1164 {
1165 cp += 1;
1166 function = get_language_from_suffix (cp);
1167 if (function != NULL)
1168 {
1169 function (inf);
1170 fclose (inf);
1171 return;
1172 }
1173 }
1174
1175 /* Look for sharp-bang as the first two characters. */
1176 if (readline_internal (&lb, inf) > 2
1177 && lb.buffer[0] == '#'
1178 && lb.buffer[1] == '!')
1179 {
1180 char *lp;
1181
1182 /* Set lp to point at the first char after the last slash in the
1183 line or, if no slashes, at the first nonblank. Then set cp to
1184 the first successive blank and terminate the string. */
1185 lp = etags_strrchr (lb.buffer+2, '/');
1186 if (lp != NULL)
1187 lp += 1;
1188 else
1189 for (lp = lb.buffer+2; *lp != '\0' && isspace (*lp); lp++)
1190 continue;
1191 for (cp = lp; *cp != '\0' && !isspace (*cp); cp++)
1192 continue;
1193 *cp = '\0';
1194
1195 if (strlen (lp) > 0)
1196 {
1197 function = get_language_from_interpreter (lp);
1198 if (function != NULL)
1199 {
1200 function (inf);
1201 fclose (inf);
1202 return;
1203 }
1204 }
1205 }
1206 rewind (inf);
1207
1208 /* Try Fortran. */
1209 old_last_node = last_node;
1210 Fortran_functions (inf);
1211
1212 /* No Fortran entries found. Try C. */
1213 if (old_last_node == last_node)
1214 {
1215 rewind (inf);
1216 default_C_entries (inf);
1217 }
1218 fclose (inf);
1219 return;
1220 }
1221 \f
1222 /* Record a tag. */
1223 void
1224 pfnote (name, is_func, linestart, linelen, lno, cno)
1225 char *name; /* tag name, if different from definition */
1226 logical is_func; /* tag is a function */
1227 char *linestart; /* start of the line where tag is */
1228 int linelen; /* length of the line where tag is */
1229 int lno; /* line number */
1230 long cno; /* character number */
1231 {
1232 register NODE *np = xnew (1, NODE);
1233
1234 /* If ctags mode, change name "main" to M<thisfilename>. */
1235 if (CTAGS && !cxref_style && streq (name, "main"))
1236 {
1237 register char *fp = etags_strrchr (curfile, '/');
1238 np->name = concat ("M", fp == 0 ? curfile : fp + 1, "");
1239 fp = etags_strrchr (np->name, '.');
1240 if (fp && fp[1] != '\0' && fp[2] == '\0')
1241 fp[0] = 0;
1242 }
1243 else
1244 np->name = name;
1245 np->been_warned = FALSE;
1246 np->file = curfile;
1247 np->is_func = is_func;
1248 np->lno = lno;
1249 /* Our char numbers are 0-base, because of C language tradition?
1250 ctags compatibility? old versions compatibility? I don't know.
1251 Anyway, since emacs's are 1-base we espect etags.el to take care
1252 of the difference. If we wanted to have 1-based numbers, we would
1253 uncomment the +1 below. */
1254 np->cno = cno /* + 1 */ ;
1255 np->left = np->right = NULL;
1256 np->pat = savenstr (linestart, ((CTAGS && !cxref_style) ? 50 : linelen));
1257
1258 add_node (np, &head);
1259 }
1260
1261 /*
1262 * free_tree ()
1263 * recurse on left children, iterate on right children.
1264 */
1265 void
1266 free_tree (node)
1267 register NODE *node;
1268 {
1269 while (node)
1270 {
1271 register NODE *node_right = node->right;
1272 free_tree (node->left);
1273 if (node->name != NULL)
1274 free (node->name);
1275 free (node->pat);
1276 free ((char *) node);
1277 node = node_right;
1278 }
1279 }
1280
1281 /*
1282 * add_node ()
1283 * Adds a node to the tree of nodes. In etags mode, we don't keep
1284 * it sorted; we just keep a linear list. In ctags mode, maintain
1285 * an ordered tree, with no attempt at balancing.
1286 *
1287 * add_node is the only function allowed to add nodes, so it can
1288 * maintain state.
1289 */
1290 NODE *last_node = NULL;
1291 void
1292 add_node (node, cur_node_p)
1293 NODE *node, **cur_node_p;
1294 {
1295 register int dif;
1296 register NODE *cur_node = *cur_node_p;
1297
1298 if (cur_node == NULL)
1299 {
1300 *cur_node_p = node;
1301 last_node = node;
1302 return;
1303 }
1304
1305 if (!CTAGS)
1306 {
1307 /* Etags Mode */
1308 if (last_node == NULL)
1309 fatal ("internal error in add_node", 0);
1310 last_node->right = node;
1311 last_node = node;
1312 }
1313 else
1314 {
1315 /* Ctags Mode */
1316 dif = strcmp (node->name, cur_node->name);
1317
1318 /*
1319 * If this tag name matches an existing one, then
1320 * do not add the node, but maybe print a warning.
1321 */
1322 if (!dif)
1323 {
1324 if (streq (node->file, cur_node->file))
1325 {
1326 if (!no_warnings)
1327 {
1328 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1329 node->file, lineno, node->name);
1330 fprintf (stderr, "Second entry ignored\n");
1331 }
1332 }
1333 else if (!cur_node->been_warned && !no_warnings)
1334 {
1335 fprintf
1336 (stderr,
1337 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1338 node->file, cur_node->file, node->name);
1339 cur_node->been_warned = TRUE;
1340 }
1341 return;
1342 }
1343
1344 /* Actually add the node */
1345 add_node (node, dif < 0 ? &cur_node->left : &cur_node->right);
1346 }
1347 }
1348 \f
1349 void
1350 put_entries (node)
1351 register NODE *node;
1352 {
1353 register char *sp;
1354
1355 if (node == NULL)
1356 return;
1357
1358 /* Output subentries that precede this one */
1359 put_entries (node->left);
1360
1361 /* Output this entry */
1362
1363 if (!CTAGS)
1364 {
1365 if (node->name != NULL)
1366 fprintf (tagf, "%s\177%s\001%d,%d\n",
1367 node->pat, node->name, node->lno, node->cno);
1368 else
1369 fprintf (tagf, "%s\177%d,%d\n",
1370 node->pat, node->lno, node->cno);
1371 }
1372 else if (!cxref_style)
1373 {
1374 fprintf (tagf, "%s\t%s\t",
1375 node->name, node->file);
1376
1377 if (node->is_func)
1378 { /* a function */
1379 putc (searchar, tagf);
1380 putc ('^', tagf);
1381
1382 for (sp = node->pat; *sp; sp++)
1383 {
1384 if (*sp == '\\' || *sp == searchar)
1385 putc ('\\', tagf);
1386 putc (*sp, tagf);
1387 }
1388 putc (searchar, tagf);
1389 }
1390 else
1391 { /* a typedef; text pattern inadequate */
1392 fprintf (tagf, "%d", node->lno);
1393 }
1394 putc ('\n', tagf);
1395 }
1396 else if (vgrind_style)
1397 fprintf (stdout, "%s %s %d\n",
1398 node->name, node->file, (node->lno + 63) / 64);
1399 else
1400 fprintf (stdout, "%-16s %3d %-16s %s\n",
1401 node->name, node->lno, node->file, node->pat);
1402
1403 /* Output subentries that follow this one */
1404 put_entries (node->right);
1405 }
1406
1407 /* Length of a number's decimal representation. */
1408 int
1409 number_len (num)
1410 long num;
1411 {
1412 int len = 0;
1413 if (!num)
1414 return 1;
1415 for (; num; num /= 10)
1416 ++len;
1417 return len;
1418 }
1419
1420 /*
1421 * Return total number of characters that put_entries will output for
1422 * the nodes in the subtree of the specified node. Works only if
1423 * we are not ctags, but called only in that case. This count
1424 * is irrelevant with the new tags.el, but is still supplied for
1425 * backward compatibility.
1426 */
1427 int
1428 total_size_of_entries (node)
1429 register NODE *node;
1430 {
1431 register int total;
1432
1433 if (node == NULL)
1434 return 0;
1435
1436 total = 0;
1437 for (; node; node = node->right)
1438 {
1439 /* Count left subentries. */
1440 total += total_size_of_entries (node->left);
1441
1442 /* Count this entry */
1443 total += strlen (node->pat) + 1;
1444 total += number_len ((long) node->lno) + 1 + number_len (node->cno) + 1;
1445 if (node->name != NULL)
1446 total += 1 + strlen (node->name); /* \001name */
1447 }
1448
1449 return total;
1450 }
1451 \f
1452 /*
1453 * The C symbol tables.
1454 */
1455 enum sym_type
1456 {
1457 st_none, st_C_struct, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1458 };
1459
1460 /* Feed stuff between (but not including) %[ and %] lines to:
1461 gperf -c -k1,3 -o -p -r -t
1462 %[
1463 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1464 %%
1465 class, C_PLPL, st_C_struct
1466 domain, C_STAR, st_C_struct
1467 union, 0, st_C_struct
1468 struct, 0, st_C_struct
1469 enum, 0, st_C_enum
1470 typedef, 0, st_C_typedef
1471 define, 0, st_C_define
1472 long, 0, st_C_typespec
1473 short, 0, st_C_typespec
1474 int, 0, st_C_typespec
1475 char, 0, st_C_typespec
1476 float, 0, st_C_typespec
1477 double, 0, st_C_typespec
1478 signed, 0, st_C_typespec
1479 unsigned, 0, st_C_typespec
1480 auto, 0, st_C_typespec
1481 void, 0, st_C_typespec
1482 extern, 0, st_C_typespec
1483 static, 0, st_C_typespec
1484 const, 0, st_C_typespec
1485 volatile, 0, st_C_typespec
1486 %]
1487 and replace lines between %< and %> with its output. */
1488 /*%<*/
1489 /* C code produced by gperf version 1.8.1 (K&R C version) */
1490 /* Command-line: gperf -c -k1,3 -o -p -r -t */
1491
1492
1493 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
1494
1495 #define MIN_WORD_LENGTH 3
1496 #define MAX_WORD_LENGTH 8
1497 #define MIN_HASH_VALUE 10
1498 #define MAX_HASH_VALUE 62
1499 /*
1500 21 keywords
1501 53 is the maximum key range
1502 */
1503
1504 static int
1505 hash (str, len)
1506 register char *str;
1507 register int len;
1508 {
1509 static unsigned char hash_table[] =
1510 {
1511 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1512 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1513 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1514 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1515 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1516 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1517 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1518 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1519 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1520 62, 62, 62, 62, 62, 62, 62, 2, 62, 7,
1521 6, 9, 15, 30, 62, 24, 62, 62, 1, 24,
1522 7, 27, 13, 62, 19, 26, 18, 27, 1, 62,
1523 62, 62, 62, 62, 62, 62, 62, 62,
1524 };
1525 return len + hash_table[str[2]] + hash_table[str[0]];
1526 }
1527
1528 struct C_stab_entry *
1529 in_word_set (str, len)
1530 register char *str;
1531 register int len;
1532 {
1533
1534 static struct C_stab_entry wordlist[] =
1535 {
1536 {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
1537 {"",},
1538 {"volatile", 0, st_C_typespec},
1539 {"",},
1540 {"long", 0, st_C_typespec},
1541 {"char", 0, st_C_typespec},
1542 {"class", C_PLPL, st_C_struct},
1543 {"",}, {"",}, {"",}, {"",},
1544 {"const", 0, st_C_typespec},
1545 {"",}, {"",}, {"",}, {"",},
1546 {"auto", 0, st_C_typespec},
1547 {"",}, {"",},
1548 {"define", 0, st_C_define},
1549 {"",},
1550 {"void", 0, st_C_typespec},
1551 {"",}, {"",}, {"",},
1552 {"extern", 0, st_C_typespec},
1553 {"static", 0, st_C_typespec},
1554 {"",},
1555 {"domain", C_STAR, st_C_struct},
1556 {"",},
1557 {"typedef", 0, st_C_typedef},
1558 {"double", 0, st_C_typespec},
1559 {"enum", 0, st_C_enum},
1560 {"",}, {"",}, {"",}, {"",},
1561 {"int", 0, st_C_typespec},
1562 {"",},
1563 {"float", 0, st_C_typespec},
1564 {"",}, {"",}, {"",},
1565 {"struct", 0, st_C_struct},
1566 {"",}, {"",}, {"",}, {"",},
1567 {"union", 0, st_C_struct},
1568 {"",},
1569 {"short", 0, st_C_typespec},
1570 {"",}, {"",},
1571 {"unsigned", 0, st_C_typespec},
1572 {"signed", 0, st_C_typespec},
1573 };
1574
1575 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
1576 {
1577 register int key = hash (str, len);
1578
1579 if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)
1580 {
1581 register char *s = wordlist[key].name;
1582
1583 if (*s == *str && strneq (str + 1, s + 1, len - 1))
1584 return &wordlist[key];
1585 }
1586 }
1587 return 0;
1588 }
1589 /*%>*/
1590
1591 enum sym_type
1592 C_symtype(str, len, c_ext)
1593 char *str;
1594 int len;
1595 int c_ext;
1596 {
1597 register struct C_stab_entry *se = in_word_set(str, len);
1598
1599 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
1600 return st_none;
1601 return se->type;
1602 }
1603 \f
1604 /*
1605 * C functions are recognized using a simple finite automaton.
1606 * funcdef is its state variable.
1607 */
1608 typedef enum
1609 {
1610 fnone, /* nothing seen */
1611 ftagseen, /* function-like tag seen */
1612 fstartlist, /* just after open parenthesis */
1613 finlist, /* in parameter list */
1614 flistseen, /* after parameter list */
1615 fignore /* before open brace */
1616 } FUNCST;
1617 FUNCST funcdef;
1618
1619
1620 /*
1621 * typedefs are recognized using a simple finite automaton.
1622 * typeddef is its state variable.
1623 */
1624 typedef enum
1625 {
1626 tnone, /* nothing seen */
1627 ttypedseen, /* typedef keyword seen */
1628 tinbody, /* inside typedef body */
1629 tend, /* just before typedef tag */
1630 tignore /* junk after typedef tag */
1631 } TYPEDST;
1632 TYPEDST typdef;
1633
1634
1635 /*
1636 * struct-like structures (enum, struct and union) are recognized
1637 * using another simple finite automaton. `structdef' is its state
1638 * variable.
1639 */
1640 typedef enum
1641 {
1642 snone, /* nothing seen yet */
1643 skeyseen, /* struct-like keyword seen */
1644 stagseen, /* struct-like tag seen */
1645 scolonseen, /* colon seen after struct-like tag */
1646 sinbody /* in struct body: recognize member func defs*/
1647 } STRUCTST;
1648 STRUCTST structdef;
1649
1650 /*
1651 * When structdef is stagseen, scolonseen, or sinbody, structtag is the
1652 * struct tag, and structtype is the type of the preceding struct-like
1653 * keyword.
1654 */
1655 char *structtag = "<uninited>";
1656 enum sym_type structtype;
1657
1658 /*
1659 * Yet another little state machine to deal with preprocessor lines.
1660 */
1661 typedef enum
1662 {
1663 dnone, /* nothing seen */
1664 dsharpseen, /* '#' seen as first char on line */
1665 ddefineseen, /* '#' and 'define' seen */
1666 dignorerest /* ignore rest of line */
1667 } DEFINEST;
1668 DEFINEST definedef;
1669
1670 /*
1671 * Set this to TRUE, and the next token considered is called a function.
1672 * Used only for GNU emacs's function-defining macros.
1673 */
1674 logical next_token_is_func;
1675
1676 /*
1677 * TRUE in the rules part of a yacc file, FALSE outside (parse as C).
1678 */
1679 logical yacc_rules;
1680
1681 /*
1682 * consider_token ()
1683 * checks to see if the current token is at the start of a
1684 * function, or corresponds to a typedef, or is a struct/union/enum
1685 * tag.
1686 *
1687 * *IS_FUNC gets TRUE iff the token is a function or macro with args.
1688 * C_EXT is which language we are looking at.
1689 *
1690 * In the future we will need some way to adjust where the end of
1691 * the token is; for instance, implementing the C++ keyword
1692 * `operator' properly will adjust the end of the token to be after
1693 * whatever follows `operator'.
1694 *
1695 * Globals
1696 * funcdef IN OUT
1697 * structdef IN OUT
1698 * definedef IN OUT
1699 * typdef IN OUT
1700 * next_token_is_func IN OUT
1701 */
1702
1703 logical
1704 consider_token (str, len, c, c_ext, cblev, is_func)
1705 register char *str; /* IN: token pointer */
1706 register int len; /* IN: token length */
1707 register char c; /* IN: first char after the token */
1708 int c_ext; /* IN: C extensions mask */
1709 int cblev; /* IN: curly brace level */
1710 logical *is_func; /* OUT: function found */
1711 {
1712 enum sym_type toktype = C_symtype (str, len, c_ext);
1713
1714 /*
1715 * Advance the definedef state machine.
1716 */
1717 switch (definedef)
1718 {
1719 case dnone:
1720 /* We're not on a preprocessor line. */
1721 break;
1722 case dsharpseen:
1723 if (toktype == st_C_define)
1724 {
1725 definedef = ddefineseen;
1726 }
1727 else
1728 {
1729 definedef = dignorerest;
1730 }
1731 return FALSE;
1732 case ddefineseen:
1733 /*
1734 * Make a tag for any macro, unless it is a constant
1735 * and constantypedefs is FALSE.
1736 */
1737 definedef = dignorerest;
1738 *is_func = (c == '(');
1739 if (!*is_func && !constantypedefs)
1740 return FALSE;
1741 else
1742 return TRUE;
1743 case dignorerest:
1744 return FALSE;
1745 default:
1746 error ("internal error: definedef value.", 0);
1747 }
1748
1749 /*
1750 * Now typedefs
1751 */
1752 switch (typdef)
1753 {
1754 case tnone:
1755 if (toktype == st_C_typedef)
1756 {
1757 if (typedefs)
1758 typdef = ttypedseen;
1759 funcdef = fnone;
1760 return FALSE;
1761 }
1762 break;
1763 case ttypedseen:
1764 switch (toktype)
1765 {
1766 case st_none:
1767 case st_C_typespec:
1768 typdef = tend;
1769 break;
1770 case st_C_struct:
1771 case st_C_enum:
1772 break;
1773 }
1774 /* Do not return here, so the structdef stuff has a chance. */
1775 break;
1776 case tend:
1777 switch (toktype)
1778 {
1779 case st_C_typespec:
1780 case st_C_struct:
1781 case st_C_enum:
1782 return FALSE;
1783 }
1784 return TRUE;
1785 }
1786
1787 /*
1788 * This structdef business is currently only invoked when cblev==0.
1789 * It should be recursively invoked whatever the curly brace level,
1790 * and a stack of states kept, to allow for definitions of structs
1791 * within structs.
1792 *
1793 * This structdef business is NOT invoked when we are ctags and the
1794 * file is plain C. This is because a struct tag may have the same
1795 * name as another tag, and this loses with ctags.
1796 *
1797 * This if statement deals with the typdef state machine as
1798 * follows: if typdef==ttypedseen and token is struct/union/class/enum,
1799 * return FALSE. All the other code here is for the structdef
1800 * state machine.
1801 */
1802 switch (toktype)
1803 {
1804 case st_C_struct:
1805 case st_C_enum:
1806 if (typdef == ttypedseen
1807 || (typedefs_and_cplusplus && cblev == 0 && structdef == snone))
1808 {
1809 structdef = skeyseen;
1810 structtype = toktype;
1811 }
1812 return FALSE;
1813 }
1814 if (structdef == skeyseen)
1815 {
1816 /* Save the tag for struct/union/class, for functions that may be
1817 defined inside. */
1818 if (structtype == st_C_struct)
1819 structtag = savenstr (str, len);
1820 else
1821 structtag = "<enum>";
1822 structdef = stagseen;
1823 return TRUE;
1824 }
1825
1826 /* Avoid entering funcdef stuff if typdef is going on. */
1827 if (typdef != tnone)
1828 {
1829 definedef = dnone;
1830 return FALSE;
1831 }
1832
1833 /* Detect GNU macros. */
1834 if (definedef == dnone)
1835 if (strneq (str, "DEFUN", len) /* Used in emacs */
1836 #if FALSE
1837 These are defined inside C functions, so currently they
1838 are not met anyway.
1839 || strneq (str, "EXFUN", len) /* Used in glibc */
1840 || strneq (str, "DEFVAR_", 7) /* Used in emacs */
1841 #endif
1842 || strneq (str, "SYSCALL", len) /* Used in glibc (mach) */
1843 || strneq (str, "ENTRY", len) /* Used in glibc */
1844 || strneq (str, "PSEUDO", len)) /* Used in glibc */
1845
1846 {
1847 next_token_is_func = TRUE;
1848 return FALSE;
1849 }
1850 if (next_token_is_func)
1851 {
1852 next_token_is_func = FALSE;
1853 funcdef = fignore;
1854 *is_func = TRUE;
1855 return TRUE;
1856 }
1857
1858 /* A function? */
1859 switch (toktype)
1860 {
1861 case st_C_typespec:
1862 if (funcdef != finlist && funcdef != fignore)
1863 funcdef = fnone; /* should be useless */
1864 return FALSE;
1865 default:
1866 if (funcdef == fnone)
1867 {
1868 funcdef = ftagseen;
1869 *is_func = TRUE;
1870 return TRUE;
1871 }
1872 }
1873
1874 return FALSE;
1875 }
1876
1877 /*
1878 * C_entries ()
1879 * This routine finds functions, typedefs, #define's and
1880 * struct/union/enum definitions in C syntax and adds them
1881 * to the list.
1882 */
1883 typedef struct
1884 {
1885 logical valid;
1886 char *str;
1887 logical named;
1888 int linelen;
1889 int lineno;
1890 long linepos;
1891 char *buffer;
1892 } TOKEN;
1893
1894 #define current_lb_is_new (newndx == curndx)
1895 #define switch_line_buffers() (curndx = 1 - curndx)
1896
1897 #define curlb (lbs[curndx].lb)
1898 #define othlb (lbs[1-curndx].lb)
1899 #define newlb (lbs[newndx].lb)
1900 #define curlinepos (lbs[curndx].linepos)
1901 #define othlinepos (lbs[1-curndx].linepos)
1902 #define newlinepos (lbs[newndx].linepos)
1903
1904 #define CNL_SAVE_DEFINEDEF \
1905 do { \
1906 curlinepos = charno; \
1907 lineno++; \
1908 charno += readline (&curlb, inf); \
1909 lp = curlb.buffer; \
1910 quotednl = FALSE; \
1911 newndx = curndx; \
1912 } while (0)
1913
1914 #define CNL \
1915 do { \
1916 CNL_SAVE_DEFINEDEF; \
1917 if (savetok.valid) \
1918 { \
1919 tok = savetok; \
1920 savetok.valid = FALSE; \
1921 } \
1922 definedef = dnone; \
1923 } while (0)
1924
1925 #define make_tag(isfun) do \
1926 { \
1927 if (tok.valid) \
1928 { \
1929 char *name = NULL; \
1930 if (tok.named) \
1931 name = savestr (token_name.buffer); \
1932 pfnote (name, isfun, tok.buffer, tok.linelen, tok.lineno, tok.linepos); \
1933 } \
1934 else if (DEBUG) abort (); \
1935 tok.valid = FALSE; \
1936 } while (0)
1937
1938 void
1939 C_entries (c_ext, inf)
1940 int c_ext; /* extension of C */
1941 FILE *inf; /* input file */
1942 {
1943 register char c; /* latest char read; '\0' for end of line */
1944 register char *lp; /* pointer one beyond the character `c' */
1945 int curndx, newndx; /* indices for current and new lb */
1946 TOKEN tok; /* latest token read */
1947 register int tokoff; /* offset in line of start of current token */
1948 register int toklen; /* length of current token */
1949 int cblev; /* current curly brace level */
1950 int parlev; /* current parenthesis level */
1951 logical incomm, inquote, inchar, quotednl, midtoken;
1952 logical cplpl;
1953 TOKEN savetok; /* token saved during preprocessor handling */
1954
1955
1956 curndx = newndx = 0;
1957 lineno = 0;
1958 charno = 0;
1959 lp = curlb.buffer;
1960 *lp = 0;
1961
1962 definedef = dnone; funcdef = fnone; typdef = tnone; structdef = snone;
1963 next_token_is_func = yacc_rules = FALSE;
1964 midtoken = inquote = inchar = incomm = quotednl = FALSE;
1965 tok.valid = savetok.valid = FALSE;
1966 cblev = 0;
1967 parlev = 0;
1968 cplpl = c_ext & C_PLPL;
1969
1970 while (!feof (inf))
1971 {
1972 c = *lp++;
1973 if (c == '\\')
1974 {
1975 /* If we're at the end of the line, the next character is a
1976 '\0'; don't skip it, because it's the thing that tells us
1977 to read the next line. */
1978 if (*lp == '\0')
1979 {
1980 quotednl = TRUE;
1981 continue;
1982 }
1983 lp++;
1984 c = ' ';
1985 }
1986 else if (incomm)
1987 {
1988 switch (c)
1989 {
1990 case '*':
1991 if (*lp == '/')
1992 {
1993 c = *lp++;
1994 incomm = FALSE;
1995 }
1996 break;
1997 case '\0':
1998 /* Newlines inside comments do not end macro definitions in
1999 traditional cpp. */
2000 CNL_SAVE_DEFINEDEF;
2001 break;
2002 }
2003 continue;
2004 }
2005 else if (inquote)
2006 {
2007 switch (c)
2008 {
2009 case '"':
2010 inquote = FALSE;
2011 break;
2012 case '\0':
2013 /* Newlines inside strings do not end macro definitions
2014 in traditional cpp, even though compilers don't
2015 usually accept them. */
2016 CNL_SAVE_DEFINEDEF;
2017 break;
2018 }
2019 continue;
2020 }
2021 else if (inchar)
2022 {
2023 switch (c)
2024 {
2025 case '\0':
2026 /* Hmmm, something went wrong. */
2027 CNL;
2028 /* FALLTHRU */
2029 case '\'':
2030 inchar = FALSE;
2031 break;
2032 }
2033 continue;
2034 }
2035 else
2036 switch (c)
2037 {
2038 case '"':
2039 inquote = TRUE;
2040 if (funcdef != finlist && funcdef != fignore)
2041 funcdef = fnone;
2042 continue;
2043 case '\'':
2044 inchar = TRUE;
2045 if (funcdef != finlist && funcdef != fignore)
2046 funcdef = fnone;
2047 continue;
2048 case '/':
2049 if (*lp == '*')
2050 {
2051 lp++;
2052 incomm = TRUE;
2053 continue;
2054 }
2055 else if (cplpl && *lp == '/')
2056 {
2057 c = 0;
2058 break;
2059 }
2060 else
2061 break;
2062 case '%':
2063 if ((c_ext & YACC) && *lp == '%')
2064 {
2065 /* entering or exiting rules section in yacc file */
2066 lp++;
2067 definedef = dnone; funcdef = fnone;
2068 typdef = tnone; structdef = snone;
2069 next_token_is_func = FALSE;
2070 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2071 cblev = 0;
2072 yacc_rules = !yacc_rules;
2073 continue;
2074 }
2075 else
2076 break;
2077 case '#':
2078 if (definedef == dnone)
2079 {
2080 char *cp;
2081 logical cpptoken = TRUE;
2082
2083 /* Look back on this line. If all blanks, or nonblanks
2084 followed by an end of comment, this is a preprocessor
2085 token. */
2086 for (cp = newlb.buffer; cp < lp-1; cp++)
2087 if (!iswhite (*cp))
2088 {
2089 if (*cp == '*' && *(cp+1) == '/')
2090 {
2091 cp++;
2092 cpptoken = TRUE;
2093 }
2094 else
2095 cpptoken = FALSE;
2096 }
2097 if (cpptoken)
2098 definedef = dsharpseen;
2099 } /* if (definedef == dnone) */
2100
2101 continue;
2102 } /* switch (c) */
2103
2104
2105 /* Consider token only if some complicated conditions are satisfied. */
2106 if ((definedef != dnone
2107 || (cblev == 0 && structdef != scolonseen)
2108 || (cblev == 1 && cplpl && structdef == sinbody))
2109 && typdef != tignore
2110 && definedef != dignorerest
2111 && funcdef != finlist)
2112 {
2113 if (midtoken)
2114 {
2115 if (endtoken (c))
2116 {
2117 if (cplpl && c == ':' && *lp == ':' && begtoken(*(lp + 1)))
2118 {
2119 /*
2120 * This handles :: in the middle, but not at the
2121 * beginning of an identifier.
2122 */
2123 lp += 2;
2124 toklen += 3;
2125 }
2126 else
2127 {
2128 logical is_func = FALSE;
2129
2130 if (yacc_rules
2131 || consider_token (newlb.buffer + tokoff, toklen,
2132 c, c_ext, cblev, &is_func))
2133 {
2134 if (structdef == sinbody
2135 && definedef == dnone
2136 && is_func)
2137 /* function defined in C++ class body */
2138 {
2139 int strsize = strlen(structtag) + 2 + toklen + 1;
2140 while (token_name.size < strsize)
2141 {
2142 token_name.size *= 2;
2143 token_name.buffer
2144 = (char *) xrealloc (token_name.buffer,
2145 token_name.size);
2146 }
2147 strcpy (token_name.buffer, structtag);
2148 strcat (token_name.buffer, "::");
2149 strncat (token_name.buffer,
2150 newlb.buffer+tokoff, toklen);
2151 tok.named = TRUE;
2152 }
2153 else
2154 {
2155 while (token_name.size < toklen + 1)
2156 {
2157 token_name.size *= 2;
2158 token_name.buffer
2159 = (char *) xrealloc (token_name.buffer,
2160 token_name.size);
2161 }
2162 strncpy (token_name.buffer,
2163 newlb.buffer+tokoff, toklen);
2164 token_name.buffer[toklen] = '\0';
2165 if (structdef == stagseen
2166 || typdef == tend
2167 || (is_func
2168 && definedef == dignorerest)) /* macro */
2169 tok.named = TRUE;
2170 else
2171 tok.named = FALSE;
2172 }
2173 tok.lineno = lineno;
2174 tok.linelen = tokoff + toklen + 1;
2175 tok.buffer = newlb.buffer;
2176 tok.linepos = newlinepos;
2177 tok.valid = TRUE;
2178
2179 if (definedef == dnone
2180 && (funcdef == ftagseen
2181 || structdef == stagseen
2182 || typdef == tend))
2183 {
2184 if (current_lb_is_new)
2185 switch_line_buffers ();
2186 }
2187 else
2188 make_tag (is_func);
2189 }
2190 midtoken = FALSE;
2191 }
2192 } /* if (endtoken (c)) */
2193 else if (intoken (c))
2194 {
2195 toklen++;
2196 continue;
2197 }
2198 } /* if (midtoken) */
2199 else if (begtoken (c))
2200 {
2201 switch (definedef)
2202 {
2203 case dnone:
2204 switch (funcdef)
2205 {
2206 case fstartlist:
2207 funcdef = finlist;
2208 continue;
2209 case flistseen:
2210 make_tag (TRUE);
2211 funcdef = fignore;
2212 break;
2213 case ftagseen:
2214 funcdef = fnone;
2215 break;
2216 }
2217 if (structdef == stagseen)
2218 structdef = snone;
2219 break;
2220 case dsharpseen:
2221 savetok = tok;
2222 }
2223 if (!yacc_rules || lp == newlb.buffer + 1)
2224 {
2225 tokoff = lp - 1 - newlb.buffer;
2226 toklen = 1;
2227 midtoken = TRUE;
2228 }
2229 continue;
2230 } /* if (begtoken) */
2231 } /* if must look at token */
2232
2233
2234 /* Detect end of line, colon, comma, semicolon and various braces
2235 after having handled a token.*/
2236 switch (c)
2237 {
2238 case ':':
2239 if (definedef != dnone)
2240 break;
2241 if (structdef == stagseen)
2242 structdef = scolonseen;
2243 else
2244 switch (funcdef)
2245 {
2246 case ftagseen:
2247 if (yacc_rules)
2248 {
2249 make_tag (FALSE);
2250 funcdef = fignore;
2251 }
2252 break;
2253 case fstartlist:
2254 funcdef = fnone;
2255 break;
2256 }
2257 break;
2258 case ';':
2259 if (definedef != dnone)
2260 break;
2261 if (cblev == 0)
2262 switch (typdef)
2263 {
2264 case tend:
2265 make_tag (FALSE);
2266 /* FALLTHRU */
2267 default:
2268 typdef = tnone;
2269 }
2270 if (funcdef != fignore)
2271 {
2272 funcdef = fnone;
2273 /* The following instruction invalidates the token.
2274 Probably the token should be invalidated in all
2275 other cases where some state machine is reset. */
2276 tok.valid = FALSE;
2277 }
2278 if (structdef == stagseen)
2279 structdef = snone;
2280 break;
2281 case ',':
2282 if (definedef != dnone)
2283 break;
2284 if (funcdef != finlist && funcdef != fignore)
2285 funcdef = fnone;
2286 if (structdef == stagseen)
2287 structdef = snone;
2288 break;
2289 case '[':
2290 if (definedef != dnone)
2291 break;
2292 if (cblev == 0 && typdef == tend)
2293 {
2294 typdef = tignore;
2295 make_tag (FALSE);
2296 break;
2297 }
2298 if (funcdef != finlist && funcdef != fignore)
2299 funcdef = fnone;
2300 if (structdef == stagseen)
2301 structdef = snone;
2302 break;
2303 case '(':
2304 if (definedef != dnone)
2305 break;
2306 switch (funcdef)
2307 {
2308 case fnone:
2309 switch (typdef)
2310 {
2311 case ttypedseen:
2312 case tend:
2313 /* Make sure that the next char is not a '*'.
2314 This handles constructs like:
2315 typedef void OperatorFun (int fun); */
2316 if (*lp != '*')
2317 {
2318 typdef = tignore;
2319 make_tag (FALSE);
2320 }
2321 break;
2322 } /* switch (typdef) */
2323 break;
2324 case ftagseen:
2325 funcdef = fstartlist;
2326 break;
2327 case flistseen:
2328 funcdef = finlist;
2329 break;
2330 }
2331 parlev++;
2332 break;
2333 case ')':
2334 if (definedef != dnone)
2335 break;
2336 if (--parlev == 0)
2337 {
2338 switch (funcdef)
2339 {
2340 case fstartlist:
2341 case finlist:
2342 funcdef = flistseen;
2343 break;
2344 }
2345 if (cblev == 0 && typdef == tend)
2346 {
2347 typdef = tignore;
2348 make_tag (FALSE);
2349 }
2350 }
2351 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
2352 parlev = 0;
2353 break;
2354 case '{':
2355 if (definedef != dnone)
2356 break;
2357 if (typdef == ttypedseen)
2358 typdef = tinbody;
2359 switch (structdef)
2360 {
2361 case skeyseen: /* unnamed struct */
2362 structtag = "_anonymous_";
2363 structdef = sinbody;
2364 break;
2365 case stagseen:
2366 case scolonseen: /* named struct */
2367 structdef = sinbody;
2368 make_tag (FALSE);
2369 break;
2370 }
2371 switch (funcdef)
2372 {
2373 case flistseen:
2374 make_tag (TRUE);
2375 /* FALLTHRU */
2376 case fignore:
2377 funcdef = fnone;
2378 break;
2379 case fnone:
2380 /* Neutralize `extern "C" {' grot and look inside structs. */
2381 if (cblev == 0 && structdef == snone && typdef == tnone)
2382 cblev = -1;
2383 }
2384 cblev++;
2385 break;
2386 case '*':
2387 if (definedef != dnone)
2388 break;
2389 if (funcdef == fstartlist)
2390 funcdef = fnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
2391 break;
2392 case '}':
2393 if (definedef != dnone)
2394 break;
2395 if (!noindentypedefs && lp == newlb.buffer + 1)
2396 {
2397 cblev = 0; /* reset curly brace level if first column */
2398 parlev = 0; /* also reset paren level, just in case... */
2399 }
2400 else if (cblev > 0)
2401 cblev--;
2402 if (cblev == 0)
2403 {
2404 if (typdef == tinbody)
2405 typdef = tend;
2406 /* Memory leakage here: the string pointed by structtag is
2407 never released, because I fear to miss something and
2408 break things while freeing the area. The amount of
2409 memory leaked here is the sum of the lenghts of the
2410 struct tags.
2411 if (structdef == sinbody)
2412 free (structtag); */
2413
2414 structdef = snone;
2415 structtag = "<error>";
2416 }
2417 break;
2418 case '=':
2419 case '#': case '+': case '-': case '~': case '&': case '%': case '/':
2420 case '|': case '^': case '!': case '<': case '>': case '.': case '?':
2421 if (definedef != dnone)
2422 break;
2423 /* These surely cannot follow a function tag. */
2424 if (funcdef != finlist && funcdef != fignore)
2425 funcdef = fnone;
2426 break;
2427 case '\0':
2428 /* If a macro spans multiple lines don't reset its state. */
2429 if (quotednl)
2430 CNL_SAVE_DEFINEDEF;
2431 else
2432 CNL;
2433 break;
2434 } /* switch (c) */
2435
2436 } /* while not eof */
2437 }
2438
2439 /*
2440 * Process either a C++ file or a C file depending on the setting
2441 * of a global flag.
2442 */
2443 void
2444 default_C_entries (inf)
2445 FILE *inf;
2446 {
2447 C_entries (cplusplus ? C_PLPL : 0, inf);
2448 }
2449
2450 /* Always do plain ANSI C. */
2451 void
2452 plain_C_entries (inf)
2453 FILE *inf;
2454 {
2455 C_entries (0, inf);
2456 }
2457
2458 /* Always do C++. */
2459 void
2460 Cplusplus_entries (inf)
2461 FILE *inf;
2462 {
2463 C_entries (C_PLPL, inf);
2464 }
2465
2466 /* Always do C*. */
2467 void
2468 Cstar_entries (inf)
2469 FILE *inf;
2470 {
2471 C_entries (C_STAR, inf);
2472 }
2473
2474 /* Always do Yacc. */
2475 void
2476 Yacc_entries (inf)
2477 FILE *inf;
2478 {
2479 C_entries (YACC, inf);
2480 }
2481 \f
2482 /* Fortran parsing */
2483
2484 char *dbp;
2485
2486 logical
2487 tail (cp)
2488 char *cp;
2489 {
2490 register int len = 0;
2491
2492 while (*cp && lowcase(*cp) == lowcase(dbp[len]))
2493 cp++, len++;
2494 if (*cp == '\0' && !intoken(dbp[len]))
2495 {
2496 dbp += len;
2497 return TRUE;
2498 }
2499 return FALSE;
2500 }
2501
2502 void
2503 takeprec ()
2504 {
2505 while (isspace (*dbp))
2506 dbp++;
2507 if (*dbp != '*')
2508 return;
2509 dbp++;
2510 while (isspace (*dbp))
2511 dbp++;
2512 if (strneq (dbp, "(*)", 3))
2513 {
2514 dbp += 3;
2515 return;
2516 }
2517 if (!isdigit (*dbp))
2518 {
2519 --dbp; /* force failure */
2520 return;
2521 }
2522 do
2523 dbp++;
2524 while (isdigit (*dbp));
2525 }
2526
2527 void
2528 getit (inf)
2529 FILE *inf;
2530 {
2531 register char *cp;
2532
2533 while (isspace (*dbp))
2534 dbp++;
2535 if (*dbp == '\0')
2536 {
2537 lineno++;
2538 linecharno = charno;
2539 charno += readline (&lb, inf);
2540 dbp = lb.buffer;
2541 if (dbp[5] != '&')
2542 return;
2543 dbp += 6;
2544 while (isspace (*dbp))
2545 dbp++;
2546 }
2547 if (!isalpha (*dbp)
2548 && *dbp != '_'
2549 && *dbp != '$')
2550 return;
2551 for (cp = dbp + 1;
2552 (*cp
2553 && (isalpha (*cp) || isdigit (*cp) || (*cp == '_') || (*cp == '$')));
2554 cp++)
2555 continue;
2556 pfnote (NULL, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
2557 }
2558
2559 void
2560 Fortran_functions (inf)
2561 FILE *inf;
2562 {
2563 lineno = 0;
2564 charno = 0;
2565
2566 while (!feof (inf))
2567 {
2568 lineno++;
2569 linecharno = charno;
2570 charno += readline (&lb, inf);
2571 dbp = lb.buffer;
2572 if (*dbp == '%')
2573 dbp++; /* Ratfor escape to fortran */
2574 while (isspace (*dbp))
2575 dbp++;
2576 if (*dbp == '\0')
2577 continue;
2578 switch (lowcase (*dbp))
2579 {
2580 case 'i':
2581 if (tail ("integer"))
2582 takeprec ();
2583 break;
2584 case 'r':
2585 if (tail ("real"))
2586 takeprec ();
2587 break;
2588 case 'l':
2589 if (tail ("logical"))
2590 takeprec ();
2591 break;
2592 case 'c':
2593 if (tail ("complex") || tail ("character"))
2594 takeprec ();
2595 break;
2596 case 'd':
2597 if (tail ("double"))
2598 {
2599 while (isspace (*dbp))
2600 dbp++;
2601 if (*dbp == '\0')
2602 continue;
2603 if (tail ("precision"))
2604 break;
2605 continue;
2606 }
2607 break;
2608 }
2609 while (isspace (*dbp))
2610 dbp++;
2611 if (*dbp == '\0')
2612 continue;
2613 switch (lowcase (*dbp))
2614 {
2615 case 'f':
2616 if (tail ("function"))
2617 getit (inf);
2618 continue;
2619 case 's':
2620 if (tail ("subroutine"))
2621 getit (inf);
2622 continue;
2623 case 'e':
2624 if (tail ("entry"))
2625 getit (inf);
2626 continue;
2627 case 'p':
2628 if (tail ("program"))
2629 {
2630 getit (inf);
2631 continue;
2632 }
2633 if (tail ("procedure"))
2634 getit (inf);
2635 continue;
2636 }
2637 }
2638 }
2639 \f
2640 /*
2641 * Bob Weiner, Motorola Inc., 4/3/94
2642 * Unix and microcontroller assembly tag handling
2643 * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
2644 */
2645 void
2646 Asm_labels (inf)
2647 FILE *inf;
2648 {
2649 register char *cp;
2650
2651 lineno = 0;
2652 charno = 0;
2653
2654 while (!feof (inf))
2655 {
2656 lineno++;
2657 linecharno = charno;
2658 charno += readline (&lb, inf);
2659 cp = lb.buffer;
2660
2661 /* If first char is alphabetic or one of [_.$], test for colon
2662 following identifier. */
2663 if (isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
2664 {
2665 /* Read past label. */
2666 cp++;
2667 while (isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
2668 cp++;
2669 if (*cp == ':' || isspace (*cp))
2670 {
2671 /* Found end of label, so copy it and add it to the table. */
2672 pfnote (NULL, TRUE,
2673 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
2674 }
2675 }
2676 }
2677 }
2678 \f
2679 /*
2680 * Perl support by Bart Robinson <lomew@cs.utah.edu>
2681 * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
2682 */
2683 void
2684 Perl_functions (inf)
2685 FILE *inf;
2686 {
2687 register char *cp;
2688
2689 lineno = 0;
2690 charno = 0;
2691
2692 while (!feof (inf))
2693 {
2694 lineno++;
2695 linecharno = charno;
2696 charno += readline (&lb, inf);
2697 cp = lb.buffer;
2698
2699 if (*cp++ == 's' && *cp++ == 'u' && *cp++ == 'b' && isspace(*cp++))
2700 {
2701 while (*cp && isspace(*cp))
2702 cp++;
2703 while (*cp && ! isspace(*cp) && *cp != '{')
2704 cp++;
2705 pfnote (NULL, TRUE,
2706 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
2707 }
2708 }
2709 }
2710 \f
2711 /* Added by Mosur Mohan, 4/22/88 */
2712 /* Pascal parsing */
2713
2714 #define GET_NEW_LINE \
2715 { \
2716 linecharno = charno; lineno++; \
2717 charno += 1 + readline (&lb, inf); \
2718 dbp = lb.buffer; \
2719 }
2720
2721 /*
2722 * Locates tags for procedures & functions. Doesn't do any type- or
2723 * var-definitions. It does look for the keyword "extern" or
2724 * "forward" immediately following the procedure statement; if found,
2725 * the tag is skipped.
2726 */
2727 void
2728 Pascal_functions (inf)
2729 FILE *inf;
2730 {
2731 struct linebuffer tline; /* mostly copied from C_entries */
2732 long save_lcno;
2733 int save_lineno, save_len;
2734 char c;
2735
2736 logical /* each of these flags is TRUE iff: */
2737 incomment, /* point is inside a comment */
2738 inquote, /* point is inside '..' string */
2739 get_tagname, /* point is after PROCEDURE/FUNCTION
2740 keyword, so next item = potential tag */
2741 found_tag, /* point is after a potential tag */
2742 inparms, /* point is within parameter-list */
2743 verify_tag; /* point has passed the parm-list, so the
2744 next token will determine whether this
2745 is a FORWARD/EXTERN to be ignored, or
2746 whether it is a real tag */
2747
2748 lineno = 0;
2749 charno = 0;
2750 dbp = lb.buffer;
2751 *dbp = '\0';
2752 save_len = 0;
2753 initbuffer (&tline);
2754
2755 incomment = inquote = FALSE;
2756 found_tag = FALSE; /* have a proc name; check if extern */
2757 get_tagname = FALSE; /* have found "procedure" keyword */
2758 inparms = FALSE; /* found '(' after "proc" */
2759 verify_tag = FALSE; /* check if "extern" is ahead */
2760
2761 /* long main loop to get next char */
2762 while (!feof (inf))
2763 {
2764 c = *dbp++;
2765 if (c == '\0') /* if end of line */
2766 {
2767 GET_NEW_LINE;
2768 if (*dbp == '\0')
2769 continue;
2770 if (!((found_tag && verify_tag) ||
2771 get_tagname))
2772 c = *dbp++; /* only if don't need *dbp pointing
2773 to the beginning of the name of
2774 the procedure or function */
2775 }
2776 if (incomment)
2777 {
2778 if (c == '}') /* within { } comments */
2779 incomment = FALSE;
2780 else if (c == '*' && *dbp == ')') /* within (* *) comments */
2781 {
2782 dbp++;
2783 incomment = FALSE;
2784 }
2785 continue;
2786 }
2787 else if (inquote)
2788 {
2789 if (c == '\'')
2790 inquote = FALSE;
2791 continue;
2792 }
2793 else
2794 switch (c)
2795 {
2796 case '\'':
2797 inquote = TRUE; /* found first quote */
2798 continue;
2799 case '{': /* found open { comment */
2800 incomment = TRUE;
2801 continue;
2802 case '(':
2803 if (*dbp == '*') /* found open (* comment */
2804 {
2805 incomment = TRUE;
2806 dbp++;
2807 }
2808 else if (found_tag) /* found '(' after tag, i.e., parm-list */
2809 inparms = TRUE;
2810 continue;
2811 case ')': /* end of parms list */
2812 if (inparms)
2813 inparms = FALSE;
2814 continue;
2815 case ';':
2816 if (found_tag && !inparms) /* end of proc or fn stmt */
2817 {
2818 verify_tag = TRUE;
2819 break;
2820 }
2821 continue;
2822 }
2823 if (found_tag && verify_tag && (*dbp != ' '))
2824 {
2825 /* check if this is an "extern" declaration */
2826 if (*dbp == '\0')
2827 continue;
2828 if (lowcase (*dbp == 'e'))
2829 {
2830 if (tail ("extern")) /* superfluous, really! */
2831 {
2832 found_tag = FALSE;
2833 verify_tag = FALSE;
2834 }
2835 }
2836 else if (lowcase (*dbp) == 'f')
2837 {
2838 if (tail ("forward")) /* check for forward reference */
2839 {
2840 found_tag = FALSE;
2841 verify_tag = FALSE;
2842 }
2843 }
2844 if (found_tag && verify_tag) /* not external proc, so make tag */
2845 {
2846 found_tag = FALSE;
2847 verify_tag = FALSE;
2848 pfnote (NULL, TRUE,
2849 tline.buffer, save_len, save_lineno, save_lcno);
2850 continue;
2851 }
2852 }
2853 if (get_tagname) /* grab name of proc or fn */
2854 {
2855 int size;
2856
2857 if (*dbp == '\0')
2858 continue;
2859
2860 /* save all values for later tagging */
2861 size = strlen (lb.buffer) + 1;
2862 while (size > tline.size)
2863 {
2864 tline.size *= 2;
2865 tline.buffer = (char *) xrealloc (tline.buffer, tline.size);
2866 }
2867 strcpy (tline.buffer, lb.buffer);
2868 save_lineno = lineno;
2869 save_lcno = linecharno;
2870
2871 /* grab block name */
2872 for (dbp++; *dbp && (!endtoken (*dbp)); dbp++)
2873 continue;
2874 save_len = dbp - lb.buffer + 1;
2875 get_tagname = FALSE;
2876 found_tag = TRUE;
2877 continue;
2878
2879 /* and proceed to check for "extern" */
2880 }
2881 else if (!incomment && !inquote && !found_tag)
2882 {
2883 /* check for proc/fn keywords */
2884 switch (lowcase (c))
2885 {
2886 case 'p':
2887 if (tail ("rocedure")) /* c = 'p', dbp has advanced */
2888 get_tagname = TRUE;
2889 continue;
2890 case 'f':
2891 if (tail ("unction"))
2892 get_tagname = TRUE;
2893 continue;
2894 }
2895 }
2896 } /* while not eof */
2897
2898 free (tline.buffer);
2899 }
2900 \f
2901 /*
2902 * lisp tag functions
2903 * look for (def or (DEF, quote or QUOTE
2904 */
2905 int
2906 L_isdef (strp)
2907 register char *strp;
2908 {
2909 return ((strp[1] == 'd' || strp[1] == 'D')
2910 && (strp[2] == 'e' || strp[2] == 'E')
2911 && (strp[3] == 'f' || strp[3] == 'F'));
2912 }
2913
2914 int
2915 L_isquote (strp)
2916 register char *strp;
2917 {
2918 return ((*(++strp) == 'q' || *strp == 'Q')
2919 && (*(++strp) == 'u' || *strp == 'U')
2920 && (*(++strp) == 'o' || *strp == 'O')
2921 && (*(++strp) == 't' || *strp == 'T')
2922 && (*(++strp) == 'e' || *strp == 'E')
2923 && isspace(*(++strp)));
2924 }
2925
2926 void
2927 L_getit ()
2928 {
2929 register char *cp;
2930
2931 if (*dbp == '\'') /* Skip prefix quote */
2932 dbp++;
2933 else if (*dbp == '(' && L_isquote (dbp)) /* Skip "(quote " */
2934 {
2935 dbp += 7;
2936 while (isspace(*dbp))
2937 dbp++;
2938 }
2939 for (cp = dbp /*+1*/;
2940 *cp && *cp != '(' && *cp != ' ' && *cp != ')';
2941 cp++)
2942 continue;
2943 if (cp == dbp)
2944 return;
2945
2946 pfnote (NULL, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
2947 }
2948
2949 void
2950 Lisp_functions (inf)
2951 FILE *inf;
2952 {
2953 lineno = 0;
2954 charno = 0;
2955
2956 while (!feof (inf))
2957 {
2958 lineno++;
2959 linecharno = charno;
2960 charno += readline (&lb, inf);
2961 dbp = lb.buffer;
2962 if (dbp[0] == '(')
2963 {
2964 if (L_isdef (dbp))
2965 {
2966 while (!isspace (*dbp))
2967 dbp++;
2968 while (isspace (*dbp))
2969 dbp++;
2970 L_getit ();
2971 }
2972 else
2973 {
2974 /* Check for (foo::defmumble name-defined ... */
2975 do
2976 dbp++;
2977 while (*dbp && !isspace (*dbp)
2978 && *dbp != ':' && *dbp != '(' && *dbp != ')');
2979 if (*dbp == ':')
2980 {
2981 do
2982 dbp++;
2983 while (*dbp == ':');
2984
2985 if (L_isdef (dbp - 1))
2986 {
2987 while (!isspace (*dbp))
2988 dbp++;
2989 while (isspace (*dbp))
2990 dbp++;
2991 L_getit ();
2992 }
2993 }
2994 }
2995 }
2996 }
2997 }
2998 \f
2999 /*
3000 * Scheme tag functions
3001 * look for (def... xyzzy
3002 * look for (def... (xyzzy
3003 * look for (def ... ((...(xyzzy ....
3004 * look for (set! xyzzy
3005 */
3006
3007 void get_scheme ();
3008
3009 void
3010 Scheme_functions (inf)
3011 FILE *inf;
3012 {
3013 lineno = 0;
3014 charno = 0;
3015
3016 while (!feof (inf))
3017 {
3018 lineno++;
3019 linecharno = charno;
3020 charno += readline (&lb, inf);
3021 dbp = lb.buffer;
3022 if (dbp[0] == '(' &&
3023 (dbp[1] == 'D' || dbp[1] == 'd') &&
3024 (dbp[2] == 'E' || dbp[2] == 'e') &&
3025 (dbp[3] == 'F' || dbp[3] == 'f'))
3026 {
3027 while (!isspace (*dbp))
3028 dbp++;
3029 /* Skip over open parens and white space */
3030 while (*dbp && (isspace (*dbp) || *dbp == '('))
3031 dbp++;
3032 get_scheme ();
3033 }
3034 if (dbp[0] == '(' &&
3035 (dbp[1] == 'S' || dbp[1] == 's') &&
3036 (dbp[2] == 'E' || dbp[2] == 'e') &&
3037 (dbp[3] == 'T' || dbp[3] == 't') &&
3038 (dbp[4] == '!' || dbp[4] == '!') &&
3039 (isspace (dbp[5])))
3040 {
3041 while (!isspace (*dbp))
3042 dbp++;
3043 /* Skip over white space */
3044 while (isspace (*dbp))
3045 dbp++;
3046 get_scheme ();
3047 }
3048 }
3049 }
3050
3051 void
3052 get_scheme ()
3053 {
3054 register char *cp;
3055
3056 if (*dbp == '\0')
3057 return;
3058 /* Go till you get to white space or a syntactic break */
3059 for (cp = dbp + 1;
3060 *cp && *cp != '(' && *cp != ')' && !isspace (*cp);
3061 cp++)
3062 continue;
3063 pfnote (NULL, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3064 }
3065 \f
3066 /* Find tags in TeX and LaTeX input files. */
3067
3068 /* TEX_toktab is a table of TeX control sequences that define tags.
3069 Each TEX_tabent records one such control sequence.
3070 CONVERT THIS TO USE THE Stab TYPE!! */
3071 struct TEX_tabent
3072 {
3073 char *name;
3074 int len;
3075 };
3076
3077 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
3078
3079 /* Default set of control sequences to put into TEX_toktab.
3080 The value of environment var TEXTAGS is prepended to this. */
3081
3082 char *TEX_defenv = "\
3083 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
3084 :part:appendix:entry:index";
3085
3086 void TEX_mode ();
3087 struct TEX_tabent *TEX_decode_env ();
3088 int TEX_Token ();
3089 #if TeX_named_tokens
3090 void TEX_getit ();
3091 #endif
3092
3093 char TEX_esc = '\\';
3094 char TEX_opgrp = '{';
3095 char TEX_clgrp = '}';
3096
3097 /*
3098 * TeX/LaTeX scanning loop.
3099 */
3100 void
3101 TeX_functions (inf)
3102 FILE *inf;
3103 {
3104 char *lasthit;
3105
3106 lineno = 0;
3107 charno = 0;
3108
3109 /* Select either \ or ! as escape character. */
3110 TEX_mode (inf);
3111
3112 /* Initialize token table once from environment. */
3113 if (!TEX_toktab)
3114 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
3115
3116 while (!feof (inf))
3117 { /* Scan each line in file */
3118 lineno++;
3119 linecharno = charno;
3120 charno += readline (&lb, inf);
3121 dbp = lb.buffer;
3122 lasthit = dbp;
3123 while (dbp = etags_strchr (dbp, TEX_esc)) /* Look at each esc in line */
3124 {
3125 register int i;
3126
3127 if (!*(++dbp))
3128 break;
3129 linecharno += dbp - lasthit;
3130 lasthit = dbp;
3131 i = TEX_Token (lasthit);
3132 if (0 <= i)
3133 {
3134 pfnote (NULL, TRUE,
3135 lb.buffer, strlen (lb.buffer), lineno, linecharno);
3136 #if TeX_named_tokens
3137 TEX_getit (lasthit, TEX_toktab[i].len);
3138 #endif
3139 break; /* We only save a line once */
3140 }
3141 }
3142 }
3143 }
3144
3145 #define TEX_LESC '\\'
3146 #define TEX_SESC '!'
3147 #define TEX_cmt '%'
3148
3149 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
3150 chars accordingly. */
3151 void
3152 TEX_mode (inf)
3153 FILE *inf;
3154 {
3155 int c;
3156
3157 while ((c = getc (inf)) != EOF)
3158 {
3159 /* Skip to next line if we hit the TeX comment char. */
3160 if (c == TEX_cmt)
3161 while (c != '\n')
3162 c = getc (inf);
3163 else if (c == TEX_LESC || c == TEX_SESC )
3164 break;
3165 }
3166
3167 if (c == TEX_LESC)
3168 {
3169 TEX_esc = TEX_LESC;
3170 TEX_opgrp = '{';
3171 TEX_clgrp = '}';
3172 }
3173 else
3174 {
3175 TEX_esc = TEX_SESC;
3176 TEX_opgrp = '<';
3177 TEX_clgrp = '>';
3178 }
3179 rewind (inf);
3180 }
3181
3182 /* Read environment and prepend it to the default string.
3183 Build token table. */
3184 struct TEX_tabent *
3185 TEX_decode_env (evarname, defenv)
3186 char *evarname;
3187 char *defenv;
3188 {
3189 register char *env, *p;
3190
3191 struct TEX_tabent *tab;
3192 int size, i;
3193
3194 /* Append default string to environment. */
3195 env = getenv (evarname);
3196 if (!env)
3197 env = defenv;
3198 else
3199 env = concat (env, defenv, "");
3200
3201 /* Allocate a token table */
3202 for (size = 1, p = env; p;)
3203 if ((p = etags_strchr (p, ':')) && *(++p))
3204 size++;
3205 /* Add 1 to leave room for null terminator. */
3206 tab = xnew (size + 1, struct TEX_tabent);
3207
3208 /* Unpack environment string into token table. Be careful about */
3209 /* zero-length strings (leading ':', "::" and trailing ':') */
3210 for (i = 0; *env;)
3211 {
3212 p = etags_strchr (env, ':');
3213 if (!p) /* End of environment string. */
3214 p = env + strlen (env);
3215 if (p - env > 0)
3216 { /* Only non-zero strings. */
3217 tab[i].name = savenstr (env, p - env);
3218 tab[i].len = strlen (tab[i].name);
3219 i++;
3220 }
3221 if (*p)
3222 env = p + 1;
3223 else
3224 {
3225 tab[i].name = NULL; /* Mark end of table. */
3226 tab[i].len = 0;
3227 break;
3228 }
3229 }
3230 return tab;
3231 }
3232
3233 #if TeX_named_tokens
3234 /* Record a tag defined by a TeX command of length LEN and starting at NAME.
3235 The name being defined actually starts at (NAME + LEN + 1).
3236 But we seem to include the TeX command in the tag name. */
3237 void
3238 TEX_getit (name, len)
3239 char *name;
3240 int len;
3241 {
3242 char *p = name + len;
3243
3244 if (*name == '\0')
3245 return;
3246
3247 /* Let tag name extend to next group close (or end of line) */
3248 while (*p && *p != TEX_clgrp)
3249 p++;
3250 pfnote (savenstr (name, p-name), TRUE,
3251 lb.buffer, strlen (lb.buffer), lineno, linecharno);
3252 }
3253 #endif
3254
3255 /* If the text at CP matches one of the tag-defining TeX command names,
3256 return the pointer to the first occurrence of that command in TEX_toktab.
3257 Otherwise return -1.
3258 Keep the capital `T' in `Token' for dumb truncating compilers
3259 (this distinguishes it from `TEX_toktab' */
3260 int
3261 TEX_Token (cp)
3262 char *cp;
3263 {
3264 int i;
3265
3266 for (i = 0; TEX_toktab[i].len > 0; i++)
3267 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
3268 return i;
3269 return -1;
3270 }
3271 \f
3272 /* Support for Prolog. */
3273
3274 /* Whole head (not only functor, but also arguments)
3275 is gotten in compound term. */
3276 void
3277 prolog_getit (s)
3278 char *s;
3279 {
3280 char *save_s;
3281 int insquote, npar;
3282
3283 save_s = s;
3284 insquote = FALSE;
3285 npar = 0;
3286 while (1)
3287 {
3288 if (s[0] == '\0') /* syntax error. */
3289 return;
3290 else if (insquote && s[0] == '\'' && s[1] == '\'')
3291 s += 2;
3292 else if (s[0] == '\'')
3293 {
3294 insquote = !insquote;
3295 s++;
3296 }
3297 else if (!insquote && s[0] == '(')
3298 {
3299 npar++;
3300 s++;
3301 }
3302 else if (!insquote && s[0] == ')')
3303 {
3304 npar--;
3305 s++;
3306 if (npar == 0)
3307 break;
3308 else if (npar < 0) /* syntax error. */
3309 return;
3310 }
3311 else if (!insquote && s[0] == '.'
3312 && (isspace (s[1]) || s[1] == '\0'))
3313 { /* fullstop. */
3314 if (npar != 0) /* syntax error. */
3315 return;
3316 s++;
3317 break;
3318 }
3319 else
3320 s++;
3321 }
3322 pfnote (NULL, TRUE, save_s, s-save_s, lineno, linecharno);
3323 }
3324
3325 /* It is assumed that prolog predicate starts from column 0. */
3326 void
3327 Prolog_functions (inf)
3328 FILE *inf;
3329 {
3330 void skip_comment (), prolog_getit ();
3331
3332 lineno = linecharno = charno = 0;
3333 while (!feof (inf))
3334 {
3335 lineno++;
3336 linecharno += charno;
3337 charno = readline (&lb, inf) + 1; /* 1 for newline. */
3338 dbp = lb.buffer;
3339 if (isspace (dbp[0])) /* not predicate header. */
3340 continue;
3341 else if (dbp[0] == '%') /* comment. */
3342 continue;
3343 else if (dbp[0] == '/' && dbp[1] == '*') /* comment. */
3344 skip_comment (&lb, inf, &lineno, &linecharno);
3345 else /* found. */
3346 prolog_getit (dbp);
3347 }
3348 }
3349
3350 void
3351 skip_comment (plb, inf, plineno, plinecharno)
3352 struct linebuffer *plb;
3353 FILE *inf;
3354 int *plineno; /* result */
3355 long *plinecharno; /* result */
3356 {
3357 char *cp;
3358
3359 do
3360 {
3361 for (cp = plb->buffer; *cp != '\0'; cp++)
3362 if (cp[0] == '*' && cp[1] == '/')
3363 return;
3364 (*plineno)++;
3365 *plinecharno += readline (plb, inf) + 1; /* 1 for newline. */
3366 }
3367 while (!feof(inf));
3368 }
3369 \f
3370 #ifdef ETAGS_REGEXPS
3371 /* Take a string like "/blah/" and turn it into "blah", making sure
3372 that the first and last characters are the same, and handling
3373 quoted separator characters. Actually, stops on the occurence of
3374 an unquoted separator. Also turns "\t" into a Tab character.
3375 Returns pointer to terminating separator. Works in place. Null
3376 terminates name string. */
3377 char *
3378 scan_separators (name)
3379 char *name;
3380 {
3381 char sep = name[0];
3382 char *copyto = name;
3383 logical quoted = FALSE;
3384
3385 for (++name; *name != '\0'; ++name)
3386 {
3387 if (quoted)
3388 {
3389 if (*name == 't')
3390 *copyto++ = '\t';
3391 else if (*name == sep)
3392 *copyto++ = sep;
3393 else
3394 {
3395 /* Something else is quoted, so preserve the quote. */
3396 *copyto++ = '\\';
3397 *copyto++ = *name;
3398 }
3399 quoted = FALSE;
3400 }
3401 else if (*name == '\\')
3402 quoted = TRUE;
3403 else if (*name == sep)
3404 break;
3405 else
3406 *copyto++ = *name;
3407 }
3408
3409 /* Terminate copied string. */
3410 *copyto = '\0';
3411 return name;
3412 }
3413
3414 /* Turn a name, which is an ed-style (but Emacs syntax) regular
3415 expression, into a real regular expression by compiling it. */
3416 void
3417 add_regex (regexp_pattern)
3418 char *regexp_pattern;
3419 {
3420 char *name;
3421 const char *err;
3422 struct re_pattern_buffer *patbuf;
3423
3424 if (regexp_pattern == NULL)
3425 {
3426 /* Remove existing regexps. */
3427 num_patterns = 0;
3428 patterns = NULL;
3429 return;
3430 }
3431
3432 if (regexp_pattern[0] == '\0')
3433 {
3434 error ("missing regexp", 0);
3435 return;
3436 }
3437 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
3438 {
3439 error ("%s: unterminated regexp", regexp_pattern);
3440 return;
3441 }
3442 name = scan_separators (regexp_pattern);
3443 if (regexp_pattern[0] == '\0')
3444 {
3445 error ("null regexp", 0);
3446 return;
3447 }
3448 (void) scan_separators (name);
3449
3450 patbuf = xnew (1, struct re_pattern_buffer);
3451 patbuf->translate = NULL;
3452 patbuf->fastmap = NULL;
3453 patbuf->buffer = NULL;
3454 patbuf->allocated = 0;
3455
3456 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
3457 if (err != NULL)
3458 {
3459 error ("%s while compiling pattern", err);
3460 return;
3461 }
3462
3463 num_patterns += 1;
3464 if (num_patterns == 1)
3465 patterns = xnew (1, struct pattern);
3466 else
3467 patterns = ((struct pattern *)
3468 xrealloc (patterns,
3469 (num_patterns * sizeof (struct pattern))));
3470 patterns[num_patterns - 1].pattern = patbuf;
3471 patterns[num_patterns - 1].name_pattern = savestr (name);
3472 patterns[num_patterns - 1].error_signaled = FALSE;
3473 }
3474
3475 /*
3476 * Do the substitutions indicated by the regular expression and
3477 * arguments.
3478 */
3479 char *
3480 substitute (in, out, regs)
3481 char *in, *out;
3482 struct re_registers *regs;
3483 {
3484 char *result = NULL, *t;
3485 int size = 0;
3486
3487 /* Pass 1: figure out how much size to allocate. */
3488 for (t = out; *t; ++t)
3489 {
3490 if (*t == '\\')
3491 {
3492 ++t;
3493 if (!*t)
3494 {
3495 fprintf (stderr, "%s: pattern subtitution ends prematurely\n",
3496 progname);
3497 return NULL;
3498 }
3499 if (isdigit (*t))
3500 {
3501 int dig = *t - '0';
3502 size += regs->end[dig] - regs->start[dig];
3503 }
3504 }
3505 }
3506
3507 /* Allocate space and do the substitutions. */
3508 result = xnew (size + 1, char);
3509 size = 0;
3510 for (; *out; ++out)
3511 {
3512 if (*out == '\\')
3513 {
3514 ++out;
3515 if (isdigit (*out))
3516 {
3517 /* Using "dig2" satisfies my debugger. Bleah. */
3518 int dig2 = *out - '0';
3519 strncpy (result + size, in + regs->start[dig2],
3520 regs->end[dig2] - regs->start[dig2]);
3521 size += regs->end[dig2] - regs->start[dig2];
3522 }
3523 else
3524 result[size++] = *out;
3525 }
3526 else
3527 result[size++] = *out;
3528 }
3529 result[size] = '\0';
3530
3531 return result;
3532 }
3533 \f
3534 #endif /* ETAGS_REGEXPS */
3535 /* Initialize a linebuffer for use */
3536 void
3537 initbuffer (linebuffer)
3538 struct linebuffer *linebuffer;
3539 {
3540 linebuffer->size = 200;
3541 linebuffer->buffer = xnew (200, char);
3542 }
3543
3544 /*
3545 * Read a line of text from `stream' into `linebuffer'.
3546 * Return the number of characters read from `stream',
3547 * which is the length of the line including the newline, if any.
3548 */
3549 long
3550 readline_internal (linebuffer, stream)
3551 struct linebuffer *linebuffer;
3552 register FILE *stream;
3553 {
3554 char *buffer = linebuffer->buffer;
3555 register char *p = linebuffer->buffer;
3556 register char *pend;
3557 int chars_deleted;
3558
3559 pend = p + linebuffer->size; /* Separate to avoid 386/IX compiler bug. */
3560
3561 while (1)
3562 {
3563 register int c = getc (stream);
3564 if (p == pend)
3565 {
3566 linebuffer->size *= 2;
3567 buffer = (char *) xrealloc (buffer, linebuffer->size);
3568 p += buffer - linebuffer->buffer;
3569 pend = buffer + linebuffer->size;
3570 linebuffer->buffer = buffer;
3571 }
3572 if (c == EOF)
3573 {
3574 chars_deleted = 0;
3575 break;
3576 }
3577 if (c == '\n')
3578 {
3579 if (p > buffer && p[-1] == '\r')
3580 {
3581 *--p = '\0';
3582 chars_deleted = 2;
3583 }
3584 else
3585 {
3586 *p = '\0';
3587 chars_deleted = 1;
3588 }
3589 break;
3590 }
3591 *p++ = c;
3592 }
3593
3594 return p - buffer + chars_deleted;
3595 }
3596
3597 /*
3598 * Like readline_internal, above, but try to match the input
3599 * line against any existing regular expressions.
3600 */
3601 long
3602 readline (linebuffer, stream)
3603 struct linebuffer *linebuffer;
3604 FILE *stream;
3605 {
3606 /* Read new line. */
3607 int i;
3608 long result = readline_internal (linebuffer, stream);
3609
3610 #ifdef ETAGS_REGEXPS
3611 /* Match against all listed patterns. */
3612 for (i = 0; i < num_patterns; ++i)
3613 {
3614 int match = re_match (patterns[i].pattern, linebuffer->buffer,
3615 (int)result, 0, &patterns[i].regs);
3616 switch (match)
3617 {
3618 case -2:
3619 /* Some error. */
3620 if (!patterns[i].error_signaled)
3621 {
3622 error ("error while matching pattern %d", i);
3623 patterns[i].error_signaled = TRUE;
3624 }
3625 break;
3626 case -1:
3627 /* No match. */
3628 break;
3629 default:
3630 /* Match occurred. Construct a tag. */
3631 if (patterns[i].name_pattern[0] != '\0')
3632 {
3633 /* Make a named tag. */
3634 char *name = substitute (linebuffer->buffer,
3635 patterns[i].name_pattern,
3636 &patterns[i].regs);
3637 if (name != NULL)
3638 pfnote (name, TRUE,
3639 linebuffer->buffer, match, lineno, linecharno);
3640 }
3641 else
3642 {
3643 /* Make an unnamed tag. */
3644 pfnote (NULL, TRUE,
3645 linebuffer->buffer, match, lineno, linecharno);
3646 }
3647 break;
3648 }
3649 }
3650 #endif /* ETAGS_REGEXPS */
3651
3652 return result;
3653 }
3654
3655 /*
3656 * Read a file, but do no processing. This is used to do regexp
3657 * matching on files that have no language defined.
3658 */
3659 void
3660 just_read_file (inf)
3661 FILE *inf;
3662 {
3663 while (!feof (inf))
3664 {
3665 ++lineno;
3666 linecharno = charno;
3667 charno += readline (&lb, inf) + 1;
3668 }
3669 }
3670
3671 \f
3672 /*
3673 * Return a pointer to a space of size strlen(cp)+1 allocated
3674 * with xnew where the string CP has been copied.
3675 */
3676 char *
3677 savestr (cp)
3678 char *cp;
3679 {
3680 return savenstr (cp, strlen (cp));
3681 }
3682
3683 /*
3684 * Return a pointer to a space of size LEN+1 allocated with xnew where
3685 * the string CP has been copied for at most the first LEN characters.
3686 */
3687 char *
3688 savenstr (cp, len)
3689 char *cp;
3690 int len;
3691 {
3692 register char *dp;
3693
3694 dp = xnew (len + 1, char);
3695 strncpy (dp, cp, len);
3696 dp[len] = '\0';
3697 return dp;
3698 }
3699
3700 /*
3701 * Return the ptr in sp at which the character c last
3702 * appears; NULL if not found
3703 *
3704 * Identical to System V strrchr, included for portability.
3705 */
3706 char *
3707 etags_strrchr (sp, c)
3708 register char *sp, c;
3709 {
3710 register char *r;
3711
3712 r = NULL;
3713 do
3714 {
3715 if (*sp == c)
3716 r = sp;
3717 } while (*sp++);
3718 return r;
3719 }
3720
3721
3722 /*
3723 * Return the ptr in sp at which the character c first
3724 * appears; NULL if not found
3725 *
3726 * Identical to System V strchr, included for portability.
3727 */
3728 char *
3729 etags_strchr (sp, c)
3730 register char *sp, c;
3731 {
3732 do
3733 {
3734 if (*sp == c)
3735 return sp;
3736 } while (*sp++);
3737 return NULL;
3738 }
3739
3740 /* Print error message and exit. */
3741 void
3742 fatal (s1, s2)
3743 char *s1, *s2;
3744 {
3745 error (s1, s2);
3746 exit (BAD);
3747 }
3748
3749 void
3750 pfatal (s1)
3751 char *s1;
3752 {
3753 perror (s1);
3754 exit (BAD);
3755 }
3756
3757 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
3758 void
3759 error (s1, s2)
3760 char *s1, *s2;
3761 {
3762 fprintf (stderr, "%s: ", progname);
3763 fprintf (stderr, s1, s2);
3764 fprintf (stderr, "\n");
3765 }
3766
3767 /* Return a newly-allocated string whose contents
3768 concatenate those of s1, s2, s3. */
3769 char *
3770 concat (s1, s2, s3)
3771 char *s1, *s2, *s3;
3772 {
3773 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
3774 char *result = xnew (len1 + len2 + len3 + 1, char);
3775
3776 strcpy (result, s1);
3777 strcpy (result + len1, s2);
3778 strcpy (result + len1 + len2, s3);
3779 result[len1 + len2 + len3] = '\0';
3780
3781 return result;
3782 }
3783 \f
3784 /* Does the same work as the system V getcwd, but does not need to
3785 guess the buffer size in advance. */
3786 char *
3787 etags_getcwd ()
3788 {
3789 #ifdef DOS_NT
3790 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
3791
3792 getwd (path);
3793 p = path;
3794 while (*p)
3795 if (*p == '\\')
3796 *p++ = '/';
3797 else
3798 *p++ = lowcase (*p);
3799
3800 return strdup (path);
3801 #else /* not DOS_NT */
3802 #if HAVE_GETCWD
3803 int bufsize = 200;
3804 char *path = xnew (bufsize, char);
3805
3806 while (getcwd (path, bufsize) == NULL)
3807 {
3808 if (errno != ERANGE)
3809 pfatal ("getcwd");
3810 bufsize *= 2;
3811 path = xnew (bufsize, char);
3812 }
3813
3814 return path;
3815 #else /* not DOS_NT and not HAVE_GETCWD */
3816 struct linebuffer path;
3817 FILE *pipe;
3818
3819 initbuffer (&path);
3820 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
3821 if (pipe == NULL || readline_internal (&path, pipe) == 0)
3822 pfatal ("pwd");
3823 pclose (pipe);
3824
3825 return path.buffer;
3826 #endif /* not HAVE_GETCWD */
3827 #endif /* not DOS_NT */
3828 }
3829
3830 /* Return a newly allocated string containing the filename
3831 of FILE relative to the absolute directory DIR (which
3832 should end with a slash). */
3833 char *
3834 relative_filename (file, dir)
3835 char *file, *dir;
3836 {
3837 char *fp, *dp, *abs, *res;
3838
3839 /* Find the common root of file and dir. */
3840 abs = absolute_filename (file, cwd);
3841 fp = abs;
3842 dp = dir;
3843 while (*fp++ == *dp++)
3844 continue;
3845 do
3846 {
3847 fp--;
3848 dp--;
3849 }
3850 while (*fp != '/');
3851
3852 /* Build a sequence of "../" strings for the resulting relative filename. */
3853 for (dp = etags_strchr (dp + 1, '/'), res = "";
3854 dp != NULL;
3855 dp = etags_strchr (dp + 1, '/'))
3856 {
3857 res = concat (res, "../", "");
3858 }
3859
3860 /* Add the filename relative to the common root of file and dir. */
3861 res = concat (res, fp + 1, "");
3862 free (abs);
3863
3864 return res;
3865 }
3866
3867 /* Return a newly allocated string containing the
3868 absolute filename of FILE given CWD (which should
3869 end with a slash). */
3870 char *
3871 absolute_filename (file, cwd)
3872 char *file, *cwd;
3873 {
3874 char *slashp, *cp, *res;
3875
3876 if (absolutefn (file))
3877 res = concat (file, "", "");
3878 else
3879 res = concat (cwd, file, "");
3880
3881 /* Delete the "/dirname/.." and "/." substrings. */
3882 slashp = etags_strchr (res, '/');
3883 while (slashp != NULL && slashp[0] != '\0')
3884 {
3885 if (slashp[1] == '.')
3886 {
3887 if (slashp[2] == '.'
3888 && (slashp[3] == '/' || slashp[3] == '\0'))
3889 {
3890 cp = slashp;
3891 do
3892 cp--;
3893 while (cp >= res && *cp != '/');
3894 if (*cp == '/')
3895 {
3896 strcpy (cp, slashp + 3);
3897 }
3898 else /* else (cp == res) */
3899 {
3900 if (slashp[3] != '\0')
3901 strcpy (cp, slashp + 4);
3902 else
3903 return ".";
3904 }
3905 slashp = cp;
3906 continue;
3907 }
3908 else if (slashp[2] == '/' || slashp[2] == '\0')
3909 {
3910 strcpy (slashp, slashp + 2);
3911 continue;
3912 }
3913 }
3914
3915 slashp = etags_strchr (slashp + 1, '/');
3916 }
3917
3918 return res;
3919 }
3920
3921 /* Return a newly allocated string containing the absolute
3922 filename of dir where FILE resides given CWD (which should
3923 end with a slash). */
3924 char *
3925 absolute_dirname (file, cwd)
3926 char *file, *cwd;
3927 {
3928 char *slashp, *res;
3929 char save;
3930
3931 slashp = etags_strrchr (file, '/');
3932 if (slashp == NULL)
3933 return cwd;
3934 save = slashp[1];
3935 slashp[1] = '\0';
3936 res = absolute_filename (file, cwd);
3937 slashp[1] = save;
3938
3939 return res;
3940 }
3941
3942 /* Like malloc but get fatal error if memory is exhausted. */
3943 long *
3944 xmalloc (size)
3945 unsigned int size;
3946 {
3947 long *result = (long *) malloc (size);
3948 if (result == NULL)
3949 fatal ("virtual memory exhausted", 0);
3950 return result;
3951 }
3952
3953 long *
3954 xrealloc (ptr, size)
3955 char *ptr;
3956 unsigned int size;
3957 {
3958 long *result = (long *) realloc (ptr, size);
3959 if (result == NULL)
3960 fatal ("virtual memory exhausted");
3961 return result;
3962 }