Wed Nov 13 17:51:29 1996 Francesco Potorti` <F.Potorti@cnuce.cnr.it>
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs
2 Copyright (C) 1984, 87, 88, 89, 93, 94, 95
3 Free Software Foundation, Inc. and Ken Arnold
4
5 This file is not considered part of GNU Emacs.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 /*
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * Sam Kendall added C++.
28 * Francesco Potorti` reorganised C and C++ based on work by Joe Wells.
29 * Regexp tags by Tom Tromey.
30 *
31 * Francesco Potorti` (F.Potorti@cnuce.cnr.it) is the current maintainer.
32 */
33
34 char pot_etags_version[] = "@(#) pot revision number is 11.77";
35
36 #define TRUE 1
37 #define FALSE 0
38
39 #ifndef DEBUG
40 # define DEBUG FALSE
41 #endif
42
43 #ifdef MSDOS
44 # include <string.h>
45 # include <fcntl.h>
46 # include <sys/param.h>
47 #endif /* MSDOS */
48
49 #ifdef WINDOWSNT
50 # include <stdlib.h>
51 # include <fcntl.h>
52 # include <string.h>
53 # include <io.h>
54 # define MAXPATHLEN _MAX_PATH
55 #endif
56
57 #if !defined (MSDOS) && !defined (WINDOWSNT) && defined (STDC_HEADERS)
58 #include <stdlib.h>
59 #include <string.h>
60 #endif
61
62 #ifdef HAVE_CONFIG_H
63 # include <config.h>
64 /* On some systems, Emacs defines static as nothing for the sake
65 of unexec. We don't want that here since we don't use unexec. */
66 # undef static
67 #endif
68
69 #include <stdio.h>
70 #include <ctype.h>
71 #include <errno.h>
72 #ifndef errno
73 extern int errno;
74 #endif
75 #include <sys/types.h>
76 #include <sys/stat.h>
77
78 #if !defined (S_ISREG) && defined (S_IFREG)
79 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
80 #endif
81
82 #include <getopt.h>
83
84 #ifdef ETAGS_REGEXPS
85 # include <regex.h>
86 #endif /* ETAGS_REGEXPS */
87
88 /* Define CTAGS to make the program "ctags" compatible with the usual one.
89 Let it undefined to make the program "etags", which makes emacs-style
90 tag tables and tags typedefs, #defines and struct/union/enum by default. */
91 #ifdef CTAGS
92 # undef CTAGS
93 # define CTAGS TRUE
94 #else
95 # define CTAGS FALSE
96 #endif
97
98 /* Exit codes for success and failure. */
99 #ifdef VMS
100 # define GOOD 1
101 # define BAD 0
102 #else
103 # define GOOD 0
104 # define BAD 1
105 #endif
106
107 /* C extensions. */
108 #define C_PLPL 0x00001 /* C++ */
109 #define C_STAR 0x00003 /* C* */
110 #define YACC 0x10000 /* yacc file */
111
112 #define streq(s,t) ((DEBUG &&!(s)&&!(t)&&(abort(),1)) || !strcmp(s,t))
113 #define strneq(s,t,n) ((DEBUG &&!(s)&&!(t)&&(abort(),1)) || !strncmp(s,t,n))
114
115 #define lowcase(c) tolower ((char)c)
116
117 #define iswhite(arg) (_wht[arg]) /* T if char is white */
118 #define begtoken(arg) (_btk[arg]) /* T if char can start token */
119 #define intoken(arg) (_itk[arg]) /* T if char can be in token */
120 #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */
121
122 #ifdef DOS_NT
123 # define absolutefn(fn) (fn[0] == '/' \
124 || (fn[1] == ':' && fn[2] == '/'))
125 #else
126 # define absolutefn(fn) (fn[0] == '/')
127 #endif
128
129
130 /*
131 * xnew -- allocate storage
132 *
133 * SYNOPSIS: Type *xnew (int n, Type);
134 */
135 #define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
136
137 typedef int logical;
138
139 typedef struct nd_st
140 { /* sorting structure */
141 char *name; /* function or type name */
142 char *file; /* file name */
143 logical is_func; /* use pattern or line no */
144 logical been_warned; /* set if noticed dup */
145 int lno; /* line number tag is on */
146 long cno; /* character number line starts on */
147 char *pat; /* search pattern */
148 struct nd_st *left, *right; /* left and right sons */
149 } NODE;
150
151 extern char *getenv ();
152
153 char *concat ();
154 char *savenstr (), *savestr ();
155 char *etags_strchr (), *etags_strrchr ();
156 char *etags_getcwd ();
157 char *relative_filename (), *absolute_filename (), *absolute_dirname ();
158 void grow_linebuffer ();
159 long *xmalloc (), *xrealloc ();
160
161 typedef void Lang_function ();
162 #if FALSE /* many compilers barf on this */
163 Lang_function Asm_labels;
164 Lang_function default_C_entries;
165 Lang_function C_entries;
166 Lang_function Cplusplus_entries;
167 Lang_function Cstar_entries;
168 Lang_function Erlang_functions;
169 Lang_function Fortran_functions;
170 Lang_function Yacc_entries;
171 Lang_function Lisp_functions;
172 Lang_function Pascal_functions;
173 Lang_function Perl_functions;
174 Lang_function Prolog_functions;
175 Lang_function Scheme_functions;
176 Lang_function TeX_functions;
177 Lang_function just_read_file;
178 #else /* so let's write it this way */
179 void Asm_labels ();
180 void C_entries ();
181 void default_C_entries ();
182 void plain_C_entries ();
183 void Cplusplus_entries ();
184 void Cstar_entries ();
185 void Erlang_functions ();
186 void Fortran_functions ();
187 void Yacc_entries ();
188 void Lisp_functions ();
189 void Pascal_functions ();
190 void Perl_functions ();
191 void Prolog_functions ();
192 void Scheme_functions ();
193 void TeX_functions ();
194 void just_read_file ();
195 #endif
196
197 Lang_function *get_language_from_name ();
198 Lang_function *get_language_from_interpreter ();
199 Lang_function *get_language_from_suffix ();
200 int total_size_of_entries ();
201 long readline ();
202 long readline_internal ();
203 #ifdef ETAGS_REGEXPS
204 void add_regex ();
205 #endif
206 void add_node ();
207 void error ();
208 void suggest_asking_for_help ();
209 void fatal (), pfatal ();
210 void find_entries ();
211 void free_tree ();
212 void getit ();
213 void init ();
214 void initbuffer ();
215 void pfnote ();
216 void process_file ();
217 void put_entries ();
218 void takeprec ();
219
220 \f
221 char searchar = '/'; /* use /.../ searches */
222
223 int lineno; /* line number of current line */
224 long charno; /* current character number */
225 long linecharno; /* charno of start of line */
226
227 char *curfile; /* current input file name */
228 char *tagfile; /* output file */
229 char *progname; /* name this program was invoked with */
230 char *cwd; /* current working directory */
231 char *tagfiledir; /* directory of tagfile */
232
233 FILE *tagf; /* ioptr for tags file */
234 NODE *head; /* the head of the binary tree of tags */
235
236 /*
237 * A `struct linebuffer' is a structure which holds a line of text.
238 * `readline' reads a line from a stream into a linebuffer and works
239 * regardless of the length of the line.
240 */
241 struct linebuffer
242 {
243 long size;
244 char *buffer;
245 };
246
247 struct linebuffer lb; /* the current line */
248 struct linebuffer token_name; /* used by C_entries as a temporary area */
249 struct
250 {
251 long linepos;
252 struct linebuffer lb; /* used by C_entries instead of lb */
253 } lbs[2];
254
255 /* boolean "functions" (see init) */
256 logical _wht[0177], _etk[0177], _itk[0177], _btk[0177];
257 char
258 /* white chars */
259 *white = " \f\t\n\013",
260 /* token ending chars */
261 *endtk = " \t\n\013\"'#()[]{}=-+%*/&|^~!<>;,.:?",
262 /* token starting chars */
263 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
264 /* valid in-token chars */
265 *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
266
267 logical append_to_tagfile; /* -a: append to tags */
268 /* The following three default to TRUE for etags, but to FALSE for ctags. */
269 logical typedefs; /* -t: create tags for typedefs */
270 logical typedefs_and_cplusplus; /* -T: create tags for typedefs, level */
271 /* 0 struct/enum/union decls, and C++ */
272 /* member functions. */
273 logical constantypedefs; /* -d: create tags for C #define and enum */
274 /* constants. */
275 /* -D: opposite of -d. Default under ctags. */
276 logical update; /* -u: update tags */
277 logical vgrind_style; /* -v: create vgrind style index output */
278 logical no_warnings; /* -w: suppress warnings */
279 logical cxref_style; /* -x: create cxref style output */
280 logical cplusplus; /* .[hc] means C++, not C */
281 logical noindentypedefs; /* -I: ignore indentation in C */
282
283 struct option longopts[] =
284 {
285 { "append", no_argument, NULL, 'a' },
286 { "backward-search", no_argument, NULL, 'B' },
287 { "c++", no_argument, NULL, 'C' },
288 { "cxref", no_argument, NULL, 'x' },
289 { "defines", no_argument, NULL, 'd' },
290 { "help", no_argument, NULL, 'h' },
291 { "help", no_argument, NULL, 'H' },
292 { "ignore-indentation", no_argument, NULL, 'I' },
293 { "include", required_argument, NULL, 'i' },
294 { "language", required_argument, NULL, 'l' },
295 { "no-defines", no_argument, NULL, 'D' },
296 { "no-regex", no_argument, NULL, 'R' },
297 { "no-warn", no_argument, NULL, 'w' },
298 { "output", required_argument, NULL, 'o' },
299 { "regex", required_argument, NULL, 'r' },
300 { "typedefs", no_argument, NULL, 't' },
301 { "typedefs-and-c++", no_argument, NULL, 'T' },
302 { "update", no_argument, NULL, 'u' },
303 { "version", no_argument, NULL, 'V' },
304 { "vgrind", no_argument, NULL, 'v' },
305 { 0 }
306 };
307
308 #ifdef ETAGS_REGEXPS
309 /* Structure defining a regular expression. Elements are
310 the compiled pattern, and the name string. */
311 struct pattern
312 {
313 struct re_pattern_buffer *pattern;
314 struct re_registers regs;
315 char *name_pattern;
316 logical error_signaled;
317 };
318
319 /* Number of regexps found. */
320 int num_patterns = 0;
321
322 /* Array of all regexps. */
323 struct pattern *patterns = NULL;
324 #endif /* ETAGS_REGEXPS */
325
326 /*
327 * Language stuff.
328 */
329
330 /* Non-NULL if language fixed. */
331 Lang_function *lang_func = NULL;
332
333 /* Assembly code */
334 char *Asm_suffixes [] = { "a", /* Unix assembler */
335 "asm", /* Microcontroller assembly */
336 "def", /* BSO/Tasking definition includes */
337 "inc", /* Microcontroller include files */
338 "ins", /* Microcontroller include files */
339 "s", "sa", /* Unix assembler */
340 "src", /* BSO/Tasking C compiler output */
341 NULL
342 };
343
344 /* Note that .c and .h can be considered C++, if the --c++ flag was
345 given. That is why default_C_entries is called here. */
346 char *default_C_suffixes [] =
347 { "c", "h", NULL };
348
349 /* .M is for Objective C++ files. */
350 char *Cplusplus_suffixes [] =
351 { "C", "H", "c++", "cc", "cpp", "cxx", "h++", "hh", "hpp", "hxx", "M", NULL};
352
353 char *Cstar_suffixes [] =
354 { "cs", "hs", NULL };
355
356 char *Erlang_suffixes [] =
357 { "erl", "hrl", NULL };
358
359 char *Fortran_suffixes [] =
360 { "F", "f", "f90", "for", NULL };
361
362 char *Lisp_suffixes [] =
363 { "cl", "clisp", "el", "l", "lisp", "lsp", "ml", NULL };
364
365 char *Pascal_suffixes [] =
366 { "p", "pas", NULL };
367
368 char *Perl_suffixes [] =
369 { "pl", "pm", NULL };
370 char *Perl_interpreters [] =
371 { "perl", "@PERL@", NULL };
372
373 char *plain_C_suffixes [] =
374 { "pc", /* Pro*C file */
375 "m", /* Objective C file */
376 "lm", /* Objective lex file */
377 NULL };
378
379 char *Prolog_suffixes [] =
380 { "prolog", NULL };
381
382 /* Can't do the `SCM' or `scm' prefix with a version number. */
383 char *Scheme_suffixes [] =
384 { "SCM", "SM", "oak", "sch", "scheme", "scm", "sm", "t", NULL };
385
386 char *TeX_suffixes [] =
387 { "TeX", "bib", "clo", "cls", "ltx", "sty", "tex", NULL };
388
389 char *Yacc_suffixes [] =
390 { "y", "ym", NULL }; /* .ym is Objective yacc file */
391
392 /* Table of language names and corresponding functions, file suffixes
393 and interpreter names.
394 It is ok for a given function to be listed under more than one
395 name. I just didn't. */
396 struct lang_entry
397 {
398 char *name;
399 Lang_function *function;
400 char **suffixes;
401 char **interpreters;
402 };
403
404 struct lang_entry lang_names [] =
405 {
406 { "asm", Asm_labels, Asm_suffixes, NULL },
407 { "c", default_C_entries, default_C_suffixes, NULL },
408 { "c++", Cplusplus_entries, Cplusplus_suffixes, NULL },
409 { "c*", Cstar_entries, Cstar_suffixes, NULL },
410 { "erlang", Erlang_functions, Erlang_suffixes, NULL },
411 { "fortran", Fortran_functions, Fortran_suffixes, NULL },
412 { "lisp", Lisp_functions, Lisp_suffixes, NULL },
413 { "pascal", Pascal_functions, Pascal_suffixes, NULL },
414 { "perl", Perl_functions, Perl_suffixes, Perl_interpreters },
415 { "proc", plain_C_entries, plain_C_suffixes, NULL },
416 { "prolog", Prolog_functions, Prolog_suffixes, NULL },
417 { "scheme", Scheme_functions, Scheme_suffixes, NULL },
418 { "tex", TeX_functions, TeX_suffixes, NULL },
419 { "yacc", Yacc_entries, Yacc_suffixes, NULL },
420 { "auto", NULL }, /* default guessing scheme */
421 { "none", just_read_file }, /* regexp matching only */
422 { NULL, NULL } /* end of list */
423 };
424
425 \f
426 void
427 print_language_names ()
428 {
429 struct lang_entry *lang;
430 char **ext;
431
432 puts ("\nThese are the currently supported languages, along with the\n\
433 default file name suffixes:");
434 for (lang = lang_names; lang->name != NULL; lang++)
435 {
436 printf ("\t%s\t", lang->name);
437 if (lang->suffixes != NULL)
438 for (ext = lang->suffixes; *ext != NULL; ext++)
439 printf (" .%s", *ext);
440 puts ("");
441 }
442 puts ("Where `auto' means use default language for files based on file\n\
443 name suffix, and `none' means only do regexp processing on files.\n\
444 If no language is specified and no matching suffix is found,\n\
445 the first line of the file is read for a sharp-bang (#!) sequence\n\
446 followed by the name of an interpreter. If no such sequence is found,\n\
447 Fortran is tried first; if no tags are found, C is tried next.");
448 }
449
450 #ifndef VERSION
451 # define VERSION "19"
452 #endif
453 void
454 print_version ()
455 {
456 printf ("%s (GNU Emacs %s)\n", (CTAGS) ? "ctags" : "etags", VERSION);
457 puts ("Copyright (C) 1996 Free Software Foundation, Inc. and Ken Arnold");
458 puts ("This program is distributed under the same terms as Emacs");
459
460 exit (GOOD);
461 }
462
463 void
464 print_help ()
465 {
466 printf ("These are the options accepted by %s. You may use unambiguous\n\
467 abbreviations for the long option names. A - as file name means read\n\
468 names from stdin.", progname);
469 if (!CTAGS)
470 printf (" Absolute names are stored in the output file as they\n\
471 are. Relative ones are stored relative to the output file's directory.");
472 puts ("\n");
473
474 puts ("-a, --append\n\
475 Append tag entries to existing tags file.");
476
477 if (CTAGS)
478 puts ("-B, --backward-search\n\
479 Write the search commands for the tag entries using '?', the\n\
480 backward-search command instead of '/', the forward-search command.");
481
482 puts ("-C, --c++\n\
483 Treat files whose name suffix defaults to C language as C++ files.");
484
485 if (CTAGS)
486 puts ("-d, --defines\n\
487 Create tag entries for C #define constants and enum constants, too.");
488 else
489 puts ("-D, --no-defines\n\
490 Don't create tag entries for C #define constants and enum constants.\n\
491 This makes the tags file smaller.");
492
493 if (!CTAGS)
494 {
495 puts ("-i FILE, --include=FILE\n\
496 Include a note in tag file indicating that, when searching for\n\
497 a tag, one should also consult the tags file FILE after\n\
498 checking the current file.");
499 puts ("-l LANG, --language=LANG\n\
500 Force the following files to be considered as written in the\n\
501 named language up to the next --language=LANG option.");
502 }
503
504 #ifdef ETAGS_REGEXPS
505 puts ("-r /REGEXP/, --regex=/REGEXP/\n\
506 Make a tag for each line matching pattern REGEXP in the\n\
507 following files. REGEXP is anchored (as if preceded by ^).\n\
508 The form /REGEXP/NAME/ creates a named tag. For example Tcl\n\
509 named tags can be created with:\n\
510 --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
511 puts ("-R, --no-regex\n\
512 Don't create tags from regexps for the following files.");
513 #endif /* ETAGS_REGEXPS */
514 puts ("-o FILE, --output=FILE\n\
515 Write the tags to FILE.");
516 puts ("-I, --ignore-indentation\n\
517 Don't rely on indentation quite as much as normal. Currently,\n\
518 this means not to assume that a closing brace in the first\n\
519 column is the final brace of a function or structure\n\
520 definition in C and C++.");
521
522 if (CTAGS)
523 {
524 puts ("-t, --typedefs\n\
525 Generate tag entries for C typedefs.");
526 puts ("-T, --typedefs-and-c++\n\
527 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
528 and C++ member functions.");
529 puts ("-u, --update\n\
530 Update the tag entries for the given files, leaving tag\n\
531 entries for other files in place. Currently, this is\n\
532 implemented by deleting the existing entries for the given\n\
533 files and then rewriting the new entries at the end of the\n\
534 tags file. It is often faster to simply rebuild the entire\n\
535 tag file than to use this.");
536 puts ("-v, --vgrind\n\
537 Generates an index of items intended for human consumption,\n\
538 similar to the output of vgrind. The index is sorted, and\n\
539 gives the page number of each item.");
540 puts ("-w, --no-warn\n\
541 Suppress warning messages about entries defined in multiple\n\
542 files.");
543 puts ("-x, --cxref\n\
544 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
545 The output uses line numbers instead of page numbers, but\n\
546 beyond that the differences are cosmetic; try both to see\n\
547 which you like.");
548 }
549
550 puts ("-V, --version\n\
551 Print the version of the program.\n\
552 -h, --help\n\
553 Print this help message.");
554
555 print_language_names ();
556
557 puts ("");
558 puts ("Report bugs to bug-gnu-emacs@prep.ai.mit.edu");
559
560 exit (GOOD);
561 }
562
563 \f
564 enum argument_type
565 {
566 at_language,
567 at_regexp,
568 at_filename
569 };
570
571 /* This structure helps us allow mixing of --lang and filenames. */
572 typedef struct
573 {
574 enum argument_type arg_type;
575 char *what;
576 Lang_function *function;
577 } argument;
578
579 #ifdef VMS /* VMS specific functions */
580
581 #define EOS '\0'
582
583 /* This is a BUG! ANY arbitrary limit is a BUG!
584 Won't someone please fix this? */
585 #define MAX_FILE_SPEC_LEN 255
586 typedef struct {
587 short curlen;
588 char body[MAX_FILE_SPEC_LEN + 1];
589 } vspec;
590
591 /*
592 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
593 returning in each successive call the next filename matching the input
594 spec. The function expects that each in_spec passed
595 to it will be processed to completion; in particular, up to and
596 including the call following that in which the last matching name
597 is returned, the function ignores the value of in_spec, and will
598 only start processing a new spec with the following call.
599 If an error occurs, on return out_spec contains the value
600 of in_spec when the error occurred.
601
602 With each successive filename returned in out_spec, the
603 function's return value is one. When there are no more matching
604 names the function returns zero. If on the first call no file
605 matches in_spec, or there is any other error, -1 is returned.
606 */
607
608 #include <rmsdef.h>
609 #include <descrip.h>
610 #define OUTSIZE MAX_FILE_SPEC_LEN
611 short
612 fn_exp (out, in)
613 vspec *out;
614 char *in;
615 {
616 static long context = 0;
617 static struct dsc$descriptor_s o;
618 static struct dsc$descriptor_s i;
619 static logical pass1 = TRUE;
620 long status;
621 short retval;
622
623 if (pass1)
624 {
625 pass1 = FALSE;
626 o.dsc$a_pointer = (char *) out;
627 o.dsc$w_length = (short)OUTSIZE;
628 i.dsc$a_pointer = in;
629 i.dsc$w_length = (short)strlen(in);
630 i.dsc$b_dtype = DSC$K_DTYPE_T;
631 i.dsc$b_class = DSC$K_CLASS_S;
632 o.dsc$b_dtype = DSC$K_DTYPE_VT;
633 o.dsc$b_class = DSC$K_CLASS_VS;
634 }
635 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
636 {
637 out->body[out->curlen] = EOS;
638 return 1;
639 }
640 else if (status == RMS$_NMF)
641 retval = 0;
642 else
643 {
644 strcpy(out->body, in);
645 retval = -1;
646 }
647 lib$find_file_end(&context);
648 pass1 = TRUE;
649 return retval;
650 }
651
652 /*
653 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
654 name of each file specified by the provided arg expanding wildcards.
655 */
656 char *
657 gfnames (arg, p_error)
658 char *arg;
659 logical *p_error;
660 {
661 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
662
663 switch (fn_exp (&filename, arg))
664 {
665 case 1:
666 *p_error = FALSE;
667 return filename.body;
668 case 0:
669 *p_error = FALSE;
670 return NULL;
671 default:
672 *p_error = TRUE;
673 return filename.body;
674 }
675 }
676
677 #ifndef OLD /* Newer versions of VMS do provide `system'. */
678 system (cmd)
679 char *cmd;
680 {
681 fprintf (stderr, "system() function not implemented under VMS\n");
682 }
683 #endif
684
685 #define VERSION_DELIM ';'
686 char *massage_name (s)
687 char *s;
688 {
689 char *start = s;
690
691 for ( ; *s; s++)
692 if (*s == VERSION_DELIM)
693 {
694 *s = EOS;
695 break;
696 }
697 else
698 *s = lowcase (*s);
699 return start;
700 }
701 #endif /* VMS */
702
703 \f
704 int
705 main (argc, argv)
706 int argc;
707 char *argv[];
708 {
709 int i;
710 unsigned int nincluded_files = 0;
711 char **included_files = xnew (argc, char *);
712 char *this_file;
713 argument *argbuffer;
714 int current_arg = 0, file_count = 0;
715 struct linebuffer filename_lb;
716 #ifdef VMS
717 logical got_err;
718 #endif
719
720 #ifdef DOS_NT
721 _fmode = O_BINARY; /* all of files are treated as binary files */
722 #endif /* DOS_NT */
723
724 progname = argv[0];
725
726 /* Allocate enough no matter what happens. Overkill, but each one
727 is small. */
728 argbuffer = xnew (argc, argument);
729
730 #ifdef ETAGS_REGEXPS
731 /* Set syntax for regular expression routines. */
732 re_set_syntax (RE_SYNTAX_EMACS);
733 #endif /* ETAGS_REGEXPS */
734
735 /*
736 * If etags, always find typedefs and structure tags. Why not?
737 * Also default is to find macro constants and enum constants.
738 */
739 if (!CTAGS)
740 typedefs = typedefs_and_cplusplus = constantypedefs = TRUE;
741
742 while (1)
743 {
744 int opt = getopt_long (argc, argv,
745 "-aCdDf:Il:o:r:RStTi:BuvxwVhH", longopts, 0);
746
747 if (opt == EOF)
748 break;
749
750 switch (opt)
751 {
752 case 0:
753 /* If getopt returns 0, then it has already processed a
754 long-named option. We should do nothing. */
755 break;
756
757 case 1:
758 /* This means that a filename has been seen. Record it. */
759 argbuffer[current_arg].arg_type = at_filename;
760 argbuffer[current_arg].what = optarg;
761 ++current_arg;
762 ++file_count;
763 break;
764
765 /* Common options. */
766 case 'a':
767 append_to_tagfile = TRUE;
768 break;
769 case 'C':
770 cplusplus = TRUE;
771 break;
772 case 'd':
773 constantypedefs = TRUE;
774 break;
775 case 'D':
776 constantypedefs = FALSE;
777 break;
778 case 'f': /* for compatibility with old makefiles */
779 case 'o':
780 if (tagfile)
781 {
782 fprintf (stderr, "%s: -%c option may only be given once.\n",
783 progname, opt);
784 suggest_asking_for_help ();
785 }
786 tagfile = optarg;
787 break;
788 case 'I':
789 case 'S': /* for backward compatibility */
790 noindentypedefs = TRUE;
791 break;
792 case 'l':
793 argbuffer[current_arg].function = get_language_from_name (optarg);
794 argbuffer[current_arg].arg_type = at_language;
795 ++current_arg;
796 break;
797 #ifdef ETAGS_REGEXPS
798 case 'r':
799 argbuffer[current_arg].arg_type = at_regexp;
800 argbuffer[current_arg].what = optarg;
801 ++current_arg;
802 break;
803 case 'R':
804 argbuffer[current_arg].arg_type = at_regexp;
805 argbuffer[current_arg].what = NULL;
806 ++current_arg;
807 break;
808 #endif /* ETAGS_REGEXPS */
809 case 'V':
810 print_version ();
811 break;
812 case 'h':
813 case 'H':
814 print_help ();
815 break;
816 case 't':
817 typedefs = TRUE;
818 break;
819 case 'T':
820 typedefs = typedefs_and_cplusplus = TRUE;
821 break;
822 #if (!CTAGS)
823 /* Etags options */
824 case 'i':
825 included_files[nincluded_files++] = optarg;
826 break;
827 #else /* CTAGS */
828 /* Ctags options. */
829 case 'B':
830 searchar = '?';
831 break;
832 case 'u':
833 update = TRUE;
834 break;
835 case 'v':
836 vgrind_style = TRUE;
837 /*FALLTHRU*/
838 case 'x':
839 cxref_style = TRUE;
840 break;
841 case 'w':
842 no_warnings = TRUE;
843 break;
844 #endif /* CTAGS */
845 default:
846 suggest_asking_for_help ();
847 }
848 }
849
850 for (; optind < argc; ++optind)
851 {
852 argbuffer[current_arg].arg_type = at_filename;
853 argbuffer[current_arg].what = argv[optind];
854 ++current_arg;
855 ++file_count;
856 }
857
858 if (nincluded_files == 0 && file_count == 0)
859 {
860 fprintf (stderr, "%s: No input files specified.\n", progname);
861 suggest_asking_for_help ();
862 }
863
864 if (tagfile == NULL)
865 tagfile = CTAGS ? "tags" : "TAGS";
866 cwd = etags_getcwd (); /* the current working directory */
867 if (cwd[strlen (cwd) - 1] != '/')
868 cwd = concat (cwd, "/", "");
869 if (streq (tagfile, "-"))
870 tagfiledir = cwd;
871 else
872 tagfiledir = absolute_dirname (tagfile, cwd);
873
874 init (); /* set up boolean "functions" */
875
876 initbuffer (&lb);
877 initbuffer (&token_name);
878 initbuffer (&lbs[0].lb);
879 initbuffer (&lbs[1].lb);
880 initbuffer (&filename_lb);
881
882 if (!CTAGS)
883 {
884 if (streq (tagfile, "-"))
885 {
886 tagf = stdout;
887 #ifdef DOS_NT
888 /* Switch redirected `stdout' to binary mode (setting `_fmode'
889 doesn't take effect until after `stdout' is already open). */
890 if (!isatty (fileno (stdout)))
891 setmode (fileno (stdout), O_BINARY);
892 #endif /* DOS_NT */
893 }
894 else
895 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
896 if (tagf == NULL)
897 pfatal (tagfile);
898 }
899
900 /*
901 * Loop through files finding functions.
902 */
903 for (i = 0; i < current_arg; ++i)
904 {
905 switch (argbuffer[i].arg_type)
906 {
907 case at_language:
908 lang_func = argbuffer[i].function;
909 break;
910 #ifdef ETAGS_REGEXPS
911 case at_regexp:
912 add_regex (argbuffer[i].what);
913 break;
914 #endif
915 case at_filename:
916 #ifdef VMS
917 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
918 {
919 if (got_err)
920 {
921 error ("Can't find file %s\n", this_file);
922 argc--, argv++;
923 }
924 else
925 {
926 this_file = massage_name (this_file);
927 }
928 #else
929 this_file = argbuffer[i].what;
930 #endif
931 /* Input file named "-" means read file names from stdin
932 and use them. */
933 if (streq (this_file, "-"))
934 while (readline_internal (&filename_lb, stdin) > 0)
935 process_file (filename_lb.buffer);
936 else
937 process_file (this_file);
938 #ifdef VMS
939 }
940 #endif
941 break;
942 }
943 }
944
945 if (!CTAGS)
946 {
947 while (nincluded_files-- > 0)
948 fprintf (tagf, "\f\n%s,include\n", *included_files++);
949
950 fclose (tagf);
951 exit (GOOD);
952 }
953
954 /* If CTAGS, we are here. process_file did not write the tags yet,
955 because we want them ordered. Let's do it now. */
956 if (cxref_style)
957 {
958 put_entries (head);
959 exit (GOOD);
960 }
961
962 if (update)
963 {
964 char cmd[BUFSIZ];
965 for (i = 0; i < current_arg; ++i)
966 {
967 if (argbuffer[i].arg_type != at_filename)
968 continue;
969 sprintf (cmd,
970 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
971 tagfile, argbuffer[i].what, tagfile);
972 if (system (cmd) != GOOD)
973 fatal ("failed to execute shell command", NULL);
974 }
975 append_to_tagfile = TRUE;
976 }
977
978 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
979 if (tagf == NULL)
980 pfatal (tagfile);
981 put_entries (head);
982 fclose (tagf);
983
984 if (update)
985 {
986 char cmd[BUFSIZ];
987 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
988 exit (system (cmd));
989 }
990 return GOOD;
991 }
992
993
994 /*
995 * Return a Lang_function given the name.
996 */
997 Lang_function *
998 get_language_from_name (name)
999 char *name;
1000 {
1001 struct lang_entry *lang;
1002
1003 if (name != NULL)
1004 for (lang = lang_names; lang->name != NULL; lang++)
1005 {
1006 if (streq (name, lang->name))
1007 return lang->function;
1008 }
1009
1010 fprintf (stderr, "%s: language \"%s\" not recognized.\n",
1011 progname, optarg);
1012 suggest_asking_for_help ();
1013
1014 /* This point should never be reached. The function should either
1015 return a function pointer or never return. Note that a NULL
1016 pointer cannot be considered as an error, as it means that the
1017 language has not been explicitely imposed by the user ("auto"). */
1018 return NULL; /* avoid warnings from compiler */
1019 }
1020
1021
1022 /*
1023 * Return a Lang_function given the interpreter name.
1024 */
1025 Lang_function *
1026 get_language_from_interpreter (interpreter)
1027 char *interpreter;
1028 {
1029 struct lang_entry *lang;
1030 char **iname;
1031
1032 if (interpreter == NULL)
1033 return NULL;
1034 for (lang = lang_names; lang->name != NULL; lang++)
1035 if (lang->interpreters != NULL)
1036 for (iname = lang->interpreters; *iname != NULL; iname++)
1037 if (streq (*iname, interpreter))
1038 return lang->function;
1039
1040 return NULL;
1041 }
1042
1043
1044
1045 /*
1046 * Return a Lang_function given the file suffix.
1047 */
1048 Lang_function *
1049 get_language_from_suffix (suffix)
1050 char *suffix;
1051 {
1052 struct lang_entry *lang;
1053 char **ext;
1054
1055 if (suffix == NULL)
1056 return NULL;
1057 for (lang = lang_names; lang->name != NULL; lang++)
1058 if (lang->suffixes != NULL)
1059 for (ext = lang->suffixes; *ext != NULL; ext++)
1060 if (streq (*ext, suffix))
1061 return lang->function;
1062
1063 return NULL;
1064 }
1065
1066
1067 /*
1068 * This routine is called on each file argument.
1069 */
1070 void
1071 process_file (file)
1072 char *file;
1073 {
1074 struct stat stat_buf;
1075 FILE *inf;
1076 #ifdef DOS_NT
1077 char *p;
1078
1079 for (p = file; *p != '\0'; p++)
1080 if (*p == '\\')
1081 *p = '/';
1082 #endif
1083
1084 if (stat (file, &stat_buf) == 0 && !S_ISREG (stat_buf.st_mode))
1085 {
1086 fprintf (stderr, "Skipping %s: it is not a regular file.\n", file);
1087 return;
1088 }
1089 if (streq (file, tagfile) && !streq (tagfile, "-"))
1090 {
1091 fprintf (stderr, "Skipping inclusion of %s in self.\n", file);
1092 return;
1093 }
1094 inf = fopen (file, "r");
1095 if (inf == NULL)
1096 {
1097 perror (file);
1098 return;
1099 }
1100
1101 find_entries (file, inf);
1102
1103 if (!CTAGS)
1104 {
1105 char *filename;
1106
1107 if (absolutefn (file))
1108 {
1109 /* file is an absolute filename. Canonicalise it. */
1110 filename = absolute_filename (file, cwd);
1111 }
1112 else
1113 {
1114 /* file is a filename relative to cwd. Make it relative
1115 to the directory of the tags file. */
1116 filename = relative_filename (file, tagfiledir);
1117 }
1118 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1119 free (filename);
1120 put_entries (head);
1121 free_tree (head);
1122 head = NULL;
1123 }
1124 }
1125
1126 /*
1127 * This routine sets up the boolean pseudo-functions which work
1128 * by setting boolean flags dependent upon the corresponding character
1129 * Every char which is NOT in that string is not a white char. Therefore,
1130 * all of the array "_wht" is set to FALSE, and then the elements
1131 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1132 * of a char is TRUE if it is the string "white", else FALSE.
1133 */
1134 void
1135 init ()
1136 {
1137 register char *sp;
1138 register int i;
1139
1140 for (i = 0; i < 0177; i++)
1141 _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
1142 for (sp = white; *sp; sp++)
1143 _wht[*sp] = TRUE;
1144 for (sp = endtk; *sp; sp++)
1145 _etk[*sp] = TRUE;
1146 for (sp = intk; *sp; sp++)
1147 _itk[*sp] = TRUE;
1148 for (sp = begtk; *sp; sp++)
1149 _btk[*sp] = TRUE;
1150 _wht[0] = _wht['\n'];
1151 _etk[0] = _etk['\n'];
1152 _btk[0] = _btk['\n'];
1153 _itk[0] = _itk['\n'];
1154 }
1155
1156 /*
1157 * This routine opens the specified file and calls the function
1158 * which finds the function and type definitions.
1159 */
1160 void
1161 find_entries (file, inf)
1162 char *file;
1163 FILE *inf;
1164 {
1165 char *cp;
1166 Lang_function *function;
1167 NODE *old_last_node;
1168 extern NODE *last_node;
1169
1170
1171 /* Memory leakage here: the memory block pointed by curfile is never
1172 released. The amount of memory leaked here is the sum of the
1173 lengths of the input file names. */
1174 curfile = savestr (file);
1175
1176 /* If user specified a language, use it. */
1177 function = lang_func;
1178 if (function != NULL)
1179 {
1180 function (inf);
1181 fclose (inf);
1182 return;
1183 }
1184
1185 cp = etags_strrchr (file, '.');
1186 if (cp != NULL)
1187 {
1188 cp += 1;
1189 function = get_language_from_suffix (cp);
1190 if (function != NULL)
1191 {
1192 function (inf);
1193 fclose (inf);
1194 return;
1195 }
1196 }
1197
1198 /* Look for sharp-bang as the first two characters. */
1199 if (readline_internal (&lb, inf) > 2
1200 && lb.buffer[0] == '#'
1201 && lb.buffer[1] == '!')
1202 {
1203 char *lp;
1204
1205 /* Set lp to point at the first char after the last slash in the
1206 line or, if no slashes, at the first nonblank. Then set cp to
1207 the first successive blank and terminate the string. */
1208 lp = etags_strrchr (lb.buffer+2, '/');
1209 if (lp != NULL)
1210 lp += 1;
1211 else
1212 for (lp = lb.buffer+2; *lp != '\0' && isspace (*lp); lp++)
1213 continue;
1214 for (cp = lp; *cp != '\0' && !isspace (*cp); cp++)
1215 continue;
1216 *cp = '\0';
1217
1218 if (strlen (lp) > 0)
1219 {
1220 function = get_language_from_interpreter (lp);
1221 if (function != NULL)
1222 {
1223 function (inf);
1224 fclose (inf);
1225 return;
1226 }
1227 }
1228 }
1229 rewind (inf);
1230
1231 /* Try Fortran. */
1232 old_last_node = last_node;
1233 Fortran_functions (inf);
1234
1235 /* No Fortran entries found. Try C. */
1236 if (old_last_node == last_node)
1237 {
1238 rewind (inf);
1239 default_C_entries (inf);
1240 }
1241 fclose (inf);
1242 return;
1243 }
1244 \f
1245 /* Record a tag. */
1246 void
1247 pfnote (name, is_func, linestart, linelen, lno, cno)
1248 char *name; /* tag name, or NULL if unnamed */
1249 logical is_func; /* tag is a function */
1250 char *linestart; /* start of the line where tag is */
1251 int linelen; /* length of the line where tag is */
1252 int lno; /* line number */
1253 long cno; /* character number */
1254 {
1255 register NODE *np;
1256
1257 if (CTAGS && name == NULL)
1258 return;
1259
1260 np = xnew (1, NODE);
1261
1262 /* If ctags mode, change name "main" to M<thisfilename>. */
1263 if (CTAGS && !cxref_style && streq (name, "main"))
1264 {
1265 register char *fp = etags_strrchr (curfile, '/');
1266 np->name = concat ("M", fp == 0 ? curfile : fp + 1, "");
1267 fp = etags_strrchr (np->name, '.');
1268 if (fp && fp[1] != '\0' && fp[2] == '\0')
1269 fp[0] = 0;
1270 }
1271 else
1272 np->name = name;
1273 np->been_warned = FALSE;
1274 np->file = curfile;
1275 np->is_func = is_func;
1276 np->lno = lno;
1277 /* Our char numbers are 0-base, because of C language tradition?
1278 ctags compatibility? old versions compatibility? I don't know.
1279 Anyway, since emacs's are 1-base we expect etags.el to take care
1280 of the difference. If we wanted to have 1-based numbers, we would
1281 uncomment the +1 below. */
1282 np->cno = cno /* + 1 */ ;
1283 np->left = np->right = NULL;
1284 if (CTAGS && !cxref_style)
1285 {
1286 if (strlen (linestart) < 50)
1287 np->pat = concat (linestart, "$", "");
1288 else
1289 np->pat = savenstr (linestart, 50);
1290 }
1291 else
1292 np->pat = savenstr (linestart, linelen);
1293
1294 add_node (np, &head);
1295 }
1296
1297 /*
1298 * free_tree ()
1299 * recurse on left children, iterate on right children.
1300 */
1301 void
1302 free_tree (node)
1303 register NODE *node;
1304 {
1305 while (node)
1306 {
1307 register NODE *node_right = node->right;
1308 free_tree (node->left);
1309 if (node->name != NULL)
1310 free (node->name);
1311 free (node->pat);
1312 free ((char *) node);
1313 node = node_right;
1314 }
1315 }
1316
1317 /*
1318 * add_node ()
1319 * Adds a node to the tree of nodes. In etags mode, we don't keep
1320 * it sorted; we just keep a linear list. In ctags mode, maintain
1321 * an ordered tree, with no attempt at balancing.
1322 *
1323 * add_node is the only function allowed to add nodes, so it can
1324 * maintain state.
1325 */
1326 NODE *last_node = NULL;
1327 void
1328 add_node (node, cur_node_p)
1329 NODE *node, **cur_node_p;
1330 {
1331 register int dif;
1332 register NODE *cur_node = *cur_node_p;
1333
1334 if (cur_node == NULL)
1335 {
1336 *cur_node_p = node;
1337 last_node = node;
1338 return;
1339 }
1340
1341 if (!CTAGS)
1342 {
1343 /* Etags Mode */
1344 if (last_node == NULL)
1345 fatal ("internal error in add_node", NULL);
1346 last_node->right = node;
1347 last_node = node;
1348 }
1349 else
1350 {
1351 /* Ctags Mode */
1352 dif = strcmp (node->name, cur_node->name);
1353
1354 /*
1355 * If this tag name matches an existing one, then
1356 * do not add the node, but maybe print a warning.
1357 */
1358 if (!dif)
1359 {
1360 if (streq (node->file, cur_node->file))
1361 {
1362 if (!no_warnings)
1363 {
1364 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1365 node->file, lineno, node->name);
1366 fprintf (stderr, "Second entry ignored\n");
1367 }
1368 }
1369 else if (!cur_node->been_warned && !no_warnings)
1370 {
1371 fprintf
1372 (stderr,
1373 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1374 node->file, cur_node->file, node->name);
1375 cur_node->been_warned = TRUE;
1376 }
1377 return;
1378 }
1379
1380 /* Actually add the node */
1381 add_node (node, dif < 0 ? &cur_node->left : &cur_node->right);
1382 }
1383 }
1384 \f
1385 void
1386 put_entries (node)
1387 register NODE *node;
1388 {
1389 register char *sp;
1390
1391 if (node == NULL)
1392 return;
1393
1394 /* Output subentries that precede this one */
1395 put_entries (node->left);
1396
1397 /* Output this entry */
1398
1399 if (!CTAGS)
1400 {
1401 if (node->name != NULL)
1402 fprintf (tagf, "%s\177%s\001%d,%d\n",
1403 node->pat, node->name, node->lno, node->cno);
1404 else
1405 fprintf (tagf, "%s\177%d,%d\n",
1406 node->pat, node->lno, node->cno);
1407 }
1408 else
1409 {
1410 if (node->name == NULL)
1411 error ("internal error: NULL name in ctags mode.", NULL);
1412
1413 if (cxref_style)
1414 {
1415 if (vgrind_style)
1416 fprintf (stdout, "%s %s %d\n",
1417 node->name, node->file, (node->lno + 63) / 64);
1418 else
1419 fprintf (stdout, "%-16s %3d %-16s %s\n",
1420 node->name, node->lno, node->file, node->pat);
1421 }
1422 else
1423 {
1424 fprintf (tagf, "%s\t%s\t", node->name, node->file);
1425
1426 if (node->is_func)
1427 { /* a function */
1428 putc (searchar, tagf);
1429 putc ('^', tagf);
1430
1431 for (sp = node->pat; *sp; sp++)
1432 {
1433 if (*sp == '\\' || *sp == searchar)
1434 putc ('\\', tagf);
1435 putc (*sp, tagf);
1436 }
1437 putc (searchar, tagf);
1438 }
1439 else
1440 { /* a typedef; text pattern inadequate */
1441 fprintf (tagf, "%d", node->lno);
1442 }
1443 putc ('\n', tagf);
1444 }
1445 }
1446
1447 /* Output subentries that follow this one */
1448 put_entries (node->right);
1449 }
1450
1451 /* Length of a number's decimal representation. */
1452 int
1453 number_len (num)
1454 long num;
1455 {
1456 int len = 0;
1457 if (!num)
1458 return 1;
1459 for (; num; num /= 10)
1460 ++len;
1461 return len;
1462 }
1463
1464 /*
1465 * Return total number of characters that put_entries will output for
1466 * the nodes in the subtree of the specified node. Works only if
1467 * we are not ctags, but called only in that case. This count
1468 * is irrelevant with the new tags.el, but is still supplied for
1469 * backward compatibility.
1470 */
1471 int
1472 total_size_of_entries (node)
1473 register NODE *node;
1474 {
1475 register int total;
1476
1477 if (node == NULL)
1478 return 0;
1479
1480 total = 0;
1481 for (; node; node = node->right)
1482 {
1483 /* Count left subentries. */
1484 total += total_size_of_entries (node->left);
1485
1486 /* Count this entry */
1487 total += strlen (node->pat) + 1;
1488 total += number_len ((long) node->lno) + 1 + number_len (node->cno) + 1;
1489 if (node->name != NULL)
1490 total += 1 + strlen (node->name); /* \001name */
1491 }
1492
1493 return total;
1494 }
1495 \f
1496 /*
1497 * The C symbol tables.
1498 */
1499 enum sym_type
1500 {
1501 st_none, st_C_objprot, st_C_objimpl, st_C_objend, st_C_gnumacro,
1502 st_C_struct, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1503 };
1504
1505 /* Feed stuff between (but not including) %[ and %] lines to:
1506 gperf -c -k 1,3 -o -p -r -t
1507 %[
1508 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1509 %%
1510 @interface, 0, st_C_objprot
1511 @protocol, 0, st_C_objprot
1512 @implementation,0, st_C_objimpl
1513 @end, 0, st_C_objend
1514 class, C_PLPL, st_C_struct
1515 namespace, C_PLPL, st_C_struct
1516 domain, C_STAR, st_C_struct
1517 union, 0, st_C_struct
1518 struct, 0, st_C_struct
1519 enum, 0, st_C_enum
1520 typedef, 0, st_C_typedef
1521 define, 0, st_C_define
1522 bool, C_PLPL, st_C_typespec
1523 long, 0, st_C_typespec
1524 short, 0, st_C_typespec
1525 int, 0, st_C_typespec
1526 char, 0, st_C_typespec
1527 float, 0, st_C_typespec
1528 double, 0, st_C_typespec
1529 signed, 0, st_C_typespec
1530 unsigned, 0, st_C_typespec
1531 auto, 0, st_C_typespec
1532 void, 0, st_C_typespec
1533 extern, 0, st_C_typespec
1534 static, 0, st_C_typespec
1535 const, 0, st_C_typespec
1536 volatile, 0, st_C_typespec
1537 explicit, C_PLPL, st_C_typespec
1538 mutable, C_PLPL, st_C_typespec
1539 typename, C_PLPL, st_C_typespec
1540 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
1541 DEFUN, 0, st_C_gnumacro
1542 SYSCALL, 0, st_C_gnumacro
1543 ENTRY, 0, st_C_gnumacro
1544 PSEUDO, 0, st_C_gnumacro
1545 # These are defined inside C functions, so currently they are not met.
1546 # EXFUN used in glibc, DEFVAR_* in emacs.
1547 #EXFUN, 0, st_C_gnumacro
1548 #DEFVAR_, 0, st_C_gnumacro
1549 %]
1550 and replace lines between %< and %> with its output. */
1551 /*%<*/
1552 /* C code produced by gperf version 2.1 (K&R C version) */
1553 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
1554
1555
1556 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
1557
1558 #define MIN_WORD_LENGTH 3
1559 #define MAX_WORD_LENGTH 15
1560 #define MIN_HASH_VALUE 34
1561 #define MAX_HASH_VALUE 121
1562 /*
1563 34 keywords
1564 88 is the maximum key range
1565 */
1566
1567 static int
1568 hash (str, len)
1569 register char *str;
1570 register unsigned int len;
1571 {
1572 static unsigned char hash_table[] =
1573 {
1574 121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
1575 121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
1576 121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
1577 121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
1578 121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
1579 121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
1580 121, 121, 121, 121, 45, 121, 121, 121, 16, 19,
1581 61, 121, 121, 121, 121, 121, 121, 121, 121, 121,
1582 10, 121, 121, 20, 53, 121, 121, 121, 121, 121,
1583 121, 121, 121, 121, 121, 121, 121, 41, 45, 22,
1584 60, 47, 37, 28, 121, 55, 121, 121, 20, 14,
1585 29, 30, 5, 121, 50, 59, 30, 54, 6, 121,
1586 121, 121, 121, 121, 121, 121, 121, 121,
1587 };
1588 return len + hash_table[str[2]] + hash_table[str[0]];
1589 }
1590
1591 struct C_stab_entry *
1592 in_word_set (str, len)
1593 register char *str;
1594 register unsigned int len;
1595 {
1596
1597 static struct C_stab_entry wordlist[] =
1598 {
1599 {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
1600 {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
1601 {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
1602 {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
1603 {"volatile", 0, st_C_typespec},
1604 {"PSEUDO", 0, st_C_gnumacro},
1605 {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
1606 {"typedef", 0, st_C_typedef},
1607 {"typename", C_PLPL, st_C_typespec},
1608 {"",}, {"",}, {"",},
1609 {"SYSCALL", 0, st_C_gnumacro},
1610 {"",}, {"",}, {"",},
1611 {"mutable", C_PLPL, st_C_typespec},
1612 {"namespace", C_PLPL, st_C_struct},
1613 {"long", 0, st_C_typespec},
1614 {"",}, {"",},
1615 {"const", 0, st_C_typespec},
1616 {"",}, {"",}, {"",},
1617 {"explicit", C_PLPL, st_C_typespec},
1618 {"",}, {"",}, {"",}, {"",},
1619 {"void", 0, st_C_typespec},
1620 {"",},
1621 {"char", 0, st_C_typespec},
1622 {"class", C_PLPL, st_C_struct},
1623 {"",}, {"",}, {"",},
1624 {"float", 0, st_C_typespec},
1625 {"",},
1626 {"@implementation", 0, st_C_objimpl},
1627 {"auto", 0, st_C_typespec},
1628 {"",},
1629 {"ENTRY", 0, st_C_gnumacro},
1630 {"@end", 0, st_C_objend},
1631 {"bool", C_PLPL, st_C_typespec},
1632 {"domain", C_STAR, st_C_struct},
1633 {"",},
1634 {"DEFUN", 0, st_C_gnumacro},
1635 {"extern", 0, st_C_typespec},
1636 {"@interface", 0, st_C_objprot},
1637 {"",}, {"",}, {"",},
1638 {"int", 0, st_C_typespec},
1639 {"",}, {"",}, {"",}, {"",},
1640 {"signed", 0, st_C_typespec},
1641 {"short", 0, st_C_typespec},
1642 {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
1643 {"define", 0, st_C_define},
1644 {"@protocol", 0, st_C_objprot},
1645 {"enum", 0, st_C_enum},
1646 {"static", 0, st_C_typespec},
1647 {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
1648 {"union", 0, st_C_struct},
1649 {"struct", 0, st_C_struct},
1650 {"",}, {"",}, {"",}, {"",},
1651 {"double", 0, st_C_typespec},
1652 {"unsigned", 0, st_C_typespec},
1653 };
1654
1655 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
1656 {
1657 register int key = hash (str, len);
1658
1659 if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)
1660 {
1661 register char *s = wordlist[key].name;
1662
1663 if (*s == *str && !strncmp (str + 1, s + 1, len - 1))
1664 return &wordlist[key];
1665 }
1666 }
1667 return 0;
1668 }
1669 /*%>*/
1670
1671 enum sym_type
1672 C_symtype (str, len, c_ext)
1673 char *str;
1674 int len;
1675 int c_ext;
1676 {
1677 register struct C_stab_entry *se = in_word_set (str, len);
1678
1679 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
1680 return st_none;
1681 return se->type;
1682 }
1683 \f
1684 /*
1685 * C functions are recognized using a simple finite automaton.
1686 * funcdef is its state variable.
1687 */
1688 enum
1689 {
1690 fnone, /* nothing seen */
1691 ftagseen, /* function-like tag seen */
1692 fstartlist, /* just after open parenthesis */
1693 finlist, /* in parameter list */
1694 flistseen, /* after parameter list */
1695 fignore /* before open brace */
1696 } funcdef;
1697
1698
1699 /*
1700 * typedefs are recognized using a simple finite automaton.
1701 * typdef is its state variable.
1702 */
1703 enum
1704 {
1705 tnone, /* nothing seen */
1706 ttypedseen, /* typedef keyword seen */
1707 tinbody, /* inside typedef body */
1708 tend, /* just before typedef tag */
1709 tignore /* junk after typedef tag */
1710 } typdef;
1711
1712
1713 /*
1714 * struct-like structures (enum, struct and union) are recognized
1715 * using another simple finite automaton. `structdef' is its state
1716 * variable.
1717 */
1718 enum
1719 {
1720 snone, /* nothing seen yet */
1721 skeyseen, /* struct-like keyword seen */
1722 stagseen, /* struct-like tag seen */
1723 scolonseen, /* colon seen after struct-like tag */
1724 sinbody /* in struct body: recognize member func defs*/
1725 } structdef;
1726
1727 /*
1728 * When structdef is stagseen, scolonseen, or sinbody, structtag is the
1729 * struct tag, and structtype is the type of the preceding struct-like
1730 * keyword.
1731 */
1732 char *structtag = "<uninited>";
1733 enum sym_type structtype;
1734
1735 /*
1736 * When objdef is different from onone, objtag is the name of the class.
1737 */
1738 char *objtag = "<uninited>";
1739
1740 /*
1741 * Yet another little state machine to deal with preprocessor lines.
1742 */
1743 enum
1744 {
1745 dnone, /* nothing seen */
1746 dsharpseen, /* '#' seen as first char on line */
1747 ddefineseen, /* '#' and 'define' seen */
1748 dignorerest /* ignore rest of line */
1749 } definedef;
1750
1751 /*
1752 * State machine for Objective C protocols and implementations.
1753 */
1754 enum
1755 {
1756 onone, /* nothing seen */
1757 oprotocol, /* @interface or @protocol seen */
1758 oimplementation, /* @implementations seen */
1759 otagseen, /* class name seen */
1760 oparenseen, /* parenthesis before category seen */
1761 ocatseen, /* category name seen */
1762 oinbody, /* in @implementation body */
1763 omethodsign, /* in @implementation body, after +/- */
1764 omethodtag, /* after method name */
1765 omethodcolon, /* after method colon */
1766 omethodparm, /* after method parameter */
1767 oignore /* wait for @end */
1768 } objdef;
1769
1770 /*
1771 * Set this to TRUE, and the next token considered is called a function.
1772 * Used only for GNU emacs's function-defining macros.
1773 */
1774 logical next_token_is_func;
1775
1776 /*
1777 * TRUE in the rules part of a yacc file, FALSE outside (parse as C).
1778 */
1779 logical yacc_rules;
1780
1781 /*
1782 * methodlen is the length of the method name stored in token_name.
1783 */
1784 int methodlen;
1785
1786 /*
1787 * consider_token ()
1788 * checks to see if the current token is at the start of a
1789 * function, or corresponds to a typedef, or is a struct/union/enum
1790 * tag, or #define, or an enum constant.
1791 *
1792 * *IS_FUNC gets TRUE iff the token is a function or #define macro
1793 * with args. C_EXT is which language we are looking at.
1794 *
1795 * In the future we will need some way to adjust where the end of
1796 * the token is; for instance, implementing the C++ keyword
1797 * `operator' properly will adjust the end of the token to be after
1798 * whatever follows `operator'.
1799 *
1800 * Globals
1801 * funcdef IN OUT
1802 * structdef IN OUT
1803 * definedef IN OUT
1804 * typdef IN OUT
1805 * objdef IN OUT
1806 * next_token_is_func IN OUT
1807 */
1808
1809 logical
1810 consider_token (str, len, c, c_ext, cblev, parlev, is_func)
1811 register char *str; /* IN: token pointer */
1812 register int len; /* IN: token length */
1813 register char c; /* IN: first char after the token */
1814 int c_ext; /* IN: C extensions mask */
1815 int cblev; /* IN: curly brace level */
1816 int parlev; /* IN: parenthesis level */
1817 logical *is_func; /* OUT: function found */
1818 {
1819 enum sym_type toktype = C_symtype (str, len, c_ext);
1820
1821 /*
1822 * Advance the definedef state machine.
1823 */
1824 switch (definedef)
1825 {
1826 case dnone:
1827 /* We're not on a preprocessor line. */
1828 break;
1829 case dsharpseen:
1830 if (toktype == st_C_define)
1831 {
1832 definedef = ddefineseen;
1833 }
1834 else
1835 {
1836 definedef = dignorerest;
1837 }
1838 return FALSE;
1839 case ddefineseen:
1840 /*
1841 * Make a tag for any macro, unless it is a constant
1842 * and constantypedefs is FALSE.
1843 */
1844 definedef = dignorerest;
1845 *is_func = (c == '(');
1846 if (!*is_func && !constantypedefs)
1847 return FALSE;
1848 else
1849 return TRUE;
1850 case dignorerest:
1851 return FALSE;
1852 default:
1853 error ("internal error: definedef value.", NULL);
1854 }
1855
1856 /*
1857 * Now typedefs
1858 */
1859 switch (typdef)
1860 {
1861 case tnone:
1862 if (toktype == st_C_typedef)
1863 {
1864 if (typedefs)
1865 typdef = ttypedseen;
1866 funcdef = fnone;
1867 return FALSE;
1868 }
1869 break;
1870 case ttypedseen:
1871 switch (toktype)
1872 {
1873 case st_none:
1874 case st_C_typespec:
1875 typdef = tend;
1876 break;
1877 case st_C_struct:
1878 case st_C_enum:
1879 break;
1880 }
1881 /* Do not return here, so the structdef stuff has a chance. */
1882 break;
1883 case tend:
1884 switch (toktype)
1885 {
1886 case st_C_typespec:
1887 case st_C_struct:
1888 case st_C_enum:
1889 return FALSE;
1890 }
1891 return TRUE;
1892 }
1893
1894 /*
1895 * This structdef business is currently only invoked when cblev==0.
1896 * It should be recursively invoked whatever the curly brace level,
1897 * and a stack of states kept, to allow for definitions of structs
1898 * within structs.
1899 *
1900 * This structdef business is NOT invoked when we are ctags and the
1901 * file is plain C. This is because a struct tag may have the same
1902 * name as another tag, and this loses with ctags.
1903 */
1904 switch (toktype)
1905 {
1906 case st_C_struct:
1907 case st_C_enum:
1908 if (typdef == ttypedseen
1909 || (typedefs_and_cplusplus && cblev == 0 && structdef == snone))
1910 {
1911 structdef = skeyseen;
1912 structtype = toktype;
1913 }
1914 return FALSE;
1915 }
1916
1917 if (structdef == skeyseen)
1918 {
1919 /* Save the tag for struct/union/class, for functions that may be
1920 defined inside. */
1921 if (structtype == st_C_struct)
1922 structtag = savenstr (str, len);
1923 else
1924 structtag = "<enum>";
1925 structdef = stagseen;
1926 return TRUE;
1927 }
1928
1929 /* Avoid entering funcdef stuff if typdef is going on. */
1930 if (typdef != tnone)
1931 {
1932 definedef = dnone;
1933 return FALSE;
1934 }
1935
1936 /* Detect GNU macros.
1937
1938 DEFUN note for writers of emacs C code:
1939 The DEFUN macro, used in emacs C source code, has a first arg
1940 that is a string (the lisp function name), and a second arg that
1941 is a C function name. Since etags skips strings, the second arg
1942 is tagged. This is unfortunate, as it would be better to tag the
1943 first arg. The simplest way to deal with this problem would be
1944 to name the tag with a name built from the function name, by
1945 removing the initial 'F' character and substituting '-' for '_'.
1946 Anyway, this assumes that the conventions of naming lisp
1947 functions will never change. Currently, this method is not
1948 implemented, so writers of emacs code are recommended to put the
1949 first two args of a DEFUN on the same line. */
1950 if (definedef == dnone && toktype == st_C_gnumacro)
1951 {
1952 next_token_is_func = TRUE;
1953 return FALSE;
1954 }
1955 if (next_token_is_func)
1956 {
1957 next_token_is_func = FALSE;
1958 funcdef = fignore;
1959 *is_func = TRUE;
1960 return TRUE;
1961 }
1962
1963 /* Detect Objective C constructs. */
1964 switch (objdef)
1965 {
1966 case onone:
1967 switch (toktype)
1968 {
1969 case st_C_objprot:
1970 objdef = oprotocol;
1971 return FALSE;
1972 case st_C_objimpl:
1973 objdef = oimplementation;
1974 return FALSE;
1975 }
1976 break;
1977 case oimplementation:
1978 /* Save the class tag for functions that may be defined inside. */
1979 objtag = savenstr (str, len);
1980 objdef = oinbody;
1981 return FALSE;
1982 case oprotocol:
1983 /* Save the class tag for categories. */
1984 objtag = savenstr (str, len);
1985 objdef = otagseen;
1986 *is_func = TRUE;
1987 return TRUE;
1988 case oparenseen:
1989 objdef = ocatseen;
1990 *is_func = TRUE;
1991 return TRUE;
1992 case oinbody:
1993 break;
1994 case omethodsign:
1995 if (parlev == 0)
1996 {
1997 objdef = omethodtag;
1998 methodlen = len;
1999 grow_linebuffer (&token_name, methodlen+1);
2000 strncpy (token_name.buffer, str, len);
2001 token_name.buffer[methodlen] = '\0';
2002 return TRUE;
2003 }
2004 return FALSE;
2005 case omethodcolon:
2006 if (parlev == 0)
2007 objdef = omethodparm;
2008 return FALSE;
2009 case omethodparm:
2010 if (parlev == 0)
2011 {
2012 objdef = omethodtag;
2013 methodlen += len;
2014 grow_linebuffer (&token_name, methodlen+1);
2015 strncat (token_name.buffer, str, len);
2016 return TRUE;
2017 }
2018 return FALSE;
2019 case oignore:
2020 if (toktype == st_C_objend)
2021 {
2022 /* Memory leakage here: the string pointed by objtag is
2023 never released, because many tests would be needed to
2024 avoid breaking on incorrect input code. The amount of
2025 memory leaked here is the sum of the lengths of the
2026 class tags.
2027 free (objtag); */
2028 objdef = onone;
2029 }
2030 return FALSE;
2031 }
2032
2033 /* A function or enum constant? */
2034 switch (toktype)
2035 {
2036 case st_C_typespec:
2037 if (funcdef != finlist && funcdef != fignore)
2038 funcdef = fnone; /* should be useless */
2039 return FALSE;
2040 case st_none:
2041 if (constantypedefs && structdef == sinbody && structtype == st_C_enum)
2042 return TRUE;
2043 if (funcdef == fnone)
2044 {
2045 funcdef = ftagseen;
2046 *is_func = TRUE;
2047 return TRUE;
2048 }
2049 }
2050
2051 return FALSE;
2052 }
2053
2054 /*
2055 * C_entries ()
2056 * This routine finds functions, typedefs, #define's, enum
2057 * constants and struct/union/enum definitions in C syntax
2058 * and adds them to the list.
2059 */
2060 typedef struct
2061 {
2062 logical valid;
2063 char *str;
2064 logical named;
2065 int linelen;
2066 int lineno;
2067 long linepos;
2068 char *buffer;
2069 } TOKEN;
2070
2071 #define current_lb_is_new (newndx == curndx)
2072 #define switch_line_buffers() (curndx = 1 - curndx)
2073
2074 #define curlb (lbs[curndx].lb)
2075 #define othlb (lbs[1-curndx].lb)
2076 #define newlb (lbs[newndx].lb)
2077 #define curlinepos (lbs[curndx].linepos)
2078 #define othlinepos (lbs[1-curndx].linepos)
2079 #define newlinepos (lbs[newndx].linepos)
2080
2081 #define CNL_SAVE_DEFINEDEF \
2082 do { \
2083 curlinepos = charno; \
2084 lineno++; \
2085 linecharno = charno; \
2086 charno += readline (&curlb, inf); \
2087 lp = curlb.buffer; \
2088 quotednl = FALSE; \
2089 newndx = curndx; \
2090 } while (0)
2091
2092 #define CNL \
2093 do { \
2094 CNL_SAVE_DEFINEDEF; \
2095 if (savetok.valid) \
2096 { \
2097 tok = savetok; \
2098 savetok.valid = FALSE; \
2099 } \
2100 definedef = dnone; \
2101 } while (0)
2102
2103 /* This macro should never be called when tok.valid is FALSE, but
2104 we must protect about both invalid input and internal errors. */
2105 #define make_C_tag(isfun) do \
2106 if (tok.valid) { \
2107 char *name = NULL; \
2108 if (CTAGS || tok.named) \
2109 name = savestr (token_name.buffer); \
2110 pfnote (name, isfun, tok.buffer, tok.linelen, tok.lineno, tok.linepos); \
2111 tok.valid = FALSE; \
2112 } /* else if (DEBUG) abort (); */ while (0)
2113
2114 void
2115 C_entries (c_ext, inf)
2116 int c_ext; /* extension of C */
2117 FILE *inf; /* input file */
2118 {
2119 register char c; /* latest char read; '\0' for end of line */
2120 register char *lp; /* pointer one beyond the character `c' */
2121 int curndx, newndx; /* indices for current and new lb */
2122 TOKEN tok; /* latest token read */
2123 register int tokoff; /* offset in line of start of current token */
2124 register int toklen; /* length of current token */
2125 int cblev; /* current curly brace level */
2126 int parlev; /* current parenthesis level */
2127 logical incomm, inquote, inchar, quotednl, midtoken;
2128 logical cplpl;
2129 TOKEN savetok; /* token saved during preprocessor handling */
2130
2131
2132 curndx = newndx = 0;
2133 lineno = 0;
2134 charno = 0;
2135 lp = curlb.buffer;
2136 *lp = 0;
2137
2138 funcdef = fnone; typdef = tnone; structdef = snone;
2139 definedef = dnone; objdef = onone;
2140 next_token_is_func = yacc_rules = FALSE;
2141 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2142 tok.valid = savetok.valid = FALSE;
2143 cblev = 0;
2144 parlev = 0;
2145 cplpl = c_ext & C_PLPL;
2146
2147 while (!feof (inf))
2148 {
2149 c = *lp++;
2150 if (c == '\\')
2151 {
2152 /* If we're at the end of the line, the next character is a
2153 '\0'; don't skip it, because it's the thing that tells us
2154 to read the next line. */
2155 if (*lp == '\0')
2156 {
2157 quotednl = TRUE;
2158 continue;
2159 }
2160 lp++;
2161 c = ' ';
2162 }
2163 else if (incomm)
2164 {
2165 switch (c)
2166 {
2167 case '*':
2168 if (*lp == '/')
2169 {
2170 c = *lp++;
2171 incomm = FALSE;
2172 }
2173 break;
2174 case '\0':
2175 /* Newlines inside comments do not end macro definitions in
2176 traditional cpp. */
2177 CNL_SAVE_DEFINEDEF;
2178 break;
2179 }
2180 continue;
2181 }
2182 else if (inquote)
2183 {
2184 switch (c)
2185 {
2186 case '"':
2187 inquote = FALSE;
2188 break;
2189 case '\0':
2190 /* Newlines inside strings do not end macro definitions
2191 in traditional cpp, even though compilers don't
2192 usually accept them. */
2193 CNL_SAVE_DEFINEDEF;
2194 break;
2195 }
2196 continue;
2197 }
2198 else if (inchar)
2199 {
2200 switch (c)
2201 {
2202 case '\0':
2203 /* Hmmm, something went wrong. */
2204 CNL;
2205 /* FALLTHRU */
2206 case '\'':
2207 inchar = FALSE;
2208 break;
2209 }
2210 continue;
2211 }
2212 else
2213 switch (c)
2214 {
2215 case '"':
2216 inquote = TRUE;
2217 if (funcdef != finlist && funcdef != fignore)
2218 funcdef = fnone;
2219 continue;
2220 case '\'':
2221 inchar = TRUE;
2222 if (funcdef != finlist && funcdef != fignore)
2223 funcdef = fnone;
2224 continue;
2225 case '/':
2226 if (*lp == '*')
2227 {
2228 lp++;
2229 incomm = TRUE;
2230 continue;
2231 }
2232 else if (/* cplpl && */ *lp == '/')
2233 {
2234 c = '\0';
2235 break;
2236 }
2237 else
2238 break;
2239 case '%':
2240 if ((c_ext & YACC) && *lp == '%')
2241 {
2242 /* entering or exiting rules section in yacc file */
2243 lp++;
2244 definedef = dnone; funcdef = fnone;
2245 typdef = tnone; structdef = snone;
2246 next_token_is_func = FALSE;
2247 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2248 cblev = 0;
2249 yacc_rules = !yacc_rules;
2250 continue;
2251 }
2252 else
2253 break;
2254 case '#':
2255 if (definedef == dnone)
2256 {
2257 char *cp;
2258 logical cpptoken = TRUE;
2259
2260 /* Look back on this line. If all blanks, or nonblanks
2261 followed by an end of comment, this is a preprocessor
2262 token. */
2263 for (cp = newlb.buffer; cp < lp-1; cp++)
2264 if (!iswhite (*cp))
2265 {
2266 if (*cp == '*' && *(cp+1) == '/')
2267 {
2268 cp++;
2269 cpptoken = TRUE;
2270 }
2271 else
2272 cpptoken = FALSE;
2273 }
2274 if (cpptoken)
2275 definedef = dsharpseen;
2276 } /* if (definedef == dnone) */
2277
2278 continue;
2279 } /* switch (c) */
2280
2281
2282 /* Consider token only if some complicated conditions are satisfied. */
2283 if ((definedef != dnone
2284 || (cblev == 0 && structdef != scolonseen)
2285 || (cblev == 1 && cplpl && structdef == sinbody)
2286 || (structdef == sinbody && structtype == st_C_enum))
2287 && typdef != tignore
2288 && definedef != dignorerest
2289 && funcdef != finlist)
2290 {
2291 if (midtoken)
2292 {
2293 if (endtoken (c))
2294 {
2295 if (c == ':' && cplpl && *lp == ':' && begtoken(*(lp + 1)))
2296 {
2297 /*
2298 * This handles :: in the middle, but not at the
2299 * beginning of an identifier.
2300 */
2301 lp += 2;
2302 toklen += 3;
2303 }
2304 else
2305 {
2306 logical is_func = FALSE;
2307
2308 if (yacc_rules
2309 || consider_token (newlb.buffer + tokoff, toklen, c,
2310 c_ext, cblev, parlev, &is_func))
2311 {
2312 if (structdef == sinbody
2313 && definedef == dnone
2314 && is_func)
2315 /* function defined in C++ class body */
2316 {
2317 grow_linebuffer (&token_name,
2318 strlen(structtag)+2+toklen+1);
2319 strcpy (token_name.buffer, structtag);
2320 strcat (token_name.buffer, "::");
2321 strncat (token_name.buffer,
2322 newlb.buffer+tokoff, toklen);
2323 tok.named = TRUE;
2324 }
2325 else if (objdef == ocatseen)
2326 /* Objective C category */
2327 {
2328 grow_linebuffer (&token_name,
2329 strlen(objtag)+2+toklen+1);
2330 strcpy (token_name.buffer, objtag);
2331 strcat (token_name.buffer, "(");
2332 strncat (token_name.buffer,
2333 newlb.buffer+tokoff, toklen);
2334 strcat (token_name.buffer, ")");
2335 tok.named = TRUE;
2336 }
2337 else if (objdef == omethodtag
2338 || objdef == omethodparm)
2339 /* Objective C method */
2340 {
2341 tok.named = TRUE;
2342 }
2343 else
2344 {
2345 grow_linebuffer (&token_name, toklen+1);
2346 strncpy (token_name.buffer,
2347 newlb.buffer+tokoff, toklen);
2348 token_name.buffer[toklen] = '\0';
2349 if (structdef == stagseen
2350 || typdef == tend
2351 || (is_func
2352 && definedef == dignorerest)) /* macro */
2353 tok.named = TRUE;
2354 else
2355 tok.named = FALSE;
2356 }
2357 tok.lineno = lineno;
2358 tok.linelen = tokoff + toklen + 1;
2359 tok.buffer = newlb.buffer;
2360 tok.linepos = newlinepos;
2361 tok.valid = TRUE;
2362
2363 if (definedef == dnone
2364 && (funcdef == ftagseen
2365 || structdef == stagseen
2366 || typdef == tend
2367 || objdef != onone))
2368 {
2369 if (current_lb_is_new)
2370 switch_line_buffers ();
2371 }
2372 else
2373 make_C_tag (is_func);
2374 }
2375 midtoken = FALSE;
2376 }
2377 } /* if (endtoken (c)) */
2378 else if (intoken (c))
2379 {
2380 toklen++;
2381 continue;
2382 }
2383 } /* if (midtoken) */
2384 else if (begtoken (c))
2385 {
2386 switch (definedef)
2387 {
2388 case dnone:
2389 switch (funcdef)
2390 {
2391 case fstartlist:
2392 funcdef = finlist;
2393 continue;
2394 case flistseen:
2395 make_C_tag (TRUE);
2396 funcdef = fignore;
2397 break;
2398 case ftagseen:
2399 funcdef = fnone;
2400 break;
2401 }
2402 if (structdef == stagseen)
2403 structdef = snone;
2404 break;
2405 case dsharpseen:
2406 savetok = tok;
2407 }
2408 if (!yacc_rules || lp == newlb.buffer + 1)
2409 {
2410 tokoff = lp - 1 - newlb.buffer;
2411 toklen = 1;
2412 midtoken = TRUE;
2413 }
2414 continue;
2415 } /* if (begtoken) */
2416 } /* if must look at token */
2417
2418
2419 /* Detect end of line, colon, comma, semicolon and various braces
2420 after having handled a token.*/
2421 switch (c)
2422 {
2423 case ':':
2424 if (definedef != dnone)
2425 break;
2426 switch (objdef)
2427 {
2428 case otagseen:
2429 objdef = oignore;
2430 make_C_tag (TRUE);
2431 break;
2432 case omethodtag:
2433 case omethodparm:
2434 objdef = omethodcolon;
2435 methodlen += 1;
2436 grow_linebuffer (&token_name, methodlen+1);
2437 strcat (token_name.buffer, ":");
2438 break;
2439 }
2440 if (structdef == stagseen)
2441 structdef = scolonseen;
2442 else
2443 switch (funcdef)
2444 {
2445 case ftagseen:
2446 if (yacc_rules)
2447 {
2448 make_C_tag (FALSE);
2449 funcdef = fignore;
2450 }
2451 break;
2452 case fstartlist:
2453 funcdef = fnone;
2454 break;
2455 }
2456 break;
2457 case ';':
2458 if (definedef != dnone)
2459 break;
2460 if (cblev == 0)
2461 switch (typdef)
2462 {
2463 case tend:
2464 make_C_tag (FALSE);
2465 /* FALLTHRU */
2466 default:
2467 typdef = tnone;
2468 }
2469 if (funcdef != fignore)
2470 {
2471 funcdef = fnone;
2472 /* The following instruction invalidates the token.
2473 Probably the token should be invalidated in all
2474 other cases where some state machine is reset. */
2475 tok.valid = FALSE;
2476 }
2477 if (structdef == stagseen)
2478 structdef = snone;
2479 break;
2480 case ',':
2481 if (definedef != dnone)
2482 break;
2483 switch (objdef)
2484 {
2485 case omethodtag:
2486 case omethodparm:
2487 make_C_tag (TRUE);
2488 objdef = oinbody;
2489 break;
2490 }
2491 if (funcdef != finlist && funcdef != fignore)
2492 funcdef = fnone;
2493 if (structdef == stagseen)
2494 structdef = snone;
2495 break;
2496 case '[':
2497 if (definedef != dnone)
2498 break;
2499 if (cblev == 0 && typdef == tend)
2500 {
2501 typdef = tignore;
2502 make_C_tag (FALSE);
2503 break;
2504 }
2505 if (funcdef != finlist && funcdef != fignore)
2506 funcdef = fnone;
2507 if (structdef == stagseen)
2508 structdef = snone;
2509 break;
2510 case '(':
2511 if (definedef != dnone)
2512 break;
2513 if (objdef == otagseen && parlev == 0)
2514 objdef = oparenseen;
2515 switch (funcdef)
2516 {
2517 case fnone:
2518 switch (typdef)
2519 {
2520 case ttypedseen:
2521 case tend:
2522 /* Make sure that the next char is not a '*'.
2523 This handles constructs like:
2524 typedef void OperatorFun (int fun); */
2525 if (*lp != '*')
2526 {
2527 typdef = tignore;
2528 make_C_tag (FALSE);
2529 }
2530 break;
2531 } /* switch (typdef) */
2532 break;
2533 case ftagseen:
2534 funcdef = fstartlist;
2535 break;
2536 case flistseen:
2537 funcdef = finlist;
2538 break;
2539 }
2540 parlev++;
2541 break;
2542 case ')':
2543 if (definedef != dnone)
2544 break;
2545 if (objdef == ocatseen && parlev == 1)
2546 {
2547 make_C_tag (TRUE);
2548 objdef = oignore;
2549 }
2550 if (--parlev == 0)
2551 {
2552 switch (funcdef)
2553 {
2554 case fstartlist:
2555 case finlist:
2556 funcdef = flistseen;
2557 break;
2558 }
2559 if (cblev == 0 && typdef == tend)
2560 {
2561 typdef = tignore;
2562 make_C_tag (FALSE);
2563 }
2564 }
2565 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
2566 parlev = 0;
2567 break;
2568 case '{':
2569 if (definedef != dnone)
2570 break;
2571 if (typdef == ttypedseen)
2572 typdef = tinbody;
2573 switch (structdef)
2574 {
2575 case skeyseen: /* unnamed struct */
2576 structdef = sinbody;
2577 structtag = "_anonymous_";
2578 break;
2579 case stagseen:
2580 case scolonseen: /* named struct */
2581 structdef = sinbody;
2582 make_C_tag (FALSE);
2583 break;
2584 }
2585 switch (funcdef)
2586 {
2587 case flistseen:
2588 make_C_tag (TRUE);
2589 /* FALLTHRU */
2590 case fignore:
2591 funcdef = fnone;
2592 break;
2593 case fnone:
2594 switch (objdef)
2595 {
2596 case otagseen:
2597 make_C_tag (TRUE);
2598 objdef = oignore;
2599 break;
2600 case omethodtag:
2601 case omethodparm:
2602 make_C_tag (TRUE);
2603 objdef = oinbody;
2604 break;
2605 default:
2606 /* Neutralize `extern "C" {' grot. */
2607 if (cblev == 0 && structdef == snone && typdef == tnone)
2608 cblev = -1;
2609 }
2610 }
2611 cblev++;
2612 break;
2613 case '*':
2614 if (definedef != dnone)
2615 break;
2616 if (funcdef == fstartlist)
2617 funcdef = fnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
2618 break;
2619 case '}':
2620 if (definedef != dnone)
2621 break;
2622 if (!noindentypedefs && lp == newlb.buffer + 1)
2623 {
2624 cblev = 0; /* reset curly brace level if first column */
2625 parlev = 0; /* also reset paren level, just in case... */
2626 }
2627 else if (cblev > 0)
2628 cblev--;
2629 if (cblev == 0)
2630 {
2631 if (typdef == tinbody)
2632 typdef = tend;
2633 /* Memory leakage here: the string pointed by structtag is
2634 never released, because I fear to miss something and
2635 break things while freeing the area. The amount of
2636 memory leaked here is the sum of the lengths of the
2637 struct tags.
2638 if (structdef == sinbody)
2639 free (structtag); */
2640
2641 structdef = snone;
2642 structtag = "<error>";
2643 }
2644 break;
2645 case '+':
2646 case '-':
2647 if (objdef == oinbody && cblev == 0)
2648 {
2649 objdef = omethodsign;
2650 break;
2651 }
2652 /* FALLTHRU */
2653 case '=': case '#': case '~': case '&': case '%': case '/':
2654 case '|': case '^': case '!': case '<': case '>': case '.': case '?':
2655 if (definedef != dnone)
2656 break;
2657 /* These surely cannot follow a function tag. */
2658 if (funcdef != finlist && funcdef != fignore)
2659 funcdef = fnone;
2660 break;
2661 case '\0':
2662 if (objdef == otagseen)
2663 {
2664 make_C_tag (TRUE);
2665 objdef = oignore;
2666 }
2667 /* If a macro spans multiple lines don't reset its state. */
2668 if (quotednl)
2669 CNL_SAVE_DEFINEDEF;
2670 else
2671 CNL;
2672 break;
2673 } /* switch (c) */
2674
2675 } /* while not eof */
2676 }
2677
2678 /*
2679 * Process either a C++ file or a C file depending on the setting
2680 * of a global flag.
2681 */
2682 void
2683 default_C_entries (inf)
2684 FILE *inf;
2685 {
2686 C_entries (cplusplus ? C_PLPL : 0, inf);
2687 }
2688
2689 /* Always do plain ANSI C. */
2690 void
2691 plain_C_entries (inf)
2692 FILE *inf;
2693 {
2694 C_entries (0, inf);
2695 }
2696
2697 /* Always do C++. */
2698 void
2699 Cplusplus_entries (inf)
2700 FILE *inf;
2701 {
2702 C_entries (C_PLPL, inf);
2703 }
2704
2705 /* Always do C*. */
2706 void
2707 Cstar_entries (inf)
2708 FILE *inf;
2709 {
2710 C_entries (C_STAR, inf);
2711 }
2712
2713 /* Always do Yacc. */
2714 void
2715 Yacc_entries (inf)
2716 FILE *inf;
2717 {
2718 C_entries (YACC, inf);
2719 }
2720 \f
2721 /* Fortran parsing */
2722
2723 char *dbp;
2724
2725 logical
2726 tail (cp)
2727 char *cp;
2728 {
2729 register int len = 0;
2730
2731 while (*cp && lowcase(*cp) == lowcase(dbp[len]))
2732 cp++, len++;
2733 if (*cp == '\0' && !intoken(dbp[len]))
2734 {
2735 dbp += len;
2736 return TRUE;
2737 }
2738 return FALSE;
2739 }
2740
2741 void
2742 takeprec ()
2743 {
2744 while (isspace (*dbp))
2745 dbp++;
2746 if (*dbp != '*')
2747 return;
2748 dbp++;
2749 while (isspace (*dbp))
2750 dbp++;
2751 if (strneq (dbp, "(*)", 3))
2752 {
2753 dbp += 3;
2754 return;
2755 }
2756 if (!isdigit (*dbp))
2757 {
2758 --dbp; /* force failure */
2759 return;
2760 }
2761 do
2762 dbp++;
2763 while (isdigit (*dbp));
2764 }
2765
2766 void
2767 getit (inf)
2768 FILE *inf;
2769 {
2770 register char *cp;
2771
2772 while (isspace (*dbp))
2773 dbp++;
2774 if (*dbp == '\0')
2775 {
2776 lineno++;
2777 linecharno = charno;
2778 charno += readline (&lb, inf);
2779 dbp = lb.buffer;
2780 if (dbp[5] != '&')
2781 return;
2782 dbp += 6;
2783 while (isspace (*dbp))
2784 dbp++;
2785 }
2786 if (!isalpha (*dbp)
2787 && *dbp != '_'
2788 && *dbp != '$')
2789 return;
2790 for (cp = dbp + 1;
2791 (*cp
2792 && (isalpha (*cp) || isdigit (*cp) || (*cp == '_') || (*cp == '$')));
2793 cp++)
2794 continue;
2795 pfnote ((CTAGS) ? savenstr (dbp, cp-dbp) : NULL, TRUE,
2796 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
2797 }
2798
2799 void
2800 Fortran_functions (inf)
2801 FILE *inf;
2802 {
2803 lineno = 0;
2804 charno = 0;
2805
2806 while (!feof (inf))
2807 {
2808 lineno++;
2809 linecharno = charno;
2810 charno += readline (&lb, inf);
2811 dbp = lb.buffer;
2812 if (*dbp == '%')
2813 dbp++; /* Ratfor escape to fortran */
2814 while (isspace (*dbp))
2815 dbp++;
2816 if (*dbp == '\0')
2817 continue;
2818 switch (lowcase (*dbp))
2819 {
2820 case 'i':
2821 if (tail ("integer"))
2822 takeprec ();
2823 break;
2824 case 'r':
2825 if (tail ("real"))
2826 takeprec ();
2827 break;
2828 case 'l':
2829 if (tail ("logical"))
2830 takeprec ();
2831 break;
2832 case 'c':
2833 if (tail ("complex") || tail ("character"))
2834 takeprec ();
2835 break;
2836 case 'd':
2837 if (tail ("double"))
2838 {
2839 while (isspace (*dbp))
2840 dbp++;
2841 if (*dbp == '\0')
2842 continue;
2843 if (tail ("precision"))
2844 break;
2845 continue;
2846 }
2847 break;
2848 }
2849 while (isspace (*dbp))
2850 dbp++;
2851 if (*dbp == '\0')
2852 continue;
2853 switch (lowcase (*dbp))
2854 {
2855 case 'f':
2856 if (tail ("function"))
2857 getit (inf);
2858 continue;
2859 case 's':
2860 if (tail ("subroutine"))
2861 getit (inf);
2862 continue;
2863 case 'e':
2864 if (tail ("entry"))
2865 getit (inf);
2866 continue;
2867 case 'p':
2868 if (tail ("program"))
2869 {
2870 getit (inf);
2871 continue;
2872 }
2873 if (tail ("procedure"))
2874 getit (inf);
2875 continue;
2876 }
2877 }
2878 }
2879 \f
2880 /*
2881 * Bob Weiner, Motorola Inc., 4/3/94
2882 * Unix and microcontroller assembly tag handling
2883 * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
2884 */
2885 void
2886 Asm_labels (inf)
2887 FILE *inf;
2888 {
2889 register char *cp;
2890
2891 lineno = 0;
2892 charno = 0;
2893
2894 while (!feof (inf))
2895 {
2896 lineno++;
2897 linecharno = charno;
2898 charno += readline (&lb, inf);
2899 cp = lb.buffer;
2900
2901 /* If first char is alphabetic or one of [_.$], test for colon
2902 following identifier. */
2903 if (isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
2904 {
2905 /* Read past label. */
2906 cp++;
2907 while (isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
2908 cp++;
2909 if (*cp == ':' || isspace (*cp))
2910 {
2911 /* Found end of label, so copy it and add it to the table. */
2912 pfnote ((CTAGS) ? savenstr(lb.buffer, cp-lb.buffer) : NULL, TRUE,
2913 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
2914 }
2915 }
2916 }
2917 }
2918 \f
2919 /*
2920 * Perl support by Bart Robinson <lomew@cs.utah.edu>
2921 * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
2922 */
2923 void
2924 Perl_functions (inf)
2925 FILE *inf;
2926 {
2927 register char *cp;
2928
2929 lineno = 0;
2930 charno = 0;
2931
2932 while (!feof (inf))
2933 {
2934 lineno++;
2935 linecharno = charno;
2936 charno += readline (&lb, inf);
2937 cp = lb.buffer;
2938
2939 if (*cp++ == 's' && *cp++ == 'u' && *cp++ == 'b' && isspace(*cp++))
2940 {
2941 while (*cp && isspace(*cp))
2942 cp++;
2943 while (*cp && ! isspace(*cp) && *cp != '{')
2944 cp++;
2945 pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : NULL, TRUE,
2946 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
2947 }
2948 }
2949 }
2950 \f
2951 /* Added by Mosur Mohan, 4/22/88 */
2952 /* Pascal parsing */
2953
2954 /*
2955 * Locates tags for procedures & functions. Doesn't do any type- or
2956 * var-definitions. It does look for the keyword "extern" or
2957 * "forward" immediately following the procedure statement; if found,
2958 * the tag is skipped.
2959 */
2960 void
2961 Pascal_functions (inf)
2962 FILE *inf;
2963 {
2964 struct linebuffer tline; /* mostly copied from C_entries */
2965 long save_lcno;
2966 int save_lineno, save_len;
2967 char c, *cp, *namebuf;
2968
2969 logical /* each of these flags is TRUE iff: */
2970 incomment, /* point is inside a comment */
2971 inquote, /* point is inside '..' string */
2972 get_tagname, /* point is after PROCEDURE/FUNCTION
2973 keyword, so next item = potential tag */
2974 found_tag, /* point is after a potential tag */
2975 inparms, /* point is within parameter-list */
2976 verify_tag; /* point has passed the parm-list, so the
2977 next token will determine whether this
2978 is a FORWARD/EXTERN to be ignored, or
2979 whether it is a real tag */
2980
2981 lineno = 0;
2982 charno = 0;
2983 dbp = lb.buffer;
2984 *dbp = '\0';
2985 save_len = 0;
2986 initbuffer (&tline);
2987
2988 incomment = inquote = FALSE;
2989 found_tag = FALSE; /* have a proc name; check if extern */
2990 get_tagname = FALSE; /* have found "procedure" keyword */
2991 inparms = FALSE; /* found '(' after "proc" */
2992 verify_tag = FALSE; /* check if "extern" is ahead */
2993
2994 /* long main loop to get next char */
2995 while (!feof (inf))
2996 {
2997 c = *dbp++;
2998 if (c == '\0') /* if end of line */
2999 {
3000 lineno++;
3001 linecharno = charno;
3002 charno += readline (&lb, inf);
3003 dbp = lb.buffer;
3004 if (*dbp == '\0')
3005 continue;
3006 if (!((found_tag && verify_tag) ||
3007 get_tagname))
3008 c = *dbp++; /* only if don't need *dbp pointing
3009 to the beginning of the name of
3010 the procedure or function */
3011 }
3012 if (incomment)
3013 {
3014 if (c == '}') /* within { } comments */
3015 incomment = FALSE;
3016 else if (c == '*' && *dbp == ')') /* within (* *) comments */
3017 {
3018 dbp++;
3019 incomment = FALSE;
3020 }
3021 continue;
3022 }
3023 else if (inquote)
3024 {
3025 if (c == '\'')
3026 inquote = FALSE;
3027 continue;
3028 }
3029 else
3030 switch (c)
3031 {
3032 case '\'':
3033 inquote = TRUE; /* found first quote */
3034 continue;
3035 case '{': /* found open { comment */
3036 incomment = TRUE;
3037 continue;
3038 case '(':
3039 if (*dbp == '*') /* found open (* comment */
3040 {
3041 incomment = TRUE;
3042 dbp++;
3043 }
3044 else if (found_tag) /* found '(' after tag, i.e., parm-list */
3045 inparms = TRUE;
3046 continue;
3047 case ')': /* end of parms list */
3048 if (inparms)
3049 inparms = FALSE;
3050 continue;
3051 case ';':
3052 if (found_tag && !inparms) /* end of proc or fn stmt */
3053 {
3054 verify_tag = TRUE;
3055 break;
3056 }
3057 continue;
3058 }
3059 if (found_tag && verify_tag && (*dbp != ' '))
3060 {
3061 /* check if this is an "extern" declaration */
3062 if (*dbp == '\0')
3063 continue;
3064 if (lowcase (*dbp == 'e'))
3065 {
3066 if (tail ("extern")) /* superfluous, really! */
3067 {
3068 found_tag = FALSE;
3069 verify_tag = FALSE;
3070 }
3071 }
3072 else if (lowcase (*dbp) == 'f')
3073 {
3074 if (tail ("forward")) /* check for forward reference */
3075 {
3076 found_tag = FALSE;
3077 verify_tag = FALSE;
3078 }
3079 }
3080 if (found_tag && verify_tag) /* not external proc, so make tag */
3081 {
3082 found_tag = FALSE;
3083 verify_tag = FALSE;
3084 pfnote (namebuf, TRUE,
3085 tline.buffer, save_len, save_lineno, save_lcno);
3086 continue;
3087 }
3088 }
3089 if (get_tagname) /* grab name of proc or fn */
3090 {
3091 if (*dbp == '\0')
3092 continue;
3093
3094 /* save all values for later tagging */
3095 grow_linebuffer (&tline, strlen (lb.buffer) + 1);
3096 strcpy (tline.buffer, lb.buffer);
3097 save_lineno = lineno;
3098 save_lcno = linecharno;
3099
3100 /* grab block name */
3101 for (cp = dbp + 1; *cp && (!endtoken (*cp)); cp++)
3102 continue;
3103 namebuf = (CTAGS) ? savenstr (dbp, cp-dbp) : NULL;
3104 dbp = cp; /* set dbp to e-o-token */
3105 save_len = dbp - lb.buffer + 1;
3106 get_tagname = FALSE;
3107 found_tag = TRUE;
3108 continue;
3109
3110 /* and proceed to check for "extern" */
3111 }
3112 else if (!incomment && !inquote && !found_tag)
3113 {
3114 /* check for proc/fn keywords */
3115 switch (lowcase (c))
3116 {
3117 case 'p':
3118 if (tail ("rocedure")) /* c = 'p', dbp has advanced */
3119 get_tagname = TRUE;
3120 continue;
3121 case 'f':
3122 if (tail ("unction"))
3123 get_tagname = TRUE;
3124 continue;
3125 }
3126 }
3127 } /* while not eof */
3128
3129 free (tline.buffer);
3130 }
3131 \f
3132 /*
3133 * lisp tag functions
3134 * look for (def or (DEF, quote or QUOTE
3135 */
3136 int
3137 L_isdef (strp)
3138 register char *strp;
3139 {
3140 return ((strp[1] == 'd' || strp[1] == 'D')
3141 && (strp[2] == 'e' || strp[2] == 'E')
3142 && (strp[3] == 'f' || strp[3] == 'F'));
3143 }
3144
3145 int
3146 L_isquote (strp)
3147 register char *strp;
3148 {
3149 return ((*(++strp) == 'q' || *strp == 'Q')
3150 && (*(++strp) == 'u' || *strp == 'U')
3151 && (*(++strp) == 'o' || *strp == 'O')
3152 && (*(++strp) == 't' || *strp == 'T')
3153 && (*(++strp) == 'e' || *strp == 'E')
3154 && isspace(*(++strp)));
3155 }
3156
3157 void
3158 L_getit ()
3159 {
3160 register char *cp;
3161
3162 if (*dbp == '\'') /* Skip prefix quote */
3163 dbp++;
3164 else if (*dbp == '(' && L_isquote (dbp)) /* Skip "(quote " */
3165 {
3166 dbp += 7;
3167 while (isspace(*dbp))
3168 dbp++;
3169 }
3170 for (cp = dbp /*+1*/;
3171 *cp && *cp != '(' && *cp != ' ' && *cp != ')';
3172 cp++)
3173 continue;
3174 if (cp == dbp)
3175 return;
3176
3177 pfnote ((CTAGS) ? savenstr (dbp, cp-dbp) : NULL, TRUE,
3178 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3179 }
3180
3181 void
3182 Lisp_functions (inf)
3183 FILE *inf;
3184 {
3185 lineno = 0;
3186 charno = 0;
3187
3188 while (!feof (inf))
3189 {
3190 lineno++;
3191 linecharno = charno;
3192 charno += readline (&lb, inf);
3193 dbp = lb.buffer;
3194 if (dbp[0] == '(')
3195 {
3196 if (L_isdef (dbp))
3197 {
3198 while (!isspace (*dbp))
3199 dbp++;
3200 while (isspace (*dbp))
3201 dbp++;
3202 L_getit ();
3203 }
3204 else
3205 {
3206 /* Check for (foo::defmumble name-defined ... */
3207 do
3208 dbp++;
3209 while (*dbp && !isspace (*dbp)
3210 && *dbp != ':' && *dbp != '(' && *dbp != ')');
3211 if (*dbp == ':')
3212 {
3213 do
3214 dbp++;
3215 while (*dbp == ':');
3216
3217 if (L_isdef (dbp - 1))
3218 {
3219 while (!isspace (*dbp))
3220 dbp++;
3221 while (isspace (*dbp))
3222 dbp++;
3223 L_getit ();
3224 }
3225 }
3226 }
3227 }
3228 }
3229 }
3230 \f
3231 /*
3232 * Scheme tag functions
3233 * look for (def... xyzzy
3234 * look for (def... (xyzzy
3235 * look for (def ... ((...(xyzzy ....
3236 * look for (set! xyzzy
3237 */
3238
3239 void get_scheme ();
3240
3241 void
3242 Scheme_functions (inf)
3243 FILE *inf;
3244 {
3245 lineno = 0;
3246 charno = 0;
3247
3248 while (!feof (inf))
3249 {
3250 lineno++;
3251 linecharno = charno;
3252 charno += readline (&lb, inf);
3253 dbp = lb.buffer;
3254 if (dbp[0] == '(' &&
3255 (dbp[1] == 'D' || dbp[1] == 'd') &&
3256 (dbp[2] == 'E' || dbp[2] == 'e') &&
3257 (dbp[3] == 'F' || dbp[3] == 'f'))
3258 {
3259 while (!isspace (*dbp))
3260 dbp++;
3261 /* Skip over open parens and white space */
3262 while (*dbp && (isspace (*dbp) || *dbp == '('))
3263 dbp++;
3264 get_scheme ();
3265 }
3266 if (dbp[0] == '(' &&
3267 (dbp[1] == 'S' || dbp[1] == 's') &&
3268 (dbp[2] == 'E' || dbp[2] == 'e') &&
3269 (dbp[3] == 'T' || dbp[3] == 't') &&
3270 (dbp[4] == '!' || dbp[4] == '!') &&
3271 (isspace (dbp[5])))
3272 {
3273 while (!isspace (*dbp))
3274 dbp++;
3275 /* Skip over white space */
3276 while (isspace (*dbp))
3277 dbp++;
3278 get_scheme ();
3279 }
3280 }
3281 }
3282
3283 void
3284 get_scheme ()
3285 {
3286 register char *cp;
3287
3288 if (*dbp == '\0')
3289 return;
3290 /* Go till you get to white space or a syntactic break */
3291 for (cp = dbp + 1;
3292 *cp && *cp != '(' && *cp != ')' && !isspace (*cp);
3293 cp++)
3294 continue;
3295 pfnote ((CTAGS) ? savenstr (dbp, cp-dbp) : NULL, TRUE,
3296 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3297 }
3298 \f
3299 /* Find tags in TeX and LaTeX input files. */
3300
3301 /* TEX_toktab is a table of TeX control sequences that define tags.
3302 Each TEX_tabent records one such control sequence.
3303 CONVERT THIS TO USE THE Stab TYPE!! */
3304 struct TEX_tabent
3305 {
3306 char *name;
3307 int len;
3308 };
3309
3310 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
3311
3312 /* Default set of control sequences to put into TEX_toktab.
3313 The value of environment var TEXTAGS is prepended to this. */
3314
3315 char *TEX_defenv = "\
3316 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
3317 :part:appendix:entry:index";
3318
3319 void TEX_mode ();
3320 struct TEX_tabent *TEX_decode_env ();
3321 int TEX_Token ();
3322 #if TeX_named_tokens
3323 void TEX_getit ();
3324 #endif
3325
3326 char TEX_esc = '\\';
3327 char TEX_opgrp = '{';
3328 char TEX_clgrp = '}';
3329
3330 /*
3331 * TeX/LaTeX scanning loop.
3332 */
3333 void
3334 TeX_functions (inf)
3335 FILE *inf;
3336 {
3337 char *lasthit;
3338
3339 lineno = 0;
3340 charno = 0;
3341
3342 /* Select either \ or ! as escape character. */
3343 TEX_mode (inf);
3344
3345 /* Initialize token table once from environment. */
3346 if (!TEX_toktab)
3347 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
3348
3349 while (!feof (inf))
3350 { /* Scan each line in file */
3351 lineno++;
3352 linecharno = charno;
3353 charno += readline (&lb, inf);
3354 dbp = lb.buffer;
3355 lasthit = dbp;
3356 while (dbp = etags_strchr (dbp, TEX_esc)) /* Look at each esc in line */
3357 {
3358 register int i;
3359
3360 if (!*(++dbp))
3361 break;
3362 linecharno += dbp - lasthit;
3363 lasthit = dbp;
3364 i = TEX_Token (lasthit);
3365 if (0 <= i)
3366 {
3367 pfnote (NULL, TRUE,
3368 lb.buffer, strlen (lb.buffer), lineno, linecharno);
3369 #if TeX_named_tokens
3370 TEX_getit (lasthit, TEX_toktab[i].len);
3371 #endif
3372 break; /* We only save a line once */
3373 }
3374 }
3375 }
3376 }
3377
3378 #define TEX_LESC '\\'
3379 #define TEX_SESC '!'
3380 #define TEX_cmt '%'
3381
3382 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
3383 chars accordingly. */
3384 void
3385 TEX_mode (inf)
3386 FILE *inf;
3387 {
3388 int c;
3389
3390 while ((c = getc (inf)) != EOF)
3391 {
3392 /* Skip to next line if we hit the TeX comment char. */
3393 if (c == TEX_cmt)
3394 while (c != '\n')
3395 c = getc (inf);
3396 else if (c == TEX_LESC || c == TEX_SESC )
3397 break;
3398 }
3399
3400 if (c == TEX_LESC)
3401 {
3402 TEX_esc = TEX_LESC;
3403 TEX_opgrp = '{';
3404 TEX_clgrp = '}';
3405 }
3406 else
3407 {
3408 TEX_esc = TEX_SESC;
3409 TEX_opgrp = '<';
3410 TEX_clgrp = '>';
3411 }
3412 rewind (inf);
3413 }
3414
3415 /* Read environment and prepend it to the default string.
3416 Build token table. */
3417 struct TEX_tabent *
3418 TEX_decode_env (evarname, defenv)
3419 char *evarname;
3420 char *defenv;
3421 {
3422 register char *env, *p;
3423
3424 struct TEX_tabent *tab;
3425 int size, i;
3426
3427 /* Append default string to environment. */
3428 env = getenv (evarname);
3429 if (!env)
3430 env = defenv;
3431 else
3432 env = concat (env, defenv, "");
3433
3434 /* Allocate a token table */
3435 for (size = 1, p = env; p;)
3436 if ((p = etags_strchr (p, ':')) && *(++p))
3437 size++;
3438 /* Add 1 to leave room for null terminator. */
3439 tab = xnew (size + 1, struct TEX_tabent);
3440
3441 /* Unpack environment string into token table. Be careful about */
3442 /* zero-length strings (leading ':', "::" and trailing ':') */
3443 for (i = 0; *env;)
3444 {
3445 p = etags_strchr (env, ':');
3446 if (!p) /* End of environment string. */
3447 p = env + strlen (env);
3448 if (p - env > 0)
3449 { /* Only non-zero strings. */
3450 tab[i].name = savenstr (env, p - env);
3451 tab[i].len = strlen (tab[i].name);
3452 i++;
3453 }
3454 if (*p)
3455 env = p + 1;
3456 else
3457 {
3458 tab[i].name = NULL; /* Mark end of table. */
3459 tab[i].len = 0;
3460 break;
3461 }
3462 }
3463 return tab;
3464 }
3465
3466 #if TeX_named_tokens
3467 /* Record a tag defined by a TeX command of length LEN and starting at NAME.
3468 The name being defined actually starts at (NAME + LEN + 1).
3469 But we seem to include the TeX command in the tag name. */
3470 void
3471 TEX_getit (name, len)
3472 char *name;
3473 int len;
3474 {
3475 char *p = name + len;
3476
3477 if (*name == '\0')
3478 return;
3479
3480 /* Let tag name extend to next group close (or end of line) */
3481 while (*p && *p != TEX_clgrp)
3482 p++;
3483 pfnote (savenstr (name, p-name), TRUE,
3484 lb.buffer, strlen (lb.buffer), lineno, linecharno);
3485 }
3486 #endif
3487
3488 /* If the text at CP matches one of the tag-defining TeX command names,
3489 return the pointer to the first occurrence of that command in TEX_toktab.
3490 Otherwise return -1.
3491 Keep the capital `T' in `Token' for dumb truncating compilers
3492 (this distinguishes it from `TEX_toktab' */
3493 int
3494 TEX_Token (cp)
3495 char *cp;
3496 {
3497 int i;
3498
3499 for (i = 0; TEX_toktab[i].len > 0; i++)
3500 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
3501 return i;
3502 return -1;
3503 }
3504 \f
3505 /*
3506 * Prolog support (rewritten) by Anders Lindgren, Mar. 96
3507 *
3508 * Assumes that the predicate starts at column 0.
3509 * Only the first clause of a predicate is added.
3510 */
3511 void
3512 Prolog_functions (inf)
3513 FILE *inf;
3514 {
3515 int prolog_pred ();
3516 void prolog_skip_comment ();
3517
3518 char * last;
3519 int len;
3520 int allocated;
3521
3522 allocated = 0;
3523 len = 0;
3524 last = NULL;
3525
3526 lineno = 0;
3527 linecharno = 0;
3528 charno = 0;
3529
3530 while (!feof (inf))
3531 {
3532 lineno++;
3533 linecharno += charno;
3534 charno = readline (&lb, inf);
3535 dbp = lb.buffer;
3536 if (dbp[0] == '\0') /* Empty line */
3537 continue;
3538 else if (isspace (dbp[0])) /* Not a predicate */
3539 continue;
3540 else if (dbp[0] == '/' && dbp[1] == '*') /* comment. */
3541 prolog_skip_comment (&lb, inf);
3542 else if (len = prolog_pred (dbp, last))
3543 {
3544 /* Predicate. Store the function name so that we only
3545 * generates a tag for the first clause. */
3546 if (last == NULL)
3547 last = xnew(len + 1, char);
3548 else if (len + 1 > allocated)
3549 last = (char *) xrealloc(last, len + 1);
3550 allocated = len + 1;
3551 strncpy (last, dbp, len);
3552 last[len] = '\0';
3553 }
3554 }
3555 }
3556
3557
3558 void
3559 prolog_skip_comment (plb, inf)
3560 struct linebuffer *plb;
3561 FILE *inf;
3562 {
3563 char *cp;
3564
3565 do
3566 {
3567 for (cp = plb->buffer; *cp != '\0'; cp++)
3568 if (cp[0] == '*' && cp[1] == '/')
3569 return;
3570 lineno++;
3571 linecharno += readline (plb, inf);
3572 }
3573 while (!feof(inf));
3574 }
3575
3576 /*
3577 * A predicate definition is added if it matches:
3578 * <beginning of line><Prolog Atom><whitespace>(
3579 *
3580 * It is added to the tags database if it doesn't match the
3581 * name of the previous clause header.
3582 *
3583 * Return the size of the name of the predicate, or 0 if no header
3584 * was found.
3585 */
3586 int
3587 prolog_pred (s, last)
3588 char *s;
3589 char *last; /* Name of last clause. */
3590 {
3591 int prolog_atom();
3592 int prolog_white();
3593
3594 int pos;
3595 int len;
3596
3597 pos = prolog_atom(s, 0);
3598 if (pos < 1)
3599 return 0;
3600
3601 len = pos;
3602 pos += prolog_white(s, pos);
3603
3604 if ((s[pos] == '(') || (s[pos] == '.'))
3605 {
3606 if (s[pos] == '(')
3607 pos++;
3608
3609 /* Save only the first clause. */
3610 if ((last == NULL) ||
3611 (len != strlen(last)) ||
3612 (strncmp(s, last, len) != 0))
3613 {
3614 pfnote ((CTAGS) ? savenstr (s, len) : NULL, TRUE,
3615 s, pos, lineno, linecharno);
3616 return len;
3617 }
3618 }
3619 return 0;
3620 }
3621
3622 /*
3623 * Consume a Prolog atom.
3624 * Return the number of bytes consumed, or -1 if there was an error.
3625 *
3626 * A prolog atom, in this context, could be one of:
3627 * - An alphanumeric sequence, starting with a lower case letter.
3628 * - A quoted arbitrary string. Single quotes can escape themselves.
3629 * Backslash quotes everything.
3630 */
3631 int
3632 prolog_atom (s, pos)
3633 char *s;
3634 int pos;
3635 {
3636 int origpos;
3637
3638 origpos = pos;
3639
3640 if (islower(s[pos]) || (s[pos] == '_'))
3641 {
3642 /* The atom is unquoted. */
3643 pos++;
3644 while (isalnum(s[pos]) || (s[pos] == '_'))
3645 {
3646 pos++;
3647 }
3648 return pos - origpos;
3649 }
3650 else if (s[pos] == '\'')
3651 {
3652 pos++;
3653
3654 while (1)
3655 {
3656 if (s[pos] == '\'')
3657 {
3658 pos++;
3659 if (s[pos] != '\'')
3660 break;
3661 pos++; /* A double quote */
3662 }
3663 else if (s[pos] == '\0')
3664 /* Multiline quoted atoms are ignored. */
3665 return -1;
3666 else if (s[pos] == '\\')
3667 {
3668 if (s[pos+1] == '\0')
3669 return -1;
3670 pos += 2;
3671 }
3672 else
3673 pos++;
3674 }
3675 return pos - origpos;
3676 }
3677 else
3678 return -1;
3679 }
3680
3681 /* Consume whitespace. Return the number of bytes eaten. */
3682 int
3683 prolog_white (s, pos)
3684 char *s;
3685 int pos;
3686 {
3687 int origpos;
3688
3689 origpos = pos;
3690
3691 while (isspace(s[pos]))
3692 pos++;
3693
3694 return pos - origpos;
3695 }
3696 \f
3697 /*
3698 * Support for Erlang -- Anders Lindgren, Feb 1996.
3699 *
3700 * Generates tags for functions, defines, and records.
3701 *
3702 * Assumes that Erlang functions start at column 0.
3703 */
3704 void
3705 Erlang_functions (inf)
3706 FILE *inf;
3707 {
3708 int erlang_func ();
3709 void erlang_attribute ();
3710
3711 char * last;
3712 int len;
3713 int allocated;
3714
3715 allocated = 0;
3716 len = 0;
3717 last = NULL;
3718
3719 lineno = 0;
3720 linecharno = 0;
3721 charno = 0;
3722
3723 while (!feof (inf))
3724 {
3725 lineno++;
3726 linecharno += charno;
3727 charno = readline (&lb, inf);
3728 dbp = lb.buffer;
3729 if (dbp[0] == '\0') /* Empty line */
3730 continue;
3731 else if (isspace (dbp[0])) /* Not function nor attribute */
3732 continue;
3733 else if (dbp[0] == '%') /* comment */
3734 continue;
3735 else if (dbp[0] == '"') /* Sometimes, strings start in column one */
3736 continue;
3737 else if (dbp[0] == '-') /* attribute, e.g. "-define" */
3738 {
3739 erlang_attribute(dbp);
3740 last = NULL;
3741 }
3742 else if (len = erlang_func (dbp, last))
3743 {
3744 /*
3745 * Function. Store the function name so that we only
3746 * generates a tag for the first clause.
3747 */
3748 if (last == NULL)
3749 last = xnew(len + 1, char);
3750 else if (len + 1 > allocated)
3751 last = (char *) xrealloc(last, len + 1);
3752 allocated = len + 1;
3753 strncpy (last, dbp, len);
3754 last[len] = '\0';
3755 }
3756 }
3757 }
3758
3759
3760 /*
3761 * A function definition is added if it matches:
3762 * <beginning of line><Erlang Atom><whitespace>(
3763 *
3764 * It is added to the tags database if it doesn't match the
3765 * name of the previous clause header.
3766 *
3767 * Return the size of the name of the function, or 0 if no function
3768 * was found.
3769 */
3770 int
3771 erlang_func (s, last)
3772 char *s;
3773 char *last; /* Name of last clause. */
3774 {
3775 int erlang_atom ();
3776 int erlang_white ();
3777
3778 int pos;
3779 int len;
3780
3781 pos = erlang_atom(s, 0);
3782 if (pos < 1)
3783 return 0;
3784
3785 len = pos;
3786 pos += erlang_white(s, pos);
3787
3788 if (s[pos++] == '(')
3789 {
3790 /* Save only the first clause. */
3791 if ((last == NULL) ||
3792 (len != strlen(last)) ||
3793 (strncmp(s, last, len) != 0))
3794 {
3795 pfnote ((CTAGS) ? savenstr (s, len) : NULL, TRUE,
3796 s, pos, lineno, linecharno);
3797 return len;
3798 }
3799 }
3800 return 0;
3801 }
3802
3803
3804 /*
3805 * Handle attributes. Currently, tags are generated for defines
3806 * and records.
3807 *
3808 * They are on the form:
3809 * -define(foo, bar).
3810 * -define(Foo(M, N), M+N).
3811 * -record(graph, {vtab = notable, cyclic = true}).
3812 */
3813 void
3814 erlang_attribute (s)
3815 char *s;
3816 {
3817 int erlang_atom ();
3818 int erlang_white ();
3819
3820 int pos;
3821 int len;
3822
3823 if ((strncmp(s, "-define", 7) == 0) ||
3824 (strncmp(s, "-record", 7) == 0))
3825 {
3826 pos = 7;
3827 pos += erlang_white(s, pos);
3828
3829 if (s[pos++] == '(')
3830 {
3831 pos += erlang_white(s, pos);
3832
3833 if (len = erlang_atom(s, pos))
3834 {
3835 pfnote ((CTAGS) ? savenstr (& s[pos], len) : NULL, TRUE,
3836 s, pos + len, lineno, linecharno);
3837 }
3838 }
3839 }
3840 return;
3841 }
3842
3843
3844 /*
3845 * Consume an Erlang atom (or variable).
3846 * Return the number of bytes consumed, or -1 if there was an error.
3847 */
3848 int
3849 erlang_atom (s, pos)
3850 char *s;
3851 int pos;
3852 {
3853 int origpos;
3854
3855 origpos = pos;
3856
3857 if (isalpha (s[pos]) || s[pos] == '_')
3858 {
3859 /* The atom is unquoted. */
3860 pos++;
3861 while (isalnum (s[pos]) || s[pos] == '_')
3862 pos++;
3863 return pos - origpos;
3864 }
3865 else if (s[pos] == '\'')
3866 {
3867 pos++;
3868
3869 while (1)
3870 {
3871 if (s[pos] == '\'')
3872 {
3873 pos++;
3874 break;
3875 }
3876 else if (s[pos] == '\0')
3877 /* Multiline quoted atoms are ignored. */
3878 return -1;
3879 else if (s[pos] == '\\')
3880 {
3881 if (s[pos+1] == '\0')
3882 return -1;
3883 pos += 2;
3884 }
3885 else
3886 pos++;
3887 }
3888 return pos - origpos;
3889 }
3890 else
3891 return -1;
3892 }
3893
3894 /* Consume whitespace. Return the number of bytes eaten */
3895 int
3896 erlang_white (s, pos)
3897 char *s;
3898 int pos;
3899 {
3900 int origpos;
3901
3902 origpos = pos;
3903
3904 while (isspace (s[pos]))
3905 pos++;
3906
3907 return pos - origpos;
3908 }
3909 \f
3910 #ifdef ETAGS_REGEXPS
3911 /* Take a string like "/blah/" and turn it into "blah", making sure
3912 that the first and last characters are the same, and handling
3913 quoted separator characters. Actually, stops on the occurrence of
3914 an unquoted separator. Also turns "\t" into a Tab character.
3915 Returns pointer to terminating separator. Works in place. Null
3916 terminates name string. */
3917 char *
3918 scan_separators (name)
3919 char *name;
3920 {
3921 char sep = name[0];
3922 char *copyto = name;
3923 logical quoted = FALSE;
3924
3925 for (++name; *name != '\0'; ++name)
3926 {
3927 if (quoted)
3928 {
3929 if (*name == 't')
3930 *copyto++ = '\t';
3931 else if (*name == sep)
3932 *copyto++ = sep;
3933 else
3934 {
3935 /* Something else is quoted, so preserve the quote. */
3936 *copyto++ = '\\';
3937 *copyto++ = *name;
3938 }
3939 quoted = FALSE;
3940 }
3941 else if (*name == '\\')
3942 quoted = TRUE;
3943 else if (*name == sep)
3944 break;
3945 else
3946 *copyto++ = *name;
3947 }
3948
3949 /* Terminate copied string. */
3950 *copyto = '\0';
3951 return name;
3952 }
3953
3954 /* Turn a name, which is an ed-style (but Emacs syntax) regular
3955 expression, into a real regular expression by compiling it. */
3956 void
3957 add_regex (regexp_pattern)
3958 char *regexp_pattern;
3959 {
3960 char *name;
3961 const char *err;
3962 struct re_pattern_buffer *patbuf;
3963
3964 if (regexp_pattern == NULL)
3965 {
3966 /* Remove existing regexps. */
3967 num_patterns = 0;
3968 patterns = NULL;
3969 return;
3970 }
3971
3972 if (regexp_pattern[0] == '\0')
3973 {
3974 error ("missing regexp", NULL);
3975 return;
3976 }
3977 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
3978 {
3979 error ("%s: unterminated regexp", regexp_pattern);
3980 return;
3981 }
3982 name = scan_separators (regexp_pattern);
3983 if (regexp_pattern[0] == '\0')
3984 {
3985 error ("null regexp", NULL);
3986 return;
3987 }
3988 (void) scan_separators (name);
3989
3990 patbuf = xnew (1, struct re_pattern_buffer);
3991 patbuf->translate = NULL;
3992 patbuf->fastmap = NULL;
3993 patbuf->buffer = NULL;
3994 patbuf->allocated = 0;
3995
3996 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
3997 if (err != NULL)
3998 {
3999 error ("%s while compiling pattern", err);
4000 return;
4001 }
4002
4003 num_patterns += 1;
4004 if (num_patterns == 1)
4005 patterns = xnew (1, struct pattern);
4006 else
4007 patterns = ((struct pattern *)
4008 xrealloc (patterns,
4009 (num_patterns * sizeof (struct pattern))));
4010 patterns[num_patterns - 1].pattern = patbuf;
4011 patterns[num_patterns - 1].name_pattern = savestr (name);
4012 patterns[num_patterns - 1].error_signaled = FALSE;
4013 }
4014
4015 /*
4016 * Do the substitutions indicated by the regular expression and
4017 * arguments.
4018 */
4019 char *
4020 substitute (in, out, regs)
4021 char *in, *out;
4022 struct re_registers *regs;
4023 {
4024 char *result = NULL, *t;
4025 int size = 0;
4026
4027 /* Pass 1: figure out how much size to allocate. */
4028 for (t = out; *t; ++t)
4029 {
4030 if (*t == '\\')
4031 {
4032 ++t;
4033 if (!*t)
4034 {
4035 fprintf (stderr, "%s: pattern substitution ends prematurely\n",
4036 progname);
4037 return NULL;
4038 }
4039 if (isdigit (*t))
4040 {
4041 int dig = *t - '0';
4042 size += regs->end[dig] - regs->start[dig];
4043 }
4044 }
4045 }
4046
4047 /* Allocate space and do the substitutions. */
4048 result = xnew (size + 1, char);
4049 size = 0;
4050 for (; *out; ++out)
4051 {
4052 if (*out == '\\')
4053 {
4054 ++out;
4055 if (isdigit (*out))
4056 {
4057 /* Using "dig2" satisfies my debugger. Bleah. */
4058 int dig2 = *out - '0';
4059 strncpy (result + size, in + regs->start[dig2],
4060 regs->end[dig2] - regs->start[dig2]);
4061 size += regs->end[dig2] - regs->start[dig2];
4062 }
4063 else
4064 result[size++] = *out;
4065 }
4066 else
4067 result[size++] = *out;
4068 }
4069 result[size] = '\0';
4070
4071 return result;
4072 }
4073 \f
4074 #endif /* ETAGS_REGEXPS */
4075 /* Initialize a linebuffer for use */
4076 void
4077 initbuffer (linebuffer)
4078 struct linebuffer *linebuffer;
4079 {
4080 linebuffer->size = 200;
4081 linebuffer->buffer = xnew (200, char);
4082 }
4083
4084 /*
4085 * Read a line of text from `stream' into `linebuffer'.
4086 * Return the number of characters read from `stream',
4087 * which is the length of the line including the newline, if any.
4088 */
4089 long
4090 readline_internal (linebuffer, stream)
4091 struct linebuffer *linebuffer;
4092 register FILE *stream;
4093 {
4094 char *buffer = linebuffer->buffer;
4095 register char *p = linebuffer->buffer;
4096 register char *pend;
4097 int chars_deleted;
4098
4099 pend = p + linebuffer->size; /* Separate to avoid 386/IX compiler bug. */
4100
4101 while (1)
4102 {
4103 register int c = getc (stream);
4104 if (p == pend)
4105 {
4106 linebuffer->size *= 2;
4107 buffer = (char *) xrealloc (buffer, linebuffer->size);
4108 p += buffer - linebuffer->buffer;
4109 pend = buffer + linebuffer->size;
4110 linebuffer->buffer = buffer;
4111 }
4112 if (c == EOF)
4113 {
4114 *p = '\0';
4115 chars_deleted = 0;
4116 break;
4117 }
4118 if (c == '\n')
4119 {
4120 if (p > buffer && p[-1] == '\r')
4121 {
4122 *--p = '\0';
4123 #ifdef DOS_NT
4124 /* Assume CRLF->LF translation will be performed by Emacs
4125 when loading this file, so CRs won't appear in the buffer.
4126 It would be cleaner to compensate within Emacs;
4127 however, Emacs does not know how many CRs were deleted
4128 before any given point in the file. */
4129 chars_deleted = 1;
4130 #else
4131 chars_deleted = 2;
4132 #endif
4133 }
4134 else
4135 {
4136 *p = '\0';
4137 chars_deleted = 1;
4138 }
4139 break;
4140 }
4141 *p++ = c;
4142 }
4143
4144 return p - buffer + chars_deleted;
4145 }
4146
4147 /*
4148 * Like readline_internal, above, but try to match the input
4149 * line against any existing regular expressions.
4150 */
4151 long
4152 readline (linebuffer, stream)
4153 struct linebuffer *linebuffer;
4154 FILE *stream;
4155 {
4156 /* Read new line. */
4157 long result = readline_internal (linebuffer, stream);
4158 #ifdef ETAGS_REGEXPS
4159 int i;
4160
4161 /* Match against all listed patterns. */
4162 for (i = 0; i < num_patterns; ++i)
4163 {
4164 int match = re_match (patterns[i].pattern, linebuffer->buffer,
4165 (int)result, 0, &patterns[i].regs);
4166 switch (match)
4167 {
4168 case -2:
4169 /* Some error. */
4170 if (!patterns[i].error_signaled)
4171 {
4172 error ("error while matching pattern %d", i);
4173 patterns[i].error_signaled = TRUE;
4174 }
4175 break;
4176 case -1:
4177 /* No match. */
4178 break;
4179 default:
4180 /* Match occurred. Construct a tag. */
4181 if (patterns[i].name_pattern[0] != '\0')
4182 {
4183 /* Make a named tag. */
4184 char *name = substitute (linebuffer->buffer,
4185 patterns[i].name_pattern,
4186 &patterns[i].regs);
4187 if (name != NULL)
4188 pfnote (name, TRUE,
4189 linebuffer->buffer, match, lineno, linecharno);
4190 }
4191 else
4192 {
4193 /* Make an unnamed tag. */
4194 pfnote (NULL, TRUE,
4195 linebuffer->buffer, match, lineno, linecharno);
4196 }
4197 break;
4198 }
4199 }
4200 #endif /* ETAGS_REGEXPS */
4201
4202 return result;
4203 }
4204
4205 /*
4206 * Read a file, but do no processing. This is used to do regexp
4207 * matching on files that have no language defined.
4208 */
4209 void
4210 just_read_file (inf)
4211 FILE *inf;
4212 {
4213 lineno = 0;
4214 charno = 0;
4215
4216 while (!feof (inf))
4217 {
4218 ++lineno;
4219 linecharno = charno;
4220 charno += readline (&lb, inf) + 1;
4221 }
4222 }
4223
4224 \f
4225 /*
4226 * Return a pointer to a space of size strlen(cp)+1 allocated
4227 * with xnew where the string CP has been copied.
4228 */
4229 char *
4230 savestr (cp)
4231 char *cp;
4232 {
4233 return savenstr (cp, strlen (cp));
4234 }
4235
4236 /*
4237 * Return a pointer to a space of size LEN+1 allocated with xnew where
4238 * the string CP has been copied for at most the first LEN characters.
4239 */
4240 char *
4241 savenstr (cp, len)
4242 char *cp;
4243 int len;
4244 {
4245 register char *dp;
4246
4247 dp = xnew (len + 1, char);
4248 strncpy (dp, cp, len);
4249 dp[len] = '\0';
4250 return dp;
4251 }
4252
4253 /*
4254 * Return the ptr in sp at which the character c last
4255 * appears; NULL if not found
4256 *
4257 * Identical to System V strrchr, included for portability.
4258 */
4259 char *
4260 etags_strrchr (sp, c)
4261 register char *sp, c;
4262 {
4263 register char *r;
4264
4265 r = NULL;
4266 do
4267 {
4268 if (*sp == c)
4269 r = sp;
4270 } while (*sp++);
4271 return r;
4272 }
4273
4274
4275 /*
4276 * Return the ptr in sp at which the character c first
4277 * appears; NULL if not found
4278 *
4279 * Identical to System V strchr, included for portability.
4280 */
4281 char *
4282 etags_strchr (sp, c)
4283 register char *sp, c;
4284 {
4285 do
4286 {
4287 if (*sp == c)
4288 return sp;
4289 } while (*sp++);
4290 return NULL;
4291 }
4292
4293 /* Print error message and exit. */
4294 void
4295 fatal (s1, s2)
4296 char *s1, *s2;
4297 {
4298 error (s1, s2);
4299 exit (BAD);
4300 }
4301
4302 void
4303 pfatal (s1)
4304 char *s1;
4305 {
4306 perror (s1);
4307 exit (BAD);
4308 }
4309
4310 void
4311 suggest_asking_for_help ()
4312 {
4313 fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
4314 progname);
4315 exit (BAD);
4316 }
4317
4318 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
4319 void
4320 error (s1, s2)
4321 char *s1, *s2;
4322 {
4323 fprintf (stderr, "%s: ", progname);
4324 fprintf (stderr, s1, s2);
4325 fprintf (stderr, "\n");
4326 }
4327
4328 /* Return a newly-allocated string whose contents
4329 concatenate those of s1, s2, s3. */
4330 char *
4331 concat (s1, s2, s3)
4332 char *s1, *s2, *s3;
4333 {
4334 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
4335 char *result = xnew (len1 + len2 + len3 + 1, char);
4336
4337 strcpy (result, s1);
4338 strcpy (result + len1, s2);
4339 strcpy (result + len1 + len2, s3);
4340 result[len1 + len2 + len3] = '\0';
4341
4342 return result;
4343 }
4344 \f
4345 /* Does the same work as the system V getcwd, but does not need to
4346 guess the buffer size in advance. */
4347 char *
4348 etags_getcwd ()
4349 {
4350 #ifdef HAVE_GETCWD
4351 int bufsize = 200;
4352 char *path = xnew (bufsize, char);
4353
4354 while (getcwd (path, bufsize) == NULL)
4355 {
4356 if (errno != ERANGE)
4357 pfatal ("getcwd");
4358 bufsize *= 2;
4359 path = xnew (bufsize, char);
4360 }
4361
4362 #if WINDOWSNT
4363 {
4364 /* Convert backslashes to slashes. */
4365 char *p;
4366 for (p = path; *p != '\0'; p++)
4367 if (*p == '\\')
4368 *p = '/';
4369 }
4370 #endif
4371
4372 return path;
4373
4374 #else /* not HAVE_GETCWD */
4375 #ifdef MSDOS
4376 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
4377
4378 getwd (path);
4379
4380 for (p = path; *p != '\0'; p++)
4381 if (*p == '\\')
4382 *p = '/';
4383 else
4384 *p = lowcase (*p);
4385
4386 return strdup (path);
4387 #else /* not MSDOS */
4388 struct linebuffer path;
4389 FILE *pipe;
4390
4391 initbuffer (&path);
4392 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
4393 if (pipe == NULL || readline_internal (&path, pipe) == 0)
4394 pfatal ("pwd");
4395 pclose (pipe);
4396
4397 return path.buffer;
4398 #endif /* not MSDOS */
4399 #endif /* not HAVE_GETCWD */
4400 }
4401
4402 /* Return a newly allocated string containing the filename
4403 of FILE relative to the absolute directory DIR (which
4404 should end with a slash). */
4405 char *
4406 relative_filename (file, dir)
4407 char *file, *dir;
4408 {
4409 char *fp, *dp, *abs, *res;
4410
4411 /* Find the common root of file and dir (with a trailing slash). */
4412 abs = absolute_filename (file, cwd);
4413 fp = abs;
4414 dp = dir;
4415 while (*fp++ == *dp++)
4416 continue;
4417 fp--, dp--; /* back to the first differing char */
4418 do /* look at the equal chars until / */
4419 fp--, dp--;
4420 while (*fp != '/');
4421
4422 /* Build a sequence of "../" strings for the resulting relative filename. */
4423 for (dp = etags_strchr (dp + 1, '/'), res = "";
4424 dp != NULL;
4425 dp = etags_strchr (dp + 1, '/'))
4426 {
4427 res = concat (res, "../", "");
4428 }
4429
4430 /* Add the filename relative to the common root of file and dir. */
4431 res = concat (res, fp + 1, "");
4432 free (abs);
4433
4434 return res;
4435 }
4436
4437 /* Return a newly allocated string containing the
4438 absolute filename of FILE given CWD (which should
4439 end with a slash). */
4440 char *
4441 absolute_filename (file, cwd)
4442 char *file, *cwd;
4443 {
4444 char *slashp, *cp, *res;
4445
4446 if (absolutefn (file))
4447 res = concat (file, "", "");
4448 #ifdef DOS_NT
4449 /* We don't support non-absolute filenames with a drive
4450 letter, like `d:NAME' (it's too much hassle). */
4451 else if (file[1] == ':')
4452 fatal ("%s: relative filenames with drive letters not supported", file);
4453 #endif
4454 else
4455 res = concat (cwd, file, "");
4456
4457 /* Delete the "/dirname/.." and "/." substrings. */
4458 slashp = etags_strchr (res, '/');
4459 while (slashp != NULL && slashp[0] != '\0')
4460 {
4461 if (slashp[1] == '.')
4462 {
4463 if (slashp[2] == '.'
4464 && (slashp[3] == '/' || slashp[3] == '\0'))
4465 {
4466 cp = slashp;
4467 do
4468 cp--;
4469 while (cp >= res && !absolutefn (cp));
4470 if (*cp == '/')
4471 {
4472 strcpy (cp, slashp + 3);
4473 }
4474 #ifdef DOS_NT
4475 /* Under MSDOS and NT we get `d:/NAME' as absolute
4476 filename, so the luser could say `d:/../NAME'.
4477 We silently treat this as `d:/NAME'. */
4478 else if (cp[1] == ':')
4479 strcpy (cp + 3, slashp + 4);
4480 #endif
4481 else /* else (cp == res) */
4482 {
4483 if (slashp[3] != '\0')
4484 strcpy (cp, slashp + 4);
4485 else
4486 return ".";
4487 }
4488 slashp = cp;
4489 continue;
4490 }
4491 else if (slashp[2] == '/' || slashp[2] == '\0')
4492 {
4493 strcpy (slashp, slashp + 2);
4494 continue;
4495 }
4496 }
4497
4498 slashp = etags_strchr (slashp + 1, '/');
4499 }
4500
4501 return res;
4502 }
4503
4504 /* Return a newly allocated string containing the absolute
4505 filename of dir where FILE resides given CWD (which should
4506 end with a slash). */
4507 char *
4508 absolute_dirname (file, cwd)
4509 char *file, *cwd;
4510 {
4511 char *slashp, *res;
4512 char save;
4513 #ifdef DOS_NT
4514 char *p;
4515
4516 for (p = file; *p != '\0'; p++)
4517 if (*p == '\\')
4518 *p = '/';
4519 #endif
4520
4521 slashp = etags_strrchr (file, '/');
4522 if (slashp == NULL)
4523 return cwd;
4524 save = slashp[1];
4525 slashp[1] = '\0';
4526 res = absolute_filename (file, cwd);
4527 slashp[1] = save;
4528
4529 return res;
4530 }
4531
4532 /* Increase the size of a linebuffer. */
4533 void
4534 grow_linebuffer (bufp, toksize)
4535 struct linebuffer *bufp;
4536 int toksize;
4537 {
4538 while (bufp->size < toksize)
4539 bufp->size *= 2;
4540 bufp->buffer = (char *) xrealloc (bufp->buffer, bufp->size);
4541 }
4542
4543 /* Like malloc but get fatal error if memory is exhausted. */
4544 long *
4545 xmalloc (size)
4546 unsigned int size;
4547 {
4548 long *result = (long *) malloc (size);
4549 if (result == NULL)
4550 fatal ("virtual memory exhausted", NULL);
4551 return result;
4552 }
4553
4554 long *
4555 xrealloc (ptr, size)
4556 char *ptr;
4557 unsigned int size;
4558 {
4559 long *result = (long *) realloc (ptr, size);
4560 if (result == NULL)
4561 fatal ("virtual memory exhausted", NULL);
4562 return result;
4563 }