(process_file,absolute_filename): Handle filenames
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs
2 Copyright (C) 1984, 87, 88, 89, 93, 94, 95
3 Free Software Foundation, Inc. and Ken Arnold
4 This file is not considered part of GNU Emacs.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20 /*
21 * Authors:
22 * Ctags originally by Ken Arnold.
23 * Fortran added by Jim Kleckner.
24 * Ed Pelegri-Llopart added C typedefs.
25 * Gnu Emacs TAGS format and modifications by RMS?
26 * Sam Kendall added C++.
27 * Francesco Potorti` reorganised C and C++ based on work by Joe Wells.
28 #ifdef ETAGS_REGEXPS
29 * Regexp tags by Tom Tromey.
30 #endif
31 *
32 * Francesco Potorti` (pot@cnuce.cnr.it) is the current maintainer.
33 */
34
35 char pot_etags_version[] = "@(#) pot revision number is 11.30";
36
37 #define TRUE 1
38 #define FALSE 0
39 #ifndef DEBUG
40 # define DEBUG FALSE
41 #endif
42
43 #ifdef MSDOS
44 #include <fcntl.h>
45 #include <sys/param.h>
46 #endif /* MSDOS */
47
48 #ifdef WINDOWSNT
49 #include <stdlib.h>
50 #include <fcntl.h>
51 #include <string.h>
52 #define MAXPATHLEN _MAX_PATH
53 #endif
54
55 #ifdef HAVE_CONFIG_H
56 #include <config.h>
57 /* On some systems, Emacs defines static as nothing for the sake
58 of unexec. We don't want that here since we don't use unexec. */
59 #undef static
60 #endif
61
62 #include <stdio.h>
63 #include <ctype.h>
64 #include <errno.h>
65 #ifndef errno
66 extern int errno;
67 #endif
68 #include <sys/types.h>
69 #include <sys/stat.h>
70
71 #if !defined (S_ISREG) && defined (S_IFREG)
72 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
73 #endif
74
75 #include <getopt.h>
76
77 #ifdef ETAGS_REGEXPS
78 #include <regex.h>
79 #endif /* ETAGS_REGEXPS */
80
81 /* Define CTAGS to make the program "ctags" compatible with the usual one.
82 Let it undefined to make the program "etags", which makes emacs-style
83 tag tables and tags typedefs, #defines and struct/union/enum by default. */
84 #ifdef CTAGS
85 # undef CTAGS
86 # define CTAGS TRUE
87 #else
88 # define CTAGS FALSE
89 #endif
90
91 /* Exit codes for success and failure. */
92 #ifdef VMS
93 #define GOOD 1
94 #define BAD 0
95 #else
96 #define GOOD 0
97 #define BAD 1
98 #endif
99
100 /* C extensions. */
101 #define C_PLPL 0x00001 /* C++ */
102 #define C_STAR 0x00003 /* C* */
103 #define YACC 0x10000 /* yacc file */
104
105 #define streq(s,t) (strcmp (s, t) == 0)
106 #define strneq(s,t,n) (strncmp (s, t, n) == 0)
107
108 #define iswhite(arg) (_wht[arg]) /* T if char is white */
109 #define begtoken(arg) (_btk[arg]) /* T if char can start token */
110 #define intoken(arg) (_itk[arg]) /* T if char can be in token */
111 #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */
112
113 /*
114 * xnew -- allocate storage
115 *
116 * SYNOPSIS: Type *xnew (int n, Type);
117 */
118 #define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
119
120 typedef int logical;
121
122 typedef struct nd_st
123 { /* sorting structure */
124 char *name; /* function or type name */
125 char *file; /* file name */
126 logical is_func; /* use pattern or line no */
127 logical named; /* list name separately */
128 logical been_warned; /* set if noticed dup */
129 int lno; /* line number tag is on */
130 long cno; /* character number line starts on */
131 char *pat; /* search pattern */
132 struct nd_st *left, *right; /* left and right sons */
133 } NODE;
134
135 extern char *getenv ();
136
137 char *concat ();
138 char *savenstr (), *savestr ();
139 char *etags_strchr (), *etags_strrchr ();
140 char *etags_getcwd ();
141 char *relative_filename (), *absolute_filename (), *absolute_dirname ();
142 long *xmalloc (), *xrealloc ();
143
144 typedef void Lang_function ();
145 #if FALSE /* many compilers barf on this */
146 Lang_function Asm_labels;
147 Lang_function default_C_entries;
148 Lang_function C_entries;
149 Lang_function Cplusplus_entries;
150 Lang_function Cstar_entries;
151 Lang_function Fortran_functions;
152 Lang_function Yacc_entries;
153 Lang_function Lisp_functions;
154 Lang_function Pascal_functions;
155 Lang_function Prolog_functions;
156 Lang_function Scheme_functions;
157 Lang_function TeX_functions;
158 Lang_function just_read_file;
159 #else /* so let's write it this way */
160 void Asm_labels ();
161 void default_C_entries ();
162 void C_entries ();
163 void Cplusplus_entries ();
164 void Cstar_entries ();
165 void Fortran_functions ();
166 void Yacc_entries ();
167 void Lisp_functions ();
168 void Pascal_functions ();
169 void Prolog_functions ();
170 void Scheme_functions ();
171 void TeX_functions ();
172 void just_read_file ();
173 #endif
174
175 logical get_language ();
176 int total_size_of_entries ();
177 long readline ();
178 long readline_internal ();
179 #ifdef ETAGS_REGEXPS
180 void add_regex ();
181 #endif
182 void add_node ();
183 void error ();
184 void fatal (), pfatal ();
185 void find_entries ();
186 void free_tree ();
187 void getit ();
188 void init ();
189 void initbuffer ();
190 void pfnote ();
191 void process_file ();
192 void put_entries ();
193 void takeprec ();
194
195 \f
196 char searchar = '/'; /* use /.../ searches */
197
198 int lineno; /* line number of current line */
199 long charno; /* current character number */
200
201 long linecharno; /* charno of start of line; not used by C,
202 but by every other language. */
203
204 char *curfile; /* current input file name */
205 char *tagfile; /* output file */
206 char *progname; /* name this program was invoked with */
207 char *cwd; /* current working directory */
208 char *tagfiledir; /* directory of tagfile */
209
210 FILE *tagf; /* ioptr for tags file */
211 NODE *head; /* the head of the binary tree of tags */
212
213 /*
214 * A `struct linebuffer' is a structure which holds a line of text.
215 * `readline' reads a line from a stream into a linebuffer and works
216 * regardless of the length of the line.
217 */
218 struct linebuffer
219 {
220 long size;
221 char *buffer;
222 };
223
224 struct linebuffer lb; /* the current line */
225 struct linebuffer token_name; /* used by C_entries as temporary area */
226 struct
227 {
228 long linepos;
229 struct linebuffer lb; /* used by C_entries instead of lb */
230 } lbs[2];
231
232 /* boolean "functions" (see init) */
233 logical _wht[0177], _etk[0177], _itk[0177], _btk[0177];
234 char
235 *white = " \f\t\n\013", /* white chars */
236 *endtk = " \t\n\013\"'#()[]{}=-+%*/&|^~!<>;,.:?", /* token ending chars */
237 /* token starting chars */
238 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~",
239 /* valid in-token chars */
240 *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
241
242 logical append_to_tagfile; /* -a: append to tags */
243 /* The following three default to TRUE for etags, but to FALSE for ctags. */
244 logical typedefs; /* -t: create tags for typedefs */
245 logical typedefs_and_cplusplus; /* -T: create tags for typedefs, level */
246 /* 0 struct/enum/union decls, and C++ */
247 /* member functions. */
248 logical constantypedefs; /* -d: create tags for C #define and enum */
249 /* constants. Enum consts not implemented. */
250 /* -D: opposite of -d. Default under ctags. */
251 logical update; /* -u: update tags */
252 logical vgrind_style; /* -v: create vgrind style index output */
253 logical no_warnings; /* -w: suppress warnings */
254 logical cxref_style; /* -x: create cxref style output */
255 logical cplusplus; /* .[hc] means C++, not C */
256 logical noindentypedefs; /* -I: ignore indentation in C */
257 #define permit_duplicates TRUE /* allow duplicate tags */
258
259 struct option longopts[] =
260 {
261 { "append", no_argument, NULL, 'a' },
262 { "backward-search", no_argument, NULL, 'B' },
263 { "c++", no_argument, NULL, 'C' },
264 { "cxref", no_argument, NULL, 'x' },
265 { "defines", no_argument, NULL, 'd' },
266 { "help", no_argument, NULL, 'h' },
267 { "help", no_argument, NULL, 'H' },
268 { "ignore-indentation", no_argument, NULL, 'I' },
269 { "include", required_argument, NULL, 'i' },
270 { "language", required_argument, NULL, 'l' },
271 { "no-defines", no_argument, NULL, 'D' },
272 { "no-regex", no_argument, NULL, 'R' },
273 { "no-warn", no_argument, NULL, 'w' },
274 { "output", required_argument, NULL, 'o' },
275 { "regex", required_argument, NULL, 'r' },
276 { "typedefs", no_argument, NULL, 't' },
277 { "typedefs-and-c++", no_argument, NULL, 'T' },
278 { "update", no_argument, NULL, 'u' },
279 { "version", no_argument, NULL, 'V' },
280 { "vgrind", no_argument, NULL, 'v' },
281 { 0 }
282 };
283
284 #ifdef ETAGS_REGEXPS
285 /* Structure defining a regular expression. Elements are
286 the compiled pattern, and the name string. */
287 struct pattern
288 {
289 struct re_pattern_buffer *pattern;
290 struct re_registers regs;
291 char *name_pattern;
292 logical error_signaled;
293 };
294
295 /* Number of regexps found. */
296 int num_patterns = 0;
297
298 /* Array of all regexps. */
299 struct pattern *patterns = NULL;
300 #endif /* ETAGS_REGEXPS */
301
302 /* Language stuff. */
303 struct lang_entry
304 {
305 char *extension;
306 Lang_function *function;
307 };
308
309 /* Table of language names and corresponding functions. */
310 /* It is ok for a given function to be listed under more than one
311 name. I just didn't. */
312 /* "auto" language reverts to default behavior. */
313 struct lang_entry lang_names[] =
314 {
315 { "asm", Asm_labels },
316 { "c", default_C_entries },
317 { "c++", Cplusplus_entries },
318 { "c*", Cstar_entries },
319 { "fortran", Fortran_functions },
320 { "lisp", Lisp_functions },
321 { "none", just_read_file },
322 { "pascal", Pascal_functions },
323 { "scheme" , Scheme_functions },
324 { "tex", TeX_functions },
325 { "auto", NULL },
326 { NULL, NULL }
327 };
328
329 /* Table of file extensions and corresponding language functions. */
330 struct lang_entry lang_extensions[] =
331 {
332 /* Assume that ".s" or ".a" is assembly code. -wolfgang.
333 Or even ".sa". */
334 { "a", Asm_labels }, /* Unix assembler */
335 { "asm", Asm_labels }, /* Microcontroller assembly */
336 { "def", Asm_labels }, /* BSO/Tasking definition includes */
337 { "inc", Asm_labels }, /* Microcontroller include files */
338 { "ins", Asm_labels }, /* Microcontroller include files */
339 { "s", Asm_labels },
340 { "sa", Asm_labels }, /* Unix assembler */
341 { "src", Asm_labels }, /* BSO/Tasking C compiler output */
342
343 /* .aux, .bbl, .clo, .cls, .dtx or .tex implies LaTeX source code. */
344 { "aux", TeX_functions },
345 { "bbl", TeX_functions },
346 { "clo", TeX_functions },
347 { "cls", TeX_functions },
348 { "dtx", TeX_functions },
349 { "sty", TeX_functions },
350 { "tex", TeX_functions },
351
352 /* .l or .el or .lisp (or .cl or .clisp or ...) implies lisp source code */
353 { "cl", Lisp_functions },
354 { "clisp", Lisp_functions },
355 { "el", Lisp_functions },
356 { "l", Lisp_functions },
357 { "lisp", Lisp_functions },
358 { "lsp", Lisp_functions },
359
360 /* .scm or .sm or .scheme implies scheme source code */
361 { "SCM", Scheme_functions },
362 { "SM", Scheme_functions },
363 { "oak", Scheme_functions },
364 { "sch", Scheme_functions },
365 { "scheme", Scheme_functions },
366 { "scm", Scheme_functions },
367 { "sm", Scheme_functions },
368 { "t", Scheme_functions },
369 /* FIXME Can't do the `SCM' or `scm' prefix with a version number */
370
371 /* Note that ".c" and ".h" can be considered C++, if the --c++
372 flag was given. That is why default_C_entries is called here. */
373 { "c", default_C_entries },
374 { "h", default_C_entries },
375
376 /* .C or .H or .c++ or .cc or .cpp or .cxx or .h++ or .hh or .hxx:
377 a C++ file */
378 { "C", Cplusplus_entries },
379 { "H", Cplusplus_entries },
380 { "c++", Cplusplus_entries },
381 { "cc", Cplusplus_entries },
382 { "cpp", Cplusplus_entries },
383 { "cxx", Cplusplus_entries },
384 { "h++", Cplusplus_entries },
385 { "hh", Cplusplus_entries },
386 { "hxx", Cplusplus_entries },
387
388 /* .y: a yacc file */
389 { "y", Yacc_entries },
390
391 /* .cs or .hs: a C* file */
392 { "cs", Cstar_entries },
393 { "hs", Cstar_entries },
394
395 /* .f and .for are FORTRAN. */
396 { "F", Fortran_functions },
397 { "f", Fortran_functions },
398 { "for", Fortran_functions },
399
400 /* .pl implies prolog source code */
401 { "pl", Prolog_functions },
402
403 /* .p or .pas: a Pascal file */
404 { "p", Pascal_functions },
405 { "pas", Pascal_functions },
406
407 { NULL, NULL }
408 };
409
410 /* Non-NULL if language fixed. */
411 Lang_function *lang_func = NULL;
412
413 \f
414 void
415 print_language_names ()
416 {
417 struct lang_entry *name, *ext;
418
419 puts ("\nThese are the currently supported languages, along with the\n\
420 default extensions for files:");
421 for (name = lang_names; name->extension; ++name)
422 {
423 printf ("\t%s\t", name->extension);
424 for (ext = lang_extensions; ext->extension; ++ext)
425 if (name->function == ext->function)
426 printf (" .%s", ext->extension);
427 puts ("");
428 }
429 puts ("Where `auto' means use default language for files based on filename\n\
430 extension, and `none' means only do regexp processing on files.\n\
431 If no language is specified and no extension is found for some file,\n\
432 Fortran is tried first; if no tags are found, C is tried next.");
433 }
434
435 void
436 print_version ()
437 {
438 #ifdef VERSION
439 printf ("%s for Emacs version %s.\n", (CTAGS) ? "CTAGS" : "ETAGS", VERSION);
440 #else
441 printf ("%s for Emacs version 19.\n", (CTAGS) ? "CTAGS" : "ETAGS");
442 #endif
443
444 exit (GOOD);
445 }
446
447 void
448 print_help ()
449 {
450 printf ("These are the options accepted by %s. You may use unambiguous\n\
451 abbreviations for the long option names. A - as file name means read\n\
452 names from stdin.\n\n", progname);
453
454 puts ("-a, --append\n\
455 Append tag entries to existing tags file.");
456
457 if (CTAGS)
458 puts ("-B, --backward-search\n\
459 Write the search commands for the tag entries using '?', the\n\
460 backward-search command instead of '/', the forward-search command.");
461
462 puts ("-C, --c++\n\
463 Treat files whose extension defaults to C language as C++ files.");
464
465 if (CTAGS)
466 puts ("-d, --defines\n\
467 Create tag entries for constant C #defines, too.");
468 else
469 puts ("-D, --no-defines\n\
470 Don't create tag entries for constant C #defines. This makes\n\
471 the tags file smaller.");
472
473 if (!CTAGS)
474 {
475 puts ("-i FILE, --include=FILE\n\
476 Include a note in tag file indicating that, when searching for\n\
477 a tag, one should also consult the tags file FILE after\n\
478 checking the current file.");
479 puts ("-l LANG, --language=LANG\n\
480 Force the following files to be considered as written in the\n\
481 named language up to the next --language=LANG option.");
482 }
483
484 #ifdef ETAGS_REGEXPS
485 puts ("-r /REGEXP/, --regex=/REGEXP/\n\
486 Make a tag for each line matching pattern REGEXP in the\n\
487 following files. REGEXP is anchored (as if preceded by ^).\n\
488 The form /REGEXP/NAME/ creates a named tag. For example Tcl\n\
489 named tags can be created with:\n\
490 --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
491 puts ("-R, --no-regex\n\
492 Don't create tags from regexps for the following files.");
493 #endif /* ETAGS_REGEXPS */
494 puts ("-o FILE, --output=FILE\n\
495 Write the tags to FILE.");
496 puts ("-I, --ignore-indentation\n\
497 Don't rely on indentation quite as much as normal. Currently,\n\
498 this means not to assume that a closing brace in the first\n\
499 column is the final brace of a function or structure\n\
500 definition in C and C++.");
501
502 if (CTAGS)
503 {
504 puts ("-t, --typedefs\n\
505 Generate tag entries for C typedefs.");
506 puts ("-T, --typedefs-and-c++\n\
507 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
508 and C++ member functions.");
509 puts ("-u, --update\n\
510 Update the tag entries for the given files, leaving tag\n\
511 entries for other files in place. Currently, this is\n\
512 implemented by deleting the existing entries for the given\n\
513 files and then rewriting the new entries at the end of the\n\
514 tags file. It is often faster to simply rebuild the entire\n\
515 tag file than to use this.");
516 puts ("-v, --vgrind\n\
517 Generates an index of items intended for human consumption,\n\
518 similar to the output of vgrind. The index is sorted, and\n\
519 gives the page number of each item.");
520 puts ("-w, --no-warn\n\
521 Suppress warning messages about entries defined in multiple\n\
522 files.");
523 puts ("-x, --cxref\n\
524 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
525 The output uses line numbers instead of page numbers, but\n\
526 beyond that the differences are cosmetic; try both to see\n\
527 which you like.");
528 }
529
530 puts ("-V, --version\n\
531 Print the version of the program.\n\
532 -h, --help\n\
533 Print this help message.");
534
535 print_language_names ();
536
537 exit (GOOD);
538 }
539
540 \f
541 enum argument_type
542 {
543 at_language,
544 at_regexp,
545 at_filename
546 };
547
548 /* This structure helps us allow mixing of --lang and filenames. */
549 typedef struct
550 {
551 enum argument_type arg_type;
552 char *what;
553 Lang_function *function;
554 } ARGUMENT;
555
556 #ifdef VMS /* VMS specific functions */
557
558 #define EOS '\0'
559
560 /* This is a BUG! ANY arbitrary limit is a BUG!
561 Won't someone please fix this? */
562 #define MAX_FILE_SPEC_LEN 255
563 typedef struct {
564 short curlen;
565 char body[MAX_FILE_SPEC_LEN + 1];
566 } vspec;
567
568 /*
569 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
570 returning in each successive call the next filename matching the input
571 spec. The function expects that each in_spec passed
572 to it will be processed to completion; in particular, up to and
573 including the call following that in which the last matching name
574 is returned, the function ignores the value of in_spec, and will
575 only start processing a new spec with the following call.
576 If an error occurs, on return out_spec contains the value
577 of in_spec when the error occurred.
578
579 With each successive filename returned in out_spec, the
580 function's return value is one. When there are no more matching
581 names the function returns zero. If on the first call no file
582 matches in_spec, or there is any other error, -1 is returned.
583 */
584
585 #include <rmsdef.h>
586 #include <descrip.h>
587 #define OUTSIZE MAX_FILE_SPEC_LEN
588 short
589 fn_exp (out, in)
590 vspec *out;
591 char *in;
592 {
593 static long context = 0;
594 static struct dsc$descriptor_s o;
595 static struct dsc$descriptor_s i;
596 static logical pass1 = TRUE;
597 long status;
598 short retval;
599
600 if (pass1)
601 {
602 pass1 = FALSE;
603 o.dsc$a_pointer = (char *) out;
604 o.dsc$w_length = (short)OUTSIZE;
605 i.dsc$a_pointer = in;
606 i.dsc$w_length = (short)strlen(in);
607 i.dsc$b_dtype = DSC$K_DTYPE_T;
608 i.dsc$b_class = DSC$K_CLASS_S;
609 o.dsc$b_dtype = DSC$K_DTYPE_VT;
610 o.dsc$b_class = DSC$K_CLASS_VS;
611 }
612 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
613 {
614 out->body[out->curlen] = EOS;
615 return 1;
616 }
617 else if (status == RMS$_NMF)
618 retval = 0;
619 else
620 {
621 strcpy(out->body, in);
622 retval = -1;
623 }
624 lib$find_file_end(&context);
625 pass1 = TRUE;
626 return retval;
627 }
628
629 /*
630 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
631 name of each file specified by the provided arg expanding wildcards.
632 */
633 char *
634 gfnames (arg, p_error)
635 char *arg;
636 logical *p_error;
637 {
638 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
639
640 switch (fn_exp (&filename, arg))
641 {
642 case 1:
643 *p_error = FALSE;
644 return filename.body;
645 case 0:
646 *p_error = FALSE;
647 return NULL;
648 default:
649 *p_error = TRUE;
650 return filename.body;
651 }
652 }
653
654 #ifndef OLD /* Newer versions of VMS do provide `system'. */
655 system (cmd)
656 char *cmd;
657 {
658 fprintf (stderr, "system() function not implemented under VMS\n");
659 }
660 #endif
661
662 #define VERSION_DELIM ';'
663 char *massage_name (s)
664 char *s;
665 {
666 char *start = s;
667
668 for ( ; *s; s++)
669 if (*s == VERSION_DELIM)
670 {
671 *s = EOS;
672 break;
673 }
674 else
675 *s = tolower(*s);
676 return start;
677 }
678 #endif /* VMS */
679
680 \f
681 void
682 main (argc, argv)
683 int argc;
684 char *argv[];
685 {
686 int i;
687 unsigned int nincluded_files = 0;
688 char **included_files = xnew (argc, char *);
689 char *this_file;
690 ARGUMENT *argbuffer;
691 int current_arg = 0, file_count = 0;
692 struct linebuffer filename_lb;
693 #ifdef VMS
694 logical got_err;
695 #endif
696
697 #ifdef DOS_NT
698 _fmode = O_BINARY; /* all of files are treated as binary files */
699 #endif /* DOS_NT */
700
701 progname = argv[0];
702
703 /* Allocate enough no matter what happens. Overkill, but each one
704 is small. */
705 argbuffer = xnew (argc, ARGUMENT);
706
707 #ifdef ETAGS_REGEXPS
708 /* Set syntax for regular expression routines. */
709 re_set_syntax (RE_SYNTAX_EMACS);
710 #endif /* ETAGS_REGEXPS */
711
712 /*
713 * If etags, always find typedefs and structure tags. Why not?
714 * Also default is to find macro constants.
715 */
716 if (!CTAGS)
717 typedefs = typedefs_and_cplusplus = constantypedefs = TRUE;
718
719 while (1)
720 {
721 int opt = getopt_long (argc, argv,
722 "-aCdDf:Il:o:r:RStTi:BuvxwVhH", longopts, 0);
723
724 if (opt == EOF)
725 break;
726
727 switch (opt)
728 {
729 case 0:
730 /* If getopt returns 0, then it has already processed a
731 long-named option. We should do nothing. */
732 break;
733
734 case 1:
735 /* This means that a filename has been seen. Record it. */
736 argbuffer[current_arg].arg_type = at_filename;
737 argbuffer[current_arg].what = optarg;
738 ++current_arg;
739 ++file_count;
740 break;
741
742 /* Common options. */
743 case 'a':
744 append_to_tagfile = TRUE;
745 break;
746 case 'C':
747 cplusplus = TRUE;
748 break;
749 case 'd':
750 constantypedefs = TRUE;
751 break;
752 case 'D':
753 constantypedefs = FALSE;
754 break;
755 case 'f': /* for compatibility with old makefiles */
756 case 'o':
757 if (tagfile)
758 {
759 fprintf (stderr, "%s: -%c option may only be given once.\n",
760 progname, opt);
761 goto usage;
762 }
763 tagfile = optarg;
764 break;
765 case 'I':
766 case 'S': /* for backward compatibility */
767 noindentypedefs = TRUE;
768 break;
769 case 'l':
770 if (!get_language (optarg, &argbuffer[current_arg].function))
771 {
772 fprintf (stderr, "%s: language \"%s\" not recognized.\n",
773 progname, optarg);
774 goto usage;
775 }
776 argbuffer[current_arg].arg_type = at_language;
777 ++current_arg;
778 break;
779 #ifdef ETAGS_REGEXPS
780 case 'r':
781 argbuffer[current_arg].arg_type = at_regexp;
782 argbuffer[current_arg].what = optarg;
783 ++current_arg;
784 break;
785 case 'R':
786 argbuffer[current_arg].arg_type = at_regexp;
787 argbuffer[current_arg].what = NULL;
788 ++current_arg;
789 break;
790 #endif /* ETAGS_REGEXPS */
791 case 'V':
792 print_version ();
793 break;
794 case 'h':
795 case 'H':
796 print_help ();
797 break;
798 case 't':
799 typedefs = TRUE;
800 break;
801 case 'T':
802 typedefs = typedefs_and_cplusplus = TRUE;
803 break;
804 #if (!CTAGS)
805 /* Etags options */
806 case 'i':
807 included_files[nincluded_files++] = optarg;
808 break;
809 #else /* CTAGS */
810 /* Ctags options. */
811 case 'B':
812 searchar = '?';
813 break;
814 case 'u':
815 update = TRUE;
816 break;
817 case 'v':
818 vgrind_style = TRUE;
819 /*FALLTHRU*/
820 case 'x':
821 cxref_style = TRUE;
822 break;
823 case 'w':
824 no_warnings = TRUE;
825 break;
826 #endif /* CTAGS */
827 default:
828 goto usage;
829 }
830 }
831
832 for (; optind < argc; ++optind)
833 {
834 argbuffer[current_arg].arg_type = at_filename;
835 argbuffer[current_arg].what = argv[optind];
836 ++current_arg;
837 ++file_count;
838 }
839
840 if (nincluded_files == 0 && file_count == 0)
841 {
842 fprintf (stderr, "%s: No input files specified.\n", progname);
843
844 usage:
845 fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
846 progname);
847 exit (BAD);
848 }
849
850 if (tagfile == NULL)
851 {
852 tagfile = CTAGS ? "tags" : "TAGS";
853 }
854 cwd = etags_getcwd (); /* the current working directory */
855 strcat (cwd, "/");
856 if (streq (tagfile, "-"))
857 {
858 tagfiledir = cwd;
859 }
860 else
861 {
862 tagfiledir = absolute_dirname (tagfile, cwd);
863 }
864
865 init (); /* set up boolean "functions" */
866
867 initbuffer (&lb);
868 initbuffer (&token_name);
869 initbuffer (&lbs[0].lb);
870 initbuffer (&lbs[1].lb);
871 initbuffer (&filename_lb);
872
873 if (!CTAGS)
874 {
875 if (streq (tagfile, "-"))
876 tagf = stdout;
877 else
878 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
879 if (tagf == NULL)
880 pfatal (tagfile);
881 }
882
883 /*
884 * Loop through files finding functions.
885 */
886 for (i = 0; i < current_arg; ++i)
887 {
888 switch (argbuffer[i].arg_type)
889 {
890 case at_language:
891 lang_func = argbuffer[i].function;
892 break;
893 #ifdef ETAGS_REGEXPS
894 case at_regexp:
895 add_regex (argbuffer[i].what);
896 break;
897 #endif
898 case at_filename:
899 #ifdef VMS
900 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
901 {
902 if (got_err)
903 {
904 error ("Can't find file %s\n", this_file);
905 argc--, argv++;
906 }
907 else
908 {
909 this_file = massage_name (this_file);
910 }
911 #else
912 this_file = argbuffer[i].what;
913 #endif
914 /* Input file named "-" means read file names from stdin
915 and use them. */
916 if (streq (this_file, "-"))
917 while (readline_internal (&filename_lb, stdin) > 0)
918 process_file (filename_lb.buffer);
919 else
920 process_file (this_file);
921 #ifdef VMS
922 }
923 #endif
924 break;
925 }
926 }
927
928 if (!CTAGS)
929 {
930 while (nincluded_files-- > 0)
931 fprintf (tagf, "\f\n%s,include\n", *included_files++);
932
933 fclose (tagf);
934 exit (GOOD);
935 }
936
937 /* If CTAGS, we are here. process_file did not write the tags yet,
938 because we want them ordered. Let's do it now. */
939 if (cxref_style)
940 {
941 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
942 if (tagf == NULL)
943 pfatal (tagfile);
944 put_entries (head);
945 exit (GOOD);
946 }
947
948 if (update)
949 {
950 char cmd[BUFSIZ];
951 for (i = 0; i < current_arg; ++i)
952 {
953 if (argbuffer[i].arg_type != at_filename)
954 continue;
955 sprintf (cmd,
956 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
957 tagfile, argbuffer[i].what, tagfile);
958 if (system (cmd) != GOOD)
959 fatal ("failed to execute shell command");
960 }
961 append_to_tagfile = TRUE;
962 }
963
964 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
965 if (tagf == NULL)
966 pfatal (tagfile);
967 put_entries (head);
968 fclose (tagf);
969
970 if (update)
971 {
972 char cmd[BUFSIZ];
973 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
974 exit (system (cmd));
975 }
976 exit (GOOD);
977 }
978
979
980 /*
981 * Set the language, given the name.
982 */
983 logical
984 get_language (language, func)
985 char *language;
986 Lang_function **func;
987 {
988 struct lang_entry *lang;
989
990 for (lang = lang_names; lang->extension; ++lang)
991 {
992 if (streq (language, lang->extension))
993 {
994 *func = lang->function;
995 return TRUE;
996 }
997 }
998
999 return FALSE;
1000 }
1001
1002
1003 /*
1004 * This routine is called on each file argument.
1005 */
1006 void
1007 process_file (file)
1008 char *file;
1009 {
1010 struct stat stat_buf;
1011 FILE *inf;
1012
1013 if (stat (file, &stat_buf) == 0 && !S_ISREG (stat_buf.st_mode))
1014 {
1015 fprintf (stderr, "Skipping %s: it is not a regular file.\n", file);
1016 return;
1017 }
1018 if (streq (file, tagfile) && !streq (tagfile, "-"))
1019 {
1020 fprintf (stderr, "Skipping inclusion of %s in self.\n", file);
1021 return;
1022 }
1023 inf = fopen (file, "r");
1024 if (inf == NULL)
1025 {
1026 perror (file);
1027 return;
1028 }
1029
1030 find_entries (file, inf);
1031
1032 if (!CTAGS)
1033 {
1034 char *filename;
1035
1036 #ifdef DOS_NT
1037 if (file[0] == '/' || (isalpha (file[0]) && file[1] == ':'))
1038 #else
1039 if (file[0] == '/')
1040 #endif
1041 {
1042 /* file is an absolute filename. Canonicalise it. */
1043 filename = absolute_filename (file, cwd);
1044 }
1045 else
1046 {
1047 /* file is a filename relative to cwd. Make it relative
1048 to the directory of the tags file. */
1049 filename = relative_filename (file, tagfiledir);
1050 }
1051 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1052 free (filename);
1053 put_entries (head);
1054 free_tree (head);
1055 head = NULL;
1056 }
1057 }
1058
1059 /*
1060 * This routine sets up the boolean pseudo-functions which work
1061 * by setting boolean flags dependent upon the corresponding character
1062 * Every char which is NOT in that string is not a white char. Therefore,
1063 * all of the array "_wht" is set to FALSE, and then the elements
1064 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1065 * of a char is TRUE if it is the string "white", else FALSE.
1066 */
1067 void
1068 init ()
1069 {
1070 register char *sp;
1071 register int i;
1072
1073 for (i = 0; i < 0177; i++)
1074 _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
1075 for (sp = white; *sp; sp++)
1076 _wht[*sp] = TRUE;
1077 for (sp = endtk; *sp; sp++)
1078 _etk[*sp] = TRUE;
1079 for (sp = intk; *sp; sp++)
1080 _itk[*sp] = TRUE;
1081 for (sp = begtk; *sp; sp++)
1082 _btk[*sp] = TRUE;
1083 _wht[0] = _wht['\n'];
1084 _etk[0] = _etk['\n'];
1085 _btk[0] = _btk['\n'];
1086 _itk[0] = _itk['\n'];
1087 }
1088
1089 /*
1090 * This routine opens the specified file and calls the function
1091 * which finds the function and type definitions.
1092 */
1093 void
1094 find_entries (file, inf)
1095 char *file;
1096 FILE *inf;
1097 {
1098 char *cp;
1099 struct lang_entry *lang;
1100 NODE *old_last_node;
1101 extern NODE *last_node;
1102
1103 curfile = savestr (file);
1104 cp = etags_strrchr (file, '.');
1105
1106 /* If user specified a language, use it. */
1107 if (lang_func != NULL)
1108 {
1109 lang_func (inf);
1110 fclose (inf);
1111 return;
1112 }
1113
1114 if (cp)
1115 {
1116 ++cp;
1117 for (lang = lang_extensions; lang->extension; ++lang)
1118 {
1119 if (streq (cp, lang->extension))
1120 {
1121 lang->function (inf);
1122 fclose (inf);
1123 return;
1124 }
1125 }
1126 }
1127
1128 /* Try Fortran. */
1129 old_last_node = last_node;
1130 Fortran_functions (inf);
1131
1132 /* No Fortran entries found. Try C. */
1133 if (old_last_node == last_node)
1134 default_C_entries (inf);
1135 fclose (inf);
1136 }
1137 \f
1138 /* Record a tag. */
1139 void
1140 pfnote (name, is_func, named, linestart, linelen, lno, cno)
1141 char *name; /* tag name */
1142 logical is_func; /* tag is a function */
1143 logical named; /* tag different from text of definition */
1144 char *linestart; /* start of the line where tag is */
1145 int linelen; /* length of the line where tag is */
1146 int lno; /* line number */
1147 long cno; /* character number */
1148 {
1149 register NODE *np = xnew (1, NODE);
1150 register char *fp;
1151
1152 /* If ctags mode, change name "main" to M<thisfilename>. */
1153 if (CTAGS && !cxref_style && streq (name, "main"))
1154 {
1155 fp = etags_strrchr (curfile, '/');
1156 np->name = concat ("M", fp == 0 ? curfile : fp + 1, "");
1157 fp = etags_strrchr (np->name, '.');
1158 if (fp && fp[1] != '\0' && fp[2] == '\0')
1159 fp[0] = 0;
1160 np->named = TRUE;
1161 }
1162 else
1163 {
1164 np->name = name;
1165 np->named = named;
1166 }
1167 np->been_warned = FALSE;
1168 np->file = curfile;
1169 np->is_func = is_func;
1170 np->lno = lno;
1171 /* Our char numbers are 0-base, because of C language tradition?
1172 ctags compatibility? old versions compatibility? I don't know.
1173 Anyway, since emacs's are 1-base we espect etags.el to take care
1174 of the difference. If we wanted to have 1-based numbers, we would
1175 uncomment the +1 below. */
1176 np->cno = cno /* + 1 */ ;
1177 np->left = np->right = NULL;
1178 np->pat = savenstr (linestart, ((CTAGS && !cxref_style) ? 50 : linelen));
1179
1180 add_node (np, &head);
1181 }
1182
1183 /*
1184 * free_tree ()
1185 * recurse on left children, iterate on right children.
1186 */
1187 void
1188 free_tree (node)
1189 register NODE *node;
1190 {
1191 while (node)
1192 {
1193 register NODE *node_right = node->right;
1194 free_tree (node->left);
1195 if (node->named)
1196 free (node->name);
1197 free (node->pat);
1198 free ((char *) node);
1199 node = node_right;
1200 }
1201 }
1202
1203 /*
1204 * add_node ()
1205 * Adds a node to the tree of nodes. In etags mode, we don't keep
1206 * it sorted; we just keep a linear list. In ctags mode, maintain
1207 * an ordered tree, with no attempt at balancing.
1208 *
1209 * add_node is the only function allowed to add nodes, so it can
1210 * maintain state.
1211 */
1212 NODE *last_node = NULL;
1213 void
1214 add_node (node, cur_node_p)
1215 NODE *node, **cur_node_p;
1216 {
1217 register int dif;
1218 register NODE *cur_node = *cur_node_p;
1219
1220 if (cur_node == NULL)
1221 {
1222 *cur_node_p = node;
1223 last_node = node;
1224 return;
1225 }
1226
1227 if (!CTAGS)
1228 {
1229 /* Etags Mode */
1230 if (last_node == NULL)
1231 fatal ("internal error in add_node", 0);
1232 last_node->right = node;
1233 last_node = node;
1234 }
1235 else
1236 {
1237 /* Ctags Mode */
1238 dif = strcmp (node->name, cur_node->name);
1239
1240 /*
1241 * If this tag name matches an existing one, then
1242 * do not add the node, but maybe print a warning.
1243 */
1244 if (!dif)
1245 {
1246 if (node->file == cur_node->file)
1247 {
1248 if (!no_warnings)
1249 {
1250 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1251 node->file, lineno, node->name);
1252 fprintf (stderr, "Second entry ignored\n");
1253 }
1254 return;
1255 }
1256 if (!cur_node->been_warned && !no_warnings)
1257 {
1258 fprintf (stderr,
1259 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1260 node->file, cur_node->file, node->name);
1261 }
1262 cur_node->been_warned = TRUE;
1263 return;
1264 }
1265
1266 /* Maybe refuse to add duplicate nodes. */
1267 if (!permit_duplicates)
1268 {
1269 if (streq (node->name, cur_node->name)
1270 && streq (node->file, cur_node->file))
1271 return;
1272 }
1273
1274 /* Actually add the node */
1275 add_node (node, dif < 0 ? &cur_node->left : &cur_node->right);
1276 }
1277 }
1278 \f
1279 void
1280 put_entries (node)
1281 register NODE *node;
1282 {
1283 register char *sp;
1284
1285 if (node == NULL)
1286 return;
1287
1288 /* Output subentries that precede this one */
1289 put_entries (node->left);
1290
1291 /* Output this entry */
1292
1293 if (!CTAGS)
1294 {
1295 if (node->named)
1296 {
1297 fprintf (tagf, "%s\177%s\001%d,%d\n",
1298 node->pat, node->name,
1299 node->lno, node->cno);
1300 }
1301 else
1302 {
1303 fprintf (tagf, "%s\177%d,%d\n",
1304 node->pat,
1305 node->lno, node->cno);
1306 }
1307 }
1308 else if (!cxref_style)
1309 {
1310 fprintf (tagf, "%s\t%s\t",
1311 node->name, node->file);
1312
1313 if (node->is_func)
1314 { /* a function */
1315 putc (searchar, tagf);
1316 putc ('^', tagf);
1317
1318 for (sp = node->pat; *sp; sp++)
1319 {
1320 if (*sp == '\\' || *sp == searchar)
1321 putc ('\\', tagf);
1322 putc (*sp, tagf);
1323 }
1324 putc (searchar, tagf);
1325 }
1326 else
1327 { /* a typedef; text pattern inadequate */
1328 fprintf (tagf, "%d", node->lno);
1329 }
1330 putc ('\n', tagf);
1331 }
1332 else if (vgrind_style)
1333 fprintf (stdout, "%s %s %d\n",
1334 node->name, node->file, (node->lno + 63) / 64);
1335 else
1336 fprintf (stdout, "%-16s %3d %-16s %s\n",
1337 node->name, node->lno, node->file, node->pat);
1338
1339 /* Output subentries that follow this one */
1340 put_entries (node->right);
1341 }
1342
1343 /* Length of a number's decimal representation. */
1344 int
1345 number_len (num)
1346 long num;
1347 {
1348 int len = 0;
1349 if (!num)
1350 return 1;
1351 for (; num; num /= 10)
1352 ++len;
1353 return len;
1354 }
1355
1356 /*
1357 * Return total number of characters that put_entries will output for
1358 * the nodes in the subtree of the specified node. Works only if
1359 * we are not ctags, but called only in that case. This count
1360 * is irrelevant with the new tags.el, but is still supplied for
1361 * backward compatibility.
1362 */
1363 int
1364 total_size_of_entries (node)
1365 register NODE *node;
1366 {
1367 register int total;
1368
1369 if (node == NULL)
1370 return 0;
1371
1372 total = 0;
1373 for (; node; node = node->right)
1374 {
1375 /* Count left subentries. */
1376 total += total_size_of_entries (node->left);
1377
1378 /* Count this entry */
1379 total += strlen (node->pat) + 1;
1380 total += number_len ((long) node->lno) + 1 + number_len (node->cno) + 1;
1381 if (node->named)
1382 total += 1 + strlen (node->name); /* \001name */
1383 }
1384
1385 return total;
1386 }
1387 \f
1388 /*
1389 * The C symbol tables.
1390 */
1391 enum sym_type
1392 {
1393 st_none, st_C_struct, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1394 };
1395
1396 /* Feed stuff between (but not including) %[ and %] lines to:
1397 gperf -c -k1,3 -o -p -r -t
1398 %[
1399 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1400 %%
1401 class, C_PLPL, st_C_struct
1402 domain, C_STAR, st_C_struct
1403 union, 0, st_C_struct
1404 struct, 0, st_C_struct
1405 enum, 0, st_C_enum
1406 typedef, 0, st_C_typedef
1407 define, 0, st_C_define
1408 long, 0, st_C_typespec
1409 short, 0, st_C_typespec
1410 int, 0, st_C_typespec
1411 char, 0, st_C_typespec
1412 float, 0, st_C_typespec
1413 double, 0, st_C_typespec
1414 signed, 0, st_C_typespec
1415 unsigned, 0, st_C_typespec
1416 auto, 0, st_C_typespec
1417 void, 0, st_C_typespec
1418 extern, 0, st_C_typespec
1419 static, 0, st_C_typespec
1420 const, 0, st_C_typespec
1421 volatile, 0, st_C_typespec
1422 %]
1423 and replace lines between %< and %> with its output. */
1424 /*%<*/
1425 /* C code produced by gperf version 1.8.1 (K&R C version) */
1426 /* Command-line: gperf -c -k1,3 -o -p -r -t */
1427
1428
1429 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
1430
1431 #define MIN_WORD_LENGTH 3
1432 #define MAX_WORD_LENGTH 8
1433 #define MIN_HASH_VALUE 10
1434 #define MAX_HASH_VALUE 62
1435 /*
1436 21 keywords
1437 53 is the maximum key range
1438 */
1439
1440 static int
1441 hash (str, len)
1442 register char *str;
1443 register int len;
1444 {
1445 static unsigned char hash_table[] =
1446 {
1447 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1448 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1449 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1450 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1451 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1452 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1453 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1454 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1455 62, 62, 62, 62, 62, 62, 62, 62, 62, 62,
1456 62, 62, 62, 62, 62, 62, 62, 2, 62, 7,
1457 6, 9, 15, 30, 62, 24, 62, 62, 1, 24,
1458 7, 27, 13, 62, 19, 26, 18, 27, 1, 62,
1459 62, 62, 62, 62, 62, 62, 62, 62,
1460 };
1461 return len + hash_table[str[2]] + hash_table[str[0]];
1462 }
1463
1464 struct C_stab_entry *
1465 in_word_set (str, len)
1466 register char *str;
1467 register int len;
1468 {
1469
1470 static struct C_stab_entry wordlist[] =
1471 {
1472 {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
1473 {"",},
1474 {"volatile", 0, st_C_typespec},
1475 {"",},
1476 {"long", 0, st_C_typespec},
1477 {"char", 0, st_C_typespec},
1478 {"class", C_PLPL, st_C_struct},
1479 {"",}, {"",}, {"",}, {"",},
1480 {"const", 0, st_C_typespec},
1481 {"",}, {"",}, {"",}, {"",},
1482 {"auto", 0, st_C_typespec},
1483 {"",}, {"",},
1484 {"define", 0, st_C_define},
1485 {"",},
1486 {"void", 0, st_C_typespec},
1487 {"",}, {"",}, {"",},
1488 {"extern", 0, st_C_typespec},
1489 {"static", 0, st_C_typespec},
1490 {"",},
1491 {"domain", C_STAR, st_C_struct},
1492 {"",},
1493 {"typedef", 0, st_C_typedef},
1494 {"double", 0, st_C_typespec},
1495 {"enum", 0, st_C_enum},
1496 {"",}, {"",}, {"",}, {"",},
1497 {"int", 0, st_C_typespec},
1498 {"",},
1499 {"float", 0, st_C_typespec},
1500 {"",}, {"",}, {"",},
1501 {"struct", 0, st_C_struct},
1502 {"",}, {"",}, {"",}, {"",},
1503 {"union", 0, st_C_struct},
1504 {"",},
1505 {"short", 0, st_C_typespec},
1506 {"",}, {"",},
1507 {"unsigned", 0, st_C_typespec},
1508 {"signed", 0, st_C_typespec},
1509 };
1510
1511 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
1512 {
1513 register int key = hash (str, len);
1514
1515 if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)
1516 {
1517 register char *s = wordlist[key].name;
1518
1519 if (*s == *str && strneq (str + 1, s + 1, len - 1))
1520 return &wordlist[key];
1521 }
1522 }
1523 return 0;
1524 }
1525 /*%>*/
1526
1527 enum sym_type
1528 C_symtype(str, len, c_ext)
1529 char *str;
1530 int len;
1531 int c_ext;
1532 {
1533 register struct C_stab_entry *se = in_word_set(str, len);
1534
1535 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
1536 return st_none;
1537 return se->type;
1538 }
1539 \f
1540 /*
1541 * C functions are recognized using a simple finite automaton.
1542 * funcdef is its state variable.
1543 */
1544 typedef enum
1545 {
1546 fnone, /* nothing seen */
1547 ftagseen, /* function-like tag seen */
1548 fstartlist, /* just after open parenthesis */
1549 finlist, /* in parameter list */
1550 flistseen, /* after parameter list */
1551 fignore /* before open brace */
1552 } FUNCST;
1553 FUNCST funcdef;
1554
1555
1556 /*
1557 * typedefs are recognized using a simple finite automaton.
1558 * typeddef is its state variable.
1559 */
1560 typedef enum
1561 {
1562 tnone, /* nothing seen */
1563 ttypedseen, /* typedef keyword seen */
1564 tinbody, /* inside typedef body */
1565 tend, /* just before typedef tag */
1566 tignore /* junk after typedef tag */
1567 } TYPEDST;
1568 TYPEDST typdef;
1569
1570
1571 /*
1572 * struct-like structures (enum, struct and union) are recognized
1573 * using another simple finite automaton. `structdef' is its state
1574 * variable.
1575 */
1576 typedef enum
1577 {
1578 snone, /* nothing seen yet */
1579 skeyseen, /* struct-like keyword seen */
1580 stagseen, /* struct-like tag seen */
1581 scolonseen, /* colon seen after struct-like tag */
1582 sinbody /* in struct body: recognize member func defs*/
1583 } STRUCTST;
1584 STRUCTST structdef;
1585
1586 /*
1587 * When structdef is stagseen, scolonseen, or sinbody, structtag is the
1588 * struct tag, and structtype is the type of the preceding struct-like
1589 * keyword.
1590 */
1591 char *structtag = "<uninited>";
1592 enum sym_type structtype;
1593
1594 /*
1595 * Yet another little state machine to deal with preprocessor lines.
1596 */
1597 typedef enum
1598 {
1599 dnone, /* nothing seen */
1600 dsharpseen, /* '#' seen as first char on line */
1601 ddefineseen, /* '#' and 'define' seen */
1602 dignorerest /* ignore rest of line */
1603 } DEFINEST;
1604 DEFINEST definedef;
1605
1606 /*
1607 * Set this to TRUE, and the next token considered is called a function.
1608 * Used only for GNU emacs's function-defining macros.
1609 */
1610 logical next_token_is_func;
1611
1612 /*
1613 * TRUE in the rules part of a yacc file, FALSE outside (parse as C).
1614 */
1615 logical yacc_rules;
1616
1617 /*
1618 * consider_token ()
1619 * checks to see if the current token is at the start of a
1620 * function, or corresponds to a typedef, or is a struct/union/enum
1621 * tag.
1622 *
1623 * *IS_FUNC gets TRUE iff the token is a function or macro with args.
1624 * C_EXT is which language we are looking at.
1625 *
1626 * In the future we will need some way to adjust where the end of
1627 * the token is; for instance, implementing the C++ keyword
1628 * `operator' properly will adjust the end of the token to be after
1629 * whatever follows `operator'.
1630 *
1631 * Globals
1632 * funcdef IN OUT
1633 * structdef IN OUT
1634 * definedef IN OUT
1635 * typdef IN OUT
1636 * next_token_is_func IN OUT
1637 */
1638
1639 logical
1640 consider_token (str, len, c, c_ext, cblev, is_func)
1641 register char *str; /* IN: token pointer */
1642 register int len; /* IN: token length */
1643 register char c; /* IN: first char after the token */
1644 int c_ext; /* IN: C extensions mask */
1645 int cblev; /* IN: curly brace level */
1646 logical *is_func; /* OUT: function found */
1647 {
1648 enum sym_type toktype = C_symtype (str, len, c_ext);
1649
1650 /*
1651 * Advance the definedef state machine.
1652 */
1653 switch (definedef)
1654 {
1655 case dnone:
1656 /* We're not on a preprocessor line. */
1657 break;
1658 case dsharpseen:
1659 if (toktype == st_C_define)
1660 {
1661 definedef = ddefineseen;
1662 }
1663 else
1664 {
1665 definedef = dignorerest;
1666 }
1667 return FALSE;
1668 case ddefineseen:
1669 /*
1670 * Make a tag for any macro, unless it is a constant
1671 * and constantypedefs is FALSE.
1672 */
1673 definedef = dignorerest;
1674 *is_func = (c == '(');
1675 if (!*is_func && !constantypedefs)
1676 return FALSE;
1677 else
1678 return TRUE;
1679 case dignorerest:
1680 return FALSE;
1681 default:
1682 error ("internal error: definedef value.", 0);
1683 }
1684
1685 /*
1686 * Now typedefs
1687 */
1688 switch (typdef)
1689 {
1690 case tnone:
1691 if (toktype == st_C_typedef)
1692 {
1693 if (typedefs)
1694 typdef = ttypedseen;
1695 funcdef = fnone;
1696 return FALSE;
1697 }
1698 break;
1699 case ttypedseen:
1700 switch (toktype)
1701 {
1702 case st_none:
1703 case st_C_typespec:
1704 typdef = tend;
1705 break;
1706 case st_C_struct:
1707 case st_C_enum:
1708 break;
1709 }
1710 /* Do not return here, so the structdef stuff has a chance. */
1711 break;
1712 case tend:
1713 switch (toktype)
1714 {
1715 case st_C_typespec:
1716 case st_C_struct:
1717 case st_C_enum:
1718 return FALSE;
1719 }
1720 return TRUE;
1721 }
1722
1723 /*
1724 * This structdef business is currently only invoked when cblev==0.
1725 * It should be recursively invoked whatever the curly brace level,
1726 * and a stack of states kept, to allow for definitions of structs
1727 * within structs.
1728 *
1729 * This structdef business is NOT invoked when we are ctags and the
1730 * file is plain C. This is because a struct tag may have the same
1731 * name as another tag, and this loses with ctags.
1732 *
1733 * This if statement deals with the typdef state machine as
1734 * follows: if typdef==ttypedseen and token is struct/union/class/enum,
1735 * return FALSE. All the other code here is for the structdef
1736 * state machine.
1737 */
1738 switch (toktype)
1739 {
1740 case st_C_struct:
1741 case st_C_enum:
1742 if (typdef == ttypedseen
1743 || (typedefs_and_cplusplus && cblev == 0 && structdef == snone))
1744 {
1745 structdef = skeyseen;
1746 structtype = toktype;
1747 }
1748 return FALSE;
1749 }
1750 if (structdef == skeyseen)
1751 {
1752 /* Save the tag for struct/union/class, for functions that may be
1753 defined inside. */
1754 if (structtype == st_C_struct)
1755 structtag = savenstr (str, len);
1756 else
1757 structtag = "<enum>";
1758 structdef = stagseen;
1759 return TRUE;
1760 }
1761
1762 /* Avoid entering funcdef stuff if typdef is going on. */
1763 if (typdef != tnone)
1764 {
1765 definedef = dnone;
1766 return FALSE;
1767 }
1768
1769 /* Detect GNU macros. */
1770 if (definedef == dnone)
1771 if (strneq (str, "DEFUN", 5) /* Used in emacs */
1772 #if FALSE
1773 These are defined inside C functions, so currently they
1774 are not met anyway.
1775 || strneq (str, "EXFUN", 5) /* Used in glibc */
1776 || strneq (str, "DEFVAR_", 7) /* Used in emacs */
1777 #endif
1778 || strneq (str, "SYSCALL", 7) /* Used in glibc (mach) */
1779 || strneq (str, "ENTRY", 5) /* Used in glibc */
1780 || strneq (str, "PSEUDO", 6)) /* Used in glibc */
1781
1782 {
1783 next_token_is_func = TRUE;
1784 return FALSE;
1785 }
1786 if (next_token_is_func)
1787 {
1788 next_token_is_func = FALSE;
1789 funcdef = fignore;
1790 *is_func = TRUE;
1791 return TRUE;
1792 }
1793
1794 /* A function? */
1795 switch (toktype)
1796 {
1797 case st_C_typespec:
1798 if (funcdef != finlist && funcdef != fignore)
1799 funcdef = fnone; /* should be useless */
1800 return FALSE;
1801 default:
1802 if (funcdef == fnone)
1803 {
1804 funcdef = ftagseen;
1805 *is_func = TRUE;
1806 return TRUE;
1807 }
1808 }
1809
1810 return FALSE;
1811 }
1812
1813 /*
1814 * C_entries ()
1815 * This routine finds functions, typedefs, #define's and
1816 * struct/union/enum definitions in C syntax and adds them
1817 * to the list.
1818 */
1819 typedef struct
1820 {
1821 logical valid;
1822 char *str;
1823 logical named;
1824 int linelen;
1825 int lineno;
1826 long linepos;
1827 char *buffer;
1828 } TOKEN;
1829
1830 #define current_lb_is_new (newndx == curndx)
1831 #define switch_line_buffers() (curndx = 1 - curndx)
1832
1833 #define curlb (lbs[curndx].lb)
1834 #define othlb (lbs[1-curndx].lb)
1835 #define newlb (lbs[newndx].lb)
1836 #define curlinepos (lbs[curndx].linepos)
1837 #define othlinepos (lbs[1-curndx].linepos)
1838 #define newlinepos (lbs[newndx].linepos)
1839
1840 #define CNL_SAVE_DEFINEDEF \
1841 do { \
1842 curlinepos = charno; \
1843 lineno++; \
1844 charno += readline (&curlb, inf); \
1845 lp = curlb.buffer; \
1846 quotednl = FALSE; \
1847 newndx = curndx; \
1848 } while (0)
1849
1850 #define CNL \
1851 do { \
1852 CNL_SAVE_DEFINEDEF; \
1853 if (savetok.valid) \
1854 { \
1855 tok = savetok; \
1856 savetok.valid = FALSE; \
1857 } \
1858 definedef = dnone; \
1859 } while (0)
1860
1861 #define make_tag(isfun) do \
1862 { \
1863 if (tok.valid) \
1864 pfnote (savestr (token_name.buffer), isfun, tok.named, \
1865 tok.buffer, tok.linelen, tok.lineno, tok.linepos); \
1866 else if (DEBUG) abort (); \
1867 tok.valid = FALSE; \
1868 } while (0)
1869
1870 void
1871 C_entries (c_ext, inf)
1872 int c_ext; /* extension of C */
1873 FILE *inf; /* input file */
1874 {
1875 register char c; /* latest char read; '\0' for end of line */
1876 register char *lp; /* pointer one beyond the character `c' */
1877 int curndx, newndx; /* indices for current and new lb */
1878 TOKEN tok; /* latest token read */
1879 register int tokoff; /* offset in line of start of current token */
1880 register int toklen; /* length of current token */
1881 int cblev; /* current curly brace level */
1882 int parlev; /* current parenthesis level */
1883 logical incomm, inquote, inchar, quotednl, midtoken;
1884 logical cplpl;
1885 TOKEN savetok; /* token saved during preprocessor handling */
1886
1887
1888 curndx = newndx = 0;
1889 lineno = 0;
1890 charno = 0;
1891 lp = curlb.buffer;
1892 *lp = 0;
1893
1894 definedef = dnone; funcdef = fnone; typdef = tnone; structdef = snone;
1895 next_token_is_func = yacc_rules = FALSE;
1896 midtoken = inquote = inchar = incomm = quotednl = FALSE;
1897 tok.valid = savetok.valid = FALSE;
1898 cblev = 0;
1899 parlev = 0;
1900 cplpl = c_ext & C_PLPL;
1901
1902 while (!feof (inf))
1903 {
1904 c = *lp++;
1905 if (c == '\\')
1906 {
1907 /* If we're at the end of the line, the next character is a
1908 '\0'; don't skip it, because it's the thing that tells us
1909 to read the next line. */
1910 if (*lp == '\0')
1911 {
1912 quotednl = TRUE;
1913 continue;
1914 }
1915 lp++;
1916 c = ' ';
1917 }
1918 else if (incomm)
1919 {
1920 switch (c)
1921 {
1922 case '*':
1923 if (*lp == '/')
1924 {
1925 c = *lp++;
1926 incomm = FALSE;
1927 }
1928 break;
1929 case '\0':
1930 /* Newlines inside comments do not end macro definitions in
1931 traditional cpp. */
1932 CNL_SAVE_DEFINEDEF;
1933 break;
1934 }
1935 continue;
1936 }
1937 else if (inquote)
1938 {
1939 switch (c)
1940 {
1941 case '"':
1942 inquote = FALSE;
1943 break;
1944 case '\0':
1945 /* Newlines inside strings do not end macro definitions
1946 in traditional cpp, even though compilers don't
1947 usually accept them. */
1948 CNL_SAVE_DEFINEDEF;
1949 break;
1950 }
1951 continue;
1952 }
1953 else if (inchar)
1954 {
1955 switch (c)
1956 {
1957 case '\0':
1958 /* Hmmm, something went wrong. */
1959 CNL;
1960 /* FALLTHRU */
1961 case '\'':
1962 inchar = FALSE;
1963 break;
1964 }
1965 continue;
1966 }
1967 else
1968 switch (c)
1969 {
1970 case '"':
1971 inquote = TRUE;
1972 if (funcdef != finlist && funcdef != fignore)
1973 funcdef = fnone;
1974 continue;
1975 case '\'':
1976 inchar = TRUE;
1977 if (funcdef != finlist && funcdef != fignore)
1978 funcdef = fnone;
1979 continue;
1980 case '/':
1981 if (*lp == '*')
1982 {
1983 lp++;
1984 incomm = TRUE;
1985 continue;
1986 }
1987 else if (cplpl && *lp == '/')
1988 {
1989 c = 0;
1990 break;
1991 }
1992 else
1993 break;
1994 case '%':
1995 if ((c_ext & YACC) && *lp == '%')
1996 {
1997 /* entering or exiting rules section in yacc file */
1998 lp++;
1999 definedef = dnone; funcdef = fnone;
2000 typdef = tnone; structdef = snone;
2001 next_token_is_func = FALSE;
2002 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2003 cblev = 0;
2004 yacc_rules = !yacc_rules;
2005 continue;
2006 }
2007 else
2008 break;
2009 case '#':
2010 if (definedef == dnone)
2011 {
2012 char *cp;
2013 logical cpptoken = TRUE;
2014
2015 /* Look back on this line. If all blanks, or nonblanks
2016 followed by an end of comment, this is a preprocessor
2017 token. */
2018 for (cp = newlb.buffer; cp < lp-1; cp++)
2019 if (!iswhite (*cp))
2020 {
2021 if (*cp == '*' && *(cp+1) == '/')
2022 {
2023 cp++;
2024 cpptoken = TRUE;
2025 }
2026 else
2027 cpptoken = FALSE;
2028 }
2029 if (cpptoken)
2030 definedef = dsharpseen;
2031 } /* if (definedef == dnone) */
2032
2033 continue;
2034 } /* switch (c) */
2035
2036
2037 /* Consider token only if some complicated conditions are satisfied. */
2038 if ((definedef != dnone
2039 || (cblev == 0 && structdef != scolonseen)
2040 || (cblev == 1 && cplpl && structdef == sinbody))
2041 && typdef != tignore
2042 && definedef != dignorerest
2043 && funcdef != finlist)
2044 {
2045 if (midtoken)
2046 {
2047 if (endtoken (c))
2048 {
2049 if (cplpl && c == ':' && *lp == ':' && begtoken(*(lp + 1)))
2050 {
2051 /*
2052 * This handles :: in the middle, but not at the
2053 * beginning of an identifier.
2054 */
2055 lp += 2;
2056 toklen += 3;
2057 }
2058 else
2059 {
2060 logical is_func = FALSE;
2061
2062 if (yacc_rules
2063 || consider_token (newlb.buffer + tokoff, toklen,
2064 c, c_ext, cblev, &is_func))
2065 {
2066 if (structdef == sinbody
2067 && definedef == dnone
2068 && is_func)
2069 /* function defined in C++ class body */
2070 {
2071 int strsize = strlen(structtag) + 2 + toklen + 1;
2072 while (token_name.size < strsize)
2073 {
2074 token_name.size *= 2;
2075 token_name.buffer
2076 = (char *) xrealloc (token_name.buffer,
2077 token_name.size);
2078 }
2079 strcpy (token_name.buffer, structtag);
2080 strcat (token_name.buffer, "::");
2081 strncat (token_name.buffer,
2082 newlb.buffer+tokoff, toklen);
2083 tok.named = TRUE;
2084 }
2085 else
2086 {
2087 while (token_name.size < toklen + 1)
2088 {
2089 token_name.size *= 2;
2090 token_name.buffer
2091 = (char *) xrealloc (token_name.buffer,
2092 token_name.size);
2093 }
2094 strncpy (token_name.buffer,
2095 newlb.buffer+tokoff, toklen);
2096 token_name.buffer[toklen] = '\0';
2097 if (structdef == stagseen
2098 || typdef == tend
2099 || (is_func
2100 && definedef == dignorerest)) /* macro */
2101 tok.named = TRUE;
2102 else
2103 tok.named = FALSE;
2104 }
2105 tok.lineno = lineno;
2106 tok.linelen = tokoff + toklen + 1;
2107 tok.buffer = newlb.buffer;
2108 tok.linepos = newlinepos;
2109 tok.valid = TRUE;
2110
2111 if (definedef == dnone
2112 && (funcdef == ftagseen
2113 || structdef == stagseen
2114 || typdef == tend))
2115 {
2116 if (current_lb_is_new)
2117 switch_line_buffers ();
2118 }
2119 else
2120 make_tag (is_func);
2121 }
2122 midtoken = FALSE;
2123 }
2124 } /* if (endtoken (c)) */
2125 else if (intoken (c))
2126 {
2127 toklen++;
2128 continue;
2129 }
2130 } /* if (midtoken) */
2131 else if (begtoken (c))
2132 {
2133 switch (definedef)
2134 {
2135 case dnone:
2136 switch (funcdef)
2137 {
2138 case fstartlist:
2139 funcdef = finlist;
2140 continue;
2141 case flistseen:
2142 make_tag (TRUE);
2143 funcdef = fignore;
2144 break;
2145 case ftagseen:
2146 funcdef = fnone;
2147 break;
2148 }
2149 if (structdef == stagseen)
2150 structdef = snone;
2151 break;
2152 case dsharpseen:
2153 savetok = tok;
2154 }
2155 if (!yacc_rules || lp == newlb.buffer + 1)
2156 {
2157 tokoff = lp - 1 - newlb.buffer;
2158 toklen = 1;
2159 midtoken = TRUE;
2160 }
2161 continue;
2162 } /* if (begtoken) */
2163 } /* if must look at token */
2164
2165
2166 /* Detect end of line, colon, comma, semicolon and various braces
2167 after having handled a token.*/
2168 switch (c)
2169 {
2170 case ':':
2171 if (definedef != dnone)
2172 break;
2173 if (structdef == stagseen)
2174 structdef = scolonseen;
2175 else
2176 switch (funcdef)
2177 {
2178 case ftagseen:
2179 if (yacc_rules)
2180 {
2181 make_tag (FALSE);
2182 funcdef = fignore;
2183 }
2184 break;
2185 case fstartlist:
2186 funcdef = fnone;
2187 break;
2188 }
2189 break;
2190 case ';':
2191 if (definedef != dnone)
2192 break;
2193 if (cblev == 0)
2194 switch (typdef)
2195 {
2196 case tend:
2197 make_tag (FALSE);
2198 /* FALLTHRU */
2199 default:
2200 typdef = tnone;
2201 }
2202 if (funcdef != fignore)
2203 funcdef = fnone;
2204 if (structdef == stagseen)
2205 structdef = snone;
2206 break;
2207 case ',':
2208 if (definedef != dnone)
2209 break;
2210 if (funcdef != finlist && funcdef != fignore)
2211 funcdef = fnone;
2212 if (structdef == stagseen)
2213 structdef = snone;
2214 break;
2215 case '[':
2216 if (definedef != dnone)
2217 break;
2218 if (cblev == 0 && typdef == tend)
2219 {
2220 typdef = tignore;
2221 make_tag (FALSE);
2222 break;
2223 }
2224 if (funcdef != finlist && funcdef != fignore)
2225 funcdef = fnone;
2226 if (structdef == stagseen)
2227 structdef = snone;
2228 break;
2229 case '(':
2230 if (definedef != dnone)
2231 break;
2232 switch (funcdef)
2233 {
2234 case fnone:
2235 switch (typdef)
2236 {
2237 case ttypedseen:
2238 case tend:
2239 /* Make sure that the next char is not a '*'.
2240 This handles constructs like:
2241 typedef void OperatorFun (int fun); */
2242 if (*lp != '*')
2243 {
2244 typdef = tignore;
2245 make_tag (FALSE);
2246 }
2247 break;
2248 } /* switch (typdef) */
2249 break;
2250 case ftagseen:
2251 funcdef = fstartlist;
2252 break;
2253 case flistseen:
2254 funcdef = finlist;
2255 break;
2256 }
2257 parlev++;
2258 break;
2259 case ')':
2260 if (definedef != dnone)
2261 break;
2262 if (--parlev == 0)
2263 {
2264 switch (funcdef)
2265 {
2266 case fstartlist:
2267 case finlist:
2268 funcdef = flistseen;
2269 break;
2270 }
2271 if (cblev == 0 && typdef == tend)
2272 {
2273 typdef = tignore;
2274 make_tag (FALSE);
2275 }
2276 }
2277 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
2278 parlev = 0;
2279 break;
2280 case '{':
2281 if (definedef != dnone)
2282 break;
2283 if (typdef == ttypedseen)
2284 typdef = tinbody;
2285 switch (structdef)
2286 {
2287 case skeyseen: /* unnamed struct */
2288 structtag = "_anonymous_";
2289 structdef = sinbody;
2290 break;
2291 case stagseen:
2292 case scolonseen: /* named struct */
2293 structdef = sinbody;
2294 make_tag (FALSE);
2295 break;
2296 }
2297 switch (funcdef)
2298 {
2299 case flistseen:
2300 make_tag (TRUE);
2301 /* FALLTHRU */
2302 case fignore:
2303 funcdef = fnone;
2304 break;
2305 case fnone:
2306 /* Neutralize `extern "C" {' grot and look inside structs. */
2307 if (cblev == 0 && structdef == snone && typdef == tnone)
2308 cblev = -1;
2309 }
2310 cblev++;
2311 break;
2312 case '*':
2313 if (definedef != dnone)
2314 break;
2315 if (funcdef == fstartlist)
2316 funcdef = fnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
2317 break;
2318 case '}':
2319 if (definedef != dnone)
2320 break;
2321 if (!noindentypedefs && lp == newlb.buffer + 1)
2322 {
2323 cblev = 0; /* reset curly brace level if first column */
2324 parlev = 0; /* also reset paren level, just in case... */
2325 }
2326 else if (cblev > 0)
2327 cblev--;
2328 if (cblev == 0)
2329 {
2330 if (typdef == tinbody)
2331 typdef = tend;
2332 #if FALSE /* too risky */
2333 if (structdef == sinbody)
2334 free (structtag);
2335 #endif
2336
2337 structdef = snone;
2338 structtag = "<error>";
2339 }
2340 break;
2341 case '=':
2342 case '#': case '+': case '-': case '~': case '&': case '%': case '/':
2343 case '|': case '^': case '!': case '<': case '>': case '.': case '?':
2344 if (definedef != dnone)
2345 break;
2346 /* These surely cannot follow a function tag. */
2347 if (funcdef != finlist && funcdef != fignore)
2348 funcdef = fnone;
2349 break;
2350 case '\0':
2351 /* If a macro spans multiple lines don't reset its state. */
2352 if (quotednl)
2353 CNL_SAVE_DEFINEDEF;
2354 else
2355 CNL;
2356 break;
2357 } /* switch (c) */
2358
2359 } /* while not eof */
2360 }
2361
2362 /*
2363 * Process either a C++ file or a C file depending on the setting
2364 * of a global flag.
2365 */
2366 void
2367 default_C_entries (inf)
2368 FILE *inf;
2369 {
2370 C_entries (cplusplus ? C_PLPL : 0, inf);
2371 }
2372
2373 /* Always do C++. */
2374 void
2375 Cplusplus_entries (inf)
2376 FILE *inf;
2377 {
2378 C_entries (C_PLPL, inf);
2379 }
2380
2381 /* Always do C*. */
2382 void
2383 Cstar_entries (inf)
2384 FILE *inf;
2385 {
2386 C_entries (C_STAR, inf);
2387 }
2388
2389 /* Always do Yacc. */
2390 void
2391 Yacc_entries (inf)
2392 FILE *inf;
2393 {
2394 C_entries (YACC, inf);
2395 }
2396 \f
2397 /* Fortran parsing */
2398
2399 char *dbp;
2400
2401 logical
2402 tail (cp)
2403 char *cp;
2404 {
2405 register int len = 0;
2406
2407 while (*cp && (*cp | ' ') == (dbp[len] | ' '))
2408 cp++, len++;
2409 if (*cp == 0)
2410 {
2411 dbp += len;
2412 return TRUE;
2413 }
2414 return FALSE;
2415 }
2416
2417 void
2418 takeprec ()
2419 {
2420 while (isspace (*dbp))
2421 dbp++;
2422 if (*dbp != '*')
2423 return;
2424 dbp++;
2425 while (isspace (*dbp))
2426 dbp++;
2427 if (tail ("(*)"))
2428 return;
2429 if (!isdigit (*dbp))
2430 {
2431 --dbp; /* force failure */
2432 return;
2433 }
2434 do
2435 dbp++;
2436 while (isdigit (*dbp));
2437 }
2438
2439 void
2440 getit (inf)
2441 FILE *inf;
2442 {
2443 register char *cp;
2444
2445 while (isspace (*dbp))
2446 dbp++;
2447 if (*dbp == '\0')
2448 {
2449 lineno++;
2450 linecharno = charno;
2451 charno += readline (&lb, inf);
2452 dbp = lb.buffer;
2453 if (dbp[5] != '&')
2454 return;
2455 dbp += 6;
2456 while (isspace (*dbp))
2457 dbp++;
2458 }
2459 if (!isalpha (*dbp)
2460 && *dbp != '_'
2461 && *dbp != '$')
2462 return;
2463 for (cp = dbp + 1;
2464 (*cp
2465 && (isalpha (*cp) || isdigit (*cp) || (*cp == '_') || (*cp == '$')));
2466 cp++)
2467 continue;
2468 pfnote (savenstr (dbp, cp-dbp), TRUE, FALSE, lb.buffer,
2469 cp - lb.buffer + 1, lineno, linecharno);
2470 }
2471
2472 void
2473 Fortran_functions (inf)
2474 FILE *inf;
2475 {
2476 lineno = 0;
2477 charno = 0;
2478
2479 while (!feof (inf))
2480 {
2481 lineno++;
2482 linecharno = charno;
2483 charno += readline (&lb, inf);
2484 dbp = lb.buffer;
2485 if (*dbp == '%')
2486 dbp++; /* Ratfor escape to fortran */
2487 while (isspace (*dbp))
2488 dbp++;
2489 if (*dbp == 0)
2490 continue;
2491 switch (*dbp | ' ')
2492 {
2493 case 'i':
2494 if (tail ("integer"))
2495 takeprec ();
2496 break;
2497 case 'r':
2498 if (tail ("real"))
2499 takeprec ();
2500 break;
2501 case 'l':
2502 if (tail ("logical"))
2503 takeprec ();
2504 break;
2505 case 'c':
2506 if (tail ("complex") || tail ("character"))
2507 takeprec ();
2508 break;
2509 case 'd':
2510 if (tail ("double"))
2511 {
2512 while (isspace (*dbp))
2513 dbp++;
2514 if (*dbp == 0)
2515 continue;
2516 if (tail ("precision"))
2517 break;
2518 continue;
2519 }
2520 break;
2521 }
2522 while (isspace (*dbp))
2523 dbp++;
2524 if (*dbp == 0)
2525 continue;
2526 switch (*dbp | ' ')
2527 {
2528 case 'f':
2529 if (tail ("function"))
2530 getit (inf);
2531 continue;
2532 case 's':
2533 if (tail ("subroutine"))
2534 getit (inf);
2535 continue;
2536 case 'e':
2537 if (tail ("entry"))
2538 getit (inf);
2539 continue;
2540 case 'p':
2541 if (tail ("program"))
2542 {
2543 getit (inf);
2544 continue;
2545 }
2546 if (tail ("procedure"))
2547 getit (inf);
2548 continue;
2549 }
2550 }
2551 }
2552 \f
2553 /*
2554 * Bob Weiner, Motorola Inc., 4/3/94
2555 * Unix and microcontroller assembly tag handling
2556 * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
2557 */
2558 void
2559 Asm_labels (inf)
2560 FILE *inf;
2561 {
2562 register char *cp;
2563
2564 lineno = 0;
2565 charno = 0;
2566
2567 while (!feof (inf))
2568 {
2569 lineno++;
2570 linecharno = charno;
2571 charno += readline (&lb, inf);
2572 cp = lb.buffer;
2573
2574 /* If first char is alphabetic or one of [_.$], test for colon
2575 following identifier. */
2576 if (isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
2577 {
2578 /* Read past label. */
2579 cp++;
2580 while (isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
2581 cp++;
2582 if (*cp == ':' || isspace (*cp))
2583 {
2584 /* Found end of label, so copy it and add it to the table. */
2585 pfnote (savenstr (lb.buffer, cp-lb.buffer), TRUE, FALSE,
2586 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
2587 }
2588 }
2589 }
2590 }
2591 \f
2592 /* Added by Mosur Mohan, 4/22/88 */
2593 /* Pascal parsing */
2594
2595 #define GET_NEW_LINE \
2596 { \
2597 linecharno = charno; lineno++; \
2598 charno += 1 + readline (&lb, inf); \
2599 dbp = lb.buffer; \
2600 }
2601
2602 /*
2603 * Locates tags for procedures & functions. Doesn't do any type- or
2604 * var-definitions. It does look for the keyword "extern" or
2605 * "forward" immediately following the procedure statement; if found,
2606 * the tag is skipped.
2607 */
2608 void
2609 Pascal_functions (inf)
2610 FILE *inf;
2611 {
2612 struct linebuffer tline; /* mostly copied from C_entries */
2613 long save_lcno;
2614 int save_lineno;
2615 char c, *cp;
2616 char *nambuf;
2617
2618 logical /* each of these flags is TRUE iff: */
2619 incomment, /* point is inside a comment */
2620 inquote, /* point is inside '..' string */
2621 get_tagname, /* point is after PROCEDURE/FUNCTION */
2622 /* keyword, so next item = potential tag */
2623 found_tag, /* point is after a potential tag */
2624 inparms, /* point is within parameter-list */
2625 verify_tag; /* point has passed the parm-list, so the */
2626 /* next token will determine whether */
2627 /* this is a FORWARD/EXTERN to be */
2628 /* ignored, or whether it is a real tag */
2629
2630 lineno = 0;
2631 charno = 0;
2632 dbp = lb.buffer;
2633 *dbp = 0;
2634 initbuffer (&tline);
2635
2636 incomment = inquote = FALSE;
2637 found_tag = FALSE; /* have a proc name; check if extern */
2638 get_tagname = FALSE; /* have found "procedure" keyword */
2639 inparms = FALSE; /* found '(' after "proc" */
2640 verify_tag = FALSE; /* check if "extern" is ahead */
2641
2642 /* long main loop to get next char */
2643 while (!feof (inf))
2644 {
2645 c = *dbp++;
2646 if (c == '\0') /* if end of line */
2647 {
2648 GET_NEW_LINE;
2649 if (*dbp == '\0')
2650 continue;
2651 if (!((found_tag && verify_tag) ||
2652 get_tagname))
2653 c = *dbp++; /* only if don't need *dbp pointing */
2654 /* to the beginning of the name of */
2655 /* the procedure or function */
2656 }
2657 if (incomment)
2658 {
2659 if (c == '}') /* within { - } comments */
2660 incomment = FALSE;
2661 else if (c == '*' && dbp[1] == ')') /* within (* - *) comments */
2662 {
2663 dbp++;
2664 incomment = FALSE;
2665 }
2666 continue;
2667 }
2668 else if (inquote)
2669 {
2670 if (c == '\'')
2671 inquote = FALSE;
2672 continue;
2673 }
2674 else
2675 switch (c)
2676 {
2677 case '\'':
2678 inquote = TRUE; /* found first quote */
2679 continue;
2680 case '{': /* found open-{-comment */
2681 incomment = TRUE;
2682 continue;
2683 case '(':
2684 if (*dbp == '*') /* found open-(*-comment */
2685 {
2686 incomment = TRUE;
2687 dbp++;
2688 }
2689 else if (found_tag) /* found '(' after tag, i.e., parm-list */
2690 inparms = TRUE;
2691 continue;
2692 case ')': /* end of parms list */
2693 if (inparms)
2694 inparms = FALSE;
2695 continue;
2696 case ';':
2697 if ((found_tag) && (!inparms)) /* end of proc or fn stmt */
2698 {
2699 verify_tag = TRUE;
2700 break;
2701 }
2702 continue;
2703 }
2704 if ((found_tag) && (verify_tag) && (*dbp != ' '))
2705 {
2706 /* check if this is an "extern" declaration */
2707 if (*dbp == 0)
2708 continue;
2709 if ((*dbp == 'e') || (*dbp == 'E'))
2710 {
2711 if (tail ("extern")) /* superfluous, really! */
2712 {
2713 found_tag = FALSE;
2714 verify_tag = FALSE;
2715 }
2716 }
2717 else if ((*dbp == 'f') || (*dbp == 'F'))
2718 {
2719 if (tail ("forward")) /* check for forward reference */
2720 {
2721 found_tag = FALSE;
2722 verify_tag = FALSE;
2723 }
2724 }
2725 if ((found_tag) && (verify_tag)) /* not external proc, so make tag */
2726 {
2727 found_tag = FALSE;
2728 verify_tag = FALSE;
2729 pfnote (nambuf, TRUE, FALSE, tline.buffer,
2730 cp - tline.buffer + 1, save_lineno, save_lcno);
2731 continue;
2732 }
2733 }
2734 if (get_tagname) /* grab name of proc or fn */
2735 {
2736 if (*dbp == 0)
2737 continue;
2738
2739 /* save all values for later tagging */
2740 tline.size = lb.size;
2741 strcpy (tline.buffer, lb.buffer);
2742 save_lineno = lineno;
2743 save_lcno = linecharno;
2744
2745 /* grab block name */
2746 for (cp = dbp + 1; *cp && (!endtoken (*cp)); cp++)
2747 continue;
2748 nambuf = savenstr (dbp, cp-dbp);
2749 dbp = cp; /* restore dbp to e-o-token */
2750 get_tagname = FALSE;
2751 found_tag = TRUE;
2752 continue;
2753
2754 /* and proceed to check for "extern" */
2755 }
2756 else if (!incomment && !inquote && !found_tag)
2757 {
2758 /* check for proc/fn keywords */
2759 switch (c | ' ')
2760 {
2761 case 'p':
2762 if (tail ("rocedure")) /* c = 'p', dbp has advanced */
2763 get_tagname = TRUE;
2764 continue;
2765 case 'f':
2766 if (tail ("unction"))
2767 get_tagname = TRUE;
2768 continue;
2769 }
2770 }
2771 } /* while not eof */
2772 }
2773 \f
2774 /*
2775 * lisp tag functions
2776 * look for (def or (DEF, quote or QUOTE
2777 */
2778 int
2779 L_isdef (strp)
2780 register char *strp;
2781 {
2782 return ((strp[1] == 'd' || strp[1] == 'D')
2783 && (strp[2] == 'e' || strp[2] == 'E')
2784 && (strp[3] == 'f' || strp[3] == 'F'));
2785 }
2786
2787 int
2788 L_isquote (strp)
2789 register char *strp;
2790 {
2791 return ((*(++strp) == 'q' || *strp == 'Q')
2792 && (*(++strp) == 'u' || *strp == 'U')
2793 && (*(++strp) == 'o' || *strp == 'O')
2794 && (*(++strp) == 't' || *strp == 'T')
2795 && (*(++strp) == 'e' || *strp == 'E')
2796 && isspace(*(++strp)));
2797 }
2798
2799 void
2800 L_getit ()
2801 {
2802 register char *cp;
2803
2804 if (*dbp == '\'') /* Skip prefix quote */
2805 dbp++;
2806 else if (*dbp == '(' && L_isquote (dbp)) /* Skip "(quote " */
2807 {
2808 dbp += 7;
2809 while (isspace(*dbp))
2810 dbp++;
2811 }
2812 for (cp = dbp /*+1*/;
2813 *cp && *cp != '(' && *cp != ' ' && *cp != ')';
2814 cp++)
2815 continue;
2816 if (cp == dbp)
2817 return;
2818
2819 pfnote (savenstr (dbp, cp-dbp), TRUE, FALSE, lb.buffer,
2820 cp - lb.buffer + 1, lineno, linecharno);
2821 }
2822
2823 void
2824 Lisp_functions (inf)
2825 FILE *inf;
2826 {
2827 lineno = 0;
2828 charno = 0;
2829
2830 while (!feof (inf))
2831 {
2832 lineno++;
2833 linecharno = charno;
2834 charno += readline (&lb, inf);
2835 dbp = lb.buffer;
2836 if (dbp[0] == '(')
2837 {
2838 if (L_isdef (dbp))
2839 {
2840 while (!isspace (*dbp))
2841 dbp++;
2842 while (isspace (*dbp))
2843 dbp++;
2844 L_getit ();
2845 }
2846 else
2847 {
2848 /* Check for (foo::defmumble name-defined ... */
2849 do
2850 dbp++;
2851 while (*dbp && !isspace (*dbp)
2852 && *dbp != ':' && *dbp != '(' && *dbp != ')');
2853 if (*dbp == ':')
2854 {
2855 do
2856 dbp++;
2857 while (*dbp == ':');
2858
2859 if (L_isdef (dbp - 1))
2860 {
2861 while (!isspace (*dbp))
2862 dbp++;
2863 while (isspace (*dbp))
2864 dbp++;
2865 L_getit ();
2866 }
2867 }
2868 }
2869 }
2870 }
2871 }
2872 \f
2873 /*
2874 * Scheme tag functions
2875 * look for (def... xyzzy
2876 * look for (def... (xyzzy
2877 * look for (def ... ((...(xyzzy ....
2878 * look for (set! xyzzy
2879 */
2880
2881 void get_scheme ();
2882
2883 void
2884 Scheme_functions (inf)
2885 FILE *inf;
2886 {
2887 lineno = 0;
2888 charno = 0;
2889
2890 while (!feof (inf))
2891 {
2892 lineno++;
2893 linecharno = charno;
2894 charno += readline (&lb, inf);
2895 dbp = lb.buffer;
2896 if (dbp[0] == '(' &&
2897 (dbp[1] == 'D' || dbp[1] == 'd') &&
2898 (dbp[2] == 'E' || dbp[2] == 'e') &&
2899 (dbp[3] == 'F' || dbp[3] == 'f'))
2900 {
2901 while (!isspace (*dbp))
2902 dbp++;
2903 /* Skip over open parens and white space */
2904 while (*dbp && (isspace (*dbp) || *dbp == '('))
2905 dbp++;
2906 get_scheme ();
2907 }
2908 if (dbp[0] == '(' &&
2909 (dbp[1] == 'S' || dbp[1] == 's') &&
2910 (dbp[2] == 'E' || dbp[2] == 'e') &&
2911 (dbp[3] == 'T' || dbp[3] == 't') &&
2912 (dbp[4] == '!' || dbp[4] == '!') &&
2913 (isspace (dbp[5])))
2914 {
2915 while (!isspace (*dbp))
2916 dbp++;
2917 /* Skip over white space */
2918 while (isspace (*dbp))
2919 dbp++;
2920 get_scheme ();
2921 }
2922 }
2923 }
2924
2925 void
2926 get_scheme ()
2927 {
2928 register char *cp;
2929
2930 if (*dbp == 0)
2931 return;
2932 /* Go till you get to white space or a syntactic break */
2933 for (cp = dbp + 1;
2934 *cp && *cp != '(' && *cp != ')' && !isspace (*cp);
2935 cp++)
2936 continue;
2937 pfnote (savenstr (dbp, cp-dbp), TRUE, FALSE,
2938 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
2939 }
2940 \f
2941 /* Find tags in TeX and LaTeX input files. */
2942
2943 /* TEX_toktab is a table of TeX control sequences that define tags.
2944 Each TEX_tabent records one such control sequence.
2945 CONVERT THIS TO USE THE Stab TYPE!! */
2946 struct TEX_tabent
2947 {
2948 char *name;
2949 int len;
2950 };
2951
2952 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
2953
2954 /* Default set of control sequences to put into TEX_toktab.
2955 The value of environment var TEXTAGS is prepended to this. */
2956
2957 char *TEX_defenv = "\
2958 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem:typeout";
2959
2960 void TEX_mode ();
2961 struct TEX_tabent *TEX_decode_env ();
2962 void TEX_getit ();
2963 int TEX_Token ();
2964
2965 char TEX_esc = '\\';
2966 char TEX_opgrp = '{';
2967 char TEX_clgrp = '}';
2968
2969 /*
2970 * TeX/LaTeX scanning loop.
2971 */
2972 void
2973 TeX_functions (inf)
2974 FILE *inf;
2975 {
2976 char *lasthit;
2977
2978 lineno = 0;
2979 charno = 0;
2980
2981 /* Select either \ or ! as escape character. */
2982 TEX_mode (inf);
2983
2984 /* Initialize token table once from environment. */
2985 if (!TEX_toktab)
2986 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
2987
2988 while (!feof (inf))
2989 { /* Scan each line in file */
2990 lineno++;
2991 linecharno = charno;
2992 charno += readline (&lb, inf);
2993 dbp = lb.buffer;
2994 lasthit = dbp;
2995 while (dbp = etags_strchr (dbp, TEX_esc)) /* Look at each esc in line */
2996 {
2997 register int i;
2998
2999 if (!*(++dbp))
3000 break;
3001 linecharno += dbp - lasthit;
3002 lasthit = dbp;
3003 i = TEX_Token (lasthit);
3004 if (0 <= i)
3005 {
3006 TEX_getit (lasthit, TEX_toktab[i].len);
3007 break; /* We only save a line once */
3008 }
3009 }
3010 }
3011 }
3012
3013 #define TEX_LESC '\\'
3014 #define TEX_SESC '!'
3015 #define TEX_cmt '%'
3016
3017 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
3018 chars accordingly. */
3019 void
3020 TEX_mode (inf)
3021 FILE *inf;
3022 {
3023 int c;
3024
3025 while ((c = getc (inf)) != EOF)
3026 {
3027 /* Skip to next line if we hit the TeX comment char. */
3028 if (c == TEX_cmt)
3029 while (c != '\n')
3030 c = getc (inf);
3031 else if (c == TEX_LESC || c == TEX_SESC )
3032 break;
3033 }
3034
3035 if (c == TEX_LESC)
3036 {
3037 TEX_esc = TEX_LESC;
3038 TEX_opgrp = '{';
3039 TEX_clgrp = '}';
3040 }
3041 else
3042 {
3043 TEX_esc = TEX_SESC;
3044 TEX_opgrp = '<';
3045 TEX_clgrp = '>';
3046 }
3047 rewind (inf);
3048 }
3049
3050 /* Read environment and prepend it to the default string.
3051 Build token table. */
3052 struct TEX_tabent *
3053 TEX_decode_env (evarname, defenv)
3054 char *evarname;
3055 char *defenv;
3056 {
3057 register char *env, *p;
3058
3059 struct TEX_tabent *tab;
3060 int size, i;
3061
3062 /* Append default string to environment. */
3063 env = getenv (evarname);
3064 if (!env)
3065 env = defenv;
3066 else
3067 env = concat (env, defenv, "");
3068
3069 /* Allocate a token table */
3070 for (size = 1, p = env; p;)
3071 if ((p = etags_strchr (p, ':')) && *(++p))
3072 size++;
3073 /* Add 1 to leave room for null terminator. */
3074 tab = xnew (size + 1, struct TEX_tabent);
3075
3076 /* Unpack environment string into token table. Be careful about */
3077 /* zero-length strings (leading ':', "::" and trailing ':') */
3078 for (i = 0; *env;)
3079 {
3080 p = etags_strchr (env, ':');
3081 if (!p) /* End of environment string. */
3082 p = env + strlen (env);
3083 if (p - env > 0)
3084 { /* Only non-zero strings. */
3085 tab[i].name = savenstr (env, p - env);
3086 tab[i].len = strlen (tab[i].name);
3087 i++;
3088 }
3089 if (*p)
3090 env = p + 1;
3091 else
3092 {
3093 tab[i].name = NULL; /* Mark end of table. */
3094 tab[i].len = 0;
3095 break;
3096 }
3097 }
3098 return tab;
3099 }
3100
3101 /* Record a tag defined by a TeX command of length LEN and starting at NAME.
3102 The name being defined actually starts at (NAME + LEN + 1).
3103 But we seem to include the TeX command in the tag name. */
3104 void
3105 TEX_getit (name, len)
3106 char *name;
3107 int len;
3108 {
3109 char *p = name + len;
3110
3111 if (*name == 0)
3112 return;
3113
3114 /* Let tag name extend to next group close (or end of line) */
3115 while (*p && *p != TEX_clgrp)
3116 p++;
3117 pfnote (savenstr (name, p-name), TRUE, FALSE, lb.buffer,
3118 strlen (lb.buffer), lineno, linecharno);
3119 }
3120
3121 /* If the text at CP matches one of the tag-defining TeX command names,
3122 return the pointer to the first occurrence of that command in TEX_toktab.
3123 Otherwise return -1.
3124 Keep the capital `T' in `Token' for dumb truncating compilers
3125 (this distinguishes it from `TEX_toktab' */
3126 int
3127 TEX_Token (cp)
3128 char *cp;
3129 {
3130 int i;
3131
3132 for (i = 0; TEX_toktab[i].len > 0; i++)
3133 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
3134 return i;
3135 return -1;
3136 }
3137 \f
3138 /* Support for Prolog. */
3139
3140 /* Whole head (not only functor, but also arguments)
3141 is gotten in compound term. */
3142 void
3143 prolog_getit (s)
3144 char *s;
3145 {
3146 char *save_s;
3147 int insquote, npar;
3148
3149 save_s = s;
3150 insquote = FALSE;
3151 npar = 0;
3152 while (1)
3153 {
3154 if (s[0] == '\0') /* syntax error. */
3155 return;
3156 else if (insquote && s[0] == '\'' && s[1] == '\'')
3157 s += 2;
3158 else if (s[0] == '\'')
3159 {
3160 insquote = !insquote;
3161 s++;
3162 }
3163 else if (!insquote && s[0] == '(')
3164 {
3165 npar++;
3166 s++;
3167 }
3168 else if (!insquote && s[0] == ')')
3169 {
3170 npar--;
3171 s++;
3172 if (npar == 0)
3173 break;
3174 else if (npar < 0) /* syntax error. */
3175 return;
3176 }
3177 else if (!insquote && s[0] == '.'
3178 && (isspace (s[1]) || s[1] == '\0'))
3179 { /* fullstop. */
3180 if (npar != 0) /* syntax error. */
3181 return;
3182 s++;
3183 break;
3184 }
3185 else
3186 s++;
3187 }
3188 pfnote (savenstr (save_s, s-save_s), TRUE, FALSE,
3189 save_s, s-save_s, lineno, linecharno);
3190 }
3191
3192 /* It is assumed that prolog predicate starts from column 0. */
3193 void
3194 Prolog_functions (inf)
3195 FILE *inf;
3196 {
3197 void skip_comment (), prolog_getit ();
3198
3199 lineno = linecharno = charno = 0;
3200 while (!feof (inf))
3201 {
3202 lineno++;
3203 linecharno += charno;
3204 charno = readline (&lb, inf) + 1; /* 1 for newline. */
3205 dbp = lb.buffer;
3206 if (isspace (dbp[0])) /* not predicate header. */
3207 continue;
3208 else if (dbp[0] == '%') /* comment. */
3209 continue;
3210 else if (dbp[0] == '/' && dbp[1] == '*') /* comment. */
3211 skip_comment (&lb, inf, &lineno, &linecharno);
3212 else /* found. */
3213 prolog_getit (dbp);
3214 }
3215 }
3216
3217 void
3218 skip_comment (plb, inf, plineno, plinecharno)
3219 struct linebuffer *plb;
3220 FILE *inf;
3221 int *plineno; /* result */
3222 long *plinecharno; /* result */
3223 {
3224 char *cp;
3225
3226 do
3227 {
3228 for (cp = plb->buffer; *cp != '\0'; cp++)
3229 if (cp[0] == '*' && cp[1] == '/')
3230 return;
3231 (*plineno)++;
3232 *plinecharno += readline (plb, inf) + 1; /* 1 for newline. */
3233 }
3234 while (!feof(inf));
3235 }
3236 \f
3237 #ifdef ETAGS_REGEXPS
3238 /* Take a string like "/blah/" and turn it into "blah", making sure
3239 that the first and last characters are the same, and handling
3240 quoted separator characters. Actually, stops on the occurence of
3241 an unquoted separator. Also turns "\t" into a Tab character.
3242 Returns pointer to terminating separator. Works in place. Null
3243 terminates name string. */
3244 char *
3245 scan_separators (name)
3246 char *name;
3247 {
3248 char sep = name[0];
3249 char *copyto = name;
3250 logical quoted = FALSE;
3251
3252 for (++name; *name != '\0'; ++name)
3253 {
3254 if (quoted)
3255 {
3256 if (*name == 't')
3257 *copyto++ = '\t';
3258 else if (*name == sep)
3259 *copyto++ = sep;
3260 else
3261 {
3262 /* Something else is quoted, so preserve the quote. */
3263 *copyto++ = '\\';
3264 *copyto++ = *name;
3265 }
3266 quoted = FALSE;
3267 }
3268 else if (*name == '\\')
3269 quoted = TRUE;
3270 else if (*name == sep)
3271 break;
3272 else
3273 *copyto++ = *name;
3274 }
3275
3276 /* Terminate copied string. */
3277 *copyto = '\0';
3278 return name;
3279 }
3280
3281 /* Turn a name, which is an ed-style (but Emacs syntax) regular
3282 expression, into a real regular expression by compiling it. */
3283 void
3284 add_regex (regexp_pattern)
3285 char *regexp_pattern;
3286 {
3287 char *name;
3288 const char *err;
3289 struct re_pattern_buffer *patbuf;
3290
3291 if (regexp_pattern == NULL)
3292 {
3293 /* Remove existing regexps. */
3294 num_patterns = 0;
3295 patterns = NULL;
3296 return;
3297 }
3298
3299 if (regexp_pattern[0] == '\0')
3300 {
3301 error ("missing regexp", 0);
3302 return;
3303 }
3304 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
3305 {
3306 error ("%s: unterminated regexp", regexp_pattern);
3307 return;
3308 }
3309 name = scan_separators (regexp_pattern);
3310 if (regexp_pattern[0] == '\0')
3311 {
3312 error ("null regexp", 0);
3313 return;
3314 }
3315 (void) scan_separators (name);
3316
3317 patbuf = xnew (1, struct re_pattern_buffer);
3318 patbuf->translate = NULL;
3319 patbuf->fastmap = NULL;
3320 patbuf->buffer = NULL;
3321 patbuf->allocated = 0;
3322
3323 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
3324 if (err != NULL)
3325 {
3326 error ("%s while compiling pattern", err);
3327 return;
3328 }
3329
3330 num_patterns += 1;
3331 if (num_patterns == 1)
3332 patterns = xnew (1, struct pattern);
3333 else
3334 patterns = ((struct pattern *)
3335 xrealloc (patterns,
3336 (num_patterns * sizeof (struct pattern))));
3337 patterns[num_patterns - 1].pattern = patbuf;
3338 patterns[num_patterns - 1].name_pattern = savestr (name);
3339 patterns[num_patterns - 1].error_signaled = FALSE;
3340 }
3341
3342 /*
3343 * Do the subtitutions indicated by the regular expression and
3344 * arguments.
3345 */
3346 char *
3347 substitute (in, out, regs)
3348 char *in, *out;
3349 struct re_registers *regs;
3350 {
3351 char *result = NULL, *t;
3352 int size = 0;
3353
3354 /* Pass 1: figure out how much size to allocate. */
3355 for (t = out; *t; ++t)
3356 {
3357 if (*t == '\\')
3358 {
3359 ++t;
3360 if (!*t)
3361 {
3362 fprintf (stderr, "%s: pattern subtitution ends prematurely\n",
3363 progname);
3364 return NULL;
3365 }
3366 if (isdigit (*t))
3367 {
3368 int dig = *t - '0';
3369 size += regs->end[dig] - regs->start[dig];
3370 }
3371 }
3372 }
3373
3374 /* Allocate space and do the substitutions. */
3375 result = xnew (size + 1, char);
3376 size = 0;
3377 for (; *out; ++out)
3378 {
3379 if (*out == '\\')
3380 {
3381 ++out;
3382 if (isdigit (*out))
3383 {
3384 /* Using "dig2" satisfies my debugger. Bleah. */
3385 int dig2 = *out - '0';
3386 strncpy (result + size, in + regs->start[dig2],
3387 regs->end[dig2] - regs->start[dig2]);
3388 size += regs->end[dig2] - regs->start[dig2];
3389 }
3390 else
3391 {
3392 switch (*out)
3393 {
3394 case '\t':
3395 result[size++] = '\t';
3396 break;
3397 case '\\':
3398 *out = '\\';
3399 break;
3400 default:
3401 result[size++] = *out;
3402 break;
3403 }
3404 }
3405 }
3406 else
3407 result[size++] = *out;
3408 }
3409 result[size] = '\0';
3410
3411 return result;
3412 }
3413 \f
3414 #endif /* ETAGS_REGEXPS */
3415 /* Initialize a linebuffer for use */
3416 void
3417 initbuffer (linebuffer)
3418 struct linebuffer *linebuffer;
3419 {
3420 linebuffer->size = 200;
3421 linebuffer->buffer = xnew (200, char);
3422 }
3423
3424 /*
3425 * Read a line of text from `stream' into `linebuffer'.
3426 * Return the number of characters read from `stream',
3427 * which is the length of the line including the newline, if any.
3428 */
3429 long
3430 readline_internal (linebuffer, stream)
3431 struct linebuffer *linebuffer;
3432 register FILE *stream;
3433 {
3434 char *buffer = linebuffer->buffer;
3435 register char *p = linebuffer->buffer;
3436 register char *pend;
3437 int chars_deleted;
3438
3439 pend = p + linebuffer->size; /* Separate to avoid 386/IX compiler bug. */
3440
3441 while (1)
3442 {
3443 register int c = getc (stream);
3444 if (p == pend)
3445 {
3446 linebuffer->size *= 2;
3447 buffer = (char *) xrealloc (buffer, linebuffer->size);
3448 p += buffer - linebuffer->buffer;
3449 pend = buffer + linebuffer->size;
3450 linebuffer->buffer = buffer;
3451 }
3452 if (c == EOF)
3453 {
3454 chars_deleted = 0;
3455 break;
3456 }
3457 if (c == '\n')
3458 {
3459 if (p > buffer && p[-1] == '\r')
3460 {
3461 *--p = '\0';
3462 chars_deleted = 2;
3463 }
3464 else
3465 {
3466 *p = '\0';
3467 chars_deleted = 1;
3468 }
3469 break;
3470 }
3471 *p++ = c;
3472 }
3473
3474 return p - buffer + chars_deleted;
3475 }
3476
3477 /*
3478 * Like readline_internal, above, but try to match the input
3479 * line against any existing regular expressions.
3480 */
3481 long
3482 readline (linebuffer, stream)
3483 struct linebuffer *linebuffer;
3484 FILE *stream;
3485 {
3486 /* Read new line. */
3487 int i;
3488 long result = readline_internal (linebuffer, stream);
3489
3490 #ifdef ETAGS_REGEXPS
3491 /* Match against all listed patterns. */
3492 for (i = 0; i < num_patterns; ++i)
3493 {
3494 int match = re_match (patterns[i].pattern, linebuffer->buffer,
3495 (int)result, 0, &patterns[i].regs);
3496 switch (match)
3497 {
3498 case -2:
3499 /* Some error. */
3500 if (!patterns[i].error_signaled)
3501 {
3502 error ("error while matching pattern %d", i);
3503 patterns[i].error_signaled = TRUE;
3504 }
3505 break;
3506 case -1:
3507 /* No match. */
3508 break;
3509 default:
3510 /* Match occurred. Construct a tag. */
3511 if (patterns[i].name_pattern[0] != '\0')
3512 {
3513 /* Make a named tag. */
3514 char *name = substitute (linebuffer->buffer,
3515 patterns[i].name_pattern,
3516 &patterns[i].regs);
3517 if (name != NULL)
3518 pfnote (name, TRUE, TRUE, linebuffer->buffer,
3519 match, lineno, linecharno);
3520 }
3521 else
3522 {
3523 /* Make an unnamed tag. */
3524 pfnote (NULL, TRUE, FALSE, linebuffer->buffer,
3525 match, lineno, linecharno);
3526 }
3527 break;
3528 }
3529 }
3530 #endif /* ETAGS_REGEXPS */
3531
3532 return result;
3533 }
3534
3535 /*
3536 * Read a file, but do no processing. This is used to do regexp
3537 * matching on files that have no language defined.
3538 */
3539 void
3540 just_read_file (inf)
3541 FILE *inf;
3542 {
3543 while (!feof (inf))
3544 {
3545 ++lineno;
3546 linecharno = charno;
3547 charno += readline (&lb, inf) + 1;
3548 }
3549 }
3550
3551 \f
3552 /*
3553 * Return a pointer to a space of size strlen(cp)+1 allocated
3554 * with xnew where the string CP has been copied.
3555 */
3556 char *
3557 savestr (cp)
3558 char *cp;
3559 {
3560 return savenstr (cp, strlen (cp));
3561 }
3562
3563 /*
3564 * Return a pointer to a space of size LEN+1 allocated with xnew where
3565 * the string CP has been copied for at most the first LEN characters.
3566 */
3567 char *
3568 savenstr (cp, len)
3569 char *cp;
3570 int len;
3571 {
3572 register char *dp;
3573
3574 dp = xnew (len + 1, char);
3575 strncpy (dp, cp, len);
3576 dp[len] = '\0';
3577 return dp;
3578 }
3579
3580 /*
3581 * Return the ptr in sp at which the character c last
3582 * appears; NULL if not found
3583 *
3584 * Identical to System V strrchr, included for portability.
3585 */
3586 char *
3587 etags_strrchr (sp, c)
3588 register char *sp, c;
3589 {
3590 register char *r;
3591
3592 r = NULL;
3593 do
3594 {
3595 if (*sp == c)
3596 r = sp;
3597 } while (*sp++);
3598 return r;
3599 }
3600
3601
3602 /*
3603 * Return the ptr in sp at which the character c first
3604 * appears; NULL if not found
3605 *
3606 * Identical to System V strchr, included for portability.
3607 */
3608 char *
3609 etags_strchr (sp, c)
3610 register char *sp, c;
3611 {
3612 do
3613 {
3614 if (*sp == c)
3615 return sp;
3616 } while (*sp++);
3617 return NULL;
3618 }
3619
3620 /* Print error message and exit. */
3621 void
3622 fatal (s1, s2)
3623 char *s1, *s2;
3624 {
3625 error (s1, s2);
3626 exit (BAD);
3627 }
3628
3629 void
3630 pfatal (s1)
3631 char *s1;
3632 {
3633 perror (s1);
3634 exit (BAD);
3635 }
3636
3637 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
3638 void
3639 error (s1, s2)
3640 char *s1, *s2;
3641 {
3642 fprintf (stderr, "%s: ", progname);
3643 fprintf (stderr, s1, s2);
3644 fprintf (stderr, "\n");
3645 }
3646
3647 /* Return a newly-allocated string whose contents
3648 concatenate those of s1, s2, s3. */
3649 char *
3650 concat (s1, s2, s3)
3651 char *s1, *s2, *s3;
3652 {
3653 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
3654 char *result = xnew (len1 + len2 + len3 + 1, char);
3655
3656 strcpy (result, s1);
3657 strcpy (result + len1, s2);
3658 strcpy (result + len1 + len2, s3);
3659 result[len1 + len2 + len3] = '\0';
3660
3661 return result;
3662 }
3663 \f
3664 /* Does the same work as the system V getcwd, but does not need to
3665 guess buffer size in advance. */
3666 char *
3667 etags_getcwd ()
3668 {
3669 #ifdef DOS_NT
3670 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
3671
3672 getwd (path);
3673 p = path;
3674 while (*p)
3675 if (*p == '\\')
3676 *p++ = '/';
3677 else
3678 *p++ = tolower (*p);
3679
3680 return strdup (path);
3681 #else /* not DOS_NT */
3682 #if HAVE_GETCWD
3683 int bufsize = 200;
3684 char *path = xnew (bufsize, char);
3685
3686 while (getcwd (path, bufsize) == NULL)
3687 {
3688 if (errno != ERANGE)
3689 pfatal ("getcwd");
3690 bufsize *= 2;
3691 path = xnew (bufsize, char);
3692 }
3693
3694 return path;
3695 #else /* not DOS_NT and not HAVE_GETCWD */
3696 struct linebuffer path;
3697 FILE *pipe;
3698
3699 initbuffer (&path);
3700 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
3701 if (pipe == NULL || readline_internal (&path, pipe) == 0)
3702 pfatal ("pwd");
3703 pclose (pipe);
3704
3705 return path.buffer;
3706 #endif /* not HAVE_GETCWD */
3707 #endif /* not DOS_NT */
3708 }
3709
3710 /* Return a newly allocated string containing the filename
3711 of FILE relative to the absolute directory DIR (which
3712 should end with a slash). */
3713 char *
3714 relative_filename (file, dir)
3715 char *file, *dir;
3716 {
3717 char *fp, *dp, *res;
3718
3719 /* Find the common root of file and dir. */
3720 fp = absolute_filename (file, cwd);
3721 dp = dir;
3722 while (*fp++ == *dp++)
3723 continue;
3724 do
3725 {
3726 fp--;
3727 dp--;
3728 }
3729 while (*fp != '/');
3730
3731 /* Build a sequence of "../" strings for the resulting relative filename. */
3732 for (dp = etags_strchr (dp + 1, '/'), res = "";
3733 dp != NULL;
3734 dp = etags_strchr (dp + 1, '/'))
3735 {
3736 res = concat (res, "../", "");
3737 }
3738
3739 /* Add the filename relative to the common root of file and dir. */
3740 res = concat (res, fp + 1, "");
3741
3742 return res; /* temporary stub */
3743 }
3744
3745 /* Return a newly allocated string containing the
3746 absolute filename of FILE given CWD (which should
3747 end with a slash). */
3748 char *
3749 absolute_filename (file, cwd)
3750 char *file, *cwd;
3751 {
3752 char *slashp, *cp, *res;
3753
3754 #ifdef DOS_NT
3755 if (file[0] == '/' || (isalpha (file[0]) && file[1] == ':'))
3756 #else
3757 if (file[0] == '/')
3758 #endif
3759 res = concat (file, "", "");
3760 else
3761 res = concat (cwd, file, "");
3762
3763 /* Delete the "/dirname/.." and "/." substrings. */
3764 slashp = etags_strchr (res, '/');
3765 while (slashp != NULL && slashp[0] != '\0')
3766 {
3767 if (slashp[1] == '.')
3768 {
3769 if (slashp[2] == '.'
3770 && (slashp[3] == '/' || slashp[3] == '\0'))
3771 {
3772 cp = slashp;
3773 do
3774 cp--;
3775 while (cp >= res && *cp != '/');
3776 if (*cp == '/')
3777 {
3778 strcpy (cp, slashp + 3);
3779 }
3780 else /* else (cp == res) */
3781 {
3782 if (slashp[3] != '\0')
3783 strcpy (cp, slashp + 4);
3784 else
3785 return ".";
3786 }
3787 slashp = cp;
3788 continue;
3789 }
3790 else if (slashp[2] == '/' || slashp[2] == '\0')
3791 {
3792 strcpy (slashp, slashp + 2);
3793 continue;
3794 }
3795 }
3796
3797 slashp = etags_strchr (slashp + 1, '/');
3798 }
3799
3800 return res;
3801 }
3802
3803 /* Return a newly allocated string containing the absolute
3804 filename of dir where FILE resides given CWD (which should
3805 end with a slash). */
3806 char *
3807 absolute_dirname (file, cwd)
3808 char *file, *cwd;
3809 {
3810 char *slashp, *res;
3811 char save;
3812
3813 slashp = etags_strrchr (file, '/');
3814 if (slashp == NULL)
3815 return cwd;
3816 save = slashp[1];
3817 slashp[1] = '\0';
3818 res = absolute_filename (file, cwd);
3819 slashp[1] = save;
3820
3821 return res;
3822 }
3823
3824 /* Like malloc but get fatal error if memory is exhausted. */
3825 long *
3826 xmalloc (size)
3827 unsigned int size;
3828 {
3829 long *result = (long *) malloc (size);
3830 if (result == NULL)
3831 fatal ("virtual memory exhausted", 0);
3832 return result;
3833 }
3834
3835 long *
3836 xrealloc (ptr, size)
3837 char *ptr;
3838 unsigned int size;
3839 {
3840 long *result = (long *) realloc (ptr, size);
3841 if (result == NULL)
3842 fatal ("virtual memory exhausted");
3843 return result;
3844 }