Comment updates to etags.c
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99, 2000, 2001
3 Free Software Foundation, Inc. and Ken Arnold
4
5 This file is not considered part of GNU Emacs.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 /*
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1993 Francesco Potortì reorganised C and C++ based on work by Joe Wells.
29 * 1994 Regexp tags by Tom Tromey.
30 * 2001 Nested classes by Francesco Potortì based on work by Mykola Dzyuba.
31 *
32 * Francesco Potortì <pot@gnu.org> has maintained it since 1993.
33 */
34
35 char pot_etags_version[] = "@(#) pot revision number is 14.13";
36
37 #define TRUE 1
38 #define FALSE 0
39
40 #ifdef DEBUG
41 # undef DEBUG
42 # define DEBUG TRUE
43 #else
44 # define DEBUG FALSE
45 # define NDEBUG /* disable assert */
46 #endif
47
48 #if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
49 # define P_(proto) proto
50 #else
51 # define P_(proto) ()
52 #endif
53
54 #ifdef HAVE_CONFIG_H
55 # include <config.h>
56 /* On some systems, Emacs defines static as nothing for the sake
57 of unexec. We don't want that here since we don't use unexec. */
58 # undef static
59 # define ETAGS_REGEXPS /* use the regexp features */
60 # define LONG_OPTIONS /* accept long options */
61 #endif /* HAVE_CONFIG_H */
62
63 #ifndef _GNU_SOURCE
64 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
65 #endif
66
67 /* WIN32_NATIVE is for Xemacs.
68 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
69 #ifdef WIN32_NATIVE
70 # undef MSDOS
71 # undef WINDOWSNT
72 # define WINDOWSNT
73 #endif /* WIN32_NATIVE */
74
75 #ifdef MSDOS
76 # undef MSDOS
77 # define MSDOS TRUE
78 # include <fcntl.h>
79 # include <sys/param.h>
80 # include <io.h>
81 # ifndef HAVE_CONFIG_H
82 # define DOS_NT
83 # include <sys/config.h>
84 # endif
85 #else
86 # define MSDOS FALSE
87 #endif /* MSDOS */
88
89 #ifdef WINDOWSNT
90 # include <stdlib.h>
91 # include <fcntl.h>
92 # include <string.h>
93 # include <direct.h>
94 # include <io.h>
95 # define MAXPATHLEN _MAX_PATH
96 # undef HAVE_NTGUI
97 # undef DOS_NT
98 # define DOS_NT
99 # ifndef HAVE_GETCWD
100 # define HAVE_GETCWD
101 # endif /* undef HAVE_GETCWD */
102 #else /* !WINDOWSNT */
103 # ifdef STDC_HEADERS
104 # include <stdlib.h>
105 # include <string.h>
106 # else
107 extern char *getenv ();
108 # endif
109 #endif /* !WINDOWSNT */
110
111 #ifdef HAVE_UNISTD_H
112 # include <unistd.h>
113 #else
114 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
115 extern char *getcwd (char *buf, size_t size);
116 # endif
117 #endif /* HAVE_UNISTD_H */
118
119 #include <stdio.h>
120 #include <ctype.h>
121 #include <errno.h>
122 #ifndef errno
123 extern int errno;
124 #endif
125 #include <assert.h>
126 #include <sys/types.h>
127 #include <sys/stat.h>
128
129 #if !defined (S_ISREG) && defined (S_IFREG)
130 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
131 #endif
132
133 #ifdef LONG_OPTIONS
134 # include <getopt.h>
135 #else
136 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
137 extern char *optarg;
138 extern int optind, opterr;
139 #endif /* LONG_OPTIONS */
140
141 #ifdef ETAGS_REGEXPS
142 # include <regex.h>
143 #endif /* ETAGS_REGEXPS */
144
145 /* Define CTAGS to make the program "ctags" compatible with the usual one.
146 Leave it undefined to make the program "etags", which makes emacs-style
147 tag tables and tags typedefs, #defines and struct/union/enum by default. */
148 #ifdef CTAGS
149 # undef CTAGS
150 # define CTAGS TRUE
151 #else
152 # define CTAGS FALSE
153 #endif
154
155 /* Exit codes for success and failure. */
156 #ifdef VMS
157 # define GOOD 1
158 # define BAD 0
159 #else
160 # define GOOD 0
161 # define BAD 1
162 #endif
163
164 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
165 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
166
167 #define CHARS 256 /* 2^sizeof(char) */
168 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
169 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
170 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
171 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
172 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
173 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
174
175 #define ISALNUM(c) isalnum (CHAR(c))
176 #define ISALPHA(c) isalpha (CHAR(c))
177 #define ISDIGIT(c) isdigit (CHAR(c))
178 #define ISLOWER(c) islower (CHAR(c))
179
180 #define lowcase(c) tolower (CHAR(c))
181 #define upcase(c) toupper (CHAR(c))
182
183
184 /*
185 * xnew, xrnew -- allocate, reallocate storage
186 *
187 * SYNOPSIS: Type *xnew (int n, Type);
188 * void xrnew (OldPointer, int n, Type);
189 */
190 #if DEBUG
191 # include "chkmalloc.h"
192 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
193 (n) * sizeof (Type)))
194 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
195 (char *) (op), (n) * sizeof (Type)))
196 #else
197 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
198 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
199 (char *) (op), (n) * sizeof (Type)))
200 #endif
201
202 typedef int bool;
203
204 typedef void Lang_function P_((FILE *));
205
206 typedef struct
207 {
208 char *suffix;
209 char *command; /* Takes one arg and decompresses to stdout */
210 } compressor;
211
212 typedef struct
213 {
214 char *name;
215 Lang_function *function;
216 char **filenames;
217 char **suffixes;
218 char **interpreters;
219 } language;
220
221 typedef struct node_st
222 { /* sorting structure */
223 char *name; /* function or type name */
224 char *file; /* file name */
225 bool is_func; /* use pattern or line no */
226 bool been_warned; /* set if noticed dup */
227 int lno; /* line number tag is on */
228 long cno; /* character number line starts on */
229 char *pat; /* search pattern */
230 struct node_st *left, *right; /* left and right sons */
231 } node;
232
233 /*
234 * A `linebuffer' is a structure which holds a line of text.
235 * `readline_internal' reads a line from a stream into a linebuffer
236 * and works regardless of the length of the line.
237 * SIZE is the size of BUFFER, LEN is the length of the string in
238 * BUFFER after readline reads it.
239 */
240 typedef struct
241 {
242 long size;
243 int len;
244 char *buffer;
245 } linebuffer;
246
247 /* Many compilers barf on this:
248 Lang_function Ada_funcs;
249 so let's write it this way */
250 static void Ada_funcs P_((FILE *));
251 static void Asm_labels P_((FILE *));
252 static void C_entries P_((int c_ext, FILE *));
253 static void default_C_entries P_((FILE *));
254 static void plain_C_entries P_((FILE *));
255 static void Cjava_entries P_((FILE *));
256 static void Cobol_paragraphs P_((FILE *));
257 static void Cplusplus_entries P_((FILE *));
258 static void Cstar_entries P_((FILE *));
259 static void Erlang_functions P_((FILE *));
260 static void Fortran_functions P_((FILE *));
261 static void Yacc_entries P_((FILE *));
262 static void Lisp_functions P_((FILE *));
263 static void Makefile_targets P_((FILE *));
264 static void Pascal_functions P_((FILE *));
265 static void Perl_functions P_((FILE *));
266 static void Postscript_functions P_((FILE *));
267 static void Prolog_functions P_((FILE *));
268 static void Python_functions P_((FILE *));
269 static void Scheme_functions P_((FILE *));
270 static void TeX_commands P_((FILE *));
271 static void Texinfo_nodes P_((FILE *));
272 static void just_read_file P_((FILE *));
273
274 static void print_language_names P_((void));
275 static void print_version P_((void));
276 static void print_help P_((void));
277 int main P_((int, char **));
278 static int number_len P_((long));
279
280 static compressor *get_compressor_from_suffix P_((char *, char **));
281 static language *get_language_from_langname P_((char *));
282 static language *get_language_from_interpreter P_((char *));
283 static language *get_language_from_filename P_((char *));
284 static int total_size_of_entries P_((node *));
285 static long readline P_((linebuffer *, FILE *));
286 static long readline_internal P_((linebuffer *, FILE *));
287 static void get_tag P_((char *));
288
289 #ifdef ETAGS_REGEXPS
290 static void analyse_regex P_((char *, bool));
291 static void add_regex P_((char *, bool, language *));
292 static void free_patterns P_((void));
293 #endif /* ETAGS_REGEXPS */
294 static void error P_((const char *, const char *));
295 static void suggest_asking_for_help P_((void));
296 void fatal P_((char *, char *));
297 static void pfatal P_((char *));
298 static void add_node P_((node *, node **));
299
300 static void init P_((void));
301 static void initbuffer P_((linebuffer *));
302 static void find_entries P_((char *, FILE *));
303 static void free_tree P_((node *));
304 static void pfnote P_((char *, bool, char *, int, int, long));
305 static void new_pfnote P_((char *, int, bool, char *, int, int, long));
306 static void process_file P_((char *));
307 static void put_entries P_((node *));
308 static void takeprec P_((void));
309
310 static char *concat P_((char *, char *, char *));
311 static char *skip_spaces P_((char *));
312 static char *skip_non_spaces P_((char *));
313 static char *savenstr P_((char *, int));
314 static char *savestr P_((char *));
315 static char *etags_strchr P_((const char *, int));
316 static char *etags_strrchr P_((const char *, int));
317 static char *etags_getcwd P_((void));
318 static char *relative_filename P_((char *, char *));
319 static char *absolute_filename P_((char *, char *));
320 static char *absolute_dirname P_((char *, char *));
321 static bool filename_is_absolute P_((char *f));
322 static void canonicalize_filename P_((char *));
323 static void linebuffer_setlen P_((linebuffer *, int));
324 long *xmalloc P_((unsigned int));
325 long *xrealloc P_((char *, unsigned int));
326
327 \f
328 char searchar = '/'; /* use /.../ searches */
329
330 char *tagfile; /* output file */
331 char *progname; /* name this program was invoked with */
332 char *cwd; /* current working directory */
333 char *tagfiledir; /* directory of tagfile */
334 FILE *tagf; /* ioptr for tags file */
335
336 char *curfile; /* current input file name */
337 language *curlang; /* current language */
338
339 int lineno; /* line number of current line */
340 long charno; /* current character number */
341 long linecharno; /* charno of start of current line */
342 char *dbp; /* pointer to start of current tag */
343
344 node *head; /* the head of the binary tree of tags */
345
346 linebuffer lb; /* the current line */
347
348 /* boolean "functions" (see init) */
349 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
350 char
351 /* white chars */
352 *white = " \f\t\n\r\v",
353 /* not in a name */
354 *nonam = " \f\t\n\r(=,[;",
355 /* token ending chars */
356 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
357 /* token starting chars */
358 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
359 /* valid in-token chars */
360 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
361
362 bool append_to_tagfile; /* -a: append to tags */
363 /* The following four default to TRUE for etags, but to FALSE for ctags. */
364 bool typedefs; /* -t: create tags for C and Ada typedefs */
365 bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
366 /* 0 struct/enum/union decls, and C++ */
367 /* member functions. */
368 bool constantypedefs; /* -d: create tags for C #define, enum */
369 /* constants and variables. */
370 /* -D: opposite of -d. Default under ctags. */
371 bool declarations; /* --declarations: tag them and extern in C&Co*/
372 bool globals; /* create tags for global variables */
373 bool members; /* create tags for C member variables */
374 bool update; /* -u: update tags */
375 bool vgrind_style; /* -v: create vgrind style index output */
376 bool no_warnings; /* -w: suppress warnings */
377 bool cxref_style; /* -x: create cxref style output */
378 bool cplusplus; /* .[hc] means C++, not C */
379 bool noindentypedefs; /* -I: ignore indentation in C */
380 bool packages_only; /* --packages-only: in Ada, only tag packages*/
381
382 #ifdef LONG_OPTIONS
383 struct option longopts[] =
384 {
385 { "packages-only", no_argument, &packages_only, TRUE },
386 { "append", no_argument, NULL, 'a' },
387 { "backward-search", no_argument, NULL, 'B' },
388 { "c++", no_argument, NULL, 'C' },
389 { "cxref", no_argument, NULL, 'x' },
390 { "defines", no_argument, NULL, 'd' },
391 { "declarations", no_argument, &declarations, TRUE },
392 { "no-defines", no_argument, NULL, 'D' },
393 { "globals", no_argument, &globals, TRUE },
394 { "no-globals", no_argument, &globals, FALSE },
395 { "help", no_argument, NULL, 'h' },
396 { "help", no_argument, NULL, 'H' },
397 { "ignore-indentation", no_argument, NULL, 'I' },
398 { "include", required_argument, NULL, 'i' },
399 { "language", required_argument, NULL, 'l' },
400 { "members", no_argument, &members, TRUE },
401 { "no-members", no_argument, &members, FALSE },
402 { "no-warn", no_argument, NULL, 'w' },
403 { "output", required_argument, NULL, 'o' },
404 #ifdef ETAGS_REGEXPS
405 { "regex", required_argument, NULL, 'r' },
406 { "no-regex", no_argument, NULL, 'R' },
407 { "ignore-case-regex", required_argument, NULL, 'c' },
408 #endif /* ETAGS_REGEXPS */
409 { "typedefs", no_argument, NULL, 't' },
410 { "typedefs-and-c++", no_argument, NULL, 'T' },
411 { "update", no_argument, NULL, 'u' },
412 { "version", no_argument, NULL, 'V' },
413 { "vgrind", no_argument, NULL, 'v' },
414 { NULL }
415 };
416 #endif /* LONG_OPTIONS */
417
418 #ifdef ETAGS_REGEXPS
419 /* Structure defining a regular expression. Elements are
420 the compiled pattern, and the name string. */
421 typedef struct pattern
422 {
423 struct pattern *p_next;
424 language *language;
425 char *regex;
426 struct re_pattern_buffer *pattern;
427 struct re_registers regs;
428 char *name_pattern;
429 bool error_signaled;
430 } pattern;
431
432 /* List of all regexps. */
433 pattern *p_head = NULL;
434
435 /* How many characters in the character set. (From regex.c.) */
436 #define CHAR_SET_SIZE 256
437 /* Translation table for case-insensitive matching. */
438 char lc_trans[CHAR_SET_SIZE];
439 #endif /* ETAGS_REGEXPS */
440
441 compressor compressors[] =
442 {
443 { "z", "gzip -d -c"},
444 { "Z", "gzip -d -c"},
445 { "gz", "gzip -d -c"},
446 { "GZ", "gzip -d -c"},
447 { "bz2", "bzip2 -d -c" },
448 { NULL }
449 };
450
451 /*
452 * Language stuff.
453 */
454
455 /* Non-NULL if language fixed. */
456 language *forced_lang = NULL;
457
458 /* Ada code */
459 char *Ada_suffixes [] =
460 { "ads", "adb", "ada", NULL };
461
462 /* Assembly code */
463 char *Asm_suffixes [] = { "a", /* Unix assembler */
464 "asm", /* Microcontroller assembly */
465 "def", /* BSO/Tasking definition includes */
466 "inc", /* Microcontroller include files */
467 "ins", /* Microcontroller include files */
468 "s", "sa", /* Unix assembler */
469 "S", /* cpp-processed Unix assembler */
470 "src", /* BSO/Tasking C compiler output */
471 NULL
472 };
473
474 /* Note that .c and .h can be considered C++, if the --c++ flag was
475 given, or if the `class' keyowrd is met inside the file.
476 That is why default_C_entries is called for these. */
477 char *default_C_suffixes [] =
478 { "c", "h", NULL };
479
480 char *Cplusplus_suffixes [] =
481 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
482 "M", /* Objective C++ */
483 "pdb", /* Postscript with C syntax */
484 NULL };
485
486 char *Cjava_suffixes [] =
487 { "java", NULL };
488
489 char *Cobol_suffixes [] =
490 { "COB", "cob", NULL };
491
492 char *Cstar_suffixes [] =
493 { "cs", "hs", NULL };
494
495 char *Erlang_suffixes [] =
496 { "erl", "hrl", NULL };
497
498 char *Fortran_suffixes [] =
499 { "F", "f", "f90", "for", NULL };
500
501 char *Lisp_suffixes [] =
502 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
503
504 char *Makefile_filenames [] =
505 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
506
507 char *Pascal_suffixes [] =
508 { "p", "pas", NULL };
509
510 char *Perl_suffixes [] =
511 { "pl", "pm", NULL };
512 char *Perl_interpreters [] =
513 { "perl", "@PERL@", NULL };
514
515 char *plain_C_suffixes [] =
516 { "lm", /* Objective lex file */
517 "m", /* Objective C file */
518 "pc", /* Pro*C file */
519 NULL };
520
521 char *Postscript_suffixes [] =
522 { "ps", "psw", NULL }; /* .psw is for PSWrap */
523
524 char *Prolog_suffixes [] =
525 { "prolog", NULL };
526
527 char *Python_suffixes [] =
528 { "py", NULL };
529
530 /* Can't do the `SCM' or `scm' prefix with a version number. */
531 char *Scheme_suffixes [] =
532 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
533
534 char *TeX_suffixes [] =
535 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
536
537 char *Texinfo_suffixes [] =
538 { "texi", "texinfo", "txi", NULL };
539
540 char *Yacc_suffixes [] =
541 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
542
543 /*
544 * Table of languages.
545 *
546 * It is ok for a given function to be listed under more than one
547 * name. I just didn't.
548 */
549
550 language lang_names [] =
551 {
552 { "ada", Ada_funcs, NULL, Ada_suffixes, NULL },
553 { "asm", Asm_labels, NULL, Asm_suffixes, NULL },
554 { "c", default_C_entries, NULL, default_C_suffixes, NULL },
555 { "c++", Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
556 { "c*", Cstar_entries, NULL, Cstar_suffixes, NULL },
557 { "cobol", Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
558 { "erlang", Erlang_functions, NULL, Erlang_suffixes, NULL },
559 { "fortran", Fortran_functions, NULL, Fortran_suffixes, NULL },
560 { "java", Cjava_entries, NULL, Cjava_suffixes, NULL },
561 { "lisp", Lisp_functions, NULL, Lisp_suffixes, NULL },
562 { "makefile", Makefile_targets, Makefile_filenames, NULL, NULL },
563 { "pascal", Pascal_functions, NULL, Pascal_suffixes, NULL },
564 { "perl", Perl_functions, NULL, Perl_suffixes, Perl_interpreters },
565 { "postscript", Postscript_functions, NULL, Postscript_suffixes, NULL },
566 { "proc", plain_C_entries, NULL, plain_C_suffixes, NULL },
567 { "prolog", Prolog_functions, NULL, Prolog_suffixes, NULL },
568 { "python", Python_functions, NULL, Python_suffixes, NULL },
569 { "scheme", Scheme_functions, NULL, Scheme_suffixes, NULL },
570 { "tex", TeX_commands, NULL, TeX_suffixes, NULL },
571 { "texinfo", Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
572 { "yacc", Yacc_entries, NULL, Yacc_suffixes, NULL },
573 { "auto", NULL }, /* default guessing scheme */
574 { "none", just_read_file }, /* regexp matching only */
575 { NULL, NULL } /* end of list */
576 };
577
578 \f
579 static void
580 print_language_names ()
581 {
582 language *lang;
583 char **name, **ext;
584
585 puts ("\nThese are the currently supported languages, along with the\n\
586 default file names and dot suffixes:");
587 for (lang = lang_names; lang->name != NULL; lang++)
588 {
589 printf (" %-*s", 10, lang->name);
590 if (lang->filenames != NULL)
591 for (name = lang->filenames; *name != NULL; name++)
592 printf (" %s", *name);
593 if (lang->suffixes != NULL)
594 for (ext = lang->suffixes; *ext != NULL; ext++)
595 printf (" .%s", *ext);
596 puts ("");
597 }
598 puts ("Where `auto' means use default language for files based on file\n\
599 name suffix, and `none' means only do regexp processing on files.\n\
600 If no language is specified and no matching suffix is found,\n\
601 the first line of the file is read for a sharp-bang (#!) sequence\n\
602 followed by the name of an interpreter. If no such sequence is found,\n\
603 Fortran is tried first; if no tags are found, C is tried next.\n\
604 When parsing any C file, a \"class\" keyword switches to C++.\n\
605 Compressed files are supported using gzip and bzip2.");
606 }
607
608 #ifndef EMACS_NAME
609 # define EMACS_NAME "GNU Emacs"
610 #endif
611 #ifndef VERSION
612 # define VERSION "21"
613 #endif
614 static void
615 print_version ()
616 {
617 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
618 puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
619 puts ("This program is distributed under the same terms as Emacs");
620
621 exit (GOOD);
622 }
623
624 static void
625 print_help ()
626 {
627 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
628 \n\
629 These are the options accepted by %s.\n", progname, progname);
630 #ifdef LONG_OPTIONS
631 puts ("You may use unambiguous abbreviations for the long option names.");
632 #else
633 puts ("Long option names do not work with this executable, as it is not\n\
634 linked with GNU getopt.");
635 #endif /* LONG_OPTIONS */
636 puts ("A - as file name means read names from stdin (one per line).");
637 if (!CTAGS)
638 printf (" Absolute names are stored in the output file as they are.\n\
639 Relative ones are stored relative to the output file's directory.");
640 puts ("\n");
641
642 puts ("-a, --append\n\
643 Append tag entries to existing tags file.");
644
645 puts ("--packages-only\n\
646 For Ada files, only generate tags for packages .");
647
648 if (CTAGS)
649 puts ("-B, --backward-search\n\
650 Write the search commands for the tag entries using '?', the\n\
651 backward-search command instead of '/', the forward-search command.");
652
653 /* This option is mostly obsolete, because etags can now automatically
654 detect C++. Retained for backward compatibility and for debugging and
655 experimentation. In principle, we could want to tag as C++ even
656 before any "class" keyword.
657 puts ("-C, --c++\n\
658 Treat files whose name suffix defaults to C language as C++ files.");
659 */
660
661 puts ("--declarations\n\
662 In C and derived languages, create tags for function declarations,");
663 if (CTAGS)
664 puts ("\tand create tags for extern variables if --globals is used.");
665 else
666 puts
667 ("\tand create tags for extern variables unless --no-globals is used.");
668
669 if (CTAGS)
670 puts ("-d, --defines\n\
671 Create tag entries for C #define constants and enum constants, too.");
672 else
673 puts ("-D, --no-defines\n\
674 Don't create tag entries for C #define constants and enum constants.\n\
675 This makes the tags file smaller.");
676
677 if (!CTAGS)
678 {
679 puts ("-i FILE, --include=FILE\n\
680 Include a note in tag file indicating that, when searching for\n\
681 a tag, one should also consult the tags file FILE after\n\
682 checking the current file.");
683 puts ("-l LANG, --language=LANG\n\
684 Force the following files to be considered as written in the\n\
685 named language up to the next --language=LANG option.");
686 }
687
688 if (CTAGS)
689 puts ("--globals\n\
690 Create tag entries for global variables in some languages.");
691 else
692 puts ("--no-globals\n\
693 Do not create tag entries for global variables in some\n\
694 languages. This makes the tags file smaller.");
695 puts ("--members\n\
696 Create tag entries for member variables in C and derived languages.");
697
698 #ifdef ETAGS_REGEXPS
699 puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
700 Make a tag for each line matching pattern REGEXP in the following\n\
701 files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
702 regexfile is a file containing one REGEXP per line.\n\
703 REGEXP is anchored (as if preceded by ^).\n\
704 The form /REGEXP/NAME/ creates a named tag.\n\
705 For example Tcl named tags can be created with:\n\
706 --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
707 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
708 Like -r, --regex but ignore case when matching expressions.");
709 puts ("-R, --no-regex\n\
710 Don't create tags from regexps for the following files.");
711 #endif /* ETAGS_REGEXPS */
712 puts ("-o FILE, --output=FILE\n\
713 Write the tags to FILE.");
714 puts ("-I, --ignore-indentation\n\
715 Don't rely on indentation quite as much as normal. Currently,\n\
716 this means not to assume that a closing brace in the first\n\
717 column is the final brace of a function or structure\n\
718 definition in C and C++.");
719
720 if (CTAGS)
721 {
722 puts ("-t, --typedefs\n\
723 Generate tag entries for C and Ada typedefs.");
724 puts ("-T, --typedefs-and-c++\n\
725 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
726 and C++ member functions.");
727 puts ("-u, --update\n\
728 Update the tag entries for the given files, leaving tag\n\
729 entries for other files in place. Currently, this is\n\
730 implemented by deleting the existing entries for the given\n\
731 files and then rewriting the new entries at the end of the\n\
732 tags file. It is often faster to simply rebuild the entire\n\
733 tag file than to use this.");
734 puts ("-v, --vgrind\n\
735 Generates an index of items intended for human consumption,\n\
736 similar to the output of vgrind. The index is sorted, and\n\
737 gives the page number of each item.");
738 puts ("-w, --no-warn\n\
739 Suppress warning messages about entries defined in multiple\n\
740 files.");
741 puts ("-x, --cxref\n\
742 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
743 The output uses line numbers instead of page numbers, but\n\
744 beyond that the differences are cosmetic; try both to see\n\
745 which you like.");
746 }
747
748 puts ("-V, --version\n\
749 Print the version of the program.\n\
750 -h, --help\n\
751 Print this help message.");
752
753 print_language_names ();
754
755 puts ("");
756 puts ("Report bugs to bug-gnu-emacs@gnu.org");
757
758 exit (GOOD);
759 }
760
761 \f
762 enum argument_type
763 {
764 at_language,
765 at_regexp,
766 at_filename,
767 at_icregexp
768 };
769
770 /* This structure helps us allow mixing of --lang and file names. */
771 typedef struct
772 {
773 enum argument_type arg_type;
774 char *what;
775 language *lang; /* language of the regexp */
776 } argument;
777
778 #ifdef VMS /* VMS specific functions */
779
780 #define EOS '\0'
781
782 /* This is a BUG! ANY arbitrary limit is a BUG!
783 Won't someone please fix this? */
784 #define MAX_FILE_SPEC_LEN 255
785 typedef struct {
786 short curlen;
787 char body[MAX_FILE_SPEC_LEN + 1];
788 } vspec;
789
790 /*
791 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
792 returning in each successive call the next file name matching the input
793 spec. The function expects that each in_spec passed
794 to it will be processed to completion; in particular, up to and
795 including the call following that in which the last matching name
796 is returned, the function ignores the value of in_spec, and will
797 only start processing a new spec with the following call.
798 If an error occurs, on return out_spec contains the value
799 of in_spec when the error occurred.
800
801 With each successive file name returned in out_spec, the
802 function's return value is one. When there are no more matching
803 names the function returns zero. If on the first call no file
804 matches in_spec, or there is any other error, -1 is returned.
805 */
806
807 #include <rmsdef.h>
808 #include <descrip.h>
809 #define OUTSIZE MAX_FILE_SPEC_LEN
810 static short
811 fn_exp (out, in)
812 vspec *out;
813 char *in;
814 {
815 static long context = 0;
816 static struct dsc$descriptor_s o;
817 static struct dsc$descriptor_s i;
818 static bool pass1 = TRUE;
819 long status;
820 short retval;
821
822 if (pass1)
823 {
824 pass1 = FALSE;
825 o.dsc$a_pointer = (char *) out;
826 o.dsc$w_length = (short)OUTSIZE;
827 i.dsc$a_pointer = in;
828 i.dsc$w_length = (short)strlen(in);
829 i.dsc$b_dtype = DSC$K_DTYPE_T;
830 i.dsc$b_class = DSC$K_CLASS_S;
831 o.dsc$b_dtype = DSC$K_DTYPE_VT;
832 o.dsc$b_class = DSC$K_CLASS_VS;
833 }
834 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
835 {
836 out->body[out->curlen] = EOS;
837 return 1;
838 }
839 else if (status == RMS$_NMF)
840 retval = 0;
841 else
842 {
843 strcpy(out->body, in);
844 retval = -1;
845 }
846 lib$find_file_end(&context);
847 pass1 = TRUE;
848 return retval;
849 }
850
851 /*
852 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
853 name of each file specified by the provided arg expanding wildcards.
854 */
855 static char *
856 gfnames (arg, p_error)
857 char *arg;
858 bool *p_error;
859 {
860 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
861
862 switch (fn_exp (&filename, arg))
863 {
864 case 1:
865 *p_error = FALSE;
866 return filename.body;
867 case 0:
868 *p_error = FALSE;
869 return NULL;
870 default:
871 *p_error = TRUE;
872 return filename.body;
873 }
874 }
875
876 #ifndef OLD /* Newer versions of VMS do provide `system'. */
877 system (cmd)
878 char *cmd;
879 {
880 error ("%s", "system() function not implemented under VMS");
881 }
882 #endif
883
884 #define VERSION_DELIM ';'
885 char *massage_name (s)
886 char *s;
887 {
888 char *start = s;
889
890 for ( ; *s; s++)
891 if (*s == VERSION_DELIM)
892 {
893 *s = EOS;
894 break;
895 }
896 else
897 *s = lowcase (*s);
898 return start;
899 }
900 #endif /* VMS */
901
902 \f
903 int
904 main (argc, argv)
905 int argc;
906 char *argv[];
907 {
908 int i;
909 unsigned int nincluded_files;
910 char **included_files;
911 char *this_file;
912 argument *argbuffer;
913 int current_arg, file_count;
914 linebuffer filename_lb;
915 #ifdef VMS
916 bool got_err;
917 #endif
918
919 #ifdef DOS_NT
920 _fmode = O_BINARY; /* all of files are treated as binary files */
921 #endif /* DOS_NT */
922
923 progname = argv[0];
924 nincluded_files = 0;
925 included_files = xnew (argc, char *);
926 current_arg = 0;
927 file_count = 0;
928
929 /* Allocate enough no matter what happens. Overkill, but each one
930 is small. */
931 argbuffer = xnew (argc, argument);
932
933 #ifdef ETAGS_REGEXPS
934 /* Set syntax for regular expression routines. */
935 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
936 /* Translation table for case-insensitive search. */
937 for (i = 0; i < CHAR_SET_SIZE; i++)
938 lc_trans[i] = lowcase (i);
939 #endif /* ETAGS_REGEXPS */
940
941 /*
942 * If etags, always find typedefs and structure tags. Why not?
943 * Also default to find macro constants, enum constants and
944 * global variables.
945 */
946 if (!CTAGS)
947 {
948 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
949 globals = TRUE;
950 declarations = FALSE;
951 members = FALSE;
952 }
953
954 while (1)
955 {
956 int opt;
957 char *optstring;
958
959 #ifdef ETAGS_REGEXPS
960 optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
961 #else
962 optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
963 #endif /* ETAGS_REGEXPS */
964
965 #ifndef LONG_OPTIONS
966 optstring = optstring + 1;
967 #endif /* LONG_OPTIONS */
968
969 opt = getopt_long (argc, argv, optstring, longopts, 0);
970 if (opt == EOF)
971 break;
972
973 switch (opt)
974 {
975 case 0:
976 /* If getopt returns 0, then it has already processed a
977 long-named option. We should do nothing. */
978 break;
979
980 case 1:
981 /* This means that a file name has been seen. Record it. */
982 argbuffer[current_arg].arg_type = at_filename;
983 argbuffer[current_arg].what = optarg;
984 ++current_arg;
985 ++file_count;
986 break;
987
988 /* Common options. */
989 case 'a': append_to_tagfile = TRUE; break;
990 case 'C': cplusplus = TRUE; break;
991 case 'd': constantypedefs = TRUE; break;
992 case 'D': constantypedefs = FALSE; break;
993 case 'f': /* for compatibility with old makefiles */
994 case 'o':
995 if (tagfile)
996 {
997 error ("-o option may only be given once.", (char *)NULL);
998 suggest_asking_for_help ();
999 }
1000 tagfile = optarg;
1001 break;
1002 case 'I':
1003 case 'S': /* for backward compatibility */
1004 noindentypedefs = TRUE;
1005 break;
1006 case 'l':
1007 {
1008 language *lang = get_language_from_langname (optarg);
1009 if (lang != NULL)
1010 {
1011 argbuffer[current_arg].lang = lang;
1012 argbuffer[current_arg].arg_type = at_language;
1013 ++current_arg;
1014 }
1015 }
1016 break;
1017 #ifdef ETAGS_REGEXPS
1018 case 'r':
1019 argbuffer[current_arg].arg_type = at_regexp;
1020 argbuffer[current_arg].what = optarg;
1021 ++current_arg;
1022 break;
1023 case 'R':
1024 argbuffer[current_arg].arg_type = at_regexp;
1025 argbuffer[current_arg].what = NULL;
1026 ++current_arg;
1027 break;
1028 case 'c':
1029 argbuffer[current_arg].arg_type = at_icregexp;
1030 argbuffer[current_arg].what = optarg;
1031 ++current_arg;
1032 break;
1033 #endif /* ETAGS_REGEXPS */
1034 case 'V':
1035 print_version ();
1036 break;
1037 case 'h':
1038 case 'H':
1039 print_help ();
1040 break;
1041 case 't':
1042 typedefs = TRUE;
1043 break;
1044 case 'T':
1045 typedefs = typedefs_or_cplusplus = TRUE;
1046 break;
1047 #if (!CTAGS)
1048 /* Etags options */
1049 case 'i':
1050 included_files[nincluded_files++] = optarg;
1051 break;
1052 #else /* CTAGS */
1053 /* Ctags options. */
1054 case 'B': searchar = '?'; break;
1055 case 'u': update = TRUE; break;
1056 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1057 case 'x': cxref_style = TRUE; break;
1058 case 'w': no_warnings = TRUE; break;
1059 #endif /* CTAGS */
1060 default:
1061 suggest_asking_for_help ();
1062 }
1063 }
1064
1065 for (; optind < argc; ++optind)
1066 {
1067 argbuffer[current_arg].arg_type = at_filename;
1068 argbuffer[current_arg].what = argv[optind];
1069 ++current_arg;
1070 ++file_count;
1071 }
1072
1073 if (nincluded_files == 0 && file_count == 0)
1074 {
1075 error ("no input files specified.", (char *)NULL);
1076 suggest_asking_for_help ();
1077 }
1078
1079 if (tagfile == NULL)
1080 tagfile = CTAGS ? "tags" : "TAGS";
1081 cwd = etags_getcwd (); /* the current working directory */
1082 if (cwd[strlen (cwd) - 1] != '/')
1083 {
1084 char *oldcwd = cwd;
1085 cwd = concat (oldcwd, "/", "");
1086 free (oldcwd);
1087 }
1088 if (streq (tagfile, "-"))
1089 tagfiledir = cwd;
1090 else
1091 tagfiledir = absolute_dirname (tagfile, cwd);
1092
1093 init (); /* set up boolean "functions" */
1094
1095 initbuffer (&lb);
1096 initbuffer (&filename_lb);
1097
1098 if (!CTAGS)
1099 {
1100 if (streq (tagfile, "-"))
1101 {
1102 tagf = stdout;
1103 #ifdef DOS_NT
1104 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1105 doesn't take effect until after `stdout' is already open). */
1106 if (!isatty (fileno (stdout)))
1107 setmode (fileno (stdout), O_BINARY);
1108 #endif /* DOS_NT */
1109 }
1110 else
1111 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1112 if (tagf == NULL)
1113 pfatal (tagfile);
1114 }
1115
1116 /*
1117 * Loop through files finding functions.
1118 */
1119 for (i = 0; i < current_arg; ++i)
1120 {
1121 switch (argbuffer[i].arg_type)
1122 {
1123 case at_language:
1124 forced_lang = argbuffer[i].lang;
1125 break;
1126 #ifdef ETAGS_REGEXPS
1127 case at_regexp:
1128 analyse_regex (argbuffer[i].what, FALSE);
1129 break;
1130 case at_icregexp:
1131 analyse_regex (argbuffer[i].what, TRUE);
1132 break;
1133 #endif
1134 case at_filename:
1135 #ifdef VMS
1136 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1137 {
1138 if (got_err)
1139 {
1140 error ("can't find file %s\n", this_file);
1141 argc--, argv++;
1142 }
1143 else
1144 {
1145 this_file = massage_name (this_file);
1146 }
1147 #else
1148 this_file = argbuffer[i].what;
1149 #endif
1150 /* Input file named "-" means read file names from stdin
1151 (one per line) and use them. */
1152 if (streq (this_file, "-"))
1153 while (readline_internal (&filename_lb, stdin) > 0)
1154 process_file (filename_lb.buffer);
1155 else
1156 process_file (this_file);
1157 #ifdef VMS
1158 }
1159 #endif
1160 break;
1161 }
1162 }
1163
1164 #ifdef ETAGS_REGEXPS
1165 free_patterns ();
1166 #endif /* ETAGS_REGEXPS */
1167
1168 if (!CTAGS)
1169 {
1170 while (nincluded_files-- > 0)
1171 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1172
1173 fclose (tagf);
1174 exit (GOOD);
1175 }
1176
1177 /* If CTAGS, we are here. process_file did not write the tags yet,
1178 because we want them ordered. Let's do it now. */
1179 if (cxref_style)
1180 {
1181 put_entries (head);
1182 free_tree (head);
1183 head = NULL;
1184 exit (GOOD);
1185 }
1186
1187 if (update)
1188 {
1189 char cmd[BUFSIZ];
1190 for (i = 0; i < current_arg; ++i)
1191 {
1192 if (argbuffer[i].arg_type != at_filename)
1193 continue;
1194 sprintf (cmd,
1195 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1196 tagfile, argbuffer[i].what, tagfile);
1197 if (system (cmd) != GOOD)
1198 fatal ("failed to execute shell command", (char *)NULL);
1199 }
1200 append_to_tagfile = TRUE;
1201 }
1202
1203 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1204 if (tagf == NULL)
1205 pfatal (tagfile);
1206 put_entries (head);
1207 free_tree (head);
1208 head = NULL;
1209 fclose (tagf);
1210
1211 if (update)
1212 {
1213 char cmd[BUFSIZ];
1214 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1215 exit (system (cmd));
1216 }
1217 return GOOD;
1218 }
1219
1220
1221
1222 /*
1223 * Return a compressor given the file name. If EXTPTR is non-zero,
1224 * return a pointer into FILE where the compressor-specific
1225 * extension begins. If no compressor is found, NULL is returned
1226 * and EXTPTR is not significant.
1227 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1228 */
1229 static compressor *
1230 get_compressor_from_suffix (file, extptr)
1231 char *file;
1232 char **extptr;
1233 {
1234 compressor *compr;
1235 char *slash, *suffix;
1236
1237 /* This relies on FN to be after canonicalize_filename,
1238 so we don't need to consider backslashes on DOS_NT. */
1239 slash = etags_strrchr (file, '/');
1240 suffix = etags_strrchr (file, '.');
1241 if (suffix == NULL || suffix < slash)
1242 return NULL;
1243 if (extptr != NULL)
1244 *extptr = suffix;
1245 suffix += 1;
1246 /* Let those poor souls who live with DOS 8+3 file name limits get
1247 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1248 Only the first do loop is run if not MSDOS */
1249 do
1250 {
1251 for (compr = compressors; compr->suffix != NULL; compr++)
1252 if (streq (compr->suffix, suffix))
1253 return compr;
1254 if (!MSDOS)
1255 break; /* do it only once: not really a loop */
1256 if (extptr != NULL)
1257 *extptr = ++suffix;
1258 } while (*suffix != '\0');
1259 return NULL;
1260 }
1261
1262
1263
1264 /*
1265 * Return a language given the name.
1266 */
1267 static language *
1268 get_language_from_langname (name)
1269 char *name;
1270 {
1271 language *lang;
1272
1273 if (name == NULL)
1274 error ("empty language name", (char *)NULL);
1275 else
1276 {
1277 for (lang = lang_names; lang->name != NULL; lang++)
1278 if (streq (name, lang->name))
1279 return lang;
1280 error ("unknown language \"%s\"", name);
1281 }
1282
1283 return NULL;
1284 }
1285
1286
1287 /*
1288 * Return a language given the interpreter name.
1289 */
1290 static language *
1291 get_language_from_interpreter (interpreter)
1292 char *interpreter;
1293 {
1294 language *lang;
1295 char **iname;
1296
1297 if (interpreter == NULL)
1298 return NULL;
1299 for (lang = lang_names; lang->name != NULL; lang++)
1300 if (lang->interpreters != NULL)
1301 for (iname = lang->interpreters; *iname != NULL; iname++)
1302 if (streq (*iname, interpreter))
1303 return lang;
1304
1305 return NULL;
1306 }
1307
1308
1309
1310 /*
1311 * Return a language given the file name.
1312 */
1313 static language *
1314 get_language_from_filename (file)
1315 char *file;
1316 {
1317 language *lang;
1318 char **name, **ext, *suffix;
1319
1320 /* Try whole file name first. */
1321 for (lang = lang_names; lang->name != NULL; lang++)
1322 if (lang->filenames != NULL)
1323 for (name = lang->filenames; *name != NULL; name++)
1324 if (streq (*name, file))
1325 return lang;
1326
1327 /* If not found, try suffix after last dot. */
1328 suffix = etags_strrchr (file, '.');
1329 if (suffix == NULL)
1330 return NULL;
1331 suffix += 1;
1332 for (lang = lang_names; lang->name != NULL; lang++)
1333 if (lang->suffixes != NULL)
1334 for (ext = lang->suffixes; *ext != NULL; ext++)
1335 if (streq (*ext, suffix))
1336 return lang;
1337 return NULL;
1338 }
1339
1340
1341
1342 /*
1343 * This routine is called on each file argument.
1344 */
1345 static void
1346 process_file (file)
1347 char *file;
1348 {
1349 struct stat stat_buf;
1350 FILE *inf;
1351 compressor *compr;
1352 char *compressed_name, *uncompressed_name;
1353 char *ext, *real_name;
1354
1355
1356 canonicalize_filename (file);
1357 if (streq (file, tagfile) && !streq (tagfile, "-"))
1358 {
1359 error ("skipping inclusion of %s in self.", file);
1360 return;
1361 }
1362 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1363 {
1364 compressed_name = NULL;
1365 real_name = uncompressed_name = savestr (file);
1366 }
1367 else
1368 {
1369 real_name = compressed_name = savestr (file);
1370 uncompressed_name = savenstr (file, ext - file);
1371 }
1372
1373 /* If the canonicalised uncompressed name has already be dealt with,
1374 skip it silently, else add it to the list. */
1375 {
1376 typedef struct processed_file
1377 {
1378 char *filename;
1379 struct processed_file *next;
1380 } processed_file;
1381 static processed_file *pf_head = NULL;
1382 register processed_file *fnp;
1383
1384 for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1385 if (streq (uncompressed_name, fnp->filename))
1386 goto exit;
1387 fnp = pf_head;
1388 pf_head = xnew (1, struct processed_file);
1389 pf_head->filename = savestr (uncompressed_name);
1390 pf_head->next = fnp;
1391 }
1392
1393 if (stat (real_name, &stat_buf) != 0)
1394 {
1395 /* Reset real_name and try with a different name. */
1396 real_name = NULL;
1397 if (compressed_name != NULL) /* try with the given suffix */
1398 {
1399 if (stat (uncompressed_name, &stat_buf) == 0)
1400 real_name = uncompressed_name;
1401 }
1402 else /* try all possible suffixes */
1403 {
1404 for (compr = compressors; compr->suffix != NULL; compr++)
1405 {
1406 compressed_name = concat (file, ".", compr->suffix);
1407 if (stat (compressed_name, &stat_buf) != 0)
1408 {
1409 if (MSDOS)
1410 {
1411 char *suf = compressed_name + strlen (file);
1412 size_t suflen = strlen (compr->suffix) + 1;
1413 for ( ; suf[1]; suf++, suflen--)
1414 {
1415 memmove (suf, suf + 1, suflen);
1416 if (stat (compressed_name, &stat_buf) == 0)
1417 {
1418 real_name = compressed_name;
1419 break;
1420 }
1421 }
1422 if (real_name != NULL)
1423 break;
1424 } /* MSDOS */
1425 free (compressed_name);
1426 compressed_name = NULL;
1427 }
1428 else
1429 {
1430 real_name = compressed_name;
1431 break;
1432 }
1433 }
1434 }
1435 if (real_name == NULL)
1436 {
1437 perror (file);
1438 goto exit;
1439 }
1440 } /* try with a different name */
1441
1442 if (!S_ISREG (stat_buf.st_mode))
1443 {
1444 error ("skipping %s: it is not a regular file.", real_name);
1445 goto exit;
1446 }
1447 if (real_name == compressed_name)
1448 {
1449 char *cmd = concat (compr->command, " ", real_name);
1450 inf = (FILE *) popen (cmd, "r");
1451 free (cmd);
1452 }
1453 else
1454 inf = fopen (real_name, "r");
1455 if (inf == NULL)
1456 {
1457 perror (real_name);
1458 goto exit;
1459 }
1460
1461 find_entries (uncompressed_name, inf);
1462
1463 if (real_name == compressed_name)
1464 pclose (inf);
1465 else
1466 fclose (inf);
1467
1468 if (!CTAGS)
1469 {
1470 char *filename;
1471
1472 if (filename_is_absolute (uncompressed_name))
1473 {
1474 /* file is an absolute file name. Canonicalise it. */
1475 filename = absolute_filename (uncompressed_name, cwd);
1476 }
1477 else
1478 {
1479 /* file is a file name relative to cwd. Make it relative
1480 to the directory of the tags file. */
1481 filename = relative_filename (uncompressed_name, tagfiledir);
1482 }
1483 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1484 free (filename);
1485 put_entries (head);
1486 free_tree (head);
1487 head = NULL;
1488 }
1489
1490 exit:
1491 if (compressed_name) free(compressed_name);
1492 if (uncompressed_name) free(uncompressed_name);
1493 return;
1494 }
1495
1496 /*
1497 * This routine sets up the boolean pseudo-functions which work
1498 * by setting boolean flags dependent upon the corresponding character.
1499 * Every char which is NOT in that string is not a white char. Therefore,
1500 * all of the array "_wht" is set to FALSE, and then the elements
1501 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1502 * of a char is TRUE if it is the string "white", else FALSE.
1503 */
1504 static void
1505 init ()
1506 {
1507 register char *sp;
1508 register int i;
1509
1510 for (i = 0; i < CHARS; i++)
1511 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1512 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1513 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1514 notinname('\0') = notinname('\n');
1515 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1516 begtoken('\0') = begtoken('\n');
1517 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1518 intoken('\0') = intoken('\n');
1519 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1520 endtoken('\0') = endtoken('\n');
1521 }
1522
1523 /*
1524 * This routine opens the specified file and calls the function
1525 * which finds the function and type definitions.
1526 */
1527 node *last_node = NULL;
1528
1529 static void
1530 find_entries (file, inf)
1531 char *file;
1532 FILE *inf;
1533 {
1534 char *cp;
1535 language *lang;
1536 node *old_last_node;
1537
1538 /* Memory leakage here: the string pointed by curfile is
1539 never released, because curfile is copied into np->file
1540 for each node, to be used in CTAGS mode. The amount of
1541 memory leaked here is the sum of the lengths of the
1542 file names. */
1543 curfile = savestr (file);
1544
1545 /* If user specified a language, use it. */
1546 lang = forced_lang;
1547 if (lang != NULL && lang->function != NULL)
1548 {
1549 curlang = lang;
1550 lang->function (inf);
1551 return;
1552 }
1553
1554 /* Try to guess the language given the file name. */
1555 lang = get_language_from_filename (file);
1556 if (lang != NULL && lang->function != NULL)
1557 {
1558 curlang = lang;
1559 lang->function (inf);
1560 return;
1561 }
1562
1563 /* Look for sharp-bang as the first two characters. */
1564 if (readline_internal (&lb, inf) > 0
1565 && lb.len >= 2
1566 && lb.buffer[0] == '#'
1567 && lb.buffer[1] == '!')
1568 {
1569 char *lp;
1570
1571 /* Set lp to point at the first char after the last slash in the
1572 line or, if no slashes, at the first nonblank. Then set cp to
1573 the first successive blank and terminate the string. */
1574 lp = etags_strrchr (lb.buffer+2, '/');
1575 if (lp != NULL)
1576 lp += 1;
1577 else
1578 lp = skip_spaces (lb.buffer + 2);
1579 cp = skip_non_spaces (lp);
1580 *cp = '\0';
1581
1582 if (strlen (lp) > 0)
1583 {
1584 lang = get_language_from_interpreter (lp);
1585 if (lang != NULL && lang->function != NULL)
1586 {
1587 curlang = lang;
1588 lang->function (inf);
1589 return;
1590 }
1591 }
1592 }
1593 /* We rewind here, even if inf may be a pipe. We fail if the
1594 length of the first line is longer than the pipe block size,
1595 which is unlikely. */
1596 rewind (inf);
1597
1598 /* Try Fortran. */
1599 old_last_node = last_node;
1600 curlang = get_language_from_langname ("fortran");
1601 Fortran_functions (inf);
1602
1603 /* No Fortran entries found. Try C. */
1604 if (old_last_node == last_node)
1605 {
1606 /* We do not tag if rewind fails.
1607 Only the file name will be recorded in the tags file. */
1608 rewind (inf);
1609 curlang = get_language_from_langname (cplusplus ? "c++" : "c");
1610 default_C_entries (inf);
1611 }
1612 return;
1613 }
1614
1615 \f
1616 /* Record a tag. */
1617 static void
1618 pfnote (name, is_func, linestart, linelen, lno, cno)
1619 char *name; /* tag name, or NULL if unnamed */
1620 bool is_func; /* tag is a function */
1621 char *linestart; /* start of the line where tag is */
1622 int linelen; /* length of the line where tag is */
1623 int lno; /* line number */
1624 long cno; /* character number */
1625 {
1626 register node *np;
1627
1628 if (CTAGS && name == NULL)
1629 return;
1630
1631 np = xnew (1, node);
1632
1633 /* If ctags mode, change name "main" to M<thisfilename>. */
1634 if (CTAGS && !cxref_style && streq (name, "main"))
1635 {
1636 register char *fp = etags_strrchr (curfile, '/');
1637 np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1638 fp = etags_strrchr (np->name, '.');
1639 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1640 fp[0] = '\0';
1641 }
1642 else
1643 np->name = name;
1644 np->been_warned = FALSE;
1645 np->file = curfile;
1646 np->is_func = is_func;
1647 np->lno = lno;
1648 /* Our char numbers are 0-base, because of C language tradition?
1649 ctags compatibility? old versions compatibility? I don't know.
1650 Anyway, since emacs's are 1-base we expect etags.el to take care
1651 of the difference. If we wanted to have 1-based numbers, we would
1652 uncomment the +1 below. */
1653 np->cno = cno /* + 1 */ ;
1654 np->left = np->right = NULL;
1655 if (CTAGS && !cxref_style)
1656 {
1657 if (strlen (linestart) < 50)
1658 np->pat = concat (linestart, "$", "");
1659 else
1660 np->pat = savenstr (linestart, 50);
1661 }
1662 else
1663 np->pat = savenstr (linestart, linelen);
1664
1665 add_node (np, &head);
1666 }
1667
1668 /*
1669 * TAGS format specification
1670 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1671 *
1672 * pfnote should emit the optimized form [unnamed tag] only if:
1673 * 1. name does not contain any of the characters " \t\r\n(),;";
1674 * 2. linestart contains name as either a rightmost, or rightmost but
1675 * one character, substring;
1676 * 3. the character, if any, immediately before name in linestart must
1677 * be one of the characters " \t(),;";
1678 * 4. the character, if any, immediately after name in linestart must
1679 * also be one of the characters " \t(),;".
1680 *
1681 * The real implementation uses the notinname() macro, which recognises
1682 * characters slightly different form " \t\r\n(),;". See the variable
1683 * `nonam'.
1684 */
1685 #define traditional_tag_style TRUE
1686 static void
1687 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1688 char *name; /* tag name, or NULL if unnamed */
1689 int namelen; /* tag length */
1690 bool is_func; /* tag is a function */
1691 char *linestart; /* start of the line where tag is */
1692 int linelen; /* length of the line where tag is */
1693 int lno; /* line number */
1694 long cno; /* character number */
1695 {
1696 register char *cp;
1697 bool named;
1698
1699 named = TRUE;
1700 if (!CTAGS)
1701 {
1702 for (cp = name; !notinname (*cp); cp++)
1703 continue;
1704 if (*cp == '\0') /* rule #1 */
1705 {
1706 cp = linestart + linelen - namelen;
1707 if (notinname (linestart[linelen-1]))
1708 cp -= 1; /* rule #4 */
1709 if (cp >= linestart /* rule #2 */
1710 && (cp == linestart
1711 || notinname (cp[-1])) /* rule #3 */
1712 && strneq (name, cp, namelen)) /* rule #2 */
1713 named = FALSE; /* use unnamed tag */
1714 }
1715 }
1716
1717 if (named)
1718 name = savenstr (name, namelen);
1719 else
1720 name = NULL;
1721 pfnote (name, is_func, linestart, linelen, lno, cno);
1722 }
1723
1724 /*
1725 * free_tree ()
1726 * recurse on left children, iterate on right children.
1727 */
1728 static void
1729 free_tree (np)
1730 register node *np;
1731 {
1732 while (np)
1733 {
1734 register node *node_right = np->right;
1735 free_tree (np->left);
1736 if (np->name != NULL)
1737 free (np->name);
1738 free (np->pat);
1739 free (np);
1740 np = node_right;
1741 }
1742 }
1743
1744 /*
1745 * add_node ()
1746 * Adds a node to the tree of nodes. In etags mode, we don't keep
1747 * it sorted; we just keep a linear list. In ctags mode, maintain
1748 * an ordered tree, with no attempt at balancing.
1749 *
1750 * add_node is the only function allowed to add nodes, so it can
1751 * maintain state.
1752 */
1753 static void
1754 add_node (np, cur_node_p)
1755 node *np, **cur_node_p;
1756 {
1757 register int dif;
1758 register node *cur_node = *cur_node_p;
1759
1760 if (cur_node == NULL)
1761 {
1762 *cur_node_p = np;
1763 last_node = np;
1764 return;
1765 }
1766
1767 if (!CTAGS)
1768 {
1769 /* Etags Mode */
1770 if (last_node == NULL)
1771 fatal ("internal error in add_node", (char *)NULL);
1772 last_node->right = np;
1773 last_node = np;
1774 }
1775 else
1776 {
1777 /* Ctags Mode */
1778 dif = strcmp (np->name, cur_node->name);
1779
1780 /*
1781 * If this tag name matches an existing one, then
1782 * do not add the node, but maybe print a warning.
1783 */
1784 if (!dif)
1785 {
1786 if (streq (np->file, cur_node->file))
1787 {
1788 if (!no_warnings)
1789 {
1790 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1791 np->file, lineno, np->name);
1792 fprintf (stderr, "Second entry ignored\n");
1793 }
1794 }
1795 else if (!cur_node->been_warned && !no_warnings)
1796 {
1797 fprintf
1798 (stderr,
1799 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1800 np->file, cur_node->file, np->name);
1801 cur_node->been_warned = TRUE;
1802 }
1803 return;
1804 }
1805
1806 /* Actually add the node */
1807 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1808 }
1809 }
1810
1811 \f
1812 static void
1813 put_entries (np)
1814 register node *np;
1815 {
1816 register char *sp;
1817
1818 if (np == NULL)
1819 return;
1820
1821 /* Output subentries that precede this one */
1822 put_entries (np->left);
1823
1824 /* Output this entry */
1825
1826 if (!CTAGS)
1827 {
1828 if (np->name != NULL)
1829 fprintf (tagf, "%s\177%s\001%d,%ld\n",
1830 np->pat, np->name, np->lno, np->cno);
1831 else
1832 fprintf (tagf, "%s\177%d,%ld\n",
1833 np->pat, np->lno, np->cno);
1834 }
1835 else
1836 {
1837 if (np->name == NULL)
1838 error ("internal error: NULL name in ctags mode.", (char *)NULL);
1839
1840 if (cxref_style)
1841 {
1842 if (vgrind_style)
1843 fprintf (stdout, "%s %s %d\n",
1844 np->name, np->file, (np->lno + 63) / 64);
1845 else
1846 fprintf (stdout, "%-16s %3d %-16s %s\n",
1847 np->name, np->lno, np->file, np->pat);
1848 }
1849 else
1850 {
1851 fprintf (tagf, "%s\t%s\t", np->name, np->file);
1852
1853 if (np->is_func)
1854 { /* a function */
1855 putc (searchar, tagf);
1856 putc ('^', tagf);
1857
1858 for (sp = np->pat; *sp; sp++)
1859 {
1860 if (*sp == '\\' || *sp == searchar)
1861 putc ('\\', tagf);
1862 putc (*sp, tagf);
1863 }
1864 putc (searchar, tagf);
1865 }
1866 else
1867 { /* a typedef; text pattern inadequate */
1868 fprintf (tagf, "%d", np->lno);
1869 }
1870 putc ('\n', tagf);
1871 }
1872 }
1873
1874 /* Output subentries that follow this one */
1875 put_entries (np->right);
1876 }
1877
1878 /* Length of a number's decimal representation. */
1879 static int
1880 number_len (num)
1881 long num;
1882 {
1883 int len = 1;
1884 while ((num /= 10) > 0)
1885 len += 1;
1886 return len;
1887 }
1888
1889 /*
1890 * Return total number of characters that put_entries will output for
1891 * the nodes in the subtree of the specified node. Works only if
1892 * we are not ctags, but called only in that case. This count
1893 * is irrelevant with the new tags.el, but is still supplied for
1894 * backward compatibility.
1895 */
1896 static int
1897 total_size_of_entries (np)
1898 register node *np;
1899 {
1900 register int total;
1901
1902 if (np == NULL)
1903 return 0;
1904
1905 for (total = 0; np != NULL; np = np->right)
1906 {
1907 /* Count left subentries. */
1908 total += total_size_of_entries (np->left);
1909
1910 /* Count this entry */
1911 total += strlen (np->pat) + 1;
1912 total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1913 if (np->name != NULL)
1914 total += 1 + strlen (np->name); /* \001name */
1915 }
1916
1917 return total;
1918 }
1919
1920 \f
1921 /* C extensions. */
1922 #define C_EXT 0x00fff /* C extensions */
1923 #define C_PLAIN 0x00000 /* C */
1924 #define C_PLPL 0x00001 /* C++ */
1925 #define C_STAR 0x00003 /* C* */
1926 #define C_JAVA 0x00005 /* JAVA */
1927 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
1928 #define YACC 0x10000 /* yacc file */
1929
1930 /*
1931 * The C symbol tables.
1932 */
1933 enum sym_type
1934 {
1935 st_none,
1936 st_C_objprot, st_C_objimpl, st_C_objend,
1937 st_C_gnumacro,
1938 st_C_ignore,
1939 st_C_javastruct,
1940 st_C_operator,
1941 st_C_class,
1942 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1943 };
1944
1945 static unsigned int hash P_((const char *, unsigned int));
1946 static struct C_stab_entry * in_word_set P_((const char *, unsigned int));
1947 static enum sym_type C_symtype P_((char *, int, int));
1948
1949 /* Feed stuff between (but not including) %[ and %] lines to:
1950 gperf -c -k 1,3 -o -p -r -t
1951 %[
1952 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1953 %%
1954 if, 0, st_C_ignore
1955 for, 0, st_C_ignore
1956 while, 0, st_C_ignore
1957 switch, 0, st_C_ignore
1958 return, 0, st_C_ignore
1959 @interface, 0, st_C_objprot
1960 @protocol, 0, st_C_objprot
1961 @implementation,0, st_C_objimpl
1962 @end, 0, st_C_objend
1963 import, C_JAVA, st_C_ignore
1964 package, C_JAVA, st_C_ignore
1965 friend, C_PLPL, st_C_ignore
1966 extends, C_JAVA, st_C_javastruct
1967 implements, C_JAVA, st_C_javastruct
1968 interface, C_JAVA, st_C_struct
1969 class, 0, st_C_class
1970 namespace, C_PLPL, st_C_struct
1971 domain, C_STAR, st_C_struct
1972 union, 0, st_C_struct
1973 struct, 0, st_C_struct
1974 extern, 0, st_C_extern
1975 enum, 0, st_C_enum
1976 typedef, 0, st_C_typedef
1977 define, 0, st_C_define
1978 operator, C_PLPL, st_C_operator
1979 bool, C_PLPL, st_C_typespec
1980 long, 0, st_C_typespec
1981 short, 0, st_C_typespec
1982 int, 0, st_C_typespec
1983 char, 0, st_C_typespec
1984 float, 0, st_C_typespec
1985 double, 0, st_C_typespec
1986 signed, 0, st_C_typespec
1987 unsigned, 0, st_C_typespec
1988 auto, 0, st_C_typespec
1989 void, 0, st_C_typespec
1990 static, 0, st_C_typespec
1991 const, 0, st_C_typespec
1992 volatile, 0, st_C_typespec
1993 explicit, C_PLPL, st_C_typespec
1994 mutable, C_PLPL, st_C_typespec
1995 typename, C_PLPL, st_C_typespec
1996 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
1997 DEFUN, 0, st_C_gnumacro
1998 SYSCALL, 0, st_C_gnumacro
1999 ENTRY, 0, st_C_gnumacro
2000 PSEUDO, 0, st_C_gnumacro
2001 # These are defined inside C functions, so currently they are not met.
2002 # EXFUN used in glibc, DEFVAR_* in emacs.
2003 #EXFUN, 0, st_C_gnumacro
2004 #DEFVAR_, 0, st_C_gnumacro
2005 %]
2006 and replace lines between %< and %> with its output,
2007 then make in_word_set static. */
2008 /*%<*/
2009 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2010 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2011 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2012
2013 #define TOTAL_KEYWORDS 46
2014 #define MIN_WORD_LENGTH 2
2015 #define MAX_WORD_LENGTH 15
2016 #define MIN_HASH_VALUE 13
2017 #define MAX_HASH_VALUE 121
2018 /* maximum key range = 109, duplicates = 0 */
2019
2020 #ifdef __GNUC__
2021 __inline
2022 #endif
2023 static unsigned int
2024 hash (str, len)
2025 register const char *str;
2026 register unsigned int len;
2027 {
2028 static unsigned char asso_values[] =
2029 {
2030 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2031 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2032 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2033 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2034 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2035 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2036 122, 122, 122, 122, 57, 122, 122, 122, 55, 6,
2037 60, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2038 51, 122, 122, 10, 2, 122, 122, 122, 122, 122,
2039 122, 122, 122, 122, 122, 122, 122, 2, 52, 59,
2040 49, 38, 56, 41, 122, 22, 122, 122, 9, 32,
2041 33, 60, 26, 122, 1, 28, 46, 59, 44, 51,
2042 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2043 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2044 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2045 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2046 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2047 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2048 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2049 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2050 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2051 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2052 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2053 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2054 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2055 122, 122, 122, 122, 122, 122
2056 };
2057 register int hval = len;
2058
2059 switch (hval)
2060 {
2061 default:
2062 case 3:
2063 hval += asso_values[(unsigned char)str[2]];
2064 case 2:
2065 case 1:
2066 hval += asso_values[(unsigned char)str[0]];
2067 break;
2068 }
2069 return hval;
2070 }
2071
2072 #ifdef __GNUC__
2073 __inline
2074 #endif
2075 struct C_stab_entry *
2076 in_word_set (str, len)
2077 register const char *str;
2078 register unsigned int len;
2079 {
2080 static struct C_stab_entry wordlist[] =
2081 {
2082 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2083 {""}, {""}, {""}, {""},
2084 {"ENTRY", 0, st_C_gnumacro},
2085 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2086 {""},
2087 {"if", 0, st_C_ignore},
2088 {""}, {""},
2089 {"SYSCALL", 0, st_C_gnumacro},
2090 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2091 {"struct", 0, st_C_struct},
2092 {"static", 0, st_C_typespec},
2093 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2094 {"long", 0, st_C_typespec},
2095 {""}, {""}, {""}, {""}, {""},
2096 {"auto", 0, st_C_typespec},
2097 {"return", 0, st_C_ignore},
2098 {"import", C_JAVA, st_C_ignore},
2099 {""},
2100 {"switch", 0, st_C_ignore},
2101 {""},
2102 {"implements", C_JAVA, st_C_javastruct},
2103 {""},
2104 {"for", 0, st_C_ignore},
2105 {"volatile", 0, st_C_typespec},
2106 {""},
2107 {"PSEUDO", 0, st_C_gnumacro},
2108 {""},
2109 {"char", 0, st_C_typespec},
2110 {"class", 0, st_C_class},
2111 {"@protocol", 0, st_C_objprot},
2112 {""}, {""},
2113 {"void", 0, st_C_typespec},
2114 {"int", 0, st_C_typespec},
2115 {"explicit", C_PLPL, st_C_typespec},
2116 {""},
2117 {"namespace", C_PLPL, st_C_struct},
2118 {"signed", 0, st_C_typespec},
2119 {""},
2120 {"interface", C_JAVA, st_C_struct},
2121 {"while", 0, st_C_ignore},
2122 {"typedef", 0, st_C_typedef},
2123 {"typename", C_PLPL, st_C_typespec},
2124 {""}, {""}, {""},
2125 {"friend", C_PLPL, st_C_ignore},
2126 {"mutable", C_PLPL, st_C_typespec},
2127 {"union", 0, st_C_struct},
2128 {"domain", C_STAR, st_C_struct},
2129 {""}, {""},
2130 {"extern", 0, st_C_extern},
2131 {"extends", C_JAVA, st_C_javastruct},
2132 {"package", C_JAVA, st_C_ignore},
2133 {"short", 0, st_C_typespec},
2134 {"@end", 0, st_C_objend},
2135 {"unsigned", 0, st_C_typespec},
2136 {""},
2137 {"const", 0, st_C_typespec},
2138 {""}, {""},
2139 {"@interface", 0, st_C_objprot},
2140 {"enum", 0, st_C_enum},
2141 {""}, {""},
2142 {"@implementation",0, st_C_objimpl},
2143 {""},
2144 {"operator", C_PLPL, st_C_operator},
2145 {""}, {""}, {""}, {""},
2146 {"define", 0, st_C_define},
2147 {""}, {""},
2148 {"double", 0, st_C_typespec},
2149 {""},
2150 {"bool", C_PLPL, st_C_typespec},
2151 {""}, {""}, {""},
2152 {"DEFUN", 0, st_C_gnumacro},
2153 {"float", 0, st_C_typespec}
2154 };
2155
2156 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2157 {
2158 register int key = hash (str, len);
2159
2160 if (key <= MAX_HASH_VALUE && key >= 0)
2161 {
2162 register const char *s = wordlist[key].name;
2163
2164 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2165 return &wordlist[key];
2166 }
2167 }
2168 return 0;
2169 }
2170 /*%>*/
2171
2172 static enum sym_type
2173 C_symtype (str, len, c_ext)
2174 char *str;
2175 int len;
2176 int c_ext;
2177 {
2178 register struct C_stab_entry *se = in_word_set (str, len);
2179
2180 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2181 return st_none;
2182 return se->type;
2183 }
2184
2185 \f
2186 /*
2187 * C functions and variables are recognized using a simple
2188 * finite automaton. fvdef is its state variable.
2189 */
2190 enum
2191 {
2192 fvnone, /* nothing seen */
2193 fdefunkey, /* Emacs DEFUN keyword seen */
2194 fdefunname, /* Emacs DEFUN name seen */
2195 foperator, /* func: operator keyword seen (cplpl) */
2196 fvnameseen, /* function or variable name seen */
2197 fstartlist, /* func: just after open parenthesis */
2198 finlist, /* func: in parameter list */
2199 flistseen, /* func: after parameter list */
2200 fignore, /* func: before open brace */
2201 vignore /* var-like: ignore until ';' */
2202 } fvdef;
2203
2204 bool fvextern; /* func or var: extern keyword seen; */
2205
2206 /*
2207 * typedefs are recognized using a simple finite automaton.
2208 * typdef is its state variable.
2209 */
2210 enum
2211 {
2212 tnone, /* nothing seen */
2213 tkeyseen, /* typedef keyword seen */
2214 ttypeseen, /* defined type seen */
2215 tinbody, /* inside typedef body */
2216 tend, /* just before typedef tag */
2217 tignore /* junk after typedef tag */
2218 } typdef;
2219
2220 /*
2221 * struct-like structures (enum, struct and union) are recognized
2222 * using another simple finite automaton. `structdef' is its state
2223 * variable.
2224 */
2225 enum
2226 {
2227 snone, /* nothing seen yet,
2228 or in struct body if cblev > 0 */
2229 skeyseen, /* struct-like keyword seen */
2230 stagseen, /* struct-like tag seen */
2231 sintemplate, /* inside template (ignore) */
2232 scolonseen /* colon seen after struct-like tag */
2233 } structdef;
2234
2235 /*
2236 * When objdef is different from onone, objtag is the name of the class.
2237 */
2238 char *objtag = "<uninited>";
2239
2240 /*
2241 * Yet another little state machine to deal with preprocessor lines.
2242 */
2243 enum
2244 {
2245 dnone, /* nothing seen */
2246 dsharpseen, /* '#' seen as first char on line */
2247 ddefineseen, /* '#' and 'define' seen */
2248 dignorerest /* ignore rest of line */
2249 } definedef;
2250
2251 /*
2252 * State machine for Objective C protocols and implementations.
2253 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2254 */
2255 enum
2256 {
2257 onone, /* nothing seen */
2258 oprotocol, /* @interface or @protocol seen */
2259 oimplementation, /* @implementations seen */
2260 otagseen, /* class name seen */
2261 oparenseen, /* parenthesis before category seen */
2262 ocatseen, /* category name seen */
2263 oinbody, /* in @implementation body */
2264 omethodsign, /* in @implementation body, after +/- */
2265 omethodtag, /* after method name */
2266 omethodcolon, /* after method colon */
2267 omethodparm, /* after method parameter */
2268 oignore /* wait for @end */
2269 } objdef;
2270
2271
2272 /*
2273 * Use this structure to keep info about the token read, and how it
2274 * should be tagged. Used by the make_C_tag function to build a tag.
2275 */
2276 struct tok
2277 {
2278 bool valid;
2279 bool named;
2280 int offset;
2281 int length;
2282 int lineno;
2283 long linepos;
2284 char *line;
2285 } token; /* latest token read */
2286 linebuffer token_name; /* its name */
2287
2288 /*
2289 * Variables and functions for dealing with nested structures.
2290 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2291 */
2292 static void pushclass_above P_((int, char *, int));
2293 static void popclass_above P_((int));
2294 static void write_classname P_((linebuffer *, char *qualifier));
2295
2296 struct {
2297 char **cname; /* nested class names */
2298 int *cblev; /* nested class curly brace level */
2299 int nl; /* class nesting level (elements used) */
2300 int size; /* length of the array */
2301 } cstack; /* stack for nested declaration tags */
2302 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2303 #define nestlev (cstack.nl)
2304 /* After struct keyword or in struct body, not inside an nested function. */
2305 #define instruct (structdef == snone && nestlev > 0 \
2306 && cblev == cstack.cblev[nestlev-1] + 1)
2307
2308 static void
2309 pushclass_above (cblev, str, len)
2310 int cblev;
2311 char *str;
2312 int len;
2313 {
2314 int nl;
2315
2316 popclass_above (cblev);
2317 nl = cstack.nl;
2318 if (nl >= cstack.size)
2319 {
2320 int size = cstack.size *= 2;
2321 xrnew (cstack.cname, size, char *);
2322 xrnew (cstack.cblev, size, int);
2323 }
2324 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2325 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2326 cstack.cblev[nl] = cblev;
2327 cstack.nl = nl + 1;
2328 }
2329
2330 static void
2331 popclass_above (cblev)
2332 int cblev;
2333 {
2334 int nl;
2335
2336 for (nl = cstack.nl - 1;
2337 nl >= 0 && cstack.cblev[nl] >= cblev;
2338 nl--)
2339 {
2340 if (cstack.cname[nl] != NULL)
2341 free (cstack.cname[nl]);
2342 cstack.nl = nl;
2343 }
2344 }
2345
2346 static void
2347 write_classname (cn, qualifier)
2348 linebuffer *cn;
2349 char *qualifier;
2350 {
2351 int i, len;
2352 int qlen = strlen (qualifier);
2353
2354 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2355 {
2356 len = 0;
2357 cn->len = 0;
2358 cn->buffer[0] = '\0';
2359 }
2360 else
2361 {
2362 len = strlen (cstack.cname[0]);
2363 linebuffer_setlen (cn, len);
2364 strcpy (cn->buffer, cstack.cname[0]);
2365 }
2366 for (i = 1; i < cstack.nl; i++)
2367 {
2368 char *s;
2369 int slen;
2370
2371 s = cstack.cname[i];
2372 if (s == NULL)
2373 continue;
2374 slen = strlen (s);
2375 len += slen + qlen;
2376 linebuffer_setlen (cn, len);
2377 strncat (cn->buffer, qualifier, qlen);
2378 strncat (cn->buffer, s, slen);
2379 }
2380 }
2381
2382 \f
2383 static bool consider_token P_((char *, int, int, int *, int, int, bool *));
2384 static void make_C_tag P_((bool));
2385
2386 /*
2387 * consider_token ()
2388 * checks to see if the current token is at the start of a
2389 * function or variable, or corresponds to a typedef, or
2390 * is a struct/union/enum tag, or #define, or an enum constant.
2391 *
2392 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2393 * with args. C_EXTP points to which language we are looking at.
2394 *
2395 * Globals
2396 * fvdef IN OUT
2397 * structdef IN OUT
2398 * definedef IN OUT
2399 * typdef IN OUT
2400 * objdef IN OUT
2401 */
2402
2403 static bool
2404 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2405 register char *str; /* IN: token pointer */
2406 register int len; /* IN: token length */
2407 register int c; /* IN: first char after the token */
2408 int *c_extp; /* IN, OUT: C extensions mask */
2409 int cblev; /* IN: curly brace level */
2410 int parlev; /* IN: parenthesis level */
2411 bool *is_func_or_var; /* OUT: function or variable found */
2412 {
2413 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2414 structtype is the type of the preceding struct-like keyword, and
2415 structcblev is the curly brace level where it has been seen. */
2416 static enum sym_type structtype;
2417 static int structcblev;
2418 static enum sym_type toktype;
2419
2420
2421 toktype = C_symtype (str, len, *c_extp);
2422
2423 /*
2424 * Advance the definedef state machine.
2425 */
2426 switch (definedef)
2427 {
2428 case dnone:
2429 /* We're not on a preprocessor line. */
2430 if (toktype == st_C_gnumacro)
2431 {
2432 fvdef = fdefunkey;
2433 return FALSE;
2434 }
2435 break;
2436 case dsharpseen:
2437 if (toktype == st_C_define)
2438 {
2439 definedef = ddefineseen;
2440 }
2441 else
2442 {
2443 definedef = dignorerest;
2444 }
2445 return FALSE;
2446 case ddefineseen:
2447 /*
2448 * Make a tag for any macro, unless it is a constant
2449 * and constantypedefs is FALSE.
2450 */
2451 definedef = dignorerest;
2452 *is_func_or_var = (c == '(');
2453 if (!*is_func_or_var && !constantypedefs)
2454 return FALSE;
2455 else
2456 return TRUE;
2457 case dignorerest:
2458 return FALSE;
2459 default:
2460 error ("internal error: definedef value.", (char *)NULL);
2461 }
2462
2463 /*
2464 * Now typedefs
2465 */
2466 switch (typdef)
2467 {
2468 case tnone:
2469 if (toktype == st_C_typedef)
2470 {
2471 if (typedefs)
2472 typdef = tkeyseen;
2473 fvextern = FALSE;
2474 fvdef = fvnone;
2475 return FALSE;
2476 }
2477 break;
2478 case tkeyseen:
2479 switch (toktype)
2480 {
2481 case st_none:
2482 case st_C_typespec:
2483 case st_C_class:
2484 case st_C_struct:
2485 case st_C_enum:
2486 typdef = ttypeseen;
2487 break;
2488 }
2489 break;
2490 case ttypeseen:
2491 if (structdef == snone && fvdef == fvnone)
2492 {
2493 fvdef = fvnameseen;
2494 return TRUE;
2495 }
2496 break;
2497 case tend:
2498 switch (toktype)
2499 {
2500 case st_C_typespec:
2501 case st_C_class:
2502 case st_C_struct:
2503 case st_C_enum:
2504 return FALSE;
2505 }
2506 return TRUE;
2507 }
2508
2509 /*
2510 * This structdef business is NOT invoked when we are ctags and the
2511 * file is plain C. This is because a struct tag may have the same
2512 * name as another tag, and this loses with ctags.
2513 */
2514 switch (toktype)
2515 {
2516 case st_C_javastruct:
2517 if (structdef == stagseen)
2518 structdef = scolonseen;
2519 return FALSE;
2520 case st_C_class:
2521 if (cblev == 0
2522 && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2523 && definedef == dnone && structdef == snone
2524 && typdef == tnone && fvdef == fvnone)
2525 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2526 /* FALLTHRU */
2527 case st_C_struct:
2528 case st_C_enum:
2529 if (parlev == 0
2530 && fvdef != vignore
2531 && (typdef == tkeyseen
2532 || (typedefs_or_cplusplus && structdef == snone)))
2533 {
2534 structdef = skeyseen;
2535 structtype = toktype;
2536 structcblev = cblev;
2537 }
2538 return FALSE;
2539 }
2540
2541 if (structdef == skeyseen)
2542 {
2543 structdef = stagseen;
2544 return TRUE;
2545 }
2546
2547 if (typdef != tnone)
2548 definedef = dnone;
2549
2550 /* Detect Objective C constructs. */
2551 switch (objdef)
2552 {
2553 case onone:
2554 switch (toktype)
2555 {
2556 case st_C_objprot:
2557 objdef = oprotocol;
2558 return FALSE;
2559 case st_C_objimpl:
2560 objdef = oimplementation;
2561 return FALSE;
2562 }
2563 break;
2564 case oimplementation:
2565 /* Save the class tag for functions or variables defined inside. */
2566 objtag = savenstr (str, len);
2567 objdef = oinbody;
2568 return FALSE;
2569 case oprotocol:
2570 /* Save the class tag for categories. */
2571 objtag = savenstr (str, len);
2572 objdef = otagseen;
2573 *is_func_or_var = TRUE;
2574 return TRUE;
2575 case oparenseen:
2576 objdef = ocatseen;
2577 *is_func_or_var = TRUE;
2578 return TRUE;
2579 case oinbody:
2580 break;
2581 case omethodsign:
2582 if (parlev == 0)
2583 {
2584 objdef = omethodtag;
2585 linebuffer_setlen (&token_name, len);
2586 strncpy (token_name.buffer, str, len);
2587 token_name.buffer[len] = '\0';
2588 return TRUE;
2589 }
2590 return FALSE;
2591 case omethodcolon:
2592 if (parlev == 0)
2593 objdef = omethodparm;
2594 return FALSE;
2595 case omethodparm:
2596 if (parlev == 0)
2597 {
2598 objdef = omethodtag;
2599 linebuffer_setlen (&token_name, token_name.len + len);
2600 strncat (token_name.buffer, str, len);
2601 return TRUE;
2602 }
2603 return FALSE;
2604 case oignore:
2605 if (toktype == st_C_objend)
2606 {
2607 /* Memory leakage here: the string pointed by objtag is
2608 never released, because many tests would be needed to
2609 avoid breaking on incorrect input code. The amount of
2610 memory leaked here is the sum of the lengths of the
2611 class tags.
2612 free (objtag); */
2613 objdef = onone;
2614 }
2615 return FALSE;
2616 }
2617
2618 /* A function, variable or enum constant? */
2619 switch (toktype)
2620 {
2621 case st_C_extern:
2622 fvextern = TRUE;
2623 /* FALLTHRU */
2624 case st_C_typespec:
2625 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2626 fvdef = fvnone; /* should be useless */
2627 return FALSE;
2628 case st_C_ignore:
2629 fvextern = FALSE;
2630 fvdef = vignore;
2631 return FALSE;
2632 case st_C_operator:
2633 fvdef = foperator;
2634 *is_func_or_var = TRUE;
2635 return TRUE;
2636 case st_none:
2637 if (constantypedefs
2638 && structdef == snone
2639 && structtype == st_C_enum && cblev > structcblev)
2640 return TRUE; /* enum constant */
2641 switch (fvdef)
2642 {
2643 case fdefunkey:
2644 if (cblev > 0)
2645 break;
2646 fvdef = fdefunname; /* GNU macro */
2647 *is_func_or_var = TRUE;
2648 return TRUE;
2649 case fvnone:
2650 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2651 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2652 {
2653 fvdef = vignore;
2654 return FALSE;
2655 }
2656 if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2657 {
2658 fvdef = foperator;
2659 *is_func_or_var = TRUE;
2660 return TRUE;
2661 }
2662 if (cblev > 0 && !instruct)
2663 break;
2664 fvdef = fvnameseen; /* function or variable */
2665 *is_func_or_var = TRUE;
2666 return TRUE;
2667 }
2668 break;
2669 }
2670
2671 return FALSE;
2672 }
2673
2674 \f
2675 /*
2676 * C_entries often keeps pointers to tokens or lines which are older than
2677 * the line currently read. By keeping two line buffers, and switching
2678 * them at end of line, it is possible to use those pointers.
2679 */
2680 struct
2681 {
2682 long linepos;
2683 linebuffer lb;
2684 } lbs[2];
2685
2686 #define current_lb_is_new (newndx == curndx)
2687 #define switch_line_buffers() (curndx = 1 - curndx)
2688
2689 #define curlb (lbs[curndx].lb)
2690 #define newlb (lbs[newndx].lb)
2691 #define curlinepos (lbs[curndx].linepos)
2692 #define newlinepos (lbs[newndx].linepos)
2693
2694 #define CNL_SAVE_DEFINEDEF() \
2695 do { \
2696 curlinepos = charno; \
2697 lineno++; \
2698 linecharno = charno; \
2699 charno += readline (&curlb, inf); \
2700 lp = curlb.buffer; \
2701 quotednl = FALSE; \
2702 newndx = curndx; \
2703 } while (0)
2704
2705 #define CNL() \
2706 do { \
2707 CNL_SAVE_DEFINEDEF(); \
2708 if (savetoken.valid) \
2709 { \
2710 token = savetoken; \
2711 savetoken.valid = FALSE; \
2712 } \
2713 definedef = dnone; \
2714 } while (0)
2715
2716
2717 static void
2718 make_C_tag (isfun)
2719 bool isfun;
2720 {
2721 /* This function should never be called when token.valid is FALSE, but
2722 we must protect against invalid input or internal errors. */
2723 if (DEBUG || token.valid)
2724 {
2725 if (traditional_tag_style)
2726 {
2727 /* This was the original code. Now we call new_pfnote instead,
2728 which uses the new method for naming tags (see new_pfnote). */
2729 char *name = NULL;
2730
2731 if (CTAGS || token.named)
2732 name = savestr (token_name.buffer);
2733 if (DEBUG && !token.valid)
2734 {
2735 if (token.named)
2736 name = concat (name, "##invalid##", "");
2737 else
2738 name = savestr ("##invalid##");
2739 }
2740 pfnote (name, isfun, token.line,
2741 token.offset+token.length+1, token.lineno, token.linepos);
2742 }
2743 else
2744 new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
2745 token.offset+token.length+1, token.lineno, token.linepos);
2746 token.valid = FALSE;
2747 }
2748 }
2749
2750
2751 /*
2752 * C_entries ()
2753 * This routine finds functions, variables, typedefs,
2754 * #define's, enum constants and struct/union/enum definitions in
2755 * C syntax and adds them to the list.
2756 */
2757 static void
2758 C_entries (c_ext, inf)
2759 int c_ext; /* extension of C */
2760 FILE *inf; /* input file */
2761 {
2762 register char c; /* latest char read; '\0' for end of line */
2763 register char *lp; /* pointer one beyond the character `c' */
2764 int curndx, newndx; /* indices for current and new lb */
2765 register int tokoff; /* offset in line of start of current token */
2766 register int toklen; /* length of current token */
2767 char *qualifier; /* string used to qualify names */
2768 int qlen; /* length of qualifier */
2769 int cblev; /* current curly brace level */
2770 int parlev; /* current parenthesis level */
2771 int typdefcblev; /* cblev where a typedef struct body begun */
2772 bool incomm, inquote, inchar, quotednl, midtoken;
2773 bool cplpl, cjava;
2774 bool yacc_rules; /* in the rules part of a yacc file */
2775 struct tok savetoken; /* token saved during preprocessor handling */
2776
2777
2778 initbuffer (&token_name);
2779 initbuffer (&lbs[0].lb);
2780 initbuffer (&lbs[1].lb);
2781 if (cstack.size == 0)
2782 {
2783 cstack.size = (DEBUG) ? 1 : 4;
2784 cstack.nl = 0;
2785 cstack.cname = xnew (cstack.size, char *);
2786 cstack.cblev = xnew (cstack.size, int);
2787 }
2788
2789 tokoff = toklen = 0; /* keep compiler quiet */
2790 curndx = newndx = 0;
2791 lineno = 0;
2792 charno = 0;
2793 lp = curlb.buffer;
2794 *lp = 0;
2795
2796 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2797 structdef = snone; definedef = dnone; objdef = onone;
2798 yacc_rules = FALSE;
2799 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2800 token.valid = savetoken.valid = FALSE;
2801 cblev = 0;
2802 parlev = 0;
2803 cplpl = (c_ext & C_PLPL) == C_PLPL;
2804 cjava = (c_ext & C_JAVA) == C_JAVA;
2805 if (cjava)
2806 { qualifier = "."; qlen = 1; }
2807 else
2808 { qualifier = "::"; qlen = 2; }
2809
2810
2811 while (!feof (inf))
2812 {
2813 c = *lp++;
2814 if (c == '\\')
2815 {
2816 /* If we're at the end of the line, the next character is a
2817 '\0'; don't skip it, because it's the thing that tells us
2818 to read the next line. */
2819 if (*lp == '\0')
2820 {
2821 quotednl = TRUE;
2822 continue;
2823 }
2824 lp++;
2825 c = ' ';
2826 }
2827 else if (incomm)
2828 {
2829 switch (c)
2830 {
2831 case '*':
2832 if (*lp == '/')
2833 {
2834 c = *lp++;
2835 incomm = FALSE;
2836 }
2837 break;
2838 case '\0':
2839 /* Newlines inside comments do not end macro definitions in
2840 traditional cpp. */
2841 CNL_SAVE_DEFINEDEF ();
2842 break;
2843 }
2844 continue;
2845 }
2846 else if (inquote)
2847 {
2848 switch (c)
2849 {
2850 case '"':
2851 inquote = FALSE;
2852 break;
2853 case '\0':
2854 /* Newlines inside strings do not end macro definitions
2855 in traditional cpp, even though compilers don't
2856 usually accept them. */
2857 CNL_SAVE_DEFINEDEF ();
2858 break;
2859 }
2860 continue;
2861 }
2862 else if (inchar)
2863 {
2864 switch (c)
2865 {
2866 case '\0':
2867 /* Hmmm, something went wrong. */
2868 CNL ();
2869 /* FALLTHRU */
2870 case '\'':
2871 inchar = FALSE;
2872 break;
2873 }
2874 continue;
2875 }
2876 else
2877 switch (c)
2878 {
2879 case '"':
2880 inquote = TRUE;
2881 switch (fvdef)
2882 {
2883 case fdefunkey:
2884 case fstartlist:
2885 case finlist:
2886 case fignore:
2887 case vignore:
2888 break;
2889 default:
2890 fvextern = FALSE;
2891 fvdef = fvnone;
2892 }
2893 continue;
2894 case '\'':
2895 inchar = TRUE;
2896 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2897 {
2898 fvextern = FALSE;
2899 fvdef = fvnone;
2900 }
2901 continue;
2902 case '/':
2903 if (*lp == '*')
2904 {
2905 lp++;
2906 incomm = TRUE;
2907 continue;
2908 }
2909 else if (/* cplpl && */ *lp == '/')
2910 {
2911 c = '\0';
2912 break;
2913 }
2914 else
2915 break;
2916 case '%':
2917 if ((c_ext & YACC) && *lp == '%')
2918 {
2919 /* Entering or exiting rules section in yacc file. */
2920 lp++;
2921 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2922 typdef = tnone; structdef = snone;
2923 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2924 cblev = 0;
2925 yacc_rules = !yacc_rules;
2926 continue;
2927 }
2928 else
2929 break;
2930 case '#':
2931 if (definedef == dnone)
2932 {
2933 char *cp;
2934 bool cpptoken = TRUE;
2935
2936 /* Look back on this line. If all blanks, or nonblanks
2937 followed by an end of comment, this is a preprocessor
2938 token. */
2939 for (cp = newlb.buffer; cp < lp-1; cp++)
2940 if (!iswhite (*cp))
2941 {
2942 if (*cp == '*' && *(cp+1) == '/')
2943 {
2944 cp++;
2945 cpptoken = TRUE;
2946 }
2947 else
2948 cpptoken = FALSE;
2949 }
2950 if (cpptoken)
2951 definedef = dsharpseen;
2952 } /* if (definedef == dnone) */
2953
2954 continue;
2955 } /* switch (c) */
2956
2957
2958 /* Consider token only if some involved conditions are satisfied. */
2959 if (typdef != tignore
2960 && definedef != dignorerest
2961 && fvdef != finlist
2962 && structdef != sintemplate
2963 && (definedef != dnone
2964 || structdef != scolonseen))
2965 {
2966 if (midtoken)
2967 {
2968 if (endtoken (c))
2969 {
2970 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
2971 {
2972 /*
2973 * This handles :: in the middle, but not at the
2974 * beginning of an identifier. Also, space-separated
2975 * :: is not recognised.
2976 */
2977 lp += 2;
2978 toklen += 2;
2979 c = lp[-1];
2980 goto intoken;
2981 }
2982 else
2983 {
2984 bool funorvar = FALSE;
2985
2986 if (yacc_rules
2987 || consider_token (newlb.buffer + tokoff, toklen, c,
2988 &c_ext, cblev, parlev, &funorvar))
2989 {
2990 if (fvdef == foperator)
2991 {
2992 char *oldlp = lp;
2993 lp = skip_spaces (lp-1);
2994 if (*lp != '\0')
2995 lp += 1;
2996 while (*lp != '\0'
2997 && !iswhite (*lp) && *lp != '(')
2998 lp += 1;
2999 c = *lp++;
3000 toklen += lp - oldlp;
3001 }
3002 token.named = FALSE;
3003 if ((c_ext & C_EXT) /* not pure C */
3004 && nestlev > 0 && definedef == dnone)
3005 /* in struct body */
3006 {
3007 write_classname (&token_name, qualifier);
3008 linebuffer_setlen (&token_name,
3009 token_name.len+qlen+toklen);
3010 strcat (token_name.buffer, qualifier);
3011 strncat (token_name.buffer,
3012 newlb.buffer + tokoff, toklen);
3013 token.named = TRUE;
3014 }
3015 else if (objdef == ocatseen)
3016 /* Objective C category */
3017 {
3018 int len = strlen (objtag) + 2 + toklen;
3019 linebuffer_setlen (&token_name, len);
3020 strcpy (token_name.buffer, objtag);
3021 strcat (token_name.buffer, "(");
3022 strncat (token_name.buffer,
3023 newlb.buffer + tokoff, toklen);
3024 strcat (token_name.buffer, ")");
3025 token.named = TRUE;
3026 }
3027 else if (objdef == omethodtag
3028 || objdef == omethodparm)
3029 /* Objective C method */
3030 {
3031 token.named = TRUE;
3032 }
3033 else if (fvdef == fdefunname)
3034 /* GNU DEFUN and similar macros */
3035 {
3036 bool defun = (newlb.buffer[tokoff] == 'F');
3037 int off = tokoff;
3038 int len = toklen;
3039
3040 /* Rewrite the tag so that emacs lisp DEFUNs
3041 can be found by their elisp name */
3042 if (defun)
3043 {
3044 off += 1;
3045 len -= 1;
3046 }
3047 len = toklen;
3048 linebuffer_setlen (&token_name, len);
3049 strncpy (token_name.buffer,
3050 newlb.buffer + off, len);
3051 token_name.buffer[len] = '\0';
3052 if (defun)
3053 while (--len >= 0)
3054 if (token_name.buffer[len] == '_')
3055 token_name.buffer[len] = '-';
3056 token.named = defun;
3057 }
3058 else
3059 {
3060 linebuffer_setlen (&token_name, toklen);
3061 strncpy (token_name.buffer,
3062 newlb.buffer + tokoff, toklen);
3063 token_name.buffer[toklen] = '\0';
3064 /* Name macros and members. */
3065 token.named = (structdef == stagseen
3066 || typdef == ttypeseen
3067 || typdef == tend
3068 || (funorvar
3069 && definedef == dignorerest)
3070 || (funorvar
3071 && definedef == dnone
3072 && structdef == snone
3073 && cblev > 0));
3074 }
3075 token.lineno = lineno;
3076 token.offset = tokoff;
3077 token.length = toklen;
3078 token.line = newlb.buffer;
3079 token.linepos = newlinepos;
3080 token.valid = TRUE;
3081
3082 if (definedef == dnone
3083 && (fvdef == fvnameseen
3084 || fvdef == foperator
3085 || structdef == stagseen
3086 || typdef == tend
3087 || typdef == ttypeseen
3088 || objdef != onone))
3089 {
3090 if (current_lb_is_new)
3091 switch_line_buffers ();
3092 }
3093 else if (definedef != dnone
3094 || fvdef == fdefunname
3095 || instruct)
3096 make_C_tag (funorvar);
3097 }
3098 midtoken = FALSE;
3099 }
3100 } /* if (endtoken (c)) */
3101 else if (intoken (c))
3102 intoken:
3103 {
3104 toklen++;
3105 continue;
3106 }
3107 } /* if (midtoken) */
3108 else if (begtoken (c))
3109 {
3110 switch (definedef)
3111 {
3112 case dnone:
3113 switch (fvdef)
3114 {
3115 case fstartlist:
3116 fvdef = finlist;
3117 continue;
3118 case flistseen:
3119 make_C_tag (TRUE); /* a function */
3120 fvdef = fignore;
3121 break;
3122 case fvnameseen:
3123 fvdef = fvnone;
3124 break;
3125 }
3126 if (structdef == stagseen && !cjava)
3127 {
3128 popclass_above (cblev);
3129 structdef = snone;
3130 }
3131 break;
3132 case dsharpseen:
3133 savetoken = token;
3134 }
3135 if (!yacc_rules || lp == newlb.buffer + 1)
3136 {
3137 tokoff = lp - 1 - newlb.buffer;
3138 toklen = 1;
3139 midtoken = TRUE;
3140 }
3141 continue;
3142 } /* if (begtoken) */
3143 } /* if must look at token */
3144
3145
3146 /* Detect end of line, colon, comma, semicolon and various braces
3147 after having handled a token.*/
3148 switch (c)
3149 {
3150 case ':':
3151 if (yacc_rules && token.offset == 0 && token.valid)
3152 {
3153 make_C_tag (FALSE); /* a yacc function */
3154 break;
3155 }
3156 if (definedef != dnone)
3157 break;
3158 switch (objdef)
3159 {
3160 case otagseen:
3161 objdef = oignore;
3162 make_C_tag (TRUE); /* an Objective C class */
3163 break;
3164 case omethodtag:
3165 case omethodparm:
3166 objdef = omethodcolon;
3167 linebuffer_setlen (&token_name, token_name.len + 1);
3168 strcat (token_name.buffer, ":");
3169 break;
3170 }
3171 if (structdef == stagseen)
3172 structdef = scolonseen;
3173 break;
3174 case ';':
3175 if (definedef != dnone)
3176 break;
3177 switch (typdef)
3178 {
3179 case tend:
3180 case ttypeseen:
3181 make_C_tag (FALSE); /* a typedef */
3182 typdef = tnone;
3183 fvdef = fvnone;
3184 break;
3185 case tnone:
3186 case tinbody:
3187 case tignore:
3188 switch (fvdef)
3189 {
3190 case fignore:
3191 if (typdef == tignore)
3192 fvdef = fvnone;
3193 break;
3194 case fvnameseen:
3195 if ((globals && cblev == 0 && (!fvextern || declarations))
3196 || (members && instruct))
3197 make_C_tag (FALSE); /* a variable */
3198 fvextern = FALSE;
3199 fvdef = fvnone;
3200 token.valid = FALSE;
3201 break;
3202 case flistseen:
3203 if ((declarations && typdef == tnone && !instruct)
3204 || (members && typdef != tignore && instruct))
3205 make_C_tag (TRUE); /* a function declaration */
3206 /* FALLTHRU */
3207 default:
3208 fvextern = FALSE;
3209 fvdef = fvnone;
3210 if (declarations
3211 && structdef == stagseen && (c_ext & C_PLPL))
3212 make_C_tag (FALSE); /* forward declaration */
3213 else
3214 /* The following instruction invalidates the token.
3215 Probably the token should be invalidated in all other
3216 cases where some state machine is reset prematurely. */
3217 token.valid = FALSE;
3218 } /* switch (fvdef) */
3219 /* FALLTHRU */
3220 default:
3221 if (!instruct)
3222 typdef = tnone;
3223 }
3224 if (structdef == stagseen)
3225 structdef = snone;
3226 break;
3227 case ',':
3228 if (definedef != dnone)
3229 break;
3230 switch (objdef)
3231 {
3232 case omethodtag:
3233 case omethodparm:
3234 make_C_tag (TRUE); /* an Objective C method */
3235 objdef = oinbody;
3236 break;
3237 }
3238 switch (fvdef)
3239 {
3240 case fdefunkey:
3241 case foperator:
3242 case fstartlist:
3243 case finlist:
3244 case fignore:
3245 case vignore:
3246 break;
3247 case fdefunname:
3248 fvdef = fignore;
3249 break;
3250 case fvnameseen: /* a variable */
3251 if ((globals && cblev == 0 && (!fvextern || declarations))
3252 || (members && instruct))
3253 make_C_tag (FALSE);
3254 break;
3255 case flistseen: /* a function */
3256 if ((declarations && typdef == tnone && !instruct)
3257 || (members && typdef != tignore && instruct))
3258 {
3259 make_C_tag (TRUE); /* a function declaration */
3260 fvdef = fvnameseen;
3261 }
3262 else if (!declarations)
3263 fvdef = fvnone;
3264 token.valid = FALSE;
3265 break;
3266 default:
3267 fvdef = fvnone;
3268 }
3269 if (structdef == stagseen)
3270 structdef = snone;
3271 break;
3272 case '[':
3273 if (definedef != dnone)
3274 break;
3275 if (structdef == stagseen)
3276 structdef = snone;
3277 switch (typdef)
3278 {
3279 case ttypeseen:
3280 case tend:
3281 typdef = tignore;
3282 make_C_tag (FALSE); /* a typedef */
3283 break;
3284 case tnone:
3285 case tinbody:
3286 switch (fvdef)
3287 {
3288 case foperator:
3289 case finlist:
3290 case fignore:
3291 case vignore:
3292 break;
3293 case fvnameseen:
3294 if ((members && cblev == 1)
3295 || (globals && cblev == 0
3296 && (!fvextern || declarations)))
3297 make_C_tag (FALSE); /* a variable */
3298 /* FALLTHRU */
3299 default:
3300 fvdef = fvnone;
3301 }
3302 break;
3303 }
3304 break;
3305 case '(':
3306 if (definedef != dnone)
3307 break;
3308 if (objdef == otagseen && parlev == 0)
3309 objdef = oparenseen;
3310 switch (fvdef)
3311 {
3312 case fvnameseen:
3313 if (typdef == ttypeseen
3314 && *lp != '*'
3315 && !instruct)
3316 {
3317 /* This handles constructs like:
3318 typedef void OperatorFun (int fun); */
3319 make_C_tag (FALSE);
3320 typdef = tignore;
3321 fvdef = fignore;
3322 break;
3323 }
3324 /* FALLTHRU */
3325 case foperator:
3326 fvdef = fstartlist;
3327 break;
3328 case flistseen:
3329 fvdef = finlist;
3330 break;
3331 }
3332 parlev++;
3333 break;
3334 case ')':
3335 if (definedef != dnone)
3336 break;
3337 if (objdef == ocatseen && parlev == 1)
3338 {
3339 make_C_tag (TRUE); /* an Objective C category */
3340 objdef = oignore;
3341 }
3342 if (--parlev == 0)
3343 {
3344 switch (fvdef)
3345 {
3346 case fstartlist:
3347 case finlist:
3348 fvdef = flistseen;
3349 break;
3350 }
3351 if (!instruct
3352 && (typdef == tend
3353 || typdef == ttypeseen))
3354 {
3355 typdef = tignore;
3356 make_C_tag (FALSE); /* a typedef */
3357 }
3358 }
3359 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3360 parlev = 0;
3361 break;
3362 case '{':
3363 if (definedef != dnone)
3364 break;
3365 if (typdef == ttypeseen)
3366 {
3367 typdefcblev = cblev;
3368 typdef = tinbody;
3369 }
3370 switch (fvdef)
3371 {
3372 case flistseen:
3373 make_C_tag (TRUE); /* a function */
3374 /* FALLTHRU */
3375 case fignore:
3376 fvdef = fvnone;
3377 break;
3378 case fvnone:
3379 switch (objdef)
3380 {
3381 case otagseen:
3382 make_C_tag (TRUE); /* an Objective C class */
3383 objdef = oignore;
3384 break;
3385 case omethodtag:
3386 case omethodparm:
3387 make_C_tag (TRUE); /* an Objective C method */
3388 objdef = oinbody;
3389 break;
3390 default:
3391 /* Neutralize `extern "C" {' grot. */
3392 if (cblev == 0 && structdef == snone && nestlev == 0
3393 && typdef == tnone)
3394 cblev = -1;
3395 }
3396 }
3397 switch (structdef)
3398 {
3399 case skeyseen: /* unnamed struct */
3400 pushclass_above (cblev, NULL, 0);
3401 structdef = snone;
3402 break;
3403 case stagseen: /* named struct or enum */
3404 case scolonseen: /* a class */
3405 pushclass_above (cblev, token.line+token.offset, token.length);
3406 structdef = snone;
3407 make_C_tag (FALSE); /* a struct or enum */
3408 break;
3409 }
3410 cblev++;
3411 break;
3412 case '*':
3413 if (definedef != dnone)
3414 break;
3415 if (fvdef == fstartlist)
3416 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3417 break;
3418 case '}':
3419 if (definedef != dnone)
3420 break;
3421 if (!noindentypedefs && lp == newlb.buffer + 1)
3422 {
3423 cblev = 0; /* reset curly brace level if first column */
3424 parlev = 0; /* also reset paren level, just in case... */
3425 }
3426 else if (cblev > 0)
3427 cblev--;
3428 popclass_above (cblev);
3429 structdef = snone;
3430 if (typdef == tinbody && cblev <= typdefcblev)
3431 {
3432 assert (cblev == typdefcblev);
3433 typdef = tend;
3434 }
3435 break;
3436 case '=':
3437 if (definedef != dnone)
3438 break;
3439 switch (fvdef)
3440 {
3441 case foperator:
3442 case finlist:
3443 case fignore:
3444 case vignore:
3445 break;
3446 case fvnameseen:
3447 if ((members && cblev == 1)
3448 || (globals && cblev == 0 && (!fvextern || declarations)))
3449 make_C_tag (FALSE); /* a variable */
3450 /* FALLTHRU */
3451 default:
3452 fvdef = vignore;
3453 }
3454 break;
3455 case '<':
3456 if (cplpl && structdef == stagseen)
3457 {
3458 structdef = sintemplate;
3459 break;
3460 }
3461 goto resetfvdef;
3462 case '>':
3463 if (structdef == sintemplate)
3464 {
3465 structdef = stagseen;
3466 break;
3467 }
3468 goto resetfvdef;
3469 case '+':
3470 case '-':
3471 if (objdef == oinbody && cblev == 0)
3472 {
3473 objdef = omethodsign;
3474 break;
3475 }
3476 /* FALLTHRU */
3477 resetfvdef:
3478 case '#': case '~': case '&': case '%': case '/': case '|':
3479 case '^': case '!': case '.': case '?': case ']':
3480 if (definedef != dnone)
3481 break;
3482 /* These surely cannot follow a function tag in C. */
3483 switch (fvdef)
3484 {
3485 case foperator:
3486 case finlist:
3487 case fignore:
3488 case vignore:
3489 break;
3490 default:
3491 fvdef = fvnone;
3492 }
3493 break;
3494 case '\0':
3495 if (objdef == otagseen)
3496 {
3497 make_C_tag (TRUE); /* an Objective C class */
3498 objdef = oignore;
3499 }
3500 /* If a macro spans multiple lines don't reset its state. */
3501 if (quotednl)
3502 CNL_SAVE_DEFINEDEF ();
3503 else
3504 CNL ();
3505 break;
3506 } /* switch (c) */
3507
3508 } /* while not eof */
3509
3510 free (token_name.buffer);
3511 free (lbs[0].lb.buffer);
3512 free (lbs[1].lb.buffer);
3513 }
3514
3515 /*
3516 * Process either a C++ file or a C file depending on the setting
3517 * of a global flag.
3518 */
3519 static void
3520 default_C_entries (inf)
3521 FILE *inf;
3522 {
3523 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3524 }
3525
3526 /* Always do plain C. */
3527 static void
3528 plain_C_entries (inf)
3529 FILE *inf;
3530 {
3531 C_entries (0, inf);
3532 }
3533
3534 /* Always do C++. */
3535 static void
3536 Cplusplus_entries (inf)
3537 FILE *inf;
3538 {
3539 C_entries (C_PLPL, inf);
3540 }
3541
3542 /* Always do Java. */
3543 static void
3544 Cjava_entries (inf)
3545 FILE *inf;
3546 {
3547 C_entries (C_JAVA, inf);
3548 }
3549
3550 /* Always do C*. */
3551 static void
3552 Cstar_entries (inf)
3553 FILE *inf;
3554 {
3555 C_entries (C_STAR, inf);
3556 }
3557
3558 /* Always do Yacc. */
3559 static void
3560 Yacc_entries (inf)
3561 FILE *inf;
3562 {
3563 C_entries (YACC, inf);
3564 }
3565
3566 \f
3567 /* A useful macro. */
3568 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3569 for (lineno = charno = 0; /* loop initialization */ \
3570 !feof (file_pointer) /* loop test */ \
3571 && (lineno++, /* instructions at start of loop */ \
3572 linecharno = charno, \
3573 charno += readline (&line_buffer, file_pointer), \
3574 char_pointer = lb.buffer, \
3575 TRUE); \
3576 )
3577
3578
3579 /*
3580 * Read a file, but do no processing. This is used to do regexp
3581 * matching on files that have no language defined.
3582 */
3583 static void
3584 just_read_file (inf)
3585 FILE *inf;
3586 {
3587 register char *dummy;
3588
3589 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3590 continue;
3591 }
3592
3593 \f
3594 /* Fortran parsing */
3595
3596 static bool tail P_((char *));
3597 static void takeprec P_((void));
3598 static void getit P_((FILE *));
3599
3600 static bool
3601 tail (cp)
3602 char *cp;
3603 {
3604 register int len = 0;
3605
3606 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
3607 cp++, len++;
3608 if (*cp == '\0' && !intoken (dbp[len]))
3609 {
3610 dbp += len;
3611 return TRUE;
3612 }
3613 return FALSE;
3614 }
3615
3616 static void
3617 takeprec ()
3618 {
3619 dbp = skip_spaces (dbp);
3620 if (*dbp != '*')
3621 return;
3622 dbp++;
3623 dbp = skip_spaces (dbp);
3624 if (strneq (dbp, "(*)", 3))
3625 {
3626 dbp += 3;
3627 return;
3628 }
3629 if (!ISDIGIT (*dbp))
3630 {
3631 --dbp; /* force failure */
3632 return;
3633 }
3634 do
3635 dbp++;
3636 while (ISDIGIT (*dbp));
3637 }
3638
3639 static void
3640 getit (inf)
3641 FILE *inf;
3642 {
3643 register char *cp;
3644
3645 dbp = skip_spaces (dbp);
3646 if (*dbp == '\0')
3647 {
3648 lineno++;
3649 linecharno = charno;
3650 charno += readline (&lb, inf);
3651 dbp = lb.buffer;
3652 if (dbp[5] != '&')
3653 return;
3654 dbp += 6;
3655 dbp = skip_spaces (dbp);
3656 }
3657 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3658 return;
3659 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3660 continue;
3661 pfnote (savenstr (dbp, cp-dbp), TRUE,
3662 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3663 }
3664
3665
3666 static void
3667 Fortran_functions (inf)
3668 FILE *inf;
3669 {
3670 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3671 {
3672 if (*dbp == '%')
3673 dbp++; /* Ratfor escape to fortran */
3674 dbp = skip_spaces (dbp);
3675 if (*dbp == '\0')
3676 continue;
3677 switch (lowcase (*dbp))
3678 {
3679 case 'i':
3680 if (tail ("integer"))
3681 takeprec ();
3682 break;
3683 case 'r':
3684 if (tail ("real"))
3685 takeprec ();
3686 break;
3687 case 'l':
3688 if (tail ("logical"))
3689 takeprec ();
3690 break;
3691 case 'c':
3692 if (tail ("complex") || tail ("character"))
3693 takeprec ();
3694 break;
3695 case 'd':
3696 if (tail ("double"))
3697 {
3698 dbp = skip_spaces (dbp);
3699 if (*dbp == '\0')
3700 continue;
3701 if (tail ("precision"))
3702 break;
3703 continue;
3704 }
3705 break;
3706 }
3707 dbp = skip_spaces (dbp);
3708 if (*dbp == '\0')
3709 continue;
3710 switch (lowcase (*dbp))
3711 {
3712 case 'f':
3713 if (tail ("function"))
3714 getit (inf);
3715 continue;
3716 case 's':
3717 if (tail ("subroutine"))
3718 getit (inf);
3719 continue;
3720 case 'e':
3721 if (tail ("entry"))
3722 getit (inf);
3723 continue;
3724 case 'b':
3725 if (tail ("blockdata") || tail ("block data"))
3726 {
3727 dbp = skip_spaces (dbp);
3728 if (*dbp == '\0') /* assume un-named */
3729 pfnote (savestr ("blockdata"), TRUE,
3730 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3731 else
3732 getit (inf); /* look for name */
3733 }
3734 continue;
3735 }
3736 }
3737 }
3738
3739 \f
3740 /*
3741 * Ada parsing
3742 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3743 */
3744
3745 static void adagetit P_((FILE *, char *));
3746
3747 /* Once we are positioned after an "interesting" keyword, let's get
3748 the real tag value necessary. */
3749 static void
3750 adagetit (inf, name_qualifier)
3751 FILE *inf;
3752 char *name_qualifier;
3753 {
3754 register char *cp;
3755 char *name;
3756 char c;
3757
3758 while (!feof (inf))
3759 {
3760 dbp = skip_spaces (dbp);
3761 if (*dbp == '\0'
3762 || (dbp[0] == '-' && dbp[1] == '-'))
3763 {
3764 lineno++;
3765 linecharno = charno;
3766 charno += readline (&lb, inf);
3767 dbp = lb.buffer;
3768 }
3769 switch (*dbp)
3770 {
3771 case 'b':
3772 case 'B':
3773 if (tail ("body"))
3774 {
3775 /* Skipping body of procedure body or package body or ....
3776 resetting qualifier to body instead of spec. */
3777 name_qualifier = "/b";
3778 continue;
3779 }
3780 break;
3781 case 't':
3782 case 'T':
3783 /* Skipping type of task type or protected type ... */
3784 if (tail ("type"))
3785 continue;
3786 break;
3787 }
3788 if (*dbp == '"')
3789 {
3790 dbp += 1;
3791 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3792 continue;
3793 }
3794 else
3795 {
3796 dbp = skip_spaces (dbp);
3797 for (cp = dbp;
3798 (*cp != '\0'
3799 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
3800 cp++)
3801 continue;
3802 if (cp == dbp)
3803 return;
3804 }
3805 c = *cp;
3806 *cp = '\0';
3807 name = concat (dbp, name_qualifier, "");
3808 *cp = c;
3809 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3810 if (c == '"')
3811 dbp = cp + 1;
3812 return;
3813 }
3814 }
3815
3816 static void
3817 Ada_funcs (inf)
3818 FILE *inf;
3819 {
3820 bool inquote = FALSE;
3821
3822 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3823 {
3824 while (*dbp != '\0')
3825 {
3826 /* Skip a string i.e. "abcd". */
3827 if (inquote || (*dbp == '"'))
3828 {
3829 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3830 if (dbp != NULL)
3831 {
3832 inquote = FALSE;
3833 dbp += 1;
3834 continue; /* advance char */
3835 }
3836 else
3837 {
3838 inquote = TRUE;
3839 break; /* advance line */
3840 }
3841 }
3842
3843 /* Skip comments. */
3844 if (dbp[0] == '-' && dbp[1] == '-')
3845 break; /* advance line */
3846
3847 /* Skip character enclosed in single quote i.e. 'a'
3848 and skip single quote starting an attribute i.e. 'Image. */
3849 if (*dbp == '\'')
3850 {
3851 dbp++ ;
3852 if (*dbp != '\0')
3853 dbp++;
3854 continue;
3855 }
3856
3857 /* Search for beginning of a token. */
3858 if (!begtoken (*dbp))
3859 {
3860 dbp++;
3861 continue; /* advance char */
3862 }
3863
3864 /* We are at the beginning of a token. */
3865 switch (*dbp)
3866 {
3867 case 'f':
3868 case 'F':
3869 if (!packages_only && tail ("function"))
3870 adagetit (inf, "/f");
3871 else
3872 break; /* from switch */
3873 continue; /* advance char */
3874 case 'p':
3875 case 'P':
3876 if (!packages_only && tail ("procedure"))
3877 adagetit (inf, "/p");
3878 else if (tail ("package"))
3879 adagetit (inf, "/s");
3880 else if (tail ("protected")) /* protected type */
3881 adagetit (inf, "/t");
3882 else
3883 break; /* from switch */
3884 continue; /* advance char */
3885 case 't':
3886 case 'T':
3887 if (!packages_only && tail ("task"))
3888 adagetit (inf, "/k");
3889 else if (typedefs && !packages_only && tail ("type"))
3890 {
3891 adagetit (inf, "/t");
3892 while (*dbp != '\0')
3893 dbp += 1;
3894 }
3895 else
3896 break; /* from switch */
3897 continue; /* advance char */
3898 }
3899
3900 /* Look for the end of the token. */
3901 while (!endtoken (*dbp))
3902 dbp++;
3903
3904 } /* advance char */
3905 } /* advance line */
3906 }
3907
3908 \f
3909 /*
3910 * Bob Weiner, Motorola Inc., 4/3/94
3911 * Unix and microcontroller assembly tag handling
3912 * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
3913 */
3914 static void
3915 Asm_labels (inf)
3916 FILE *inf;
3917 {
3918 register char *cp;
3919
3920 LOOP_ON_INPUT_LINES (inf, lb, cp)
3921 {
3922 /* If first char is alphabetic or one of [_.$], test for colon
3923 following identifier. */
3924 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3925 {
3926 /* Read past label. */
3927 cp++;
3928 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3929 cp++;
3930 if (*cp == ':' || iswhite (*cp))
3931 {
3932 /* Found end of label, so copy it and add it to the table. */
3933 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3934 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3935 }
3936 }
3937 }
3938 }
3939
3940 \f
3941 /*
3942 * Perl support
3943 * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
3944 * Perl variable names: /^(my|local).../
3945 * Bart Robinson <lomew@cs.utah.edu> (1995)
3946 * Michael Ernst <mernst@alum.mit.edu> (1997)
3947 */
3948 static void
3949 Perl_functions (inf)
3950 FILE *inf;
3951 {
3952 register char *cp;
3953
3954 LOOP_ON_INPUT_LINES (inf, lb, cp)
3955 {
3956 if (*cp++ == 's'
3957 && *cp++ == 'u'
3958 && *cp++ == 'b' && iswhite (*cp++))
3959 {
3960 cp = skip_spaces (cp);
3961 if (*cp != '\0')
3962 {
3963 char *sp = cp;
3964 while (*cp != '\0'
3965 && !iswhite (*cp) && *cp != '{' && *cp != '(')
3966 cp++;
3967 pfnote (savenstr (sp, cp-sp), TRUE,
3968 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3969 }
3970 }
3971 else if (globals /* only if tagging global vars is enabled */
3972 && ((cp = lb.buffer,
3973 *cp++ == 'm'
3974 && *cp++ == 'y')
3975 || (cp = lb.buffer,
3976 *cp++ == 'l'
3977 && *cp++ == 'o'
3978 && *cp++ == 'c'
3979 && *cp++ == 'a'
3980 && *cp++ == 'l'))
3981 && (*cp == '(' || iswhite (*cp)))
3982 {
3983 /* After "my" or "local", but before any following paren or space. */
3984 char *varname = NULL;
3985
3986 cp = skip_spaces (cp);
3987 if (*cp == '$' || *cp == '@' || *cp == '%')
3988 {
3989 char* varstart = ++cp;
3990 while (ISALNUM (*cp) || *cp == '_')
3991 cp++;
3992 varname = savenstr (varstart, cp-varstart);
3993 }
3994 else
3995 {
3996 /* Should be examining a variable list at this point;
3997 could insist on seeing an open parenthesis. */
3998 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
3999 cp++;
4000 }
4001
4002 /* Perhaps I should back cp up one character, so the TAGS table
4003 doesn't mention (and so depend upon) the following char. */
4004 pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
4005 FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4006 }
4007 }
4008 }
4009
4010 \f
4011 /*
4012 * Python support
4013 * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4014 * Eric S. Raymond <esr@thyrsus.com> (1997)
4015 */
4016 static void
4017 Python_functions (inf)
4018 FILE *inf;
4019 {
4020 register char *cp;
4021
4022 LOOP_ON_INPUT_LINES (inf, lb, cp)
4023 {
4024 if (*cp++ == 'd'
4025 && *cp++ == 'e'
4026 && *cp++ == 'f' && iswhite (*cp++))
4027 {
4028 cp = skip_spaces (cp);
4029 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
4030 cp++;
4031 pfnote (NULL, TRUE,
4032 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4033 }
4034
4035 cp = lb.buffer;
4036 if (*cp++ == 'c'
4037 && *cp++ == 'l'
4038 && *cp++ == 'a'
4039 && *cp++ == 's'
4040 && *cp++ == 's' && iswhite (*cp++))
4041 {
4042 cp = skip_spaces (cp);
4043 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
4044 cp++;
4045 pfnote (NULL, TRUE,
4046 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4047 }
4048 }
4049 }
4050
4051 \f
4052 /* Idea by Corny de Souza
4053 * Cobol tag functions
4054 * We could look for anything that could be a paragraph name.
4055 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4056 */
4057 static void
4058 Cobol_paragraphs (inf)
4059 FILE *inf;
4060 {
4061 register char *bp, *ep;
4062
4063 LOOP_ON_INPUT_LINES (inf, lb, bp)
4064 {
4065 if (lb.len < 9)
4066 continue;
4067 bp += 8;
4068
4069 /* If eoln, compiler option or comment ignore whole line. */
4070 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4071 continue;
4072
4073 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4074 continue;
4075 if (*ep++ == '.')
4076 pfnote (savenstr (bp, ep-bp), TRUE,
4077 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4078 }
4079 }
4080
4081 \f
4082 /*
4083 * Makefile support
4084 * Idea by Assar Westerlund <assar@sics.se> (2001)
4085 */
4086 static void
4087 Makefile_targets (inf)
4088 FILE *inf;
4089 {
4090 register char *bp;
4091
4092 LOOP_ON_INPUT_LINES (inf, lb, bp)
4093 {
4094 if (*bp == '\t' || *bp == '#')
4095 continue;
4096 while (*bp != '\0' && *bp != '=' && *bp != ':')
4097 bp++;
4098 if (*bp == ':')
4099 pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4100 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4101 }
4102 }
4103
4104 \f
4105 /* Added by Mosur Mohan, 4/22/88 */
4106 /* Pascal parsing */
4107
4108 /*
4109 * Locates tags for procedures & functions. Doesn't do any type- or
4110 * var-definitions. It does look for the keyword "extern" or
4111 * "forward" immediately following the procedure statement; if found,
4112 * the tag is skipped.
4113 */
4114 static void
4115 Pascal_functions (inf)
4116 FILE *inf;
4117 {
4118 linebuffer tline; /* mostly copied from C_entries */
4119 long save_lcno;
4120 int save_lineno, save_len;
4121 char c, *cp, *namebuf;
4122
4123 bool /* each of these flags is TRUE iff: */
4124 incomment, /* point is inside a comment */
4125 inquote, /* point is inside '..' string */
4126 get_tagname, /* point is after PROCEDURE/FUNCTION
4127 keyword, so next item = potential tag */
4128 found_tag, /* point is after a potential tag */
4129 inparms, /* point is within parameter-list */
4130 verify_tag; /* point has passed the parm-list, so the
4131 next token will determine whether this
4132 is a FORWARD/EXTERN to be ignored, or
4133 whether it is a real tag */
4134
4135 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4136 namebuf = NULL; /* keep compiler quiet */
4137 lineno = 0;
4138 charno = 0;
4139 dbp = lb.buffer;
4140 *dbp = '\0';
4141 initbuffer (&tline);
4142
4143 incomment = inquote = FALSE;
4144 found_tag = FALSE; /* have a proc name; check if extern */
4145 get_tagname = FALSE; /* have found "procedure" keyword */
4146 inparms = FALSE; /* found '(' after "proc" */
4147 verify_tag = FALSE; /* check if "extern" is ahead */
4148
4149
4150 while (!feof (inf)) /* long main loop to get next char */
4151 {
4152 c = *dbp++;
4153 if (c == '\0') /* if end of line */
4154 {
4155 lineno++;
4156 linecharno = charno;
4157 charno += readline (&lb, inf);
4158 dbp = lb.buffer;
4159 if (*dbp == '\0')
4160 continue;
4161 if (!((found_tag && verify_tag)
4162 || get_tagname))
4163 c = *dbp++; /* only if don't need *dbp pointing
4164 to the beginning of the name of
4165 the procedure or function */
4166 }
4167 if (incomment)
4168 {
4169 if (c == '}') /* within { } comments */
4170 incomment = FALSE;
4171 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4172 {
4173 dbp++;
4174 incomment = FALSE;
4175 }
4176 continue;
4177 }
4178 else if (inquote)
4179 {
4180 if (c == '\'')
4181 inquote = FALSE;
4182 continue;
4183 }
4184 else
4185 switch (c)
4186 {
4187 case '\'':
4188 inquote = TRUE; /* found first quote */
4189 continue;
4190 case '{': /* found open { comment */
4191 incomment = TRUE;
4192 continue;
4193 case '(':
4194 if (*dbp == '*') /* found open (* comment */
4195 {
4196 incomment = TRUE;
4197 dbp++;
4198 }
4199 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4200 inparms = TRUE;
4201 continue;
4202 case ')': /* end of parms list */
4203 if (inparms)
4204 inparms = FALSE;
4205 continue;
4206 case ';':
4207 if (found_tag && !inparms) /* end of proc or fn stmt */
4208 {
4209 verify_tag = TRUE;
4210 break;
4211 }
4212 continue;
4213 }
4214 if (found_tag && verify_tag && (*dbp != ' '))
4215 {
4216 /* check if this is an "extern" declaration */
4217 if (*dbp == '\0')
4218 continue;
4219 if (lowcase (*dbp == 'e'))
4220 {
4221 if (tail ("extern")) /* superfluous, really! */
4222 {
4223 found_tag = FALSE;
4224 verify_tag = FALSE;
4225 }
4226 }
4227 else if (lowcase (*dbp) == 'f')
4228 {
4229 if (tail ("forward")) /* check for forward reference */
4230 {
4231 found_tag = FALSE;
4232 verify_tag = FALSE;
4233 }
4234 }
4235 if (found_tag && verify_tag) /* not external proc, so make tag */
4236 {
4237 found_tag = FALSE;
4238 verify_tag = FALSE;
4239 pfnote (namebuf, TRUE,
4240 tline.buffer, save_len, save_lineno, save_lcno);
4241 continue;
4242 }
4243 }
4244 if (get_tagname) /* grab name of proc or fn */
4245 {
4246 if (*dbp == '\0')
4247 continue;
4248
4249 /* save all values for later tagging */
4250 linebuffer_setlen (&tline, lb.len);
4251 strcpy (tline.buffer, lb.buffer);
4252 save_lineno = lineno;
4253 save_lcno = linecharno;
4254
4255 /* grab block name */
4256 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4257 continue;
4258 namebuf = savenstr (dbp, cp-dbp);
4259 dbp = cp; /* set dbp to e-o-token */
4260 save_len = dbp - lb.buffer + 1;
4261 get_tagname = FALSE;
4262 found_tag = TRUE;
4263 continue;
4264
4265 /* and proceed to check for "extern" */
4266 }
4267 else if (!incomment && !inquote && !found_tag)
4268 {
4269 /* check for proc/fn keywords */
4270 switch (lowcase (c))
4271 {
4272 case 'p':
4273 if (tail ("rocedure")) /* c = 'p', dbp has advanced */
4274 get_tagname = TRUE;
4275 continue;
4276 case 'f':
4277 if (tail ("unction"))
4278 get_tagname = TRUE;
4279 continue;
4280 }
4281 }
4282 } /* while not eof */
4283
4284 free (tline.buffer);
4285 }
4286
4287 \f
4288 /*
4289 * Lisp tag functions
4290 * look for (def or (DEF, quote or QUOTE
4291 */
4292
4293 static int L_isdef P_((char *));
4294 static int L_isquote P_((char *));
4295 static void L_getit P_((void));
4296
4297 static int
4298 L_isdef (strp)
4299 register char *strp;
4300 {
4301 return ((strp[1] == 'd' || strp[1] == 'D')
4302 && (strp[2] == 'e' || strp[2] == 'E')
4303 && (strp[3] == 'f' || strp[3] == 'F'));
4304 }
4305
4306 static int
4307 L_isquote (strp)
4308 register char *strp;
4309 {
4310 return ((*++strp == 'q' || *strp == 'Q')
4311 && (*++strp == 'u' || *strp == 'U')
4312 && (*++strp == 'o' || *strp == 'O')
4313 && (*++strp == 't' || *strp == 'T')
4314 && (*++strp == 'e' || *strp == 'E')
4315 && iswhite (*++strp));
4316 }
4317
4318 static void
4319 L_getit ()
4320 {
4321 register char *cp;
4322
4323 if (*dbp == '\'') /* Skip prefix quote */
4324 dbp++;
4325 else if (*dbp == '(')
4326 {
4327 if (L_isquote (dbp))
4328 dbp += 7; /* Skip "(quote " */
4329 else
4330 dbp += 1; /* Skip "(" before name in (defstruct (foo)) */
4331 dbp = skip_spaces (dbp);
4332 }
4333
4334 for (cp = dbp /*+1*/;
4335 *cp != '\0' && *cp != '(' && !iswhite(*cp) && *cp != ')';
4336 cp++)
4337 continue;
4338 if (cp == dbp)
4339 return;
4340
4341 pfnote (savenstr (dbp, cp-dbp), TRUE,
4342 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4343 }
4344
4345 static void
4346 Lisp_functions (inf)
4347 FILE *inf;
4348 {
4349 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4350 {
4351 if (dbp[0] == '(')
4352 {
4353 if (L_isdef (dbp))
4354 {
4355 dbp = skip_non_spaces (dbp);
4356 dbp = skip_spaces (dbp);
4357 L_getit ();
4358 }
4359 else
4360 {
4361 /* Check for (foo::defmumble name-defined ... */
4362 do
4363 dbp++;
4364 while (*dbp != '\0' && !iswhite (*dbp)
4365 && *dbp != ':' && *dbp != '(' && *dbp != ')');
4366 if (*dbp == ':')
4367 {
4368 do
4369 dbp++;
4370 while (*dbp == ':');
4371
4372 if (L_isdef (dbp - 1))
4373 {
4374 dbp = skip_non_spaces (dbp);
4375 dbp = skip_spaces (dbp);
4376 L_getit ();
4377 }
4378 }
4379 }
4380 }
4381 }
4382 }
4383
4384 \f
4385 /*
4386 * Postscript tag functions
4387 * Just look for lines where the first character is '/'
4388 * Also look at "defineps" for PSWrap
4389 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4390 * Ideas by Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4391 */
4392 static void
4393 Postscript_functions (inf)
4394 FILE *inf;
4395 {
4396 register char *bp, *ep;
4397
4398 LOOP_ON_INPUT_LINES (inf, lb, bp)
4399 {
4400 if (bp[0] == '/')
4401 {
4402 for (ep = bp+1;
4403 *ep != '\0' && *ep != ' ' && *ep != '{';
4404 ep++)
4405 continue;
4406 pfnote (savenstr (bp, ep-bp), TRUE,
4407 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4408 }
4409 else if (strneq (bp, "defineps", 8))
4410 {
4411 bp = skip_non_spaces (bp);
4412 bp = skip_spaces (bp);
4413 get_tag (bp);
4414 }
4415 }
4416 }
4417
4418 \f
4419 /*
4420 * Scheme tag functions
4421 * look for (def... xyzzy
4422 * look for (def... (xyzzy
4423 * look for (def ... ((...(xyzzy ....
4424 * look for (set! xyzzy
4425 */
4426
4427 static void
4428 Scheme_functions (inf)
4429 FILE *inf;
4430 {
4431 register char *bp;
4432
4433 LOOP_ON_INPUT_LINES (inf, lb, bp)
4434 {
4435 if (bp[0] == '('
4436 && (bp[1] == 'D' || bp[1] == 'd')
4437 && (bp[2] == 'E' || bp[2] == 'e')
4438 && (bp[3] == 'F' || bp[3] == 'f'))
4439 {
4440 bp = skip_non_spaces (bp);
4441 /* Skip over open parens and white space */
4442 while (iswhite (*bp) || *bp == '(')
4443 bp++;
4444 get_tag (bp);
4445 }
4446 if (bp[0] == '('
4447 && (bp[1] == 'S' || bp[1] == 's')
4448 && (bp[2] == 'E' || bp[2] == 'e')
4449 && (bp[3] == 'T' || bp[3] == 't')
4450 && (bp[4] == '!' || bp[4] == '!')
4451 && (iswhite (bp[5])))
4452 {
4453 bp = skip_non_spaces (bp);
4454 bp = skip_spaces (bp);
4455 get_tag (bp);
4456 }
4457 }
4458 }
4459
4460 \f
4461 /* Find tags in TeX and LaTeX input files. */
4462
4463 /* TEX_toktab is a table of TeX control sequences that define tags.
4464 Each TEX_tabent records one such control sequence.
4465 CONVERT THIS TO USE THE Stab TYPE!! */
4466 struct TEX_tabent
4467 {
4468 char *name;
4469 int len;
4470 };
4471
4472 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
4473
4474 /* Default set of control sequences to put into TEX_toktab.
4475 The value of environment var TEXTAGS is prepended to this. */
4476
4477 char *TEX_defenv = "\
4478 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4479 :part:appendix:entry:index";
4480
4481 static void TEX_mode P_((FILE *));
4482 static struct TEX_tabent *TEX_decode_env P_((char *, char *));
4483 static int TEX_Token P_((char *));
4484
4485 char TEX_esc = '\\';
4486 char TEX_opgrp = '{';
4487 char TEX_clgrp = '}';
4488
4489 /*
4490 * TeX/LaTeX scanning loop.
4491 */
4492 static void
4493 TeX_commands (inf)
4494 FILE *inf;
4495 {
4496 char *cp, *lasthit;
4497 register int i;
4498
4499 /* Select either \ or ! as escape character. */
4500 TEX_mode (inf);
4501
4502 /* Initialize token table once from environment. */
4503 if (!TEX_toktab)
4504 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4505
4506 LOOP_ON_INPUT_LINES (inf, lb, cp)
4507 {
4508 lasthit = cp;
4509 /* Look at each esc in line. */
4510 while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4511 {
4512 if (*++cp == '\0')
4513 break;
4514 linecharno += cp - lasthit;
4515 lasthit = cp;
4516 i = TEX_Token (lasthit);
4517 if (i >= 0)
4518 {
4519 /* We seem to include the TeX command in the tag name.
4520 register char *p;
4521 for (p = lasthit + TEX_toktab[i].len;
4522 *p != '\0' && *p != TEX_clgrp;
4523 p++)
4524 continue; */
4525 pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4526 lb.buffer, lb.len, lineno, linecharno);
4527 break; /* We only tag a line once */
4528 }
4529 }
4530 }
4531 }
4532
4533 #define TEX_LESC '\\'
4534 #define TEX_SESC '!'
4535 #define TEX_cmt '%'
4536
4537 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4538 chars accordingly. */
4539 static void
4540 TEX_mode (inf)
4541 FILE *inf;
4542 {
4543 int c;
4544
4545 while ((c = getc (inf)) != EOF)
4546 {
4547 /* Skip to next line if we hit the TeX comment char. */
4548 if (c == TEX_cmt)
4549 while (c != '\n')
4550 c = getc (inf);
4551 else if (c == TEX_LESC || c == TEX_SESC )
4552 break;
4553 }
4554
4555 if (c == TEX_LESC)
4556 {
4557 TEX_esc = TEX_LESC;
4558 TEX_opgrp = '{';
4559 TEX_clgrp = '}';
4560 }
4561 else
4562 {
4563 TEX_esc = TEX_SESC;
4564 TEX_opgrp = '<';
4565 TEX_clgrp = '>';
4566 }
4567 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4568 No attempt is made to correct the situation. */
4569 rewind (inf);
4570 }
4571
4572 /* Read environment and prepend it to the default string.
4573 Build token table. */
4574 static struct TEX_tabent *
4575 TEX_decode_env (evarname, defenv)
4576 char *evarname;
4577 char *defenv;
4578 {
4579 register char *env, *p;
4580
4581 struct TEX_tabent *tab;
4582 int size, i;
4583
4584 /* Append default string to environment. */
4585 env = getenv (evarname);
4586 if (!env)
4587 env = defenv;
4588 else
4589 {
4590 char *oldenv = env;
4591 env = concat (oldenv, defenv, "");
4592 }
4593
4594 /* Allocate a token table */
4595 for (size = 1, p = env; p;)
4596 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4597 size++;
4598 /* Add 1 to leave room for null terminator. */
4599 tab = xnew (size + 1, struct TEX_tabent);
4600
4601 /* Unpack environment string into token table. Be careful about */
4602 /* zero-length strings (leading ':', "::" and trailing ':') */
4603 for (i = 0; *env;)
4604 {
4605 p = etags_strchr (env, ':');
4606 if (!p) /* End of environment string. */
4607 p = env + strlen (env);
4608 if (p - env > 0)
4609 { /* Only non-zero strings. */
4610 tab[i].name = savenstr (env, p - env);
4611 tab[i].len = strlen (tab[i].name);
4612 i++;
4613 }
4614 if (*p)
4615 env = p + 1;
4616 else
4617 {
4618 tab[i].name = NULL; /* Mark end of table. */
4619 tab[i].len = 0;
4620 break;
4621 }
4622 }
4623 return tab;
4624 }
4625
4626 /* If the text at CP matches one of the tag-defining TeX command names,
4627 return the pointer to the first occurrence of that command in TEX_toktab.
4628 Otherwise return -1.
4629 Keep the capital `T' in `token' for dumb truncating compilers
4630 (this distinguishes it from `TEX_toktab' */
4631 static int
4632 TEX_Token (cp)
4633 char *cp;
4634 {
4635 int i;
4636
4637 for (i = 0; TEX_toktab[i].len > 0; i++)
4638 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4639 return i;
4640 return -1;
4641 }
4642
4643 \f
4644 /* Texinfo support. Dave Love, Mar. 2000. */
4645 static void
4646 Texinfo_nodes (inf)
4647 FILE * inf;
4648 {
4649 char *cp, *start;
4650 LOOP_ON_INPUT_LINES (inf, lb, cp)
4651 {
4652 if ((*cp++ == '@'
4653 && *cp++ == 'n'
4654 && *cp++ == 'o'
4655 && *cp++ == 'd'
4656 && *cp++ == 'e' && iswhite (*cp++)))
4657 {
4658 start = cp = skip_spaces(cp);
4659 while (*cp != '\0' && *cp != ',')
4660 cp++;
4661 pfnote (savenstr (start, cp - start), TRUE,
4662 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4663 }
4664 }
4665 }
4666
4667 \f
4668 /*
4669 * Prolog support (rewritten) by Anders Lindgren, Mar. 96
4670 *
4671 * Assumes that the predicate starts at column 0.
4672 * Only the first clause of a predicate is added.
4673 */
4674 static int prolog_pred P_((char *, char *));
4675 static void prolog_skip_comment P_((linebuffer *, FILE *));
4676 static int prolog_atom P_((char *, int));
4677
4678 static void
4679 Prolog_functions (inf)
4680 FILE *inf;
4681 {
4682 char *cp, *last;
4683 int len;
4684 int allocated;
4685
4686 allocated = 0;
4687 len = 0;
4688 last = NULL;
4689
4690 LOOP_ON_INPUT_LINES (inf, lb, cp)
4691 {
4692 if (cp[0] == '\0') /* Empty line */
4693 continue;
4694 else if (iswhite (cp[0])) /* Not a predicate */
4695 continue;
4696 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4697 prolog_skip_comment (&lb, inf);
4698 else if ((len = prolog_pred (cp, last)) > 0)
4699 {
4700 /* Predicate. Store the function name so that we only
4701 generate a tag for the first clause. */
4702 if (last == NULL)
4703 last = xnew(len + 1, char);
4704 else if (len + 1 > allocated)
4705 xrnew (last, len + 1, char);
4706 allocated = len + 1;
4707 strncpy (last, cp, len);
4708 last[len] = '\0';
4709 }
4710 }
4711 }
4712
4713
4714 static void
4715 prolog_skip_comment (plb, inf)
4716 linebuffer *plb;
4717 FILE *inf;
4718 {
4719 char *cp;
4720
4721 do
4722 {
4723 for (cp = plb->buffer; *cp != '\0'; cp++)
4724 if (cp[0] == '*' && cp[1] == '/')
4725 return;
4726 lineno++;
4727 linecharno += readline (plb, inf);
4728 }
4729 while (!feof(inf));
4730 }
4731
4732 /*
4733 * A predicate definition is added if it matches:
4734 * <beginning of line><Prolog Atom><whitespace>(
4735 *
4736 * It is added to the tags database if it doesn't match the
4737 * name of the previous clause header.
4738 *
4739 * Return the size of the name of the predicate, or 0 if no header
4740 * was found.
4741 */
4742 static int
4743 prolog_pred (s, last)
4744 char *s;
4745 char *last; /* Name of last clause. */
4746 {
4747 int pos;
4748 int len;
4749
4750 pos = prolog_atom (s, 0);
4751 if (pos < 1)
4752 return 0;
4753
4754 len = pos;
4755 pos = skip_spaces (s + pos) - s;
4756
4757 if ((s[pos] == '(') || (s[pos] == '.'))
4758 {
4759 if (s[pos] == '(')
4760 pos++;
4761
4762 /* Save only the first clause. */
4763 if (last == NULL
4764 || len != (int)strlen (last)
4765 || !strneq (s, last, len))
4766 {
4767 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4768 return len;
4769 }
4770 }
4771 return 0;
4772 }
4773
4774 /*
4775 * Consume a Prolog atom.
4776 * Return the number of bytes consumed, or -1 if there was an error.
4777 *
4778 * A prolog atom, in this context, could be one of:
4779 * - An alphanumeric sequence, starting with a lower case letter.
4780 * - A quoted arbitrary string. Single quotes can escape themselves.
4781 * Backslash quotes everything.
4782 */
4783 static int
4784 prolog_atom (s, pos)
4785 char *s;
4786 int pos;
4787 {
4788 int origpos;
4789
4790 origpos = pos;
4791
4792 if (ISLOWER(s[pos]) || (s[pos] == '_'))
4793 {
4794 /* The atom is unquoted. */
4795 pos++;
4796 while (ISALNUM(s[pos]) || (s[pos] == '_'))
4797 {
4798 pos++;
4799 }
4800 return pos - origpos;
4801 }
4802 else if (s[pos] == '\'')
4803 {
4804 pos++;
4805
4806 while (1)
4807 {
4808 if (s[pos] == '\'')
4809 {
4810 pos++;
4811 if (s[pos] != '\'')
4812 break;
4813 pos++; /* A double quote */
4814 }
4815 else if (s[pos] == '\0')
4816 /* Multiline quoted atoms are ignored. */
4817 return -1;
4818 else if (s[pos] == '\\')
4819 {
4820 if (s[pos+1] == '\0')
4821 return -1;
4822 pos += 2;
4823 }
4824 else
4825 pos++;
4826 }
4827 return pos - origpos;
4828 }
4829 else
4830 return -1;
4831 }
4832
4833 \f
4834 /*
4835 * Support for Erlang -- Anders Lindgren, Feb 1996.
4836 *
4837 * Generates tags for functions, defines, and records.
4838 *
4839 * Assumes that Erlang functions start at column 0.
4840 */
4841 static int erlang_func P_((char *, char *));
4842 static void erlang_attribute P_((char *));
4843 static int erlang_atom P_((char *, int));
4844
4845 static void
4846 Erlang_functions (inf)
4847 FILE *inf;
4848 {
4849 char *cp, *last;
4850 int len;
4851 int allocated;
4852
4853 allocated = 0;
4854 len = 0;
4855 last = NULL;
4856
4857 LOOP_ON_INPUT_LINES (inf, lb, cp)
4858 {
4859 if (cp[0] == '\0') /* Empty line */
4860 continue;
4861 else if (iswhite (cp[0])) /* Not function nor attribute */
4862 continue;
4863 else if (cp[0] == '%') /* comment */
4864 continue;
4865 else if (cp[0] == '"') /* Sometimes, strings start in column one */
4866 continue;
4867 else if (cp[0] == '-') /* attribute, e.g. "-define" */
4868 {
4869 erlang_attribute (cp);
4870 last = NULL;
4871 }
4872 else if ((len = erlang_func (cp, last)) > 0)
4873 {
4874 /*
4875 * Function. Store the function name so that we only
4876 * generates a tag for the first clause.
4877 */
4878 if (last == NULL)
4879 last = xnew (len + 1, char);
4880 else if (len + 1 > allocated)
4881 xrnew (last, len + 1, char);
4882 allocated = len + 1;
4883 strncpy (last, cp, len);
4884 last[len] = '\0';
4885 }
4886 }
4887 }
4888
4889
4890 /*
4891 * A function definition is added if it matches:
4892 * <beginning of line><Erlang Atom><whitespace>(
4893 *
4894 * It is added to the tags database if it doesn't match the
4895 * name of the previous clause header.
4896 *
4897 * Return the size of the name of the function, or 0 if no function
4898 * was found.
4899 */
4900 static int
4901 erlang_func (s, last)
4902 char *s;
4903 char *last; /* Name of last clause. */
4904 {
4905 int pos;
4906 int len;
4907
4908 pos = erlang_atom (s, 0);
4909 if (pos < 1)
4910 return 0;
4911
4912 len = pos;
4913 pos = skip_spaces (s + pos) - s;
4914
4915 /* Save only the first clause. */
4916 if (s[pos++] == '('
4917 && (last == NULL
4918 || len != (int)strlen (last)
4919 || !strneq (s, last, len)))
4920 {
4921 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4922 return len;
4923 }
4924
4925 return 0;
4926 }
4927
4928
4929 /*
4930 * Handle attributes. Currently, tags are generated for defines
4931 * and records.
4932 *
4933 * They are on the form:
4934 * -define(foo, bar).
4935 * -define(Foo(M, N), M+N).
4936 * -record(graph, {vtab = notable, cyclic = true}).
4937 */
4938 static void
4939 erlang_attribute (s)
4940 char *s;
4941 {
4942 int pos;
4943 int len;
4944
4945 if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
4946 {
4947 pos = skip_spaces (s + 7) - s;
4948 if (s[pos++] == '(')
4949 {
4950 pos = skip_spaces (s + pos) - s;
4951 len = erlang_atom (s, pos);
4952 if (len != 0)
4953 pfnote (savenstr (& s[pos], len), TRUE,
4954 s, pos + len, lineno, linecharno);
4955 }
4956 }
4957 return;
4958 }
4959
4960
4961 /*
4962 * Consume an Erlang atom (or variable).
4963 * Return the number of bytes consumed, or -1 if there was an error.
4964 */
4965 static int
4966 erlang_atom (s, pos)
4967 char *s;
4968 int pos;
4969 {
4970 int origpos;
4971
4972 origpos = pos;
4973
4974 if (ISALPHA (s[pos]) || s[pos] == '_')
4975 {
4976 /* The atom is unquoted. */
4977 pos++;
4978 while (ISALNUM (s[pos]) || s[pos] == '_')
4979 pos++;
4980 return pos - origpos;
4981 }
4982 else if (s[pos] == '\'')
4983 {
4984 pos++;
4985
4986 while (1)
4987 {
4988 if (s[pos] == '\'')
4989 {
4990 pos++;
4991 break;
4992 }
4993 else if (s[pos] == '\0')
4994 /* Multiline quoted atoms are ignored. */
4995 return -1;
4996 else if (s[pos] == '\\')
4997 {
4998 if (s[pos+1] == '\0')
4999 return -1;
5000 pos += 2;
5001 }
5002 else
5003 pos++;
5004 }
5005 return pos - origpos;
5006 }
5007 else
5008 return -1;
5009 }
5010
5011 \f
5012 #ifdef ETAGS_REGEXPS
5013
5014 static char *scan_separators P_((char *));
5015 static void analyse_regex P_((char *, bool));
5016 static void add_regex P_((char *, bool, language *));
5017 static char *substitute P_((char *, char *, struct re_registers *));
5018
5019 /* Take a string like "/blah/" and turn it into "blah", making sure
5020 that the first and last characters are the same, and handling
5021 quoted separator characters. Actually, stops on the occurrence of
5022 an unquoted separator. Also turns "\t" into a Tab character.
5023 Returns pointer to terminating separator. Works in place. Null
5024 terminates name string. */
5025 static char *
5026 scan_separators (name)
5027 char *name;
5028 {
5029 char sep = name[0];
5030 char *copyto = name;
5031 bool quoted = FALSE;
5032
5033 for (++name; *name != '\0'; ++name)
5034 {
5035 if (quoted)
5036 {
5037 if (*name == 't')
5038 *copyto++ = '\t';
5039 else if (*name == sep)
5040 *copyto++ = sep;
5041 else
5042 {
5043 /* Something else is quoted, so preserve the quote. */
5044 *copyto++ = '\\';
5045 *copyto++ = *name;
5046 }
5047 quoted = FALSE;
5048 }
5049 else if (*name == '\\')
5050 quoted = TRUE;
5051 else if (*name == sep)
5052 break;
5053 else
5054 *copyto++ = *name;
5055 }
5056
5057 /* Terminate copied string. */
5058 *copyto = '\0';
5059 return name;
5060 }
5061
5062 /* Look at the argument of --regex or --no-regex and do the right
5063 thing. Same for each line of a regexp file. */
5064 static void
5065 analyse_regex (regex_arg, ignore_case)
5066 char *regex_arg;
5067 bool ignore_case;
5068 {
5069 if (regex_arg == NULL)
5070 free_patterns (); /* --no-regex: remove existing regexps */
5071
5072 /* A real --regexp option or a line in a regexp file. */
5073 switch (regex_arg[0])
5074 {
5075 /* Comments in regexp file or null arg to --regex. */
5076 case '\0':
5077 case ' ':
5078 case '\t':
5079 break;
5080
5081 /* Read a regex file. This is recursive and may result in a
5082 loop, which will stop when the file descriptors are exhausted. */
5083 case '@':
5084 {
5085 FILE *regexfp;
5086 linebuffer regexbuf;
5087 char *regexfile = regex_arg + 1;
5088
5089 /* regexfile is a file containing regexps, one per line. */
5090 regexfp = fopen (regexfile, "r");
5091 if (regexfp == NULL)
5092 {
5093 pfatal (regexfile);
5094 return;
5095 }
5096 initbuffer (&regexbuf);
5097 while (readline_internal (&regexbuf, regexfp) > 0)
5098 analyse_regex (regexbuf.buffer, ignore_case);
5099 free (regexbuf.buffer);
5100 fclose (regexfp);
5101 }
5102 break;
5103
5104 /* Regexp to be used for a specific language only. */
5105 case '{':
5106 {
5107 language *lang;
5108 char *lang_name = regex_arg + 1;
5109 char *cp;
5110
5111 for (cp = lang_name; *cp != '}'; cp++)
5112 if (*cp == '\0')
5113 {
5114 error ("unterminated language name in regex: %s", regex_arg);
5115 return;
5116 }
5117 *cp = '\0';
5118 lang = get_language_from_langname (lang_name);
5119 if (lang == NULL)
5120 return;
5121 add_regex (cp + 1, ignore_case, lang);
5122 }
5123 break;
5124
5125 /* Regexp to be used for any language. */
5126 default:
5127 add_regex (regex_arg, ignore_case, NULL);
5128 break;
5129 }
5130 }
5131
5132 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5133 expression, into a real regular expression by compiling it. */
5134 static void
5135 add_regex (regexp_pattern, ignore_case, lang)
5136 char *regexp_pattern;
5137 bool ignore_case;
5138 language *lang;
5139 {
5140 char *name;
5141 const char *err;
5142 struct re_pattern_buffer *patbuf;
5143 pattern *pp;
5144
5145
5146 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
5147 {
5148 error ("%s: unterminated regexp", regexp_pattern);
5149 return;
5150 }
5151 name = scan_separators (regexp_pattern);
5152 if (regexp_pattern[0] == '\0')
5153 {
5154 error ("null regexp", (char *)NULL);
5155 return;
5156 }
5157 (void) scan_separators (name);
5158
5159 patbuf = xnew (1, struct re_pattern_buffer);
5160 /* Translation table to fold case if appropriate. */
5161 patbuf->translate = (ignore_case) ? lc_trans : NULL;
5162 patbuf->fastmap = NULL;
5163 patbuf->buffer = NULL;
5164 patbuf->allocated = 0;
5165
5166 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
5167 if (err != NULL)
5168 {
5169 error ("%s while compiling pattern", err);
5170 return;
5171 }
5172
5173 pp = p_head;
5174 p_head = xnew (1, pattern);
5175 p_head->regex = savestr (regexp_pattern);
5176 p_head->p_next = pp;
5177 p_head->language = lang;
5178 p_head->pattern = patbuf;
5179 p_head->name_pattern = savestr (name);
5180 p_head->error_signaled = FALSE;
5181 }
5182
5183 /*
5184 * Do the substitutions indicated by the regular expression and
5185 * arguments.
5186 */
5187 static char *
5188 substitute (in, out, regs)
5189 char *in, *out;
5190 struct re_registers *regs;
5191 {
5192 char *result, *t;
5193 int size, dig, diglen;
5194
5195 result = NULL;
5196 size = strlen (out);
5197
5198 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5199 if (out[size - 1] == '\\')
5200 fatal ("pattern error in \"%s\"", out);
5201 for (t = etags_strchr (out, '\\');
5202 t != NULL;
5203 t = etags_strchr (t + 2, '\\'))
5204 if (ISDIGIT (t[1]))
5205 {
5206 dig = t[1] - '0';
5207 diglen = regs->end[dig] - regs->start[dig];
5208 size += diglen - 2;
5209 }
5210 else
5211 size -= 1;
5212
5213 /* Allocate space and do the substitutions. */
5214 result = xnew (size + 1, char);
5215
5216 for (t = result; *out != '\0'; out++)
5217 if (*out == '\\' && ISDIGIT (*++out))
5218 {
5219 dig = *out - '0';
5220 diglen = regs->end[dig] - regs->start[dig];
5221 strncpy (t, in + regs->start[dig], diglen);
5222 t += diglen;
5223 }
5224 else
5225 *t++ = *out;
5226 *t = '\0';
5227
5228 assert (t <= result + size && t - result == (int)strlen (result));
5229
5230 return result;
5231 }
5232
5233 /* Deallocate all patterns. */
5234 static void
5235 free_patterns ()
5236 {
5237 pattern *pp;
5238 while (p_head != NULL)
5239 {
5240 pp = p_head->p_next;
5241 free (p_head->regex);
5242 free (p_head->name_pattern);
5243 free (p_head);
5244 p_head = pp;
5245 }
5246 return;
5247 }
5248 #endif /* ETAGS_REGEXPS */
5249
5250 \f
5251 static void
5252 get_tag (bp)
5253 register char *bp;
5254 {
5255 register char *cp;
5256
5257 if (*bp == '\0')
5258 return;
5259 /* Go till you get to white space or a syntactic break */
5260 for (cp = bp + 1;
5261 *cp != '\0' && *cp != '(' && *cp != ')' && !iswhite (*cp);
5262 cp++)
5263 continue;
5264 pfnote (savenstr (bp, cp-bp), TRUE,
5265 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5266 }
5267
5268 /* Initialize a linebuffer for use */
5269 static void
5270 initbuffer (lbp)
5271 linebuffer *lbp;
5272 {
5273 lbp->size = (DEBUG) ? 3 : 200;
5274 lbp->buffer = xnew (lbp->size, char);
5275 lbp->buffer[0] = '\0';
5276 lbp->len = 0;
5277 }
5278
5279 /*
5280 * Read a line of text from `stream' into `lbp', excluding the
5281 * newline or CR-NL, if any. Return the number of characters read from
5282 * `stream', which is the length of the line including the newline.
5283 *
5284 * On DOS or Windows we do not count the CR character, if any, before the
5285 * NL, in the returned length; this mirrors the behavior of emacs on those
5286 * platforms (for text files, it translates CR-NL to NL as it reads in the
5287 * file).
5288 */
5289 static long
5290 readline_internal (lbp, stream)
5291 linebuffer *lbp;
5292 register FILE *stream;
5293 {
5294 char *buffer = lbp->buffer;
5295 register char *p = lbp->buffer;
5296 register char *pend;
5297 int chars_deleted;
5298
5299 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5300
5301 while (1)
5302 {
5303 register int c = getc (stream);
5304 if (p == pend)
5305 {
5306 /* We're at the end of linebuffer: expand it. */
5307 lbp->size *= 2;
5308 xrnew (buffer, lbp->size, char);
5309 p += buffer - lbp->buffer;
5310 pend = buffer + lbp->size;
5311 lbp->buffer = buffer;
5312 }
5313 if (c == EOF)
5314 {
5315 *p = '\0';
5316 chars_deleted = 0;
5317 break;
5318 }
5319 if (c == '\n')
5320 {
5321 if (p > buffer && p[-1] == '\r')
5322 {
5323 p -= 1;
5324 #ifdef DOS_NT
5325 /* Assume CRLF->LF translation will be performed by Emacs
5326 when loading this file, so CRs won't appear in the buffer.
5327 It would be cleaner to compensate within Emacs;
5328 however, Emacs does not know how many CRs were deleted
5329 before any given point in the file. */
5330 chars_deleted = 1;
5331 #else
5332 chars_deleted = 2;
5333 #endif
5334 }
5335 else
5336 {
5337 chars_deleted = 1;
5338 }
5339 *p = '\0';
5340 break;
5341 }
5342 *p++ = c;
5343 }
5344 lbp->len = p - buffer;
5345
5346 return lbp->len + chars_deleted;
5347 }
5348
5349 /*
5350 * Like readline_internal, above, but in addition try to match the
5351 * input line against relevant regular expressions.
5352 */
5353 static long
5354 readline (lbp, stream)
5355 linebuffer *lbp;
5356 FILE *stream;
5357 {
5358 /* Read new line. */
5359 long result = readline_internal (lbp, stream);
5360 #ifdef ETAGS_REGEXPS
5361 int match;
5362 pattern *pp;
5363
5364 /* Match against relevant patterns. */
5365 if (lbp->len > 0)
5366 for (pp = p_head; pp != NULL; pp = pp->p_next)
5367 {
5368 /* Only use generic regexps or those for the current language. */
5369 if (pp->language != NULL && pp->language != curlang)
5370 continue;
5371
5372 match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
5373 switch (match)
5374 {
5375 case -2:
5376 /* Some error. */
5377 if (!pp->error_signaled)
5378 {
5379 error ("error while matching \"%s\"", pp->regex);
5380 pp->error_signaled = TRUE;
5381 }
5382 break;
5383 case -1:
5384 /* No match. */
5385 break;
5386 default:
5387 /* Match occurred. Construct a tag. */
5388 if (pp->name_pattern[0] != '\0')
5389 {
5390 /* Make a named tag. */
5391 char *name = substitute (lbp->buffer,
5392 pp->name_pattern, &pp->regs);
5393 if (name != NULL)
5394 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5395 }
5396 else
5397 {
5398 /* Make an unnamed tag. */
5399 pfnote ((char *)NULL, TRUE,
5400 lbp->buffer, match, lineno, linecharno);
5401 }
5402 break;
5403 }
5404 }
5405 #endif /* ETAGS_REGEXPS */
5406
5407 return result;
5408 }
5409
5410 \f
5411 /*
5412 * Return a pointer to a space of size strlen(cp)+1 allocated
5413 * with xnew where the string CP has been copied.
5414 */
5415 static char *
5416 savestr (cp)
5417 char *cp;
5418 {
5419 return savenstr (cp, strlen (cp));
5420 }
5421
5422 /*
5423 * Return a pointer to a space of size LEN+1 allocated with xnew where
5424 * the string CP has been copied for at most the first LEN characters.
5425 */
5426 static char *
5427 savenstr (cp, len)
5428 char *cp;
5429 int len;
5430 {
5431 register char *dp;
5432
5433 dp = xnew (len + 1, char);
5434 strncpy (dp, cp, len);
5435 dp[len] = '\0';
5436 return dp;
5437 }
5438
5439 /*
5440 * Return the ptr in sp at which the character c last
5441 * appears; NULL if not found
5442 *
5443 * Identical to POSIX strrchr, included for portability.
5444 */
5445 static char *
5446 etags_strrchr (sp, c)
5447 register const char *sp;
5448 register int c;
5449 {
5450 register const char *r;
5451
5452 r = NULL;
5453 do
5454 {
5455 if (*sp == c)
5456 r = sp;
5457 } while (*sp++);
5458 return (char *)r;
5459 }
5460
5461
5462 /*
5463 * Return the ptr in sp at which the character c first
5464 * appears; NULL if not found
5465 *
5466 * Identical to POSIX strchr, included for portability.
5467 */
5468 static char *
5469 etags_strchr (sp, c)
5470 register const char *sp;
5471 register int c;
5472 {
5473 do
5474 {
5475 if (*sp == c)
5476 return (char *)sp;
5477 } while (*sp++);
5478 return NULL;
5479 }
5480
5481 /* Skip spaces, return new pointer. */
5482 static char *
5483 skip_spaces (cp)
5484 char *cp;
5485 {
5486 while (iswhite (*cp))
5487 cp++;
5488 return cp;
5489 }
5490
5491 /* Skip non spaces, return new pointer. */
5492 static char *
5493 skip_non_spaces (cp)
5494 char *cp;
5495 {
5496 while (*cp != '\0' && !iswhite (*cp))
5497 cp++;
5498 return cp;
5499 }
5500
5501 /* Print error message and exit. */
5502 void
5503 fatal (s1, s2)
5504 char *s1, *s2;
5505 {
5506 error (s1, s2);
5507 exit (BAD);
5508 }
5509
5510 static void
5511 pfatal (s1)
5512 char *s1;
5513 {
5514 perror (s1);
5515 exit (BAD);
5516 }
5517
5518 static void
5519 suggest_asking_for_help ()
5520 {
5521 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5522 progname,
5523 #ifdef LONG_OPTIONS
5524 "--help"
5525 #else
5526 "-h"
5527 #endif
5528 );
5529 exit (BAD);
5530 }
5531
5532 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
5533 static void
5534 error (s1, s2)
5535 const char *s1, *s2;
5536 {
5537 fprintf (stderr, "%s: ", progname);
5538 fprintf (stderr, s1, s2);
5539 fprintf (stderr, "\n");
5540 }
5541
5542 /* Return a newly-allocated string whose contents
5543 concatenate those of s1, s2, s3. */
5544 static char *
5545 concat (s1, s2, s3)
5546 char *s1, *s2, *s3;
5547 {
5548 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5549 char *result = xnew (len1 + len2 + len3 + 1, char);
5550
5551 strcpy (result, s1);
5552 strcpy (result + len1, s2);
5553 strcpy (result + len1 + len2, s3);
5554 result[len1 + len2 + len3] = '\0';
5555
5556 return result;
5557 }
5558
5559 \f
5560 /* Does the same work as the system V getcwd, but does not need to
5561 guess the buffer size in advance. */
5562 static char *
5563 etags_getcwd ()
5564 {
5565 #ifdef HAVE_GETCWD
5566 int bufsize = 200;
5567 char *path = xnew (bufsize, char);
5568
5569 while (getcwd (path, bufsize) == NULL)
5570 {
5571 if (errno != ERANGE)
5572 pfatal ("getcwd");
5573 bufsize *= 2;
5574 free (path);
5575 path = xnew (bufsize, char);
5576 }
5577
5578 canonicalize_filename (path);
5579 return path;
5580
5581 #else /* not HAVE_GETCWD */
5582 #if MSDOS
5583
5584 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
5585
5586 getwd (path);
5587
5588 for (p = path; *p != '\0'; p++)
5589 if (*p == '\\')
5590 *p = '/';
5591 else
5592 *p = lowcase (*p);
5593
5594 return strdup (path);
5595 #else /* not MSDOS */
5596 linebuffer path;
5597 FILE *pipe;
5598
5599 initbuffer (&path);
5600 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5601 if (pipe == NULL || readline_internal (&path, pipe) == 0)
5602 pfatal ("pwd");
5603 pclose (pipe);
5604
5605 return path.buffer;
5606 #endif /* not MSDOS */
5607 #endif /* not HAVE_GETCWD */
5608 }
5609
5610 /* Return a newly allocated string containing the file name of FILE
5611 relative to the absolute directory DIR (which should end with a slash). */
5612 static char *
5613 relative_filename (file, dir)
5614 char *file, *dir;
5615 {
5616 char *fp, *dp, *afn, *res;
5617 int i;
5618
5619 /* Find the common root of file and dir (with a trailing slash). */
5620 afn = absolute_filename (file, cwd);
5621 fp = afn;
5622 dp = dir;
5623 while (*fp++ == *dp++)
5624 continue;
5625 fp--, dp--; /* back to the first differing char */
5626 #ifdef DOS_NT
5627 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5628 return afn;
5629 #endif
5630 do /* look at the equal chars until '/' */
5631 fp--, dp--;
5632 while (*fp != '/');
5633
5634 /* Build a sequence of "../" strings for the resulting relative file name. */
5635 i = 0;
5636 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5637 i += 1;
5638 res = xnew (3*i + strlen (fp + 1) + 1, char);
5639 res[0] = '\0';
5640 while (i-- > 0)
5641 strcat (res, "../");
5642
5643 /* Add the file name relative to the common root of file and dir. */
5644 strcat (res, fp + 1);
5645 free (afn);
5646
5647 return res;
5648 }
5649
5650 /* Return a newly allocated string containing the absolute file name
5651 of FILE given DIR (which should end with a slash). */
5652 static char *
5653 absolute_filename (file, dir)
5654 char *file, *dir;
5655 {
5656 char *slashp, *cp, *res;
5657
5658 if (filename_is_absolute (file))
5659 res = savestr (file);
5660 #ifdef DOS_NT
5661 /* We don't support non-absolute file names with a drive
5662 letter, like `d:NAME' (it's too much hassle). */
5663 else if (file[1] == ':')
5664 fatal ("%s: relative file names with drive letters not supported", file);
5665 #endif
5666 else
5667 res = concat (dir, file, "");
5668
5669 /* Delete the "/dirname/.." and "/." substrings. */
5670 slashp = etags_strchr (res, '/');
5671 while (slashp != NULL && slashp[0] != '\0')
5672 {
5673 if (slashp[1] == '.')
5674 {
5675 if (slashp[2] == '.'
5676 && (slashp[3] == '/' || slashp[3] == '\0'))
5677 {
5678 cp = slashp;
5679 do
5680 cp--;
5681 while (cp >= res && !filename_is_absolute (cp));
5682 if (cp < res)
5683 cp = slashp; /* the absolute name begins with "/.." */
5684 #ifdef DOS_NT
5685 /* Under MSDOS and NT we get `d:/NAME' as absolute
5686 file name, so the luser could say `d:/../NAME'.
5687 We silently treat this as `d:/NAME'. */
5688 else if (cp[0] != '/')
5689 cp = slashp;
5690 #endif
5691 strcpy (cp, slashp + 3);
5692 slashp = cp;
5693 continue;
5694 }
5695 else if (slashp[2] == '/' || slashp[2] == '\0')
5696 {
5697 strcpy (slashp, slashp + 2);
5698 continue;
5699 }
5700 }
5701
5702 slashp = etags_strchr (slashp + 1, '/');
5703 }
5704
5705 if (res[0] == '\0')
5706 return savestr ("/");
5707 else
5708 return res;
5709 }
5710
5711 /* Return a newly allocated string containing the absolute
5712 file name of dir where FILE resides given DIR (which should
5713 end with a slash). */
5714 static char *
5715 absolute_dirname (file, dir)
5716 char *file, *dir;
5717 {
5718 char *slashp, *res;
5719 char save;
5720
5721 canonicalize_filename (file);
5722 slashp = etags_strrchr (file, '/');
5723 if (slashp == NULL)
5724 return savestr (dir);
5725 save = slashp[1];
5726 slashp[1] = '\0';
5727 res = absolute_filename (file, dir);
5728 slashp[1] = save;
5729
5730 return res;
5731 }
5732
5733 /* Whether the argument string is an absolute file name. The argument
5734 string must have been canonicalized with canonicalize_filename. */
5735 static bool
5736 filename_is_absolute (fn)
5737 char *fn;
5738 {
5739 return (fn[0] == '/'
5740 #ifdef DOS_NT
5741 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
5742 #endif
5743 );
5744 }
5745
5746 /* Translate backslashes into slashes. Works in place. */
5747 static void
5748 canonicalize_filename (fn)
5749 register char *fn;
5750 {
5751 #ifdef DOS_NT
5752 /* Canonicalize drive letter case. */
5753 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
5754 fn[0] = upcase (fn[0]);
5755 /* Convert backslashes to slashes. */
5756 for (; *fn != '\0'; fn++)
5757 if (*fn == '\\')
5758 *fn = '/';
5759 #else
5760 /* No action. */
5761 fn = NULL; /* shut up the compiler */
5762 #endif
5763 }
5764
5765 /* Set the minimum size of a string contained in a linebuffer. */
5766 static void
5767 linebuffer_setlen (lbp, toksize)
5768 linebuffer *lbp;
5769 int toksize;
5770 {
5771 while (lbp->size <= toksize)
5772 {
5773 lbp->size *= 2;
5774 xrnew (lbp->buffer, lbp->size, char);
5775 }
5776 lbp->len = toksize;
5777 }
5778
5779 /* Like malloc but get fatal error if memory is exhausted. */
5780 long *
5781 xmalloc (size)
5782 unsigned int size;
5783 {
5784 long *result = (long *) malloc (size);
5785 if (result == NULL)
5786 fatal ("virtual memory exhausted", (char *)NULL);
5787 return result;
5788 }
5789
5790 long *
5791 xrealloc (ptr, size)
5792 char *ptr;
5793 unsigned int size;
5794 {
5795 long *result = (long *) realloc (ptr, size);
5796 if (result == NULL)
5797 fatal ("virtual memory exhausted", (char *)NULL);
5798 return result;
5799 }