Many changes to the parsing capabilities of etags.
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99, 2000, 2001
3 Free Software Foundation, Inc. and Ken Arnold
4
5 This file is not considered part of GNU Emacs.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 /*
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 199x Sam Kendall added C++.
28 * 1993 Francesco Potortì reorganised C and C++ based on work by Joe Wells.
29 * 1994 Regexp tags by Tom Tromey.
30 * 2001 Nested classes by Francesco Potortì based on work by Mykola Dzyuba.
31 *
32 * Francesco Potortì <pot@gnu.org> has maintained it since 1993.
33 */
34
35 char pot_etags_version[] = "@(#) pot revision number is 14.11";
36
37 #define TRUE 1
38 #define FALSE 0
39
40 #ifdef DEBUG
41 # undef DEBUG
42 # define DEBUG TRUE
43 #else
44 # define DEBUG FALSE
45 # define NDEBUG /* disable assert */
46 #endif
47
48 #if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
49 # define P_(proto) proto
50 #else
51 # define P_(proto) ()
52 #endif
53
54 #ifdef HAVE_CONFIG_H
55 # include <config.h>
56 /* On some systems, Emacs defines static as nothing for the sake
57 of unexec. We don't want that here since we don't use unexec. */
58 # undef static
59 # define ETAGS_REGEXPS /* use the regexp features */
60 # define LONG_OPTIONS /* accept long options */
61 #endif /* HAVE_CONFIG_H */
62
63 #ifndef _GNU_SOURCE
64 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
65 #endif
66
67 #ifdef MSDOS
68 # undef MSDOS
69 # define MSDOS TRUE
70 # include <fcntl.h>
71 # include <sys/param.h>
72 # include <io.h>
73 # ifndef HAVE_CONFIG_H
74 # define DOS_NT
75 # include <sys/config.h>
76 # endif
77 #else
78 # define MSDOS FALSE
79 #endif /* MSDOS */
80
81 #ifdef WINDOWSNT
82 # include <stdlib.h>
83 # include <fcntl.h>
84 # include <string.h>
85 # include <direct.h>
86 # include <io.h>
87 # define MAXPATHLEN _MAX_PATH
88 # ifdef HAVE_CONFIG_H
89 # undef HAVE_NTGUI
90 # else
91 # define DOS_NT
92 # endif /* not HAVE_CONFIG_H */
93 # ifndef HAVE_GETCWD
94 # define HAVE_GETCWD
95 # endif /* undef HAVE_GETCWD */
96 #else /* !WINDOWSNT */
97 # ifdef STDC_HEADERS
98 # include <stdlib.h>
99 # include <string.h>
100 # else
101 extern char *getenv ();
102 # endif
103 #endif /* !WINDOWSNT */
104
105 #ifdef HAVE_UNISTD_H
106 # include <unistd.h>
107 #else
108 # if defined (HAVE_GETCWD) && !WINDOWSNT
109 extern char *getcwd (char *buf, size_t size);
110 # endif
111 #endif /* HAVE_UNISTD_H */
112
113 #include <stdio.h>
114 #include <ctype.h>
115 #include <errno.h>
116 #ifndef errno
117 extern int errno;
118 #endif
119 #include <assert.h>
120 #include <sys/types.h>
121 #include <sys/stat.h>
122
123 #if !defined (S_ISREG) && defined (S_IFREG)
124 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
125 #endif
126
127 #ifdef LONG_OPTIONS
128 # include <getopt.h>
129 #else
130 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
131 extern char *optarg;
132 extern int optind, opterr;
133 #endif /* LONG_OPTIONS */
134
135 #ifdef ETAGS_REGEXPS
136 # include <regex.h>
137 #endif /* ETAGS_REGEXPS */
138
139 /* Define CTAGS to make the program "ctags" compatible with the usual one.
140 Leave it undefined to make the program "etags", which makes emacs-style
141 tag tables and tags typedefs, #defines and struct/union/enum by default. */
142 #ifdef CTAGS
143 # undef CTAGS
144 # define CTAGS TRUE
145 #else
146 # define CTAGS FALSE
147 #endif
148
149 /* Exit codes for success and failure. */
150 #ifdef VMS
151 # define GOOD 1
152 # define BAD 0
153 #else
154 # define GOOD 0
155 # define BAD 1
156 #endif
157
158 /* C extensions. */
159 #define C_EXT 0x00fff /* C extensions */
160 #define C_PLAIN 0x00000 /* C */
161 #define C_PLPL 0x00001 /* C++ */
162 #define C_STAR 0x00003 /* C* */
163 #define C_JAVA 0x00005 /* JAVA */
164 #define C_AUTO 0x01000 /* C, switch to C++ if `class' is met */
165 #define YACC 0x10000 /* yacc file */
166
167 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
168 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
169
170 #define CHARS 256 /* 2^sizeof(char) */
171 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
172 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
173 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
174 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
175 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
176 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
177
178 #define ISALNUM(c) isalnum (CHAR(c))
179 #define ISALPHA(c) isalpha (CHAR(c))
180 #define ISDIGIT(c) isdigit (CHAR(c))
181 #define ISLOWER(c) islower (CHAR(c))
182
183 #define lowcase(c) tolower (CHAR(c))
184 #define upcase(c) toupper (CHAR(c))
185
186
187 /*
188 * xnew, xrnew -- allocate, reallocate storage
189 *
190 * SYNOPSIS: Type *xnew (int n, Type);
191 * void xrnew (OldPointer, int n, Type);
192 */
193 #if DEBUG
194 # include "chkmalloc.h"
195 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
196 (n) * sizeof (Type)))
197 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
198 (char *) (op), (n) * sizeof (Type)))
199 #else
200 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
201 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
202 (char *) (op), (n) * sizeof (Type)))
203 #endif
204
205 typedef int bool;
206
207 typedef void Lang_function P_((FILE *));
208
209 typedef struct
210 {
211 char *suffix;
212 char *command; /* Takes one arg and decompresses to stdout */
213 } compressor;
214
215 typedef struct
216 {
217 char *name;
218 Lang_function *function;
219 char **filenames;
220 char **suffixes;
221 char **interpreters;
222 } language;
223
224 typedef struct node_st
225 { /* sorting structure */
226 char *name; /* function or type name */
227 char *file; /* file name */
228 bool is_func; /* use pattern or line no */
229 bool been_warned; /* set if noticed dup */
230 int lno; /* line number tag is on */
231 long cno; /* character number line starts on */
232 char *pat; /* search pattern */
233 struct node_st *left, *right; /* left and right sons */
234 } node;
235
236 /*
237 * A `linebuffer' is a structure which holds a line of text.
238 * `readline_internal' reads a line from a stream into a linebuffer
239 * and works regardless of the length of the line.
240 * SIZE is the size of BUFFER, LEN is the length of the string in
241 * BUFFER after readline reads it.
242 */
243 typedef struct
244 {
245 long size;
246 int len;
247 char *buffer;
248 } linebuffer;
249
250 /* Many compilers barf on this:
251 Lang_function Ada_funcs;
252 so let's write it this way */
253 static void Ada_funcs P_((FILE *));
254 static void Asm_labels P_((FILE *));
255 static void C_entries P_((int c_ext, FILE *));
256 static void default_C_entries P_((FILE *));
257 static void plain_C_entries P_((FILE *));
258 static void Cjava_entries P_((FILE *));
259 static void Cobol_paragraphs P_((FILE *));
260 static void Cplusplus_entries P_((FILE *));
261 static void Cstar_entries P_((FILE *));
262 static void Erlang_functions P_((FILE *));
263 static void Fortran_functions P_((FILE *));
264 static void Yacc_entries P_((FILE *));
265 static void Lisp_functions P_((FILE *));
266 static void Makefile_targets P_((FILE *));
267 static void Pascal_functions P_((FILE *));
268 static void Perl_functions P_((FILE *));
269 static void Postscript_functions P_((FILE *));
270 static void Prolog_functions P_((FILE *));
271 static void Python_functions P_((FILE *));
272 static void Scheme_functions P_((FILE *));
273 static void TeX_commands P_((FILE *));
274 static void Texinfo_nodes P_((FILE *));
275 static void just_read_file P_((FILE *));
276
277 static void print_language_names P_((void));
278 static void print_version P_((void));
279 static void print_help P_((void));
280 int main P_((int, char **));
281 static int number_len P_((long));
282
283 static compressor *get_compressor_from_suffix P_((char *, char **));
284 static language *get_language_from_langname P_((char *));
285 static language *get_language_from_interpreter P_((char *));
286 static language *get_language_from_filename P_((char *));
287 static int total_size_of_entries P_((node *));
288 static long readline P_((linebuffer *, FILE *));
289 static long readline_internal P_((linebuffer *, FILE *));
290 static void get_tag P_((char *));
291
292 #ifdef ETAGS_REGEXPS
293 static void analyse_regex P_((char *, bool));
294 static void add_regex P_((char *, bool, language *));
295 static void free_patterns P_((void));
296 #endif /* ETAGS_REGEXPS */
297 static void error P_((const char *, const char *));
298 static void suggest_asking_for_help P_((void));
299 void fatal P_((char *, char *));
300 static void pfatal P_((char *));
301 static void add_node P_((node *, node **));
302
303 static void init P_((void));
304 static void initbuffer P_((linebuffer *));
305 static void find_entries P_((char *, FILE *));
306 static void free_tree P_((node *));
307 static void pfnote P_((char *, bool, char *, int, int, long));
308 static void new_pfnote P_((char *, int, bool, char *, int, int, long));
309 static void process_file P_((char *));
310 static void put_entries P_((node *));
311 static void takeprec P_((void));
312
313 static char *concat P_((char *, char *, char *));
314 static char *skip_spaces P_((char *));
315 static char *skip_non_spaces P_((char *));
316 static char *savenstr P_((char *, int));
317 static char *savestr P_((char *));
318 static char *etags_strchr P_((const char *, int));
319 static char *etags_strrchr P_((const char *, int));
320 static char *etags_getcwd P_((void));
321 static char *relative_filename P_((char *, char *));
322 static char *absolute_filename P_((char *, char *));
323 static char *absolute_dirname P_((char *, char *));
324 static bool filename_is_absolute P_((char *f));
325 static void canonicalize_filename P_((char *));
326 static void linebuffer_setlen P_((linebuffer *, int));
327 long *xmalloc P_((unsigned int));
328 long *xrealloc P_((char *, unsigned int));
329
330 \f
331 char searchar = '/'; /* use /.../ searches */
332
333 char *tagfile; /* output file */
334 char *progname; /* name this program was invoked with */
335 char *cwd; /* current working directory */
336 char *tagfiledir; /* directory of tagfile */
337 FILE *tagf; /* ioptr for tags file */
338
339 char *curfile; /* current input file name */
340 language *curlang; /* current language */
341
342 int lineno; /* line number of current line */
343 long charno; /* current character number */
344 long linecharno; /* charno of start of current line */
345 char *dbp; /* pointer to start of current tag */
346
347 node *head; /* the head of the binary tree of tags */
348
349 linebuffer lb; /* the current line */
350
351 /* boolean "functions" (see init) */
352 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
353 char
354 /* white chars */
355 *white = " \f\t\n\r\v",
356 /* not in a name */
357 *nonam = " \f\t\n\r(=,[;",
358 /* token ending chars */
359 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
360 /* token starting chars */
361 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
362 /* valid in-token chars */
363 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
364
365 bool append_to_tagfile; /* -a: append to tags */
366 /* The following four default to TRUE for etags, but to FALSE for ctags. */
367 bool typedefs; /* -t: create tags for C and Ada typedefs */
368 bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
369 /* 0 struct/enum/union decls, and C++ */
370 /* member functions. */
371 bool constantypedefs; /* -d: create tags for C #define, enum */
372 /* constants and variables. */
373 /* -D: opposite of -d. Default under ctags. */
374 bool declarations; /* --declarations: tag them and extern in C&Co*/
375 bool globals; /* create tags for global variables */
376 bool members; /* create tags for C member variables */
377 bool update; /* -u: update tags */
378 bool vgrind_style; /* -v: create vgrind style index output */
379 bool no_warnings; /* -w: suppress warnings */
380 bool cxref_style; /* -x: create cxref style output */
381 bool cplusplus; /* .[hc] means C++, not C */
382 bool noindentypedefs; /* -I: ignore indentation in C */
383 bool packages_only; /* --packages-only: in Ada, only tag packages*/
384
385 #ifdef LONG_OPTIONS
386 struct option longopts[] =
387 {
388 { "packages-only", no_argument, &packages_only, TRUE },
389 { "append", no_argument, NULL, 'a' },
390 { "backward-search", no_argument, NULL, 'B' },
391 { "c++", no_argument, NULL, 'C' },
392 { "cxref", no_argument, NULL, 'x' },
393 { "defines", no_argument, NULL, 'd' },
394 { "declarations", no_argument, &declarations, TRUE },
395 { "no-defines", no_argument, NULL, 'D' },
396 { "globals", no_argument, &globals, TRUE },
397 { "no-globals", no_argument, &globals, FALSE },
398 { "help", no_argument, NULL, 'h' },
399 { "help", no_argument, NULL, 'H' },
400 { "ignore-indentation", no_argument, NULL, 'I' },
401 { "include", required_argument, NULL, 'i' },
402 { "language", required_argument, NULL, 'l' },
403 { "members", no_argument, &members, TRUE },
404 { "no-members", no_argument, &members, FALSE },
405 { "no-warn", no_argument, NULL, 'w' },
406 { "output", required_argument, NULL, 'o' },
407 #ifdef ETAGS_REGEXPS
408 { "regex", required_argument, NULL, 'r' },
409 { "no-regex", no_argument, NULL, 'R' },
410 { "ignore-case-regex", required_argument, NULL, 'c' },
411 #endif /* ETAGS_REGEXPS */
412 { "typedefs", no_argument, NULL, 't' },
413 { "typedefs-and-c++", no_argument, NULL, 'T' },
414 { "update", no_argument, NULL, 'u' },
415 { "version", no_argument, NULL, 'V' },
416 { "vgrind", no_argument, NULL, 'v' },
417 { NULL }
418 };
419 #endif /* LONG_OPTIONS */
420
421 #ifdef ETAGS_REGEXPS
422 /* Structure defining a regular expression. Elements are
423 the compiled pattern, and the name string. */
424 typedef struct pattern
425 {
426 struct pattern *p_next;
427 language *language;
428 char *regex;
429 struct re_pattern_buffer *pattern;
430 struct re_registers regs;
431 char *name_pattern;
432 bool error_signaled;
433 } pattern;
434
435 /* List of all regexps. */
436 pattern *p_head = NULL;
437
438 /* How many characters in the character set. (From regex.c.) */
439 #define CHAR_SET_SIZE 256
440 /* Translation table for case-insensitive matching. */
441 char lc_trans[CHAR_SET_SIZE];
442 #endif /* ETAGS_REGEXPS */
443
444 compressor compressors[] =
445 {
446 { "z", "gzip -d -c"},
447 { "Z", "gzip -d -c"},
448 { "gz", "gzip -d -c"},
449 { "GZ", "gzip -d -c"},
450 { "bz2", "bzip2 -d -c" },
451 { NULL }
452 };
453
454 /*
455 * Language stuff.
456 */
457
458 /* Non-NULL if language fixed. */
459 language *forced_lang = NULL;
460
461 /* Ada code */
462 char *Ada_suffixes [] =
463 { "ads", "adb", "ada", NULL };
464
465 /* Assembly code */
466 char *Asm_suffixes [] = { "a", /* Unix assembler */
467 "asm", /* Microcontroller assembly */
468 "def", /* BSO/Tasking definition includes */
469 "inc", /* Microcontroller include files */
470 "ins", /* Microcontroller include files */
471 "s", "sa", /* Unix assembler */
472 "S", /* cpp-processed Unix assembler */
473 "src", /* BSO/Tasking C compiler output */
474 NULL
475 };
476
477 /* Note that .c and .h can be considered C++, if the --c++ flag was
478 given. That is why default_C_entries is called here. */
479 char *default_C_suffixes [] =
480 { "c", "h", NULL };
481
482 char *Cplusplus_suffixes [] =
483 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
484 "M", /* Objective C++ */
485 "pdb", /* Postscript with C syntax */
486 NULL };
487
488 char *Cjava_suffixes [] =
489 { "java", NULL };
490
491 char *Cobol_suffixes [] =
492 { "COB", "cob", NULL };
493
494 char *Cstar_suffixes [] =
495 { "cs", "hs", NULL };
496
497 char *Erlang_suffixes [] =
498 { "erl", "hrl", NULL };
499
500 char *Fortran_suffixes [] =
501 { "F", "f", "f90", "for", NULL };
502
503 char *Lisp_suffixes [] =
504 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
505
506 char *Makefile_filenames [] =
507 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
508
509 char *Pascal_suffixes [] =
510 { "p", "pas", NULL };
511
512 char *Perl_suffixes [] =
513 { "pl", "pm", NULL };
514 char *Perl_interpreters [] =
515 { "perl", "@PERL@", NULL };
516
517 char *plain_C_suffixes [] =
518 { "lm", /* Objective lex file */
519 "m", /* Objective C file */
520 "pc", /* Pro*C file */
521 NULL };
522
523 char *Postscript_suffixes [] =
524 { "ps", "psw", NULL }; /* .psw is for PSWrap */
525
526 char *Prolog_suffixes [] =
527 { "prolog", NULL };
528
529 char *Python_suffixes [] =
530 { "py", NULL };
531
532 /* Can't do the `SCM' or `scm' prefix with a version number. */
533 char *Scheme_suffixes [] =
534 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
535
536 char *TeX_suffixes [] =
537 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
538
539 char *Texinfo_suffixes [] =
540 { "texi", "texinfo", "txi", NULL };
541
542 char *Yacc_suffixes [] =
543 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
544
545 /*
546 * Table of languages.
547 *
548 * It is ok for a given function to be listed under more than one
549 * name. I just didn't.
550 */
551
552 language lang_names [] =
553 {
554 { "ada", Ada_funcs, NULL, Ada_suffixes, NULL },
555 { "asm", Asm_labels, NULL, Asm_suffixes, NULL },
556 { "c", default_C_entries, NULL, default_C_suffixes, NULL },
557 { "c++", Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
558 { "c*", Cstar_entries, NULL, Cstar_suffixes, NULL },
559 { "cobol", Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
560 { "erlang", Erlang_functions, NULL, Erlang_suffixes, NULL },
561 { "fortran", Fortran_functions, NULL, Fortran_suffixes, NULL },
562 { "java", Cjava_entries, NULL, Cjava_suffixes, NULL },
563 { "lisp", Lisp_functions, NULL, Lisp_suffixes, NULL },
564 { "makefile", Makefile_targets, Makefile_filenames, NULL, NULL },
565 { "pascal", Pascal_functions, NULL, Pascal_suffixes, NULL },
566 { "perl", Perl_functions, NULL, Perl_suffixes, Perl_interpreters },
567 { "postscript", Postscript_functions, NULL, Postscript_suffixes, NULL },
568 { "proc", plain_C_entries, NULL, plain_C_suffixes, NULL },
569 { "prolog", Prolog_functions, NULL, Prolog_suffixes, NULL },
570 { "python", Python_functions, NULL, Python_suffixes, NULL },
571 { "scheme", Scheme_functions, NULL, Scheme_suffixes, NULL },
572 { "tex", TeX_commands, NULL, TeX_suffixes, NULL },
573 { "texinfo", Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
574 { "yacc", Yacc_entries, NULL, Yacc_suffixes, NULL },
575 { "auto", NULL }, /* default guessing scheme */
576 { "none", just_read_file }, /* regexp matching only */
577 { NULL, NULL } /* end of list */
578 };
579
580 \f
581 static void
582 print_language_names ()
583 {
584 language *lang;
585 char **name, **ext;
586
587 puts ("\nThese are the currently supported languages, along with the\n\
588 default file names and dot suffixes:");
589 for (lang = lang_names; lang->name != NULL; lang++)
590 {
591 printf (" %-*s", 10, lang->name);
592 if (lang->filenames != NULL)
593 for (name = lang->filenames; *name != NULL; name++)
594 printf (" %s", *name);
595 if (lang->suffixes != NULL)
596 for (ext = lang->suffixes; *ext != NULL; ext++)
597 printf (" .%s", *ext);
598 puts ("");
599 }
600 puts ("Where `auto' means use default language for files based on file\n\
601 name suffix, and `none' means only do regexp processing on files.\n\
602 If no language is specified and no matching suffix is found,\n\
603 the first line of the file is read for a sharp-bang (#!) sequence\n\
604 followed by the name of an interpreter. If no such sequence is found,\n\
605 Fortran is tried first; if no tags are found, C is tried next.\n\
606 When parsing any C file, a \"class\" keyword switches to C++.\n\
607 Compressed files are supported using gzip and bzip2.");
608 }
609
610 #ifndef EMACS_NAME
611 # define EMACS_NAME "GNU Emacs"
612 #endif
613 #ifndef VERSION
614 # define VERSION "21"
615 #endif
616 static void
617 print_version ()
618 {
619 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
620 puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
621 puts ("This program is distributed under the same terms as Emacs");
622
623 exit (GOOD);
624 }
625
626 static void
627 print_help ()
628 {
629 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
630 \n\
631 These are the options accepted by %s.\n", progname, progname);
632 #ifdef LONG_OPTIONS
633 puts ("You may use unambiguous abbreviations for the long option names.");
634 #else
635 puts ("Long option names do not work with this executable, as it is not\n\
636 linked with GNU getopt.");
637 #endif /* LONG_OPTIONS */
638 puts ("A - as file name means read names from stdin (one per line).");
639 if (!CTAGS)
640 printf (" Absolute names are stored in the output file as they are.\n\
641 Relative ones are stored relative to the output file's directory.");
642 puts ("\n");
643
644 puts ("-a, --append\n\
645 Append tag entries to existing tags file.");
646
647 puts ("--packages-only\n\
648 For Ada files, only generate tags for packages .");
649
650 if (CTAGS)
651 puts ("-B, --backward-search\n\
652 Write the search commands for the tag entries using '?', the\n\
653 backward-search command instead of '/', the forward-search command.");
654
655 /* This option is mostly obsolete, because etags can now automatically
656 detect C++. Retained for backward compatibility and for debugging and
657 experimentation. In principle, we could want to tag as C++ even
658 before any "class" keyword.
659 puts ("-C, --c++\n\
660 Treat files whose name suffix defaults to C language as C++ files.");
661 */
662
663 puts ("--declarations\n\
664 In C and derived languages, create tags for function declarations,");
665 if (CTAGS)
666 puts ("\tand create tags for extern variables if --globals is used.");
667 else
668 puts
669 ("\tand create tags for extern variables unless --no-globals is used.");
670
671 if (CTAGS)
672 puts ("-d, --defines\n\
673 Create tag entries for C #define constants and enum constants, too.");
674 else
675 puts ("-D, --no-defines\n\
676 Don't create tag entries for C #define constants and enum constants.\n\
677 This makes the tags file smaller.");
678
679 if (!CTAGS)
680 {
681 puts ("-i FILE, --include=FILE\n\
682 Include a note in tag file indicating that, when searching for\n\
683 a tag, one should also consult the tags file FILE after\n\
684 checking the current file.");
685 puts ("-l LANG, --language=LANG\n\
686 Force the following files to be considered as written in the\n\
687 named language up to the next --language=LANG option.");
688 }
689
690 if (CTAGS)
691 puts ("--globals\n\
692 Create tag entries for global variables in some languages.");
693 else
694 puts ("--no-globals\n\
695 Do not create tag entries for global variables in some\n\
696 languages. This makes the tags file smaller.");
697 puts ("--members\n\
698 Create tag entries for member variables in C and derived languages.");
699
700 #ifdef ETAGS_REGEXPS
701 puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
702 Make a tag for each line matching pattern REGEXP in the following\n\
703 files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
704 regexfile is a file containing one REGEXP per line.\n\
705 REGEXP is anchored (as if preceded by ^).\n\
706 The form /REGEXP/NAME/ creates a named tag.\n\
707 For example Tcl named tags can be created with:\n\
708 --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
709 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
710 Like -r, --regex but ignore case when matching expressions.");
711 puts ("-R, --no-regex\n\
712 Don't create tags from regexps for the following files.");
713 #endif /* ETAGS_REGEXPS */
714 puts ("-o FILE, --output=FILE\n\
715 Write the tags to FILE.");
716 puts ("-I, --ignore-indentation\n\
717 Don't rely on indentation quite as much as normal. Currently,\n\
718 this means not to assume that a closing brace in the first\n\
719 column is the final brace of a function or structure\n\
720 definition in C and C++.");
721
722 if (CTAGS)
723 {
724 puts ("-t, --typedefs\n\
725 Generate tag entries for C and Ada typedefs.");
726 puts ("-T, --typedefs-and-c++\n\
727 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
728 and C++ member functions.");
729 puts ("-u, --update\n\
730 Update the tag entries for the given files, leaving tag\n\
731 entries for other files in place. Currently, this is\n\
732 implemented by deleting the existing entries for the given\n\
733 files and then rewriting the new entries at the end of the\n\
734 tags file. It is often faster to simply rebuild the entire\n\
735 tag file than to use this.");
736 puts ("-v, --vgrind\n\
737 Generates an index of items intended for human consumption,\n\
738 similar to the output of vgrind. The index is sorted, and\n\
739 gives the page number of each item.");
740 puts ("-w, --no-warn\n\
741 Suppress warning messages about entries defined in multiple\n\
742 files.");
743 puts ("-x, --cxref\n\
744 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
745 The output uses line numbers instead of page numbers, but\n\
746 beyond that the differences are cosmetic; try both to see\n\
747 which you like.");
748 }
749
750 puts ("-V, --version\n\
751 Print the version of the program.\n\
752 -h, --help\n\
753 Print this help message.");
754
755 print_language_names ();
756
757 puts ("");
758 puts ("Report bugs to bug-gnu-emacs@gnu.org");
759
760 exit (GOOD);
761 }
762
763 \f
764 enum argument_type
765 {
766 at_language,
767 at_regexp,
768 at_filename,
769 at_icregexp
770 };
771
772 /* This structure helps us allow mixing of --lang and file names. */
773 typedef struct
774 {
775 enum argument_type arg_type;
776 char *what;
777 language *lang; /* language of the regexp */
778 } argument;
779
780 #ifdef VMS /* VMS specific functions */
781
782 #define EOS '\0'
783
784 /* This is a BUG! ANY arbitrary limit is a BUG!
785 Won't someone please fix this? */
786 #define MAX_FILE_SPEC_LEN 255
787 typedef struct {
788 short curlen;
789 char body[MAX_FILE_SPEC_LEN + 1];
790 } vspec;
791
792 /*
793 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
794 returning in each successive call the next file name matching the input
795 spec. The function expects that each in_spec passed
796 to it will be processed to completion; in particular, up to and
797 including the call following that in which the last matching name
798 is returned, the function ignores the value of in_spec, and will
799 only start processing a new spec with the following call.
800 If an error occurs, on return out_spec contains the value
801 of in_spec when the error occurred.
802
803 With each successive file name returned in out_spec, the
804 function's return value is one. When there are no more matching
805 names the function returns zero. If on the first call no file
806 matches in_spec, or there is any other error, -1 is returned.
807 */
808
809 #include <rmsdef.h>
810 #include <descrip.h>
811 #define OUTSIZE MAX_FILE_SPEC_LEN
812 static short
813 fn_exp (out, in)
814 vspec *out;
815 char *in;
816 {
817 static long context = 0;
818 static struct dsc$descriptor_s o;
819 static struct dsc$descriptor_s i;
820 static bool pass1 = TRUE;
821 long status;
822 short retval;
823
824 if (pass1)
825 {
826 pass1 = FALSE;
827 o.dsc$a_pointer = (char *) out;
828 o.dsc$w_length = (short)OUTSIZE;
829 i.dsc$a_pointer = in;
830 i.dsc$w_length = (short)strlen(in);
831 i.dsc$b_dtype = DSC$K_DTYPE_T;
832 i.dsc$b_class = DSC$K_CLASS_S;
833 o.dsc$b_dtype = DSC$K_DTYPE_VT;
834 o.dsc$b_class = DSC$K_CLASS_VS;
835 }
836 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
837 {
838 out->body[out->curlen] = EOS;
839 return 1;
840 }
841 else if (status == RMS$_NMF)
842 retval = 0;
843 else
844 {
845 strcpy(out->body, in);
846 retval = -1;
847 }
848 lib$find_file_end(&context);
849 pass1 = TRUE;
850 return retval;
851 }
852
853 /*
854 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
855 name of each file specified by the provided arg expanding wildcards.
856 */
857 static char *
858 gfnames (arg, p_error)
859 char *arg;
860 bool *p_error;
861 {
862 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
863
864 switch (fn_exp (&filename, arg))
865 {
866 case 1:
867 *p_error = FALSE;
868 return filename.body;
869 case 0:
870 *p_error = FALSE;
871 return NULL;
872 default:
873 *p_error = TRUE;
874 return filename.body;
875 }
876 }
877
878 #ifndef OLD /* Newer versions of VMS do provide `system'. */
879 system (cmd)
880 char *cmd;
881 {
882 error ("%s", "system() function not implemented under VMS");
883 }
884 #endif
885
886 #define VERSION_DELIM ';'
887 char *massage_name (s)
888 char *s;
889 {
890 char *start = s;
891
892 for ( ; *s; s++)
893 if (*s == VERSION_DELIM)
894 {
895 *s = EOS;
896 break;
897 }
898 else
899 *s = lowcase (*s);
900 return start;
901 }
902 #endif /* VMS */
903
904 \f
905 int
906 main (argc, argv)
907 int argc;
908 char *argv[];
909 {
910 int i;
911 unsigned int nincluded_files;
912 char **included_files;
913 char *this_file;
914 argument *argbuffer;
915 int current_arg, file_count;
916 linebuffer filename_lb;
917 #ifdef VMS
918 bool got_err;
919 #endif
920
921 #ifdef DOS_NT
922 _fmode = O_BINARY; /* all of files are treated as binary files */
923 #endif /* DOS_NT */
924
925 progname = argv[0];
926 nincluded_files = 0;
927 included_files = xnew (argc, char *);
928 current_arg = 0;
929 file_count = 0;
930
931 /* Allocate enough no matter what happens. Overkill, but each one
932 is small. */
933 argbuffer = xnew (argc, argument);
934
935 #ifdef ETAGS_REGEXPS
936 /* Set syntax for regular expression routines. */
937 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
938 /* Translation table for case-insensitive search. */
939 for (i = 0; i < CHAR_SET_SIZE; i++)
940 lc_trans[i] = lowcase (i);
941 #endif /* ETAGS_REGEXPS */
942
943 /*
944 * If etags, always find typedefs and structure tags. Why not?
945 * Also default to find macro constants, enum constants and
946 * global variables.
947 */
948 if (!CTAGS)
949 {
950 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
951 globals = TRUE;
952 declarations = FALSE;
953 members = FALSE;
954 }
955
956 while (1)
957 {
958 int opt;
959 char *optstring;
960
961 #ifdef ETAGS_REGEXPS
962 optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
963 #else
964 optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
965 #endif /* ETAGS_REGEXPS */
966
967 #ifndef LONG_OPTIONS
968 optstring = optstring + 1;
969 #endif /* LONG_OPTIONS */
970
971 opt = getopt_long (argc, argv, optstring, longopts, 0);
972 if (opt == EOF)
973 break;
974
975 switch (opt)
976 {
977 case 0:
978 /* If getopt returns 0, then it has already processed a
979 long-named option. We should do nothing. */
980 break;
981
982 case 1:
983 /* This means that a file name has been seen. Record it. */
984 argbuffer[current_arg].arg_type = at_filename;
985 argbuffer[current_arg].what = optarg;
986 ++current_arg;
987 ++file_count;
988 break;
989
990 /* Common options. */
991 case 'a': append_to_tagfile = TRUE; break;
992 case 'C': cplusplus = TRUE; break;
993 case 'd': constantypedefs = TRUE; break;
994 case 'D': constantypedefs = FALSE; break;
995 case 'f': /* for compatibility with old makefiles */
996 case 'o':
997 if (tagfile)
998 {
999 error ("-o option may only be given once.", (char *)NULL);
1000 suggest_asking_for_help ();
1001 }
1002 tagfile = optarg;
1003 break;
1004 case 'I':
1005 case 'S': /* for backward compatibility */
1006 noindentypedefs = TRUE;
1007 break;
1008 case 'l':
1009 {
1010 language *lang = get_language_from_langname (optarg);
1011 if (lang != NULL)
1012 {
1013 argbuffer[current_arg].lang = lang;
1014 argbuffer[current_arg].arg_type = at_language;
1015 ++current_arg;
1016 }
1017 }
1018 break;
1019 #ifdef ETAGS_REGEXPS
1020 case 'r':
1021 argbuffer[current_arg].arg_type = at_regexp;
1022 argbuffer[current_arg].what = optarg;
1023 ++current_arg;
1024 break;
1025 case 'R':
1026 argbuffer[current_arg].arg_type = at_regexp;
1027 argbuffer[current_arg].what = NULL;
1028 ++current_arg;
1029 break;
1030 case 'c':
1031 argbuffer[current_arg].arg_type = at_icregexp;
1032 argbuffer[current_arg].what = optarg;
1033 ++current_arg;
1034 break;
1035 #endif /* ETAGS_REGEXPS */
1036 case 'V':
1037 print_version ();
1038 break;
1039 case 'h':
1040 case 'H':
1041 print_help ();
1042 break;
1043 case 't':
1044 typedefs = TRUE;
1045 break;
1046 case 'T':
1047 typedefs = typedefs_or_cplusplus = TRUE;
1048 break;
1049 #if (!CTAGS)
1050 /* Etags options */
1051 case 'i':
1052 included_files[nincluded_files++] = optarg;
1053 break;
1054 #else /* CTAGS */
1055 /* Ctags options. */
1056 case 'B': searchar = '?'; break;
1057 case 'u': update = TRUE; break;
1058 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1059 case 'x': cxref_style = TRUE; break;
1060 case 'w': no_warnings = TRUE; break;
1061 #endif /* CTAGS */
1062 default:
1063 suggest_asking_for_help ();
1064 }
1065 }
1066
1067 for (; optind < argc; ++optind)
1068 {
1069 argbuffer[current_arg].arg_type = at_filename;
1070 argbuffer[current_arg].what = argv[optind];
1071 ++current_arg;
1072 ++file_count;
1073 }
1074
1075 if (nincluded_files == 0 && file_count == 0)
1076 {
1077 error ("no input files specified.", (char *)NULL);
1078 suggest_asking_for_help ();
1079 }
1080
1081 if (tagfile == NULL)
1082 tagfile = CTAGS ? "tags" : "TAGS";
1083 cwd = etags_getcwd (); /* the current working directory */
1084 if (cwd[strlen (cwd) - 1] != '/')
1085 {
1086 char *oldcwd = cwd;
1087 cwd = concat (oldcwd, "/", "");
1088 free (oldcwd);
1089 }
1090 if (streq (tagfile, "-"))
1091 tagfiledir = cwd;
1092 else
1093 tagfiledir = absolute_dirname (tagfile, cwd);
1094
1095 init (); /* set up boolean "functions" */
1096
1097 initbuffer (&lb);
1098 initbuffer (&filename_lb);
1099
1100 if (!CTAGS)
1101 {
1102 if (streq (tagfile, "-"))
1103 {
1104 tagf = stdout;
1105 #ifdef DOS_NT
1106 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1107 doesn't take effect until after `stdout' is already open). */
1108 if (!isatty (fileno (stdout)))
1109 setmode (fileno (stdout), O_BINARY);
1110 #endif /* DOS_NT */
1111 }
1112 else
1113 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1114 if (tagf == NULL)
1115 pfatal (tagfile);
1116 }
1117
1118 /*
1119 * Loop through files finding functions.
1120 */
1121 for (i = 0; i < current_arg; ++i)
1122 {
1123 switch (argbuffer[i].arg_type)
1124 {
1125 case at_language:
1126 forced_lang = argbuffer[i].lang;
1127 break;
1128 #ifdef ETAGS_REGEXPS
1129 case at_regexp:
1130 analyse_regex (argbuffer[i].what, FALSE);
1131 break;
1132 case at_icregexp:
1133 analyse_regex (argbuffer[i].what, TRUE);
1134 break;
1135 #endif
1136 case at_filename:
1137 #ifdef VMS
1138 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1139 {
1140 if (got_err)
1141 {
1142 error ("can't find file %s\n", this_file);
1143 argc--, argv++;
1144 }
1145 else
1146 {
1147 this_file = massage_name (this_file);
1148 }
1149 #else
1150 this_file = argbuffer[i].what;
1151 #endif
1152 /* Input file named "-" means read file names from stdin
1153 (one per line) and use them. */
1154 if (streq (this_file, "-"))
1155 while (readline_internal (&filename_lb, stdin) > 0)
1156 process_file (filename_lb.buffer);
1157 else
1158 process_file (this_file);
1159 #ifdef VMS
1160 }
1161 #endif
1162 break;
1163 }
1164 }
1165
1166 #ifdef ETAGS_REGEXPS
1167 free_patterns ();
1168 #endif /* ETAGS_REGEXPS */
1169
1170 if (!CTAGS)
1171 {
1172 while (nincluded_files-- > 0)
1173 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1174
1175 fclose (tagf);
1176 exit (GOOD);
1177 }
1178
1179 /* If CTAGS, we are here. process_file did not write the tags yet,
1180 because we want them ordered. Let's do it now. */
1181 if (cxref_style)
1182 {
1183 put_entries (head);
1184 free_tree (head);
1185 head = NULL;
1186 exit (GOOD);
1187 }
1188
1189 if (update)
1190 {
1191 char cmd[BUFSIZ];
1192 for (i = 0; i < current_arg; ++i)
1193 {
1194 if (argbuffer[i].arg_type != at_filename)
1195 continue;
1196 sprintf (cmd,
1197 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1198 tagfile, argbuffer[i].what, tagfile);
1199 if (system (cmd) != GOOD)
1200 fatal ("failed to execute shell command", (char *)NULL);
1201 }
1202 append_to_tagfile = TRUE;
1203 }
1204
1205 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1206 if (tagf == NULL)
1207 pfatal (tagfile);
1208 put_entries (head);
1209 free_tree (head);
1210 head = NULL;
1211 fclose (tagf);
1212
1213 if (update)
1214 {
1215 char cmd[BUFSIZ];
1216 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1217 exit (system (cmd));
1218 }
1219 return GOOD;
1220 }
1221
1222
1223
1224 /*
1225 * Return a compressor given the file name. If EXTPTR is non-zero,
1226 * return a pointer into FILE where the compressor-specific
1227 * extension begins. If no compressor is found, NULL is returned
1228 * and EXTPTR is not significant.
1229 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1230 */
1231 static compressor *
1232 get_compressor_from_suffix (file, extptr)
1233 char *file;
1234 char **extptr;
1235 {
1236 compressor *compr;
1237 char *slash, *suffix;
1238
1239 /* This relies on FN to be after canonicalize_filename,
1240 so we don't need to consider backslashes on DOS_NT. */
1241 slash = etags_strrchr (file, '/');
1242 suffix = etags_strrchr (file, '.');
1243 if (suffix == NULL || suffix < slash)
1244 return NULL;
1245 if (extptr != NULL)
1246 *extptr = suffix;
1247 suffix += 1;
1248 /* Let those poor souls who live with DOS 8+3 file name limits get
1249 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1250 Only the first do loop is run if not MSDOS */
1251 do
1252 {
1253 for (compr = compressors; compr->suffix != NULL; compr++)
1254 if (streq (compr->suffix, suffix))
1255 return compr;
1256 if (!MSDOS)
1257 break; /* do it only once: not really a loop */
1258 if (extptr != NULL)
1259 *extptr = ++suffix;
1260 } while (*suffix != '\0');
1261 return NULL;
1262 }
1263
1264
1265
1266 /*
1267 * Return a language given the name.
1268 */
1269 static language *
1270 get_language_from_langname (name)
1271 char *name;
1272 {
1273 language *lang;
1274
1275 if (name == NULL)
1276 error ("empty language name", (char *)NULL);
1277 else
1278 {
1279 for (lang = lang_names; lang->name != NULL; lang++)
1280 if (streq (name, lang->name))
1281 return lang;
1282 error ("unknown language \"%s\"", name);
1283 }
1284
1285 return NULL;
1286 }
1287
1288
1289 /*
1290 * Return a language given the interpreter name.
1291 */
1292 static language *
1293 get_language_from_interpreter (interpreter)
1294 char *interpreter;
1295 {
1296 language *lang;
1297 char **iname;
1298
1299 if (interpreter == NULL)
1300 return NULL;
1301 for (lang = lang_names; lang->name != NULL; lang++)
1302 if (lang->interpreters != NULL)
1303 for (iname = lang->interpreters; *iname != NULL; iname++)
1304 if (streq (*iname, interpreter))
1305 return lang;
1306
1307 return NULL;
1308 }
1309
1310
1311
1312 /*
1313 * Return a language given the file name.
1314 */
1315 static language *
1316 get_language_from_filename (file)
1317 char *file;
1318 {
1319 language *lang;
1320 char **name, **ext, *suffix;
1321
1322 /* Try whole file name first. */
1323 for (lang = lang_names; lang->name != NULL; lang++)
1324 if (lang->filenames != NULL)
1325 for (name = lang->filenames; *name != NULL; name++)
1326 if (streq (*name, file))
1327 return lang;
1328
1329 /* If not found, try suffix after last dot. */
1330 suffix = etags_strrchr (file, '.');
1331 if (suffix == NULL)
1332 return NULL;
1333 suffix += 1;
1334 for (lang = lang_names; lang->name != NULL; lang++)
1335 if (lang->suffixes != NULL)
1336 for (ext = lang->suffixes; *ext != NULL; ext++)
1337 if (streq (*ext, suffix))
1338 return lang;
1339 return NULL;
1340 }
1341
1342
1343
1344 /*
1345 * This routine is called on each file argument.
1346 */
1347 static void
1348 process_file (file)
1349 char *file;
1350 {
1351 struct stat stat_buf;
1352 FILE *inf;
1353 compressor *compr;
1354 char *compressed_name, *uncompressed_name;
1355 char *ext, *real_name;
1356
1357
1358 canonicalize_filename (file);
1359 if (streq (file, tagfile) && !streq (tagfile, "-"))
1360 {
1361 error ("skipping inclusion of %s in self.", file);
1362 return;
1363 }
1364 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1365 {
1366 compressed_name = NULL;
1367 real_name = uncompressed_name = savestr (file);
1368 }
1369 else
1370 {
1371 real_name = compressed_name = savestr (file);
1372 uncompressed_name = savenstr (file, ext - file);
1373 }
1374
1375 /* If the canonicalised uncompressed name has already be dealt with,
1376 skip it silently, else add it to the list. */
1377 {
1378 typedef struct processed_file
1379 {
1380 char *filename;
1381 struct processed_file *next;
1382 } processed_file;
1383 static processed_file *pf_head = NULL;
1384 register processed_file *fnp;
1385
1386 for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1387 if (streq (uncompressed_name, fnp->filename))
1388 goto exit;
1389 fnp = pf_head;
1390 pf_head = xnew (1, struct processed_file);
1391 pf_head->filename = savestr (uncompressed_name);
1392 pf_head->next = fnp;
1393 }
1394
1395 if (stat (real_name, &stat_buf) != 0)
1396 {
1397 /* Reset real_name and try with a different name. */
1398 real_name = NULL;
1399 if (compressed_name != NULL) /* try with the given suffix */
1400 {
1401 if (stat (uncompressed_name, &stat_buf) == 0)
1402 real_name = uncompressed_name;
1403 }
1404 else /* try all possible suffixes */
1405 {
1406 for (compr = compressors; compr->suffix != NULL; compr++)
1407 {
1408 compressed_name = concat (file, ".", compr->suffix);
1409 if (stat (compressed_name, &stat_buf) != 0)
1410 {
1411 if (MSDOS)
1412 {
1413 char *suf = compressed_name + strlen (file);
1414 size_t suflen = strlen (compr->suffix) + 1;
1415 for ( ; suf[1]; suf++, suflen--)
1416 {
1417 memmove (suf, suf + 1, suflen);
1418 if (stat (compressed_name, &stat_buf) == 0)
1419 {
1420 real_name = compressed_name;
1421 break;
1422 }
1423 }
1424 if (real_name != NULL)
1425 break;
1426 } /* MSDOS */
1427 free (compressed_name);
1428 compressed_name = NULL;
1429 }
1430 else
1431 {
1432 real_name = compressed_name;
1433 break;
1434 }
1435 }
1436 }
1437 if (real_name == NULL)
1438 {
1439 perror (file);
1440 goto exit;
1441 }
1442 } /* try with a different name */
1443
1444 if (!S_ISREG (stat_buf.st_mode))
1445 {
1446 error ("skipping %s: it is not a regular file.", real_name);
1447 goto exit;
1448 }
1449 if (real_name == compressed_name)
1450 {
1451 char *cmd = concat (compr->command, " ", real_name);
1452 inf = (FILE *) popen (cmd, "r");
1453 free (cmd);
1454 }
1455 else
1456 inf = fopen (real_name, "r");
1457 if (inf == NULL)
1458 {
1459 perror (real_name);
1460 goto exit;
1461 }
1462
1463 find_entries (uncompressed_name, inf);
1464
1465 if (real_name == compressed_name)
1466 pclose (inf);
1467 else
1468 fclose (inf);
1469
1470 if (!CTAGS)
1471 {
1472 char *filename;
1473
1474 if (filename_is_absolute (uncompressed_name))
1475 {
1476 /* file is an absolute file name. Canonicalise it. */
1477 filename = absolute_filename (uncompressed_name, cwd);
1478 }
1479 else
1480 {
1481 /* file is a file name relative to cwd. Make it relative
1482 to the directory of the tags file. */
1483 filename = relative_filename (uncompressed_name, tagfiledir);
1484 }
1485 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1486 free (filename);
1487 put_entries (head);
1488 free_tree (head);
1489 head = NULL;
1490 }
1491
1492 exit:
1493 if (compressed_name) free(compressed_name);
1494 if (uncompressed_name) free(uncompressed_name);
1495 return;
1496 }
1497
1498 /*
1499 * This routine sets up the boolean pseudo-functions which work
1500 * by setting boolean flags dependent upon the corresponding character.
1501 * Every char which is NOT in that string is not a white char. Therefore,
1502 * all of the array "_wht" is set to FALSE, and then the elements
1503 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1504 * of a char is TRUE if it is the string "white", else FALSE.
1505 */
1506 static void
1507 init ()
1508 {
1509 register char *sp;
1510 register int i;
1511
1512 for (i = 0; i < CHARS; i++)
1513 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1514 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1515 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1516 notinname('\0') = notinname('\n');
1517 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1518 begtoken('\0') = begtoken('\n');
1519 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1520 intoken('\0') = intoken('\n');
1521 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1522 endtoken('\0') = endtoken('\n');
1523 }
1524
1525 /*
1526 * This routine opens the specified file and calls the function
1527 * which finds the function and type definitions.
1528 */
1529 node *last_node = NULL;
1530
1531 static void
1532 find_entries (file, inf)
1533 char *file;
1534 FILE *inf;
1535 {
1536 char *cp;
1537 language *lang;
1538 node *old_last_node;
1539
1540 /* Memory leakage here: the string pointed by curfile is
1541 never released, because curfile is copied into np->file
1542 for each node, to be used in CTAGS mode. The amount of
1543 memory leaked here is the sum of the lengths of the
1544 file names. */
1545 curfile = savestr (file);
1546
1547 /* If user specified a language, use it. */
1548 lang = forced_lang;
1549 if (lang != NULL && lang->function != NULL)
1550 {
1551 curlang = lang;
1552 lang->function (inf);
1553 return;
1554 }
1555
1556 /* Try to guess the language given the file name. */
1557 lang = get_language_from_filename (file);
1558 if (lang != NULL && lang->function != NULL)
1559 {
1560 curlang = lang;
1561 lang->function (inf);
1562 return;
1563 }
1564
1565 /* Look for sharp-bang as the first two characters. */
1566 if (readline_internal (&lb, inf) > 0
1567 && lb.len >= 2
1568 && lb.buffer[0] == '#'
1569 && lb.buffer[1] == '!')
1570 {
1571 char *lp;
1572
1573 /* Set lp to point at the first char after the last slash in the
1574 line or, if no slashes, at the first nonblank. Then set cp to
1575 the first successive blank and terminate the string. */
1576 lp = etags_strrchr (lb.buffer+2, '/');
1577 if (lp != NULL)
1578 lp += 1;
1579 else
1580 lp = skip_spaces (lb.buffer + 2);
1581 cp = skip_non_spaces (lp);
1582 *cp = '\0';
1583
1584 if (strlen (lp) > 0)
1585 {
1586 lang = get_language_from_interpreter (lp);
1587 if (lang != NULL && lang->function != NULL)
1588 {
1589 curlang = lang;
1590 lang->function (inf);
1591 return;
1592 }
1593 }
1594 }
1595 /* We rewind here, even if inf may be a pipe. We fail if the
1596 length of the first line is longer than the pipe block size,
1597 which is unlikely. */
1598 rewind (inf);
1599
1600 /* Try Fortran. */
1601 old_last_node = last_node;
1602 curlang = get_language_from_langname ("fortran");
1603 Fortran_functions (inf);
1604
1605 /* No Fortran entries found. Try C. */
1606 if (old_last_node == last_node)
1607 {
1608 /* We do not tag if rewind fails.
1609 Only the file name will be recorded in the tags file. */
1610 rewind (inf);
1611 curlang = get_language_from_langname (cplusplus ? "c++" : "c");
1612 default_C_entries (inf);
1613 }
1614 return;
1615 }
1616
1617 \f
1618 /* Record a tag. */
1619 static void
1620 pfnote (name, is_func, linestart, linelen, lno, cno)
1621 char *name; /* tag name, or NULL if unnamed */
1622 bool is_func; /* tag is a function */
1623 char *linestart; /* start of the line where tag is */
1624 int linelen; /* length of the line where tag is */
1625 int lno; /* line number */
1626 long cno; /* character number */
1627 {
1628 register node *np;
1629
1630 if (CTAGS && name == NULL)
1631 return;
1632
1633 np = xnew (1, node);
1634
1635 /* If ctags mode, change name "main" to M<thisfilename>. */
1636 if (CTAGS && !cxref_style && streq (name, "main"))
1637 {
1638 register char *fp = etags_strrchr (curfile, '/');
1639 np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1640 fp = etags_strrchr (np->name, '.');
1641 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1642 fp[0] = '\0';
1643 }
1644 else
1645 np->name = name;
1646 np->been_warned = FALSE;
1647 np->file = curfile;
1648 np->is_func = is_func;
1649 np->lno = lno;
1650 /* Our char numbers are 0-base, because of C language tradition?
1651 ctags compatibility? old versions compatibility? I don't know.
1652 Anyway, since emacs's are 1-base we expect etags.el to take care
1653 of the difference. If we wanted to have 1-based numbers, we would
1654 uncomment the +1 below. */
1655 np->cno = cno /* + 1 */ ;
1656 np->left = np->right = NULL;
1657 if (CTAGS && !cxref_style)
1658 {
1659 if (strlen (linestart) < 50)
1660 np->pat = concat (linestart, "$", "");
1661 else
1662 np->pat = savenstr (linestart, 50);
1663 }
1664 else
1665 np->pat = savenstr (linestart, linelen);
1666
1667 add_node (np, &head);
1668 }
1669
1670 /*
1671 * TAGS format specification
1672 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1673 *
1674 * pfnote should emit the optimized form [unnamed tag] only if:
1675 * 1. name does not contain any of the characters " \t\r\n(),;";
1676 * 2. linestart contains name as either a rightmost, or rightmost but
1677 * one character, substring;
1678 * 3. the character, if any, immediately before name in linestart must
1679 * be one of the characters " \t(),;";
1680 * 4. the character, if any, immediately after name in linestart must
1681 * also be one of the characters " \t(),;".
1682 *
1683 * The real implementation uses the notinname() macro, which recognises
1684 * characters slightly different form " \t\r\n(),;". See the variable
1685 * `nonam'.
1686 */
1687 #define traditional_tag_style TRUE
1688 static void
1689 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1690 char *name; /* tag name, or NULL if unnamed */
1691 int namelen; /* tag length */
1692 bool is_func; /* tag is a function */
1693 char *linestart; /* start of the line where tag is */
1694 int linelen; /* length of the line where tag is */
1695 int lno; /* line number */
1696 long cno; /* character number */
1697 {
1698 register char *cp;
1699 bool named;
1700
1701 named = TRUE;
1702 if (!CTAGS)
1703 {
1704 for (cp = name; !notinname (*cp); cp++)
1705 continue;
1706 if (*cp == '\0') /* rule #1 */
1707 {
1708 cp = linestart + linelen - namelen;
1709 if (notinname (linestart[linelen-1]))
1710 cp -= 1; /* rule #4 */
1711 if (cp >= linestart /* rule #2 */
1712 && (cp == linestart
1713 || notinname (cp[-1])) /* rule #3 */
1714 && strneq (name, cp, namelen)) /* rule #2 */
1715 named = FALSE; /* use unnamed tag */
1716 }
1717 }
1718
1719 if (named)
1720 name = savenstr (name, namelen);
1721 else
1722 name = NULL;
1723 pfnote (name, is_func, linestart, linelen, lno, cno);
1724 }
1725
1726 /*
1727 * free_tree ()
1728 * recurse on left children, iterate on right children.
1729 */
1730 static void
1731 free_tree (np)
1732 register node *np;
1733 {
1734 while (np)
1735 {
1736 register node *node_right = np->right;
1737 free_tree (np->left);
1738 if (np->name != NULL)
1739 free (np->name);
1740 free (np->pat);
1741 free (np);
1742 np = node_right;
1743 }
1744 }
1745
1746 /*
1747 * add_node ()
1748 * Adds a node to the tree of nodes. In etags mode, we don't keep
1749 * it sorted; we just keep a linear list. In ctags mode, maintain
1750 * an ordered tree, with no attempt at balancing.
1751 *
1752 * add_node is the only function allowed to add nodes, so it can
1753 * maintain state.
1754 */
1755 static void
1756 add_node (np, cur_node_p)
1757 node *np, **cur_node_p;
1758 {
1759 register int dif;
1760 register node *cur_node = *cur_node_p;
1761
1762 if (cur_node == NULL)
1763 {
1764 *cur_node_p = np;
1765 last_node = np;
1766 return;
1767 }
1768
1769 if (!CTAGS)
1770 {
1771 /* Etags Mode */
1772 if (last_node == NULL)
1773 fatal ("internal error in add_node", (char *)NULL);
1774 last_node->right = np;
1775 last_node = np;
1776 }
1777 else
1778 {
1779 /* Ctags Mode */
1780 dif = strcmp (np->name, cur_node->name);
1781
1782 /*
1783 * If this tag name matches an existing one, then
1784 * do not add the node, but maybe print a warning.
1785 */
1786 if (!dif)
1787 {
1788 if (streq (np->file, cur_node->file))
1789 {
1790 if (!no_warnings)
1791 {
1792 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1793 np->file, lineno, np->name);
1794 fprintf (stderr, "Second entry ignored\n");
1795 }
1796 }
1797 else if (!cur_node->been_warned && !no_warnings)
1798 {
1799 fprintf
1800 (stderr,
1801 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1802 np->file, cur_node->file, np->name);
1803 cur_node->been_warned = TRUE;
1804 }
1805 return;
1806 }
1807
1808 /* Actually add the node */
1809 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1810 }
1811 }
1812
1813 \f
1814 static void
1815 put_entries (np)
1816 register node *np;
1817 {
1818 register char *sp;
1819
1820 if (np == NULL)
1821 return;
1822
1823 /* Output subentries that precede this one */
1824 put_entries (np->left);
1825
1826 /* Output this entry */
1827
1828 if (!CTAGS)
1829 {
1830 if (np->name != NULL)
1831 fprintf (tagf, "%s\177%s\001%d,%ld\n",
1832 np->pat, np->name, np->lno, np->cno);
1833 else
1834 fprintf (tagf, "%s\177%d,%ld\n",
1835 np->pat, np->lno, np->cno);
1836 }
1837 else
1838 {
1839 if (np->name == NULL)
1840 error ("internal error: NULL name in ctags mode.", (char *)NULL);
1841
1842 if (cxref_style)
1843 {
1844 if (vgrind_style)
1845 fprintf (stdout, "%s %s %d\n",
1846 np->name, np->file, (np->lno + 63) / 64);
1847 else
1848 fprintf (stdout, "%-16s %3d %-16s %s\n",
1849 np->name, np->lno, np->file, np->pat);
1850 }
1851 else
1852 {
1853 fprintf (tagf, "%s\t%s\t", np->name, np->file);
1854
1855 if (np->is_func)
1856 { /* a function */
1857 putc (searchar, tagf);
1858 putc ('^', tagf);
1859
1860 for (sp = np->pat; *sp; sp++)
1861 {
1862 if (*sp == '\\' || *sp == searchar)
1863 putc ('\\', tagf);
1864 putc (*sp, tagf);
1865 }
1866 putc (searchar, tagf);
1867 }
1868 else
1869 { /* a typedef; text pattern inadequate */
1870 fprintf (tagf, "%d", np->lno);
1871 }
1872 putc ('\n', tagf);
1873 }
1874 }
1875
1876 /* Output subentries that follow this one */
1877 put_entries (np->right);
1878 }
1879
1880 /* Length of a number's decimal representation. */
1881 static int
1882 number_len (num)
1883 long num;
1884 {
1885 int len = 1;
1886 while ((num /= 10) > 0)
1887 len += 1;
1888 return len;
1889 }
1890
1891 /*
1892 * Return total number of characters that put_entries will output for
1893 * the nodes in the subtree of the specified node. Works only if
1894 * we are not ctags, but called only in that case. This count
1895 * is irrelevant with the new tags.el, but is still supplied for
1896 * backward compatibility.
1897 */
1898 static int
1899 total_size_of_entries (np)
1900 register node *np;
1901 {
1902 register int total;
1903
1904 if (np == NULL)
1905 return 0;
1906
1907 for (total = 0; np != NULL; np = np->right)
1908 {
1909 /* Count left subentries. */
1910 total += total_size_of_entries (np->left);
1911
1912 /* Count this entry */
1913 total += strlen (np->pat) + 1;
1914 total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1915 if (np->name != NULL)
1916 total += 1 + strlen (np->name); /* \001name */
1917 }
1918
1919 return total;
1920 }
1921
1922 \f
1923 /*
1924 * The C symbol tables.
1925 */
1926 enum sym_type
1927 {
1928 st_none,
1929 st_C_objprot, st_C_objimpl, st_C_objend,
1930 st_C_gnumacro,
1931 st_C_ignore,
1932 st_C_javastruct,
1933 st_C_operator,
1934 st_C_class,
1935 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1936 };
1937
1938 static unsigned int hash P_((const char *, unsigned int));
1939 static struct C_stab_entry * in_word_set P_((const char *, unsigned int));
1940 static enum sym_type C_symtype P_((char *, int, int));
1941
1942 /* Feed stuff between (but not including) %[ and %] lines to:
1943 gperf -c -k 1,3 -o -p -r -t
1944 %[
1945 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1946 %%
1947 if, 0, st_C_ignore
1948 for, 0, st_C_ignore
1949 while, 0, st_C_ignore
1950 switch, 0, st_C_ignore
1951 return, 0, st_C_ignore
1952 @interface, 0, st_C_objprot
1953 @protocol, 0, st_C_objprot
1954 @implementation,0, st_C_objimpl
1955 @end, 0, st_C_objend
1956 import, C_JAVA, st_C_ignore
1957 package, C_JAVA, st_C_ignore
1958 friend, C_PLPL, st_C_ignore
1959 extends, C_JAVA, st_C_javastruct
1960 implements, C_JAVA, st_C_javastruct
1961 interface, C_JAVA, st_C_struct
1962 class, 0, st_C_class
1963 namespace, C_PLPL, st_C_struct
1964 domain, C_STAR, st_C_struct
1965 union, 0, st_C_struct
1966 struct, 0, st_C_struct
1967 extern, 0, st_C_extern
1968 enum, 0, st_C_enum
1969 typedef, 0, st_C_typedef
1970 define, 0, st_C_define
1971 operator, C_PLPL, st_C_operator
1972 bool, C_PLPL, st_C_typespec
1973 long, 0, st_C_typespec
1974 short, 0, st_C_typespec
1975 int, 0, st_C_typespec
1976 char, 0, st_C_typespec
1977 float, 0, st_C_typespec
1978 double, 0, st_C_typespec
1979 signed, 0, st_C_typespec
1980 unsigned, 0, st_C_typespec
1981 auto, 0, st_C_typespec
1982 void, 0, st_C_typespec
1983 static, 0, st_C_typespec
1984 const, 0, st_C_typespec
1985 volatile, 0, st_C_typespec
1986 explicit, C_PLPL, st_C_typespec
1987 mutable, C_PLPL, st_C_typespec
1988 typename, C_PLPL, st_C_typespec
1989 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
1990 DEFUN, 0, st_C_gnumacro
1991 SYSCALL, 0, st_C_gnumacro
1992 ENTRY, 0, st_C_gnumacro
1993 PSEUDO, 0, st_C_gnumacro
1994 # These are defined inside C functions, so currently they are not met.
1995 # EXFUN used in glibc, DEFVAR_* in emacs.
1996 #EXFUN, 0, st_C_gnumacro
1997 #DEFVAR_, 0, st_C_gnumacro
1998 %]
1999 and replace lines between %< and %> with its output,
2000 then make in_word_set static. */
2001 /*%<*/
2002 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2003 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2004 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2005
2006 #define TOTAL_KEYWORDS 46
2007 #define MIN_WORD_LENGTH 2
2008 #define MAX_WORD_LENGTH 15
2009 #define MIN_HASH_VALUE 13
2010 #define MAX_HASH_VALUE 121
2011 /* maximum key range = 109, duplicates = 0 */
2012
2013 #ifdef __GNUC__
2014 __inline
2015 #endif
2016 static unsigned int
2017 hash (str, len)
2018 register const char *str;
2019 register unsigned int len;
2020 {
2021 static unsigned char asso_values[] =
2022 {
2023 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2024 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2025 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2026 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2027 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2028 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2029 122, 122, 122, 122, 57, 122, 122, 122, 55, 6,
2030 60, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2031 51, 122, 122, 10, 2, 122, 122, 122, 122, 122,
2032 122, 122, 122, 122, 122, 122, 122, 2, 52, 59,
2033 49, 38, 56, 41, 122, 22, 122, 122, 9, 32,
2034 33, 60, 26, 122, 1, 28, 46, 59, 44, 51,
2035 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2036 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2037 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2038 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2039 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2040 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2041 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2042 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2043 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2044 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2045 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2046 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2047 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2048 122, 122, 122, 122, 122, 122
2049 };
2050 register int hval = len;
2051
2052 switch (hval)
2053 {
2054 default:
2055 case 3:
2056 hval += asso_values[(unsigned char)str[2]];
2057 case 2:
2058 case 1:
2059 hval += asso_values[(unsigned char)str[0]];
2060 break;
2061 }
2062 return hval;
2063 }
2064
2065 #ifdef __GNUC__
2066 __inline
2067 #endif
2068 struct C_stab_entry *
2069 in_word_set (str, len)
2070 register const char *str;
2071 register unsigned int len;
2072 {
2073 static struct C_stab_entry wordlist[] =
2074 {
2075 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2076 {""}, {""}, {""}, {""},
2077 {"ENTRY", 0, st_C_gnumacro},
2078 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2079 {""},
2080 {"if", 0, st_C_ignore},
2081 {""}, {""},
2082 {"SYSCALL", 0, st_C_gnumacro},
2083 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2084 {"struct", 0, st_C_struct},
2085 {"static", 0, st_C_typespec},
2086 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2087 {"long", 0, st_C_typespec},
2088 {""}, {""}, {""}, {""}, {""},
2089 {"auto", 0, st_C_typespec},
2090 {"return", 0, st_C_ignore},
2091 {"import", C_JAVA, st_C_ignore},
2092 {""},
2093 {"switch", 0, st_C_ignore},
2094 {""},
2095 {"implements", C_JAVA, st_C_javastruct},
2096 {""},
2097 {"for", 0, st_C_ignore},
2098 {"volatile", 0, st_C_typespec},
2099 {""},
2100 {"PSEUDO", 0, st_C_gnumacro},
2101 {""},
2102 {"char", 0, st_C_typespec},
2103 {"class", 0, st_C_class},
2104 {"@protocol", 0, st_C_objprot},
2105 {""}, {""},
2106 {"void", 0, st_C_typespec},
2107 {"int", 0, st_C_typespec},
2108 {"explicit", C_PLPL, st_C_typespec},
2109 {""},
2110 {"namespace", C_PLPL, st_C_struct},
2111 {"signed", 0, st_C_typespec},
2112 {""},
2113 {"interface", C_JAVA, st_C_struct},
2114 {"while", 0, st_C_ignore},
2115 {"typedef", 0, st_C_typedef},
2116 {"typename", C_PLPL, st_C_typespec},
2117 {""}, {""}, {""},
2118 {"friend", C_PLPL, st_C_ignore},
2119 {"mutable", C_PLPL, st_C_typespec},
2120 {"union", 0, st_C_struct},
2121 {"domain", C_STAR, st_C_struct},
2122 {""}, {""},
2123 {"extern", 0, st_C_extern},
2124 {"extends", C_JAVA, st_C_javastruct},
2125 {"package", C_JAVA, st_C_ignore},
2126 {"short", 0, st_C_typespec},
2127 {"@end", 0, st_C_objend},
2128 {"unsigned", 0, st_C_typespec},
2129 {""},
2130 {"const", 0, st_C_typespec},
2131 {""}, {""},
2132 {"@interface", 0, st_C_objprot},
2133 {"enum", 0, st_C_enum},
2134 {""}, {""},
2135 {"@implementation",0, st_C_objimpl},
2136 {""},
2137 {"operator", C_PLPL, st_C_operator},
2138 {""}, {""}, {""}, {""},
2139 {"define", 0, st_C_define},
2140 {""}, {""},
2141 {"double", 0, st_C_typespec},
2142 {""},
2143 {"bool", C_PLPL, st_C_typespec},
2144 {""}, {""}, {""},
2145 {"DEFUN", 0, st_C_gnumacro},
2146 {"float", 0, st_C_typespec}
2147 };
2148
2149 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2150 {
2151 register int key = hash (str, len);
2152
2153 if (key <= MAX_HASH_VALUE && key >= 0)
2154 {
2155 register const char *s = wordlist[key].name;
2156
2157 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2158 return &wordlist[key];
2159 }
2160 }
2161 return 0;
2162 }
2163 /*%>*/
2164
2165 static enum sym_type
2166 C_symtype (str, len, c_ext)
2167 char *str;
2168 int len;
2169 int c_ext;
2170 {
2171 register struct C_stab_entry *se = in_word_set (str, len);
2172
2173 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2174 return st_none;
2175 return se->type;
2176 }
2177
2178 \f
2179 /*
2180 * C functions and variables are recognized using a simple
2181 * finite automaton. fvdef is its state variable.
2182 */
2183 enum
2184 {
2185 fvnone, /* nothing seen */
2186 fdefunkey, /* Emacs DEFUN keyword seen */
2187 fdefunname, /* Emacs DEFUN name seen */
2188 foperator, /* func: operator keyword seen (cplpl) */
2189 fvnameseen, /* function or variable name seen */
2190 fstartlist, /* func: just after open parenthesis */
2191 finlist, /* func: in parameter list */
2192 flistseen, /* func: after parameter list */
2193 fignore, /* func: before open brace */
2194 vignore /* var-like: ignore until ';' */
2195 } fvdef;
2196
2197 bool fvextern; /* func or var: extern keyword seen; */
2198
2199 /*
2200 * typedefs are recognized using a simple finite automaton.
2201 * typdef is its state variable.
2202 */
2203 enum
2204 {
2205 tnone, /* nothing seen */
2206 tkeyseen, /* typedef keyword seen */
2207 ttypeseen, /* defined type seen */
2208 tinbody, /* inside typedef body */
2209 tend, /* just before typedef tag */
2210 tignore /* junk after typedef tag */
2211 } typdef;
2212
2213 /*
2214 * struct-like structures (enum, struct and union) are recognized
2215 * using another simple finite automaton. `structdef' is its state
2216 * variable.
2217 */
2218 enum
2219 {
2220 snone, /* nothing seen yet,
2221 or in struct body if cblev > 0 */
2222 skeyseen, /* struct-like keyword seen */
2223 stagseen, /* struct-like tag seen */
2224 sintemplate, /* inside template (ignore) */
2225 scolonseen /* colon seen after struct-like tag */
2226 } structdef;
2227
2228 /*
2229 * When objdef is different from onone, objtag is the name of the class.
2230 */
2231 char *objtag = "<uninited>";
2232
2233 /*
2234 * Yet another little state machine to deal with preprocessor lines.
2235 */
2236 enum
2237 {
2238 dnone, /* nothing seen */
2239 dsharpseen, /* '#' seen as first char on line */
2240 ddefineseen, /* '#' and 'define' seen */
2241 dignorerest /* ignore rest of line */
2242 } definedef;
2243
2244 /*
2245 * State machine for Objective C protocols and implementations.
2246 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2247 */
2248 enum
2249 {
2250 onone, /* nothing seen */
2251 oprotocol, /* @interface or @protocol seen */
2252 oimplementation, /* @implementations seen */
2253 otagseen, /* class name seen */
2254 oparenseen, /* parenthesis before category seen */
2255 ocatseen, /* category name seen */
2256 oinbody, /* in @implementation body */
2257 omethodsign, /* in @implementation body, after +/- */
2258 omethodtag, /* after method name */
2259 omethodcolon, /* after method colon */
2260 omethodparm, /* after method parameter */
2261 oignore /* wait for @end */
2262 } objdef;
2263
2264
2265 /*
2266 * Use this structure to keep info about the token read, and how it
2267 * should be tagged. Used by the make_C_tag function to build a tag.
2268 */
2269 struct tok
2270 {
2271 bool valid;
2272 bool named;
2273 int offset;
2274 int length;
2275 int lineno;
2276 long linepos;
2277 char *line;
2278 } token; /* latest token read */
2279 linebuffer token_name; /* its name */
2280
2281 /*
2282 * Variables and functions for dealing with nested structures.
2283 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2284 */
2285 static void pushclass_above P_((int, char *, int));
2286 static void popclass_above P_((int));
2287 static void write_classname P_((linebuffer *, char *qualifier));
2288
2289 struct {
2290 char **cname; /* nested class names */
2291 int *cblev; /* nested class curly brace level */
2292 int nl; /* class nesting level (elements used) */
2293 int size; /* length of the array */
2294 } cstack; /* stack for nested declaration tags */
2295 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2296 #define nestlev (cstack.nl)
2297 /* After struct keyword or in struct body, not inside an nested function. */
2298 #define instruct (structdef == snone && nestlev > 0 \
2299 && cblev == cstack.cblev[nestlev-1] + 1)
2300
2301 static void
2302 pushclass_above (cblev, str, len)
2303 int cblev;
2304 char *str;
2305 int len;
2306 {
2307 int nl;
2308
2309 popclass_above (cblev);
2310 nl = cstack.nl;
2311 if (nl >= cstack.size)
2312 {
2313 int size = cstack.size *= 2;
2314 xrnew (cstack.cname, size, char *);
2315 xrnew (cstack.cblev, size, int);
2316 }
2317 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2318 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2319 cstack.cblev[nl] = cblev;
2320 cstack.nl = nl + 1;
2321 }
2322
2323 static void
2324 popclass_above (cblev)
2325 int cblev;
2326 {
2327 int nl;
2328
2329 for (nl = cstack.nl - 1;
2330 nl >= 0 && cstack.cblev[nl] >= cblev;
2331 nl--)
2332 {
2333 if (cstack.cname[nl] != NULL)
2334 free (cstack.cname[nl]);
2335 cstack.nl = nl;
2336 }
2337 }
2338
2339 static void
2340 write_classname (cn, qualifier)
2341 linebuffer *cn;
2342 char *qualifier;
2343 {
2344 int i, len;
2345 int qlen = strlen (qualifier);
2346
2347 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2348 {
2349 len = 0;
2350 cn->len = 0;
2351 cn->buffer[0] = '\0';
2352 }
2353 else
2354 {
2355 len = strlen (cstack.cname[0]);
2356 linebuffer_setlen (cn, len);
2357 strcpy (cn->buffer, cstack.cname[0]);
2358 }
2359 for (i = 1; i < cstack.nl; i++)
2360 {
2361 char *s;
2362 int slen;
2363
2364 s = cstack.cname[i];
2365 if (s == NULL)
2366 continue;
2367 slen = strlen (s);
2368 len += slen + qlen;
2369 linebuffer_setlen (cn, len);
2370 strncat (cn->buffer, qualifier, qlen);
2371 strncat (cn->buffer, s, slen);
2372 }
2373 }
2374
2375 \f
2376 static bool consider_token P_((char *, int, int, int *, int, int, bool *));
2377 static void make_C_tag P_((bool));
2378
2379 /*
2380 * consider_token ()
2381 * checks to see if the current token is at the start of a
2382 * function or variable, or corresponds to a typedef, or
2383 * is a struct/union/enum tag, or #define, or an enum constant.
2384 *
2385 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2386 * with args. C_EXTP points to which language we are looking at.
2387 *
2388 * Globals
2389 * fvdef IN OUT
2390 * structdef IN OUT
2391 * definedef IN OUT
2392 * typdef IN OUT
2393 * objdef IN OUT
2394 */
2395
2396 static bool
2397 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2398 register char *str; /* IN: token pointer */
2399 register int len; /* IN: token length */
2400 register int c; /* IN: first char after the token */
2401 int *c_extp; /* IN, OUT: C extensions mask */
2402 int cblev; /* IN: curly brace level */
2403 int parlev; /* IN: parenthesis level */
2404 bool *is_func_or_var; /* OUT: function or variable found */
2405 {
2406 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2407 structtype is the type of the preceding struct-like keyword, and
2408 structcblev is the curly brace level where it has been seen. */
2409 static enum sym_type structtype;
2410 static int structcblev;
2411 static enum sym_type toktype;
2412
2413
2414 toktype = C_symtype (str, len, *c_extp);
2415
2416 /*
2417 * Advance the definedef state machine.
2418 */
2419 switch (definedef)
2420 {
2421 case dnone:
2422 /* We're not on a preprocessor line. */
2423 if (toktype == st_C_gnumacro)
2424 {
2425 fvdef = fdefunkey;
2426 return FALSE;
2427 }
2428 break;
2429 case dsharpseen:
2430 if (toktype == st_C_define)
2431 {
2432 definedef = ddefineseen;
2433 }
2434 else
2435 {
2436 definedef = dignorerest;
2437 }
2438 return FALSE;
2439 case ddefineseen:
2440 /*
2441 * Make a tag for any macro, unless it is a constant
2442 * and constantypedefs is FALSE.
2443 */
2444 definedef = dignorerest;
2445 *is_func_or_var = (c == '(');
2446 if (!*is_func_or_var && !constantypedefs)
2447 return FALSE;
2448 else
2449 return TRUE;
2450 case dignorerest:
2451 return FALSE;
2452 default:
2453 error ("internal error: definedef value.", (char *)NULL);
2454 }
2455
2456 /*
2457 * Now typedefs
2458 */
2459 switch (typdef)
2460 {
2461 case tnone:
2462 if (toktype == st_C_typedef)
2463 {
2464 if (typedefs)
2465 typdef = tkeyseen;
2466 fvextern = FALSE;
2467 fvdef = fvnone;
2468 return FALSE;
2469 }
2470 break;
2471 case tkeyseen:
2472 switch (toktype)
2473 {
2474 case st_none:
2475 case st_C_typespec:
2476 case st_C_class:
2477 case st_C_struct:
2478 case st_C_enum:
2479 typdef = ttypeseen;
2480 break;
2481 }
2482 break;
2483 case ttypeseen:
2484 if (structdef == snone && fvdef == fvnone)
2485 {
2486 fvdef = fvnameseen;
2487 return TRUE;
2488 }
2489 break;
2490 case tend:
2491 switch (toktype)
2492 {
2493 case st_C_typespec:
2494 case st_C_class:
2495 case st_C_struct:
2496 case st_C_enum:
2497 return FALSE;
2498 }
2499 return TRUE;
2500 }
2501
2502 /*
2503 * This structdef business is NOT invoked when we are ctags and the
2504 * file is plain C. This is because a struct tag may have the same
2505 * name as another tag, and this loses with ctags.
2506 */
2507 switch (toktype)
2508 {
2509 case st_C_javastruct:
2510 if (structdef == stagseen)
2511 structdef = scolonseen;
2512 return FALSE;
2513 case st_C_class:
2514 if (cblev == 0
2515 && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2516 && definedef == dnone && structdef == snone
2517 && typdef == tnone && fvdef == fvnone)
2518 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2519 /* FALLTHRU */
2520 case st_C_struct:
2521 case st_C_enum:
2522 if (parlev == 0
2523 && fvdef != vignore
2524 && (typdef == tkeyseen
2525 || (typedefs_or_cplusplus && structdef == snone)))
2526 {
2527 structdef = skeyseen;
2528 structtype = toktype;
2529 structcblev = cblev;
2530 }
2531 return FALSE;
2532 }
2533
2534 if (structdef == skeyseen)
2535 {
2536 structdef = stagseen;
2537 return TRUE;
2538 }
2539
2540 if (typdef != tnone)
2541 definedef = dnone;
2542
2543 /* Detect Objective C constructs. */
2544 switch (objdef)
2545 {
2546 case onone:
2547 switch (toktype)
2548 {
2549 case st_C_objprot:
2550 objdef = oprotocol;
2551 return FALSE;
2552 case st_C_objimpl:
2553 objdef = oimplementation;
2554 return FALSE;
2555 }
2556 break;
2557 case oimplementation:
2558 /* Save the class tag for functions or variables defined inside. */
2559 objtag = savenstr (str, len);
2560 objdef = oinbody;
2561 return FALSE;
2562 case oprotocol:
2563 /* Save the class tag for categories. */
2564 objtag = savenstr (str, len);
2565 objdef = otagseen;
2566 *is_func_or_var = TRUE;
2567 return TRUE;
2568 case oparenseen:
2569 objdef = ocatseen;
2570 *is_func_or_var = TRUE;
2571 return TRUE;
2572 case oinbody:
2573 break;
2574 case omethodsign:
2575 if (parlev == 0)
2576 {
2577 objdef = omethodtag;
2578 linebuffer_setlen (&token_name, len);
2579 strncpy (token_name.buffer, str, len);
2580 token_name.buffer[len] = '\0';
2581 return TRUE;
2582 }
2583 return FALSE;
2584 case omethodcolon:
2585 if (parlev == 0)
2586 objdef = omethodparm;
2587 return FALSE;
2588 case omethodparm:
2589 if (parlev == 0)
2590 {
2591 objdef = omethodtag;
2592 linebuffer_setlen (&token_name, token_name.len + len);
2593 strncat (token_name.buffer, str, len);
2594 return TRUE;
2595 }
2596 return FALSE;
2597 case oignore:
2598 if (toktype == st_C_objend)
2599 {
2600 /* Memory leakage here: the string pointed by objtag is
2601 never released, because many tests would be needed to
2602 avoid breaking on incorrect input code. The amount of
2603 memory leaked here is the sum of the lengths of the
2604 class tags.
2605 free (objtag); */
2606 objdef = onone;
2607 }
2608 return FALSE;
2609 }
2610
2611 /* A function, variable or enum constant? */
2612 switch (toktype)
2613 {
2614 case st_C_extern:
2615 fvextern = TRUE;
2616 /* FALLTHRU */
2617 case st_C_typespec:
2618 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2619 fvdef = fvnone; /* should be useless */
2620 return FALSE;
2621 case st_C_ignore:
2622 fvextern = FALSE;
2623 fvdef = vignore;
2624 return FALSE;
2625 case st_C_operator:
2626 fvdef = foperator;
2627 *is_func_or_var = TRUE;
2628 return TRUE;
2629 case st_none:
2630 if (constantypedefs
2631 && structdef == snone
2632 && structtype == st_C_enum && cblev > structcblev)
2633 return TRUE; /* enum constant */
2634 switch (fvdef)
2635 {
2636 case fdefunkey:
2637 if (cblev > 0)
2638 break;
2639 fvdef = fdefunname; /* GNU macro */
2640 *is_func_or_var = TRUE;
2641 return TRUE;
2642 case fvnone:
2643 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2644 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2645 {
2646 fvdef = vignore;
2647 return FALSE;
2648 }
2649 if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2650 {
2651 fvdef = foperator;
2652 *is_func_or_var = TRUE;
2653 return TRUE;
2654 }
2655 if (cblev > 0 && !instruct)
2656 break;
2657 fvdef = fvnameseen; /* function or variable */
2658 *is_func_or_var = TRUE;
2659 return TRUE;
2660 }
2661 break;
2662 }
2663
2664 return FALSE;
2665 }
2666
2667 \f
2668 /*
2669 * C_entries often keeps pointers to tokens or lines which are older than
2670 * the line currently read. By keeping two line buffers, and switching
2671 * them at end of line, it is possible to use those pointers.
2672 */
2673 struct
2674 {
2675 long linepos;
2676 linebuffer lb;
2677 } lbs[2];
2678
2679 #define current_lb_is_new (newndx == curndx)
2680 #define switch_line_buffers() (curndx = 1 - curndx)
2681
2682 #define curlb (lbs[curndx].lb)
2683 #define newlb (lbs[newndx].lb)
2684 #define curlinepos (lbs[curndx].linepos)
2685 #define newlinepos (lbs[newndx].linepos)
2686
2687 #define CNL_SAVE_DEFINEDEF() \
2688 do { \
2689 curlinepos = charno; \
2690 lineno++; \
2691 linecharno = charno; \
2692 charno += readline (&curlb, inf); \
2693 lp = curlb.buffer; \
2694 quotednl = FALSE; \
2695 newndx = curndx; \
2696 } while (0)
2697
2698 #define CNL() \
2699 do { \
2700 CNL_SAVE_DEFINEDEF(); \
2701 if (savetoken.valid) \
2702 { \
2703 token = savetoken; \
2704 savetoken.valid = FALSE; \
2705 } \
2706 definedef = dnone; \
2707 } while (0)
2708
2709
2710 static void
2711 make_C_tag (isfun)
2712 bool isfun;
2713 {
2714 /* This function should never be called when token.valid is FALSE, but
2715 we must protect against invalid input or internal errors. */
2716 if (DEBUG || token.valid)
2717 {
2718 if (traditional_tag_style)
2719 {
2720 /* This was the original code. Now we call new_pfnote instead,
2721 which uses the new method for naming tags (see new_pfnote). */
2722 char *name = NULL;
2723
2724 if (CTAGS || token.named)
2725 name = savestr (token_name.buffer);
2726 if (DEBUG && !token.valid)
2727 {
2728 if (token.named)
2729 name = concat (name, "##invalid##", "");
2730 else
2731 name = savestr ("##invalid##");
2732 }
2733 pfnote (name, isfun, token.line,
2734 token.offset+token.length+1, token.lineno, token.linepos);
2735 }
2736 else
2737 new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
2738 token.offset+token.length+1, token.lineno, token.linepos);
2739 token.valid = FALSE;
2740 }
2741 }
2742
2743
2744 /*
2745 * C_entries ()
2746 * This routine finds functions, variables, typedefs,
2747 * #define's, enum constants and struct/union/enum definitions in
2748 * C syntax and adds them to the list.
2749 */
2750 static void
2751 C_entries (c_ext, inf)
2752 int c_ext; /* extension of C */
2753 FILE *inf; /* input file */
2754 {
2755 register char c; /* latest char read; '\0' for end of line */
2756 register char *lp; /* pointer one beyond the character `c' */
2757 int curndx, newndx; /* indices for current and new lb */
2758 register int tokoff; /* offset in line of start of current token */
2759 register int toklen; /* length of current token */
2760 char *qualifier; /* string used to qualify names */
2761 int qlen; /* length of qualifier */
2762 int cblev; /* current curly brace level */
2763 int parlev; /* current parenthesis level */
2764 int typdefcblev; /* cblev where a typedef struct body begun */
2765 bool incomm, inquote, inchar, quotednl, midtoken;
2766 bool cplpl, cjava;
2767 bool yacc_rules; /* in the rules part of a yacc file */
2768 struct tok savetoken; /* token saved during preprocessor handling */
2769
2770
2771 initbuffer (&token_name);
2772 initbuffer (&lbs[0].lb);
2773 initbuffer (&lbs[1].lb);
2774 if (cstack.size == 0)
2775 {
2776 cstack.size = (DEBUG) ? 1 : 4;
2777 cstack.nl = 0;
2778 cstack.cname = xnew (cstack.size, char *);
2779 cstack.cblev = xnew (cstack.size, int);
2780 }
2781
2782 tokoff = toklen = 0; /* keep compiler quiet */
2783 curndx = newndx = 0;
2784 lineno = 0;
2785 charno = 0;
2786 lp = curlb.buffer;
2787 *lp = 0;
2788
2789 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2790 structdef = snone; definedef = dnone; objdef = onone;
2791 yacc_rules = FALSE;
2792 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2793 token.valid = savetoken.valid = FALSE;
2794 cblev = 0;
2795 parlev = 0;
2796 cplpl = (c_ext & C_PLPL) == C_PLPL;
2797 cjava = (c_ext & C_JAVA) == C_JAVA;
2798 if (cjava)
2799 { qualifier = "."; qlen = 1; }
2800 else
2801 { qualifier = "::"; qlen = 2; }
2802
2803
2804 while (!feof (inf))
2805 {
2806 c = *lp++;
2807 if (c == '\\')
2808 {
2809 /* If we're at the end of the line, the next character is a
2810 '\0'; don't skip it, because it's the thing that tells us
2811 to read the next line. */
2812 if (*lp == '\0')
2813 {
2814 quotednl = TRUE;
2815 continue;
2816 }
2817 lp++;
2818 c = ' ';
2819 }
2820 else if (incomm)
2821 {
2822 switch (c)
2823 {
2824 case '*':
2825 if (*lp == '/')
2826 {
2827 c = *lp++;
2828 incomm = FALSE;
2829 }
2830 break;
2831 case '\0':
2832 /* Newlines inside comments do not end macro definitions in
2833 traditional cpp. */
2834 CNL_SAVE_DEFINEDEF ();
2835 break;
2836 }
2837 continue;
2838 }
2839 else if (inquote)
2840 {
2841 switch (c)
2842 {
2843 case '"':
2844 inquote = FALSE;
2845 break;
2846 case '\0':
2847 /* Newlines inside strings do not end macro definitions
2848 in traditional cpp, even though compilers don't
2849 usually accept them. */
2850 CNL_SAVE_DEFINEDEF ();
2851 break;
2852 }
2853 continue;
2854 }
2855 else if (inchar)
2856 {
2857 switch (c)
2858 {
2859 case '\0':
2860 /* Hmmm, something went wrong. */
2861 CNL ();
2862 /* FALLTHRU */
2863 case '\'':
2864 inchar = FALSE;
2865 break;
2866 }
2867 continue;
2868 }
2869 else
2870 switch (c)
2871 {
2872 case '"':
2873 inquote = TRUE;
2874 switch (fvdef)
2875 {
2876 case fdefunkey:
2877 case fstartlist:
2878 case finlist:
2879 case fignore:
2880 case vignore:
2881 break;
2882 default:
2883 fvextern = FALSE;
2884 fvdef = fvnone;
2885 }
2886 continue;
2887 case '\'':
2888 inchar = TRUE;
2889 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2890 {
2891 fvextern = FALSE;
2892 fvdef = fvnone;
2893 }
2894 continue;
2895 case '/':
2896 if (*lp == '*')
2897 {
2898 lp++;
2899 incomm = TRUE;
2900 continue;
2901 }
2902 else if (/* cplpl && */ *lp == '/')
2903 {
2904 c = '\0';
2905 break;
2906 }
2907 else
2908 break;
2909 case '%':
2910 if ((c_ext & YACC) && *lp == '%')
2911 {
2912 /* Entering or exiting rules section in yacc file. */
2913 lp++;
2914 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2915 typdef = tnone; structdef = snone;
2916 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2917 cblev = 0;
2918 yacc_rules = !yacc_rules;
2919 continue;
2920 }
2921 else
2922 break;
2923 case '#':
2924 if (definedef == dnone)
2925 {
2926 char *cp;
2927 bool cpptoken = TRUE;
2928
2929 /* Look back on this line. If all blanks, or nonblanks
2930 followed by an end of comment, this is a preprocessor
2931 token. */
2932 for (cp = newlb.buffer; cp < lp-1; cp++)
2933 if (!iswhite (*cp))
2934 {
2935 if (*cp == '*' && *(cp+1) == '/')
2936 {
2937 cp++;
2938 cpptoken = TRUE;
2939 }
2940 else
2941 cpptoken = FALSE;
2942 }
2943 if (cpptoken)
2944 definedef = dsharpseen;
2945 } /* if (definedef == dnone) */
2946
2947 continue;
2948 } /* switch (c) */
2949
2950
2951 /* Consider token only if some involved conditions are satisfied. */
2952 if (typdef != tignore
2953 && definedef != dignorerest
2954 && fvdef != finlist
2955 && structdef != sintemplate
2956 && (definedef != dnone
2957 || structdef != scolonseen))
2958 {
2959 if (midtoken)
2960 {
2961 if (endtoken (c))
2962 {
2963 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
2964 {
2965 /*
2966 * This handles :: in the middle, but not at the
2967 * beginning of an identifier. Also, space-separated
2968 * :: is not recognised.
2969 */
2970 lp += 2;
2971 toklen += 2;
2972 c = lp[-1];
2973 goto intoken;
2974 }
2975 else
2976 {
2977 bool funorvar = FALSE;
2978
2979 if (yacc_rules
2980 || consider_token (newlb.buffer + tokoff, toklen, c,
2981 &c_ext, cblev, parlev, &funorvar))
2982 {
2983 if (fvdef == foperator)
2984 {
2985 char *oldlp = lp;
2986 lp = skip_spaces (lp-1);
2987 if (*lp != '\0')
2988 lp += 1;
2989 while (*lp != '\0'
2990 && !iswhite (*lp) && *lp != '(')
2991 lp += 1;
2992 c = *lp++;
2993 toklen += lp - oldlp;
2994 }
2995 token.named = FALSE;
2996 if ((c_ext & C_EXT) /* not pure C */
2997 && nestlev > 0 && definedef == dnone)
2998 /* in struct body */
2999 {
3000 write_classname (&token_name, qualifier);
3001 linebuffer_setlen (&token_name,
3002 token_name.len+qlen+toklen);
3003 strcat (token_name.buffer, qualifier);
3004 strncat (token_name.buffer,
3005 newlb.buffer + tokoff, toklen);
3006 token.named = TRUE;
3007 }
3008 else if (objdef == ocatseen)
3009 /* Objective C category */
3010 {
3011 int len = strlen (objtag) + 2 + toklen;
3012 linebuffer_setlen (&token_name, len);
3013 strcpy (token_name.buffer, objtag);
3014 strcat (token_name.buffer, "(");
3015 strncat (token_name.buffer,
3016 newlb.buffer + tokoff, toklen);
3017 strcat (token_name.buffer, ")");
3018 token.named = TRUE;
3019 }
3020 else if (objdef == omethodtag
3021 || objdef == omethodparm)
3022 /* Objective C method */
3023 {
3024 token.named = TRUE;
3025 }
3026 else if (fvdef == fdefunname)
3027 /* GNU DEFUN and similar macros */
3028 {
3029 bool defun = (newlb.buffer[tokoff] == 'F');
3030 int off = tokoff;
3031 int len = toklen;
3032
3033 /* Rewrite the tag so that emacs lisp DEFUNs
3034 can be found by their elisp name */
3035 if (defun)
3036 {
3037 off += 1;
3038 len -= 1;
3039 }
3040 len = toklen;
3041 linebuffer_setlen (&token_name, len);
3042 strncpy (token_name.buffer,
3043 newlb.buffer + off, len);
3044 token_name.buffer[len] = '\0';
3045 if (defun)
3046 while (--len >= 0)
3047 if (token_name.buffer[len] == '_')
3048 token_name.buffer[len] = '-';
3049 token.named = defun;
3050 }
3051 else
3052 {
3053 linebuffer_setlen (&token_name, toklen);
3054 strncpy (token_name.buffer,
3055 newlb.buffer + tokoff, toklen);
3056 token_name.buffer[toklen] = '\0';
3057 /* Name macros and members. */
3058 token.named = (structdef == stagseen
3059 || typdef == ttypeseen
3060 || typdef == tend
3061 || (funorvar
3062 && definedef == dignorerest)
3063 || (funorvar
3064 && definedef == dnone
3065 && structdef == snone
3066 && cblev > 0));
3067 }
3068 token.lineno = lineno;
3069 token.offset = tokoff;
3070 token.length = toklen;
3071 token.line = newlb.buffer;
3072 token.linepos = newlinepos;
3073 token.valid = TRUE;
3074
3075 if (definedef == dnone
3076 && (fvdef == fvnameseen
3077 || fvdef == foperator
3078 || structdef == stagseen
3079 || typdef == tend
3080 || typdef == ttypeseen
3081 || objdef != onone))
3082 {
3083 if (current_lb_is_new)
3084 switch_line_buffers ();
3085 }
3086 else if (definedef != dnone
3087 || fvdef == fdefunname
3088 || instruct)
3089 make_C_tag (funorvar);
3090 }
3091 midtoken = FALSE;
3092 }
3093 } /* if (endtoken (c)) */
3094 else if (intoken (c))
3095 intoken:
3096 {
3097 toklen++;
3098 continue;
3099 }
3100 } /* if (midtoken) */
3101 else if (begtoken (c))
3102 {
3103 switch (definedef)
3104 {
3105 case dnone:
3106 switch (fvdef)
3107 {
3108 case fstartlist:
3109 fvdef = finlist;
3110 continue;
3111 case flistseen:
3112 make_C_tag (TRUE); /* a function */
3113 fvdef = fignore;
3114 break;
3115 case fvnameseen:
3116 fvdef = fvnone;
3117 break;
3118 }
3119 if (structdef == stagseen && !cjava)
3120 {
3121 popclass_above (cblev);
3122 structdef = snone;
3123 }
3124 break;
3125 case dsharpseen:
3126 savetoken = token;
3127 }
3128 if (!yacc_rules || lp == newlb.buffer + 1)
3129 {
3130 tokoff = lp - 1 - newlb.buffer;
3131 toklen = 1;
3132 midtoken = TRUE;
3133 }
3134 continue;
3135 } /* if (begtoken) */
3136 } /* if must look at token */
3137
3138
3139 /* Detect end of line, colon, comma, semicolon and various braces
3140 after having handled a token.*/
3141 switch (c)
3142 {
3143 case ':':
3144 if (yacc_rules && token.offset == 0 && token.valid)
3145 {
3146 make_C_tag (FALSE); /* a yacc function */
3147 break;
3148 }
3149 if (definedef != dnone)
3150 break;
3151 switch (objdef)
3152 {
3153 case otagseen:
3154 objdef = oignore;
3155 make_C_tag (TRUE); /* an Objective C class */
3156 break;
3157 case omethodtag:
3158 case omethodparm:
3159 objdef = omethodcolon;
3160 linebuffer_setlen (&token_name, token_name.len + 1);
3161 strcat (token_name.buffer, ":");
3162 break;
3163 }
3164 if (structdef == stagseen)
3165 structdef = scolonseen;
3166 break;
3167 case ';':
3168 if (definedef != dnone)
3169 break;
3170 switch (typdef)
3171 {
3172 case tend:
3173 case ttypeseen:
3174 make_C_tag (FALSE); /* a typedef */
3175 typdef = tnone;
3176 fvdef = fvnone;
3177 break;
3178 case tnone:
3179 case tinbody:
3180 case tignore:
3181 switch (fvdef)
3182 {
3183 case fignore:
3184 if (typdef == tignore)
3185 fvdef = fvnone;
3186 break;
3187 case fvnameseen:
3188 if ((globals && cblev == 0 && (!fvextern || declarations))
3189 || (members && instruct))
3190 make_C_tag (FALSE); /* a variable */
3191 fvextern = FALSE;
3192 fvdef = fvnone;
3193 token.valid = FALSE;
3194 break;
3195 case flistseen:
3196 if ((declarations && typdef == tnone && !instruct)
3197 || (members && typdef != tignore && instruct))
3198 make_C_tag (TRUE); /* a function declaration */
3199 /* FALLTHRU */
3200 default:
3201 fvextern = FALSE;
3202 fvdef = fvnone;
3203 if (declarations
3204 && structdef == stagseen && (c_ext & C_PLPL))
3205 make_C_tag (FALSE); /* forward declaration */
3206 else
3207 /* The following instruction invalidates the token.
3208 Probably the token should be invalidated in all other
3209 cases where some state machine is reset prematurely. */
3210 token.valid = FALSE;
3211 } /* switch (fvdef) */
3212 /* FALLTHRU */
3213 default:
3214 if (!instruct)
3215 typdef = tnone;
3216 }
3217 if (structdef == stagseen)
3218 structdef = snone;
3219 break;
3220 case ',':
3221 if (definedef != dnone)
3222 break;
3223 switch (objdef)
3224 {
3225 case omethodtag:
3226 case omethodparm:
3227 make_C_tag (TRUE); /* an Objective C method */
3228 objdef = oinbody;
3229 break;
3230 }
3231 switch (fvdef)
3232 {
3233 case fdefunkey:
3234 case foperator:
3235 case fstartlist:
3236 case finlist:
3237 case fignore:
3238 case vignore:
3239 break;
3240 case fdefunname:
3241 fvdef = fignore;
3242 break;
3243 case fvnameseen: /* a variable */
3244 if ((globals && cblev == 0 && (!fvextern || declarations))
3245 || (members && instruct))
3246 make_C_tag (FALSE);
3247 break;
3248 case flistseen: /* a function */
3249 if ((declarations && typdef == tnone && !instruct)
3250 || (members && typdef != tignore && instruct))
3251 {
3252 make_C_tag (TRUE); /* a function declaration */
3253 fvdef = fvnameseen;
3254 }
3255 else if (!declarations)
3256 fvdef = fvnone;
3257 token.valid = FALSE;
3258 break;
3259 default:
3260 fvdef = fvnone;
3261 }
3262 if (structdef == stagseen)
3263 structdef = snone;
3264 break;
3265 case '[':
3266 if (definedef != dnone)
3267 break;
3268 if (structdef == stagseen)
3269 structdef = snone;
3270 switch (typdef)
3271 {
3272 case ttypeseen:
3273 case tend:
3274 typdef = tignore;
3275 make_C_tag (FALSE); /* a typedef */
3276 break;
3277 case tnone:
3278 case tinbody:
3279 switch (fvdef)
3280 {
3281 case foperator:
3282 case finlist:
3283 case fignore:
3284 case vignore:
3285 break;
3286 case fvnameseen:
3287 if ((members && cblev == 1)
3288 || (globals && cblev == 0
3289 && (!fvextern || declarations)))
3290 make_C_tag (FALSE); /* a variable */
3291 /* FALLTHRU */
3292 default:
3293 fvdef = fvnone;
3294 }
3295 break;
3296 }
3297 break;
3298 case '(':
3299 if (definedef != dnone)
3300 break;
3301 if (objdef == otagseen && parlev == 0)
3302 objdef = oparenseen;
3303 switch (fvdef)
3304 {
3305 case fvnameseen:
3306 if (typdef == ttypeseen
3307 && *lp != '*'
3308 && !instruct)
3309 {
3310 /* This handles constructs like:
3311 typedef void OperatorFun (int fun); */
3312 make_C_tag (FALSE);
3313 typdef = tignore;
3314 fvdef = fignore;
3315 break;
3316 }
3317 /* FALLTHRU */
3318 case foperator:
3319 fvdef = fstartlist;
3320 break;
3321 case flistseen:
3322 fvdef = finlist;
3323 break;
3324 }
3325 parlev++;
3326 break;
3327 case ')':
3328 if (definedef != dnone)
3329 break;
3330 if (objdef == ocatseen && parlev == 1)
3331 {
3332 make_C_tag (TRUE); /* an Objective C category */
3333 objdef = oignore;
3334 }
3335 if (--parlev == 0)
3336 {
3337 switch (fvdef)
3338 {
3339 case fstartlist:
3340 case finlist:
3341 fvdef = flistseen;
3342 break;
3343 }
3344 if (!instruct
3345 && (typdef == tend
3346 || typdef == ttypeseen))
3347 {
3348 typdef = tignore;
3349 make_C_tag (FALSE); /* a typedef */
3350 }
3351 }
3352 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3353 parlev = 0;
3354 break;
3355 case '{':
3356 if (definedef != dnone)
3357 break;
3358 if (typdef == ttypeseen)
3359 {
3360 typdefcblev = cblev;
3361 typdef = tinbody;
3362 }
3363 switch (fvdef)
3364 {
3365 case flistseen:
3366 make_C_tag (TRUE); /* a function */
3367 /* FALLTHRU */
3368 case fignore:
3369 fvdef = fvnone;
3370 break;
3371 case fvnone:
3372 switch (objdef)
3373 {
3374 case otagseen:
3375 make_C_tag (TRUE); /* an Objective C class */
3376 objdef = oignore;
3377 break;
3378 case omethodtag:
3379 case omethodparm:
3380 make_C_tag (TRUE); /* an Objective C method */
3381 objdef = oinbody;
3382 break;
3383 default:
3384 /* Neutralize `extern "C" {' grot. */
3385 if (cblev == 0 && structdef == snone && nestlev == 0
3386 && typdef == tnone)
3387 cblev = -1;
3388 }
3389 }
3390 switch (structdef)
3391 {
3392 case skeyseen: /* unnamed struct */
3393 pushclass_above (cblev, NULL, 0);
3394 structdef = snone;
3395 break;
3396 case stagseen: /* named struct or enum */
3397 case scolonseen: /* a class */
3398 pushclass_above (cblev, token.line+token.offset, token.length);
3399 structdef = snone;
3400 make_C_tag (FALSE); /* a struct or enum */
3401 break;
3402 }
3403 cblev++;
3404 break;
3405 case '*':
3406 if (definedef != dnone)
3407 break;
3408 if (fvdef == fstartlist)
3409 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3410 break;
3411 case '}':
3412 if (definedef != dnone)
3413 break;
3414 if (!noindentypedefs && lp == newlb.buffer + 1)
3415 {
3416 cblev = 0; /* reset curly brace level if first column */
3417 parlev = 0; /* also reset paren level, just in case... */
3418 }
3419 else if (cblev > 0)
3420 cblev--;
3421 popclass_above (cblev);
3422 structdef = snone;
3423 if (typdef == tinbody && cblev <= typdefcblev)
3424 {
3425 assert (cblev == typdefcblev);
3426 typdef = tend;
3427 }
3428 break;
3429 case '=':
3430 if (definedef != dnone)
3431 break;
3432 switch (fvdef)
3433 {
3434 case foperator:
3435 case finlist:
3436 case fignore:
3437 case vignore:
3438 break;
3439 case fvnameseen:
3440 if ((members && cblev == 1)
3441 || (globals && cblev == 0 && (!fvextern || declarations)))
3442 make_C_tag (FALSE); /* a variable */
3443 /* FALLTHRU */
3444 default:
3445 fvdef = vignore;
3446 }
3447 break;
3448 case '<':
3449 if (cplpl && structdef == stagseen)
3450 {
3451 structdef = sintemplate;
3452 break;
3453 }
3454 goto resetfvdef;
3455 case '>':
3456 if (structdef == sintemplate)
3457 {
3458 structdef = stagseen;
3459 break;
3460 }
3461 goto resetfvdef;
3462 case '+':
3463 case '-':
3464 if (objdef == oinbody && cblev == 0)
3465 {
3466 objdef = omethodsign;
3467 break;
3468 }
3469 /* FALLTHRU */
3470 resetfvdef:
3471 case '#': case '~': case '&': case '%': case '/': case '|':
3472 case '^': case '!': case '.': case '?': case ']':
3473 if (definedef != dnone)
3474 break;
3475 /* These surely cannot follow a function tag in C. */
3476 switch (fvdef)
3477 {
3478 case foperator:
3479 case finlist:
3480 case fignore:
3481 case vignore:
3482 break;
3483 default:
3484 fvdef = fvnone;
3485 }
3486 break;
3487 case '\0':
3488 if (objdef == otagseen)
3489 {
3490 make_C_tag (TRUE); /* an Objective C class */
3491 objdef = oignore;
3492 }
3493 /* If a macro spans multiple lines don't reset its state. */
3494 if (quotednl)
3495 CNL_SAVE_DEFINEDEF ();
3496 else
3497 CNL ();
3498 break;
3499 } /* switch (c) */
3500
3501 } /* while not eof */
3502
3503 free (token_name.buffer);
3504 free (lbs[0].lb.buffer);
3505 free (lbs[1].lb.buffer);
3506 }
3507
3508 /*
3509 * Process either a C++ file or a C file depending on the setting
3510 * of a global flag.
3511 */
3512 static void
3513 default_C_entries (inf)
3514 FILE *inf;
3515 {
3516 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3517 }
3518
3519 /* Always do plain C. */
3520 static void
3521 plain_C_entries (inf)
3522 FILE *inf;
3523 {
3524 C_entries (0, inf);
3525 }
3526
3527 /* Always do C++. */
3528 static void
3529 Cplusplus_entries (inf)
3530 FILE *inf;
3531 {
3532 C_entries (C_PLPL, inf);
3533 }
3534
3535 /* Always do Java. */
3536 static void
3537 Cjava_entries (inf)
3538 FILE *inf;
3539 {
3540 C_entries (C_JAVA, inf);
3541 }
3542
3543 /* Always do C*. */
3544 static void
3545 Cstar_entries (inf)
3546 FILE *inf;
3547 {
3548 C_entries (C_STAR, inf);
3549 }
3550
3551 /* Always do Yacc. */
3552 static void
3553 Yacc_entries (inf)
3554 FILE *inf;
3555 {
3556 C_entries (YACC, inf);
3557 }
3558
3559 \f
3560 /* A useful macro. */
3561 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3562 for (lineno = charno = 0; /* loop initialization */ \
3563 !feof (file_pointer) /* loop test */ \
3564 && (lineno++, /* instructions at start of loop */ \
3565 linecharno = charno, \
3566 charno += readline (&line_buffer, file_pointer), \
3567 char_pointer = lb.buffer, \
3568 TRUE); \
3569 )
3570
3571
3572 /*
3573 * Read a file, but do no processing. This is used to do regexp
3574 * matching on files that have no language defined.
3575 */
3576 static void
3577 just_read_file (inf)
3578 FILE *inf;
3579 {
3580 register char *dummy;
3581
3582 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3583 continue;
3584 }
3585
3586 \f
3587 /* Fortran parsing */
3588
3589 static bool tail P_((char *));
3590 static void takeprec P_((void));
3591 static void getit P_((FILE *));
3592
3593 static bool
3594 tail (cp)
3595 char *cp;
3596 {
3597 register int len = 0;
3598
3599 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
3600 cp++, len++;
3601 if (*cp == '\0' && !intoken (dbp[len]))
3602 {
3603 dbp += len;
3604 return TRUE;
3605 }
3606 return FALSE;
3607 }
3608
3609 static void
3610 takeprec ()
3611 {
3612 dbp = skip_spaces (dbp);
3613 if (*dbp != '*')
3614 return;
3615 dbp++;
3616 dbp = skip_spaces (dbp);
3617 if (strneq (dbp, "(*)", 3))
3618 {
3619 dbp += 3;
3620 return;
3621 }
3622 if (!ISDIGIT (*dbp))
3623 {
3624 --dbp; /* force failure */
3625 return;
3626 }
3627 do
3628 dbp++;
3629 while (ISDIGIT (*dbp));
3630 }
3631
3632 static void
3633 getit (inf)
3634 FILE *inf;
3635 {
3636 register char *cp;
3637
3638 dbp = skip_spaces (dbp);
3639 if (*dbp == '\0')
3640 {
3641 lineno++;
3642 linecharno = charno;
3643 charno += readline (&lb, inf);
3644 dbp = lb.buffer;
3645 if (dbp[5] != '&')
3646 return;
3647 dbp += 6;
3648 dbp = skip_spaces (dbp);
3649 }
3650 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3651 return;
3652 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3653 continue;
3654 pfnote (savenstr (dbp, cp-dbp), TRUE,
3655 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3656 }
3657
3658
3659 static void
3660 Fortran_functions (inf)
3661 FILE *inf;
3662 {
3663 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3664 {
3665 if (*dbp == '%')
3666 dbp++; /* Ratfor escape to fortran */
3667 dbp = skip_spaces (dbp);
3668 if (*dbp == '\0')
3669 continue;
3670 switch (lowcase (*dbp))
3671 {
3672 case 'i':
3673 if (tail ("integer"))
3674 takeprec ();
3675 break;
3676 case 'r':
3677 if (tail ("real"))
3678 takeprec ();
3679 break;
3680 case 'l':
3681 if (tail ("logical"))
3682 takeprec ();
3683 break;
3684 case 'c':
3685 if (tail ("complex") || tail ("character"))
3686 takeprec ();
3687 break;
3688 case 'd':
3689 if (tail ("double"))
3690 {
3691 dbp = skip_spaces (dbp);
3692 if (*dbp == '\0')
3693 continue;
3694 if (tail ("precision"))
3695 break;
3696 continue;
3697 }
3698 break;
3699 }
3700 dbp = skip_spaces (dbp);
3701 if (*dbp == '\0')
3702 continue;
3703 switch (lowcase (*dbp))
3704 {
3705 case 'f':
3706 if (tail ("function"))
3707 getit (inf);
3708 continue;
3709 case 's':
3710 if (tail ("subroutine"))
3711 getit (inf);
3712 continue;
3713 case 'e':
3714 if (tail ("entry"))
3715 getit (inf);
3716 continue;
3717 case 'b':
3718 if (tail ("blockdata") || tail ("block data"))
3719 {
3720 dbp = skip_spaces (dbp);
3721 if (*dbp == '\0') /* assume un-named */
3722 pfnote (savestr ("blockdata"), TRUE,
3723 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3724 else
3725 getit (inf); /* look for name */
3726 }
3727 continue;
3728 }
3729 }
3730 }
3731
3732 \f
3733 /*
3734 * Ada parsing
3735 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3736 */
3737
3738 static void adagetit P_((FILE *, char *));
3739
3740 /* Once we are positioned after an "interesting" keyword, let's get
3741 the real tag value necessary. */
3742 static void
3743 adagetit (inf, name_qualifier)
3744 FILE *inf;
3745 char *name_qualifier;
3746 {
3747 register char *cp;
3748 char *name;
3749 char c;
3750
3751 while (!feof (inf))
3752 {
3753 dbp = skip_spaces (dbp);
3754 if (*dbp == '\0'
3755 || (dbp[0] == '-' && dbp[1] == '-'))
3756 {
3757 lineno++;
3758 linecharno = charno;
3759 charno += readline (&lb, inf);
3760 dbp = lb.buffer;
3761 }
3762 switch (*dbp)
3763 {
3764 case 'b':
3765 case 'B':
3766 if (tail ("body"))
3767 {
3768 /* Skipping body of procedure body or package body or ....
3769 resetting qualifier to body instead of spec. */
3770 name_qualifier = "/b";
3771 continue;
3772 }
3773 break;
3774 case 't':
3775 case 'T':
3776 /* Skipping type of task type or protected type ... */
3777 if (tail ("type"))
3778 continue;
3779 break;
3780 }
3781 if (*dbp == '"')
3782 {
3783 dbp += 1;
3784 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3785 continue;
3786 }
3787 else
3788 {
3789 dbp = skip_spaces (dbp);
3790 for (cp = dbp;
3791 (*cp != '\0'
3792 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
3793 cp++)
3794 continue;
3795 if (cp == dbp)
3796 return;
3797 }
3798 c = *cp;
3799 *cp = '\0';
3800 name = concat (dbp, name_qualifier, "");
3801 *cp = c;
3802 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3803 if (c == '"')
3804 dbp = cp + 1;
3805 return;
3806 }
3807 }
3808
3809 static void
3810 Ada_funcs (inf)
3811 FILE *inf;
3812 {
3813 bool inquote = FALSE;
3814
3815 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3816 {
3817 while (*dbp != '\0')
3818 {
3819 /* Skip a string i.e. "abcd". */
3820 if (inquote || (*dbp == '"'))
3821 {
3822 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3823 if (dbp != NULL)
3824 {
3825 inquote = FALSE;
3826 dbp += 1;
3827 continue; /* advance char */
3828 }
3829 else
3830 {
3831 inquote = TRUE;
3832 break; /* advance line */
3833 }
3834 }
3835
3836 /* Skip comments. */
3837 if (dbp[0] == '-' && dbp[1] == '-')
3838 break; /* advance line */
3839
3840 /* Skip character enclosed in single quote i.e. 'a'
3841 and skip single quote starting an attribute i.e. 'Image. */
3842 if (*dbp == '\'')
3843 {
3844 dbp++ ;
3845 if (*dbp != '\0')
3846 dbp++;
3847 continue;
3848 }
3849
3850 /* Search for beginning of a token. */
3851 if (!begtoken (*dbp))
3852 {
3853 dbp++;
3854 continue; /* advance char */
3855 }
3856
3857 /* We are at the beginning of a token. */
3858 switch (*dbp)
3859 {
3860 case 'f':
3861 case 'F':
3862 if (!packages_only && tail ("function"))
3863 adagetit (inf, "/f");
3864 else
3865 break; /* from switch */
3866 continue; /* advance char */
3867 case 'p':
3868 case 'P':
3869 if (!packages_only && tail ("procedure"))
3870 adagetit (inf, "/p");
3871 else if (tail ("package"))
3872 adagetit (inf, "/s");
3873 else if (tail ("protected")) /* protected type */
3874 adagetit (inf, "/t");
3875 else
3876 break; /* from switch */
3877 continue; /* advance char */
3878 case 't':
3879 case 'T':
3880 if (!packages_only && tail ("task"))
3881 adagetit (inf, "/k");
3882 else if (typedefs && !packages_only && tail ("type"))
3883 {
3884 adagetit (inf, "/t");
3885 while (*dbp != '\0')
3886 dbp += 1;
3887 }
3888 else
3889 break; /* from switch */
3890 continue; /* advance char */
3891 }
3892
3893 /* Look for the end of the token. */
3894 while (!endtoken (*dbp))
3895 dbp++;
3896
3897 } /* advance char */
3898 } /* advance line */
3899 }
3900
3901 \f
3902 /*
3903 * Bob Weiner, Motorola Inc., 4/3/94
3904 * Unix and microcontroller assembly tag handling
3905 * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
3906 */
3907 static void
3908 Asm_labels (inf)
3909 FILE *inf;
3910 {
3911 register char *cp;
3912
3913 LOOP_ON_INPUT_LINES (inf, lb, cp)
3914 {
3915 /* If first char is alphabetic or one of [_.$], test for colon
3916 following identifier. */
3917 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3918 {
3919 /* Read past label. */
3920 cp++;
3921 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3922 cp++;
3923 if (*cp == ':' || iswhite (*cp))
3924 {
3925 /* Found end of label, so copy it and add it to the table. */
3926 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3927 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3928 }
3929 }
3930 }
3931 }
3932
3933 \f
3934 /*
3935 * Perl support
3936 * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
3937 * Perl variable names: /^(my|local).../
3938 * Bart Robinson <lomew@cs.utah.edu> (1995)
3939 * Michael Ernst <mernst@alum.mit.edu> (1997)
3940 */
3941 static void
3942 Perl_functions (inf)
3943 FILE *inf;
3944 {
3945 register char *cp;
3946
3947 LOOP_ON_INPUT_LINES (inf, lb, cp)
3948 {
3949 if (*cp++ == 's'
3950 && *cp++ == 'u'
3951 && *cp++ == 'b' && iswhite (*cp++))
3952 {
3953 cp = skip_spaces (cp);
3954 if (*cp != '\0')
3955 {
3956 char *sp = cp;
3957 while (*cp != '\0'
3958 && !iswhite (*cp) && *cp != '{' && *cp != '(')
3959 cp++;
3960 pfnote (savenstr (sp, cp-sp), TRUE,
3961 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3962 }
3963 }
3964 else if (globals /* only if tagging global vars is enabled */
3965 && ((cp = lb.buffer,
3966 *cp++ == 'm'
3967 && *cp++ == 'y')
3968 || (cp = lb.buffer,
3969 *cp++ == 'l'
3970 && *cp++ == 'o'
3971 && *cp++ == 'c'
3972 && *cp++ == 'a'
3973 && *cp++ == 'l'))
3974 && (*cp == '(' || iswhite (*cp)))
3975 {
3976 /* After "my" or "local", but before any following paren or space. */
3977 char *varname = NULL;
3978
3979 cp = skip_spaces (cp);
3980 if (*cp == '$' || *cp == '@' || *cp == '%')
3981 {
3982 char* varstart = ++cp;
3983 while (ISALNUM (*cp) || *cp == '_')
3984 cp++;
3985 varname = savenstr (varstart, cp-varstart);
3986 }
3987 else
3988 {
3989 /* Should be examining a variable list at this point;
3990 could insist on seeing an open parenthesis. */
3991 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
3992 cp++;
3993 }
3994
3995 /* Perhaps I should back cp up one character, so the TAGS table
3996 doesn't mention (and so depend upon) the following char. */
3997 pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
3998 FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3999 }
4000 }
4001 }
4002
4003 \f
4004 /*
4005 * Python support
4006 * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4007 * Eric S. Raymond <esr@thyrsus.com> (1997)
4008 */
4009 static void
4010 Python_functions (inf)
4011 FILE *inf;
4012 {
4013 register char *cp;
4014
4015 LOOP_ON_INPUT_LINES (inf, lb, cp)
4016 {
4017 if (*cp++ == 'd'
4018 && *cp++ == 'e'
4019 && *cp++ == 'f' && iswhite (*cp++))
4020 {
4021 cp = skip_spaces (cp);
4022 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
4023 cp++;
4024 pfnote (NULL, TRUE,
4025 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4026 }
4027
4028 cp = lb.buffer;
4029 if (*cp++ == 'c'
4030 && *cp++ == 'l'
4031 && *cp++ == 'a'
4032 && *cp++ == 's'
4033 && *cp++ == 's' && iswhite (*cp++))
4034 {
4035 cp = skip_spaces (cp);
4036 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
4037 cp++;
4038 pfnote (NULL, TRUE,
4039 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4040 }
4041 }
4042 }
4043
4044 \f
4045 /* Idea by Corny de Souza
4046 * Cobol tag functions
4047 * We could look for anything that could be a paragraph name.
4048 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4049 */
4050 static void
4051 Cobol_paragraphs (inf)
4052 FILE *inf;
4053 {
4054 register char *bp, *ep;
4055
4056 LOOP_ON_INPUT_LINES (inf, lb, bp)
4057 {
4058 if (lb.len < 9)
4059 continue;
4060 bp += 8;
4061
4062 /* If eoln, compiler option or comment ignore whole line. */
4063 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4064 continue;
4065
4066 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4067 continue;
4068 if (*ep++ == '.')
4069 pfnote (savenstr (bp, ep-bp), TRUE,
4070 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4071 }
4072 }
4073
4074 \f
4075 /*
4076 * Makefile support
4077 * Idea by Assar Westerlund <assar@sics.se> (2001)
4078 */
4079 static void
4080 Makefile_targets (inf)
4081 FILE *inf;
4082 {
4083 register char *bp;
4084
4085 LOOP_ON_INPUT_LINES (inf, lb, bp)
4086 {
4087 if (*bp == '\t' || *bp == '#')
4088 continue;
4089 while (*bp != '\0' && *bp != '=' && *bp != ':')
4090 bp++;
4091 if (*bp == ':')
4092 pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4093 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4094 }
4095 }
4096
4097 \f
4098 /* Added by Mosur Mohan, 4/22/88 */
4099 /* Pascal parsing */
4100
4101 /*
4102 * Locates tags for procedures & functions. Doesn't do any type- or
4103 * var-definitions. It does look for the keyword "extern" or
4104 * "forward" immediately following the procedure statement; if found,
4105 * the tag is skipped.
4106 */
4107 static void
4108 Pascal_functions (inf)
4109 FILE *inf;
4110 {
4111 linebuffer tline; /* mostly copied from C_entries */
4112 long save_lcno;
4113 int save_lineno, save_len;
4114 char c, *cp, *namebuf;
4115
4116 bool /* each of these flags is TRUE iff: */
4117 incomment, /* point is inside a comment */
4118 inquote, /* point is inside '..' string */
4119 get_tagname, /* point is after PROCEDURE/FUNCTION
4120 keyword, so next item = potential tag */
4121 found_tag, /* point is after a potential tag */
4122 inparms, /* point is within parameter-list */
4123 verify_tag; /* point has passed the parm-list, so the
4124 next token will determine whether this
4125 is a FORWARD/EXTERN to be ignored, or
4126 whether it is a real tag */
4127
4128 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4129 namebuf = NULL; /* keep compiler quiet */
4130 lineno = 0;
4131 charno = 0;
4132 dbp = lb.buffer;
4133 *dbp = '\0';
4134 initbuffer (&tline);
4135
4136 incomment = inquote = FALSE;
4137 found_tag = FALSE; /* have a proc name; check if extern */
4138 get_tagname = FALSE; /* have found "procedure" keyword */
4139 inparms = FALSE; /* found '(' after "proc" */
4140 verify_tag = FALSE; /* check if "extern" is ahead */
4141
4142
4143 while (!feof (inf)) /* long main loop to get next char */
4144 {
4145 c = *dbp++;
4146 if (c == '\0') /* if end of line */
4147 {
4148 lineno++;
4149 linecharno = charno;
4150 charno += readline (&lb, inf);
4151 dbp = lb.buffer;
4152 if (*dbp == '\0')
4153 continue;
4154 if (!((found_tag && verify_tag)
4155 || get_tagname))
4156 c = *dbp++; /* only if don't need *dbp pointing
4157 to the beginning of the name of
4158 the procedure or function */
4159 }
4160 if (incomment)
4161 {
4162 if (c == '}') /* within { } comments */
4163 incomment = FALSE;
4164 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4165 {
4166 dbp++;
4167 incomment = FALSE;
4168 }
4169 continue;
4170 }
4171 else if (inquote)
4172 {
4173 if (c == '\'')
4174 inquote = FALSE;
4175 continue;
4176 }
4177 else
4178 switch (c)
4179 {
4180 case '\'':
4181 inquote = TRUE; /* found first quote */
4182 continue;
4183 case '{': /* found open { comment */
4184 incomment = TRUE;
4185 continue;
4186 case '(':
4187 if (*dbp == '*') /* found open (* comment */
4188 {
4189 incomment = TRUE;
4190 dbp++;
4191 }
4192 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4193 inparms = TRUE;
4194 continue;
4195 case ')': /* end of parms list */
4196 if (inparms)
4197 inparms = FALSE;
4198 continue;
4199 case ';':
4200 if (found_tag && !inparms) /* end of proc or fn stmt */
4201 {
4202 verify_tag = TRUE;
4203 break;
4204 }
4205 continue;
4206 }
4207 if (found_tag && verify_tag && (*dbp != ' '))
4208 {
4209 /* check if this is an "extern" declaration */
4210 if (*dbp == '\0')
4211 continue;
4212 if (lowcase (*dbp == 'e'))
4213 {
4214 if (tail ("extern")) /* superfluous, really! */
4215 {
4216 found_tag = FALSE;
4217 verify_tag = FALSE;
4218 }
4219 }
4220 else if (lowcase (*dbp) == 'f')
4221 {
4222 if (tail ("forward")) /* check for forward reference */
4223 {
4224 found_tag = FALSE;
4225 verify_tag = FALSE;
4226 }
4227 }
4228 if (found_tag && verify_tag) /* not external proc, so make tag */
4229 {
4230 found_tag = FALSE;
4231 verify_tag = FALSE;
4232 pfnote (namebuf, TRUE,
4233 tline.buffer, save_len, save_lineno, save_lcno);
4234 continue;
4235 }
4236 }
4237 if (get_tagname) /* grab name of proc or fn */
4238 {
4239 if (*dbp == '\0')
4240 continue;
4241
4242 /* save all values for later tagging */
4243 linebuffer_setlen (&tline, lb.len);
4244 strcpy (tline.buffer, lb.buffer);
4245 save_lineno = lineno;
4246 save_lcno = linecharno;
4247
4248 /* grab block name */
4249 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4250 continue;
4251 namebuf = savenstr (dbp, cp-dbp);
4252 dbp = cp; /* set dbp to e-o-token */
4253 save_len = dbp - lb.buffer + 1;
4254 get_tagname = FALSE;
4255 found_tag = TRUE;
4256 continue;
4257
4258 /* and proceed to check for "extern" */
4259 }
4260 else if (!incomment && !inquote && !found_tag)
4261 {
4262 /* check for proc/fn keywords */
4263 switch (lowcase (c))
4264 {
4265 case 'p':
4266 if (tail ("rocedure")) /* c = 'p', dbp has advanced */
4267 get_tagname = TRUE;
4268 continue;
4269 case 'f':
4270 if (tail ("unction"))
4271 get_tagname = TRUE;
4272 continue;
4273 }
4274 }
4275 } /* while not eof */
4276
4277 free (tline.buffer);
4278 }
4279
4280 \f
4281 /*
4282 * Lisp tag functions
4283 * look for (def or (DEF, quote or QUOTE
4284 */
4285
4286 static int L_isdef P_((char *));
4287 static int L_isquote P_((char *));
4288 static void L_getit P_((void));
4289
4290 static int
4291 L_isdef (strp)
4292 register char *strp;
4293 {
4294 return ((strp[1] == 'd' || strp[1] == 'D')
4295 && (strp[2] == 'e' || strp[2] == 'E')
4296 && (strp[3] == 'f' || strp[3] == 'F'));
4297 }
4298
4299 static int
4300 L_isquote (strp)
4301 register char *strp;
4302 {
4303 return ((*++strp == 'q' || *strp == 'Q')
4304 && (*++strp == 'u' || *strp == 'U')
4305 && (*++strp == 'o' || *strp == 'O')
4306 && (*++strp == 't' || *strp == 'T')
4307 && (*++strp == 'e' || *strp == 'E')
4308 && iswhite (*++strp));
4309 }
4310
4311 static void
4312 L_getit ()
4313 {
4314 register char *cp;
4315
4316 if (*dbp == '\'') /* Skip prefix quote */
4317 dbp++;
4318 else if (*dbp == '(')
4319 {
4320 if (L_isquote (dbp))
4321 dbp += 7; /* Skip "(quote " */
4322 else
4323 dbp += 1; /* Skip "(" before name in (defstruct (foo)) */
4324 dbp = skip_spaces (dbp);
4325 }
4326
4327 for (cp = dbp /*+1*/;
4328 *cp != '\0' && *cp != '(' && !iswhite(*cp) && *cp != ')';
4329 cp++)
4330 continue;
4331 if (cp == dbp)
4332 return;
4333
4334 pfnote (savenstr (dbp, cp-dbp), TRUE,
4335 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4336 }
4337
4338 static void
4339 Lisp_functions (inf)
4340 FILE *inf;
4341 {
4342 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4343 {
4344 if (dbp[0] == '(')
4345 {
4346 if (L_isdef (dbp))
4347 {
4348 dbp = skip_non_spaces (dbp);
4349 dbp = skip_spaces (dbp);
4350 L_getit ();
4351 }
4352 else
4353 {
4354 /* Check for (foo::defmumble name-defined ... */
4355 do
4356 dbp++;
4357 while (*dbp != '\0' && !iswhite (*dbp)
4358 && *dbp != ':' && *dbp != '(' && *dbp != ')');
4359 if (*dbp == ':')
4360 {
4361 do
4362 dbp++;
4363 while (*dbp == ':');
4364
4365 if (L_isdef (dbp - 1))
4366 {
4367 dbp = skip_non_spaces (dbp);
4368 dbp = skip_spaces (dbp);
4369 L_getit ();
4370 }
4371 }
4372 }
4373 }
4374 }
4375 }
4376
4377 \f
4378 /*
4379 * Postscript tag functions
4380 * Just look for lines where the first character is '/'
4381 * Also look at "defineps" for PSWrap
4382 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4383 * Ideas by Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4384 */
4385 static void
4386 Postscript_functions (inf)
4387 FILE *inf;
4388 {
4389 register char *bp, *ep;
4390
4391 LOOP_ON_INPUT_LINES (inf, lb, bp)
4392 {
4393 if (bp[0] == '/')
4394 {
4395 for (ep = bp+1;
4396 *ep != '\0' && *ep != ' ' && *ep != '{';
4397 ep++)
4398 continue;
4399 pfnote (savenstr (bp, ep-bp), TRUE,
4400 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4401 }
4402 else if (strneq (bp, "defineps", 8))
4403 {
4404 bp = skip_non_spaces (bp);
4405 bp = skip_spaces (bp);
4406 get_tag (bp);
4407 }
4408 }
4409 }
4410
4411 \f
4412 /*
4413 * Scheme tag functions
4414 * look for (def... xyzzy
4415 * look for (def... (xyzzy
4416 * look for (def ... ((...(xyzzy ....
4417 * look for (set! xyzzy
4418 */
4419
4420 static void
4421 Scheme_functions (inf)
4422 FILE *inf;
4423 {
4424 register char *bp;
4425
4426 LOOP_ON_INPUT_LINES (inf, lb, bp)
4427 {
4428 if (bp[0] == '('
4429 && (bp[1] == 'D' || bp[1] == 'd')
4430 && (bp[2] == 'E' || bp[2] == 'e')
4431 && (bp[3] == 'F' || bp[3] == 'f'))
4432 {
4433 bp = skip_non_spaces (bp);
4434 /* Skip over open parens and white space */
4435 while (iswhite (*bp) || *bp == '(')
4436 bp++;
4437 get_tag (bp);
4438 }
4439 if (bp[0] == '('
4440 && (bp[1] == 'S' || bp[1] == 's')
4441 && (bp[2] == 'E' || bp[2] == 'e')
4442 && (bp[3] == 'T' || bp[3] == 't')
4443 && (bp[4] == '!' || bp[4] == '!')
4444 && (iswhite (bp[5])))
4445 {
4446 bp = skip_non_spaces (bp);
4447 bp = skip_spaces (bp);
4448 get_tag (bp);
4449 }
4450 }
4451 }
4452
4453 \f
4454 /* Find tags in TeX and LaTeX input files. */
4455
4456 /* TEX_toktab is a table of TeX control sequences that define tags.
4457 Each TEX_tabent records one such control sequence.
4458 CONVERT THIS TO USE THE Stab TYPE!! */
4459 struct TEX_tabent
4460 {
4461 char *name;
4462 int len;
4463 };
4464
4465 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
4466
4467 /* Default set of control sequences to put into TEX_toktab.
4468 The value of environment var TEXTAGS is prepended to this. */
4469
4470 char *TEX_defenv = "\
4471 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4472 :part:appendix:entry:index";
4473
4474 static void TEX_mode P_((FILE *));
4475 static struct TEX_tabent *TEX_decode_env P_((char *, char *));
4476 static int TEX_Token P_((char *));
4477
4478 char TEX_esc = '\\';
4479 char TEX_opgrp = '{';
4480 char TEX_clgrp = '}';
4481
4482 /*
4483 * TeX/LaTeX scanning loop.
4484 */
4485 static void
4486 TeX_commands (inf)
4487 FILE *inf;
4488 {
4489 char *cp, *lasthit;
4490 register int i;
4491
4492 /* Select either \ or ! as escape character. */
4493 TEX_mode (inf);
4494
4495 /* Initialize token table once from environment. */
4496 if (!TEX_toktab)
4497 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4498
4499 LOOP_ON_INPUT_LINES (inf, lb, cp)
4500 {
4501 lasthit = cp;
4502 /* Look at each esc in line. */
4503 while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4504 {
4505 if (*++cp == '\0')
4506 break;
4507 linecharno += cp - lasthit;
4508 lasthit = cp;
4509 i = TEX_Token (lasthit);
4510 if (i >= 0)
4511 {
4512 /* We seem to include the TeX command in the tag name.
4513 register char *p;
4514 for (p = lasthit + TEX_toktab[i].len;
4515 *p != '\0' && *p != TEX_clgrp;
4516 p++)
4517 continue; */
4518 pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4519 lb.buffer, lb.len, lineno, linecharno);
4520 break; /* We only tag a line once */
4521 }
4522 }
4523 }
4524 }
4525
4526 #define TEX_LESC '\\'
4527 #define TEX_SESC '!'
4528 #define TEX_cmt '%'
4529
4530 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4531 chars accordingly. */
4532 static void
4533 TEX_mode (inf)
4534 FILE *inf;
4535 {
4536 int c;
4537
4538 while ((c = getc (inf)) != EOF)
4539 {
4540 /* Skip to next line if we hit the TeX comment char. */
4541 if (c == TEX_cmt)
4542 while (c != '\n')
4543 c = getc (inf);
4544 else if (c == TEX_LESC || c == TEX_SESC )
4545 break;
4546 }
4547
4548 if (c == TEX_LESC)
4549 {
4550 TEX_esc = TEX_LESC;
4551 TEX_opgrp = '{';
4552 TEX_clgrp = '}';
4553 }
4554 else
4555 {
4556 TEX_esc = TEX_SESC;
4557 TEX_opgrp = '<';
4558 TEX_clgrp = '>';
4559 }
4560 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4561 No attempt is made to correct the situation. */
4562 rewind (inf);
4563 }
4564
4565 /* Read environment and prepend it to the default string.
4566 Build token table. */
4567 static struct TEX_tabent *
4568 TEX_decode_env (evarname, defenv)
4569 char *evarname;
4570 char *defenv;
4571 {
4572 register char *env, *p;
4573
4574 struct TEX_tabent *tab;
4575 int size, i;
4576
4577 /* Append default string to environment. */
4578 env = getenv (evarname);
4579 if (!env)
4580 env = defenv;
4581 else
4582 {
4583 char *oldenv = env;
4584 env = concat (oldenv, defenv, "");
4585 }
4586
4587 /* Allocate a token table */
4588 for (size = 1, p = env; p;)
4589 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4590 size++;
4591 /* Add 1 to leave room for null terminator. */
4592 tab = xnew (size + 1, struct TEX_tabent);
4593
4594 /* Unpack environment string into token table. Be careful about */
4595 /* zero-length strings (leading ':', "::" and trailing ':') */
4596 for (i = 0; *env;)
4597 {
4598 p = etags_strchr (env, ':');
4599 if (!p) /* End of environment string. */
4600 p = env + strlen (env);
4601 if (p - env > 0)
4602 { /* Only non-zero strings. */
4603 tab[i].name = savenstr (env, p - env);
4604 tab[i].len = strlen (tab[i].name);
4605 i++;
4606 }
4607 if (*p)
4608 env = p + 1;
4609 else
4610 {
4611 tab[i].name = NULL; /* Mark end of table. */
4612 tab[i].len = 0;
4613 break;
4614 }
4615 }
4616 return tab;
4617 }
4618
4619 /* If the text at CP matches one of the tag-defining TeX command names,
4620 return the pointer to the first occurrence of that command in TEX_toktab.
4621 Otherwise return -1.
4622 Keep the capital `T' in `token' for dumb truncating compilers
4623 (this distinguishes it from `TEX_toktab' */
4624 static int
4625 TEX_Token (cp)
4626 char *cp;
4627 {
4628 int i;
4629
4630 for (i = 0; TEX_toktab[i].len > 0; i++)
4631 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4632 return i;
4633 return -1;
4634 }
4635
4636 \f
4637 /* Texinfo support. Dave Love, Mar. 2000. */
4638 static void
4639 Texinfo_nodes (inf)
4640 FILE * inf;
4641 {
4642 char *cp, *start;
4643 LOOP_ON_INPUT_LINES (inf, lb, cp)
4644 {
4645 if ((*cp++ == '@'
4646 && *cp++ == 'n'
4647 && *cp++ == 'o'
4648 && *cp++ == 'd'
4649 && *cp++ == 'e' && iswhite (*cp++)))
4650 {
4651 start = cp = skip_spaces(cp);
4652 while (*cp != '\0' && *cp != ',')
4653 cp++;
4654 pfnote (savenstr (start, cp - start), TRUE,
4655 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4656 }
4657 }
4658 }
4659
4660 \f
4661 /*
4662 * Prolog support (rewritten) by Anders Lindgren, Mar. 96
4663 *
4664 * Assumes that the predicate starts at column 0.
4665 * Only the first clause of a predicate is added.
4666 */
4667 static int prolog_pred P_((char *, char *));
4668 static void prolog_skip_comment P_((linebuffer *, FILE *));
4669 static int prolog_atom P_((char *, int));
4670
4671 static void
4672 Prolog_functions (inf)
4673 FILE *inf;
4674 {
4675 char *cp, *last;
4676 int len;
4677 int allocated;
4678
4679 allocated = 0;
4680 len = 0;
4681 last = NULL;
4682
4683 LOOP_ON_INPUT_LINES (inf, lb, cp)
4684 {
4685 if (cp[0] == '\0') /* Empty line */
4686 continue;
4687 else if (iswhite (cp[0])) /* Not a predicate */
4688 continue;
4689 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4690 prolog_skip_comment (&lb, inf);
4691 else if ((len = prolog_pred (cp, last)) > 0)
4692 {
4693 /* Predicate. Store the function name so that we only
4694 generate a tag for the first clause. */
4695 if (last == NULL)
4696 last = xnew(len + 1, char);
4697 else if (len + 1 > allocated)
4698 xrnew (last, len + 1, char);
4699 allocated = len + 1;
4700 strncpy (last, cp, len);
4701 last[len] = '\0';
4702 }
4703 }
4704 }
4705
4706
4707 static void
4708 prolog_skip_comment (plb, inf)
4709 linebuffer *plb;
4710 FILE *inf;
4711 {
4712 char *cp;
4713
4714 do
4715 {
4716 for (cp = plb->buffer; *cp != '\0'; cp++)
4717 if (cp[0] == '*' && cp[1] == '/')
4718 return;
4719 lineno++;
4720 linecharno += readline (plb, inf);
4721 }
4722 while (!feof(inf));
4723 }
4724
4725 /*
4726 * A predicate definition is added if it matches:
4727 * <beginning of line><Prolog Atom><whitespace>(
4728 *
4729 * It is added to the tags database if it doesn't match the
4730 * name of the previous clause header.
4731 *
4732 * Return the size of the name of the predicate, or 0 if no header
4733 * was found.
4734 */
4735 static int
4736 prolog_pred (s, last)
4737 char *s;
4738 char *last; /* Name of last clause. */
4739 {
4740 int pos;
4741 int len;
4742
4743 pos = prolog_atom (s, 0);
4744 if (pos < 1)
4745 return 0;
4746
4747 len = pos;
4748 pos = skip_spaces (s + pos) - s;
4749
4750 if ((s[pos] == '(') || (s[pos] == '.'))
4751 {
4752 if (s[pos] == '(')
4753 pos++;
4754
4755 /* Save only the first clause. */
4756 if (last == NULL
4757 || len != (int)strlen (last)
4758 || !strneq (s, last, len))
4759 {
4760 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4761 return len;
4762 }
4763 }
4764 return 0;
4765 }
4766
4767 /*
4768 * Consume a Prolog atom.
4769 * Return the number of bytes consumed, or -1 if there was an error.
4770 *
4771 * A prolog atom, in this context, could be one of:
4772 * - An alphanumeric sequence, starting with a lower case letter.
4773 * - A quoted arbitrary string. Single quotes can escape themselves.
4774 * Backslash quotes everything.
4775 */
4776 static int
4777 prolog_atom (s, pos)
4778 char *s;
4779 int pos;
4780 {
4781 int origpos;
4782
4783 origpos = pos;
4784
4785 if (ISLOWER(s[pos]) || (s[pos] == '_'))
4786 {
4787 /* The atom is unquoted. */
4788 pos++;
4789 while (ISALNUM(s[pos]) || (s[pos] == '_'))
4790 {
4791 pos++;
4792 }
4793 return pos - origpos;
4794 }
4795 else if (s[pos] == '\'')
4796 {
4797 pos++;
4798
4799 while (1)
4800 {
4801 if (s[pos] == '\'')
4802 {
4803 pos++;
4804 if (s[pos] != '\'')
4805 break;
4806 pos++; /* A double quote */
4807 }
4808 else if (s[pos] == '\0')
4809 /* Multiline quoted atoms are ignored. */
4810 return -1;
4811 else if (s[pos] == '\\')
4812 {
4813 if (s[pos+1] == '\0')
4814 return -1;
4815 pos += 2;
4816 }
4817 else
4818 pos++;
4819 }
4820 return pos - origpos;
4821 }
4822 else
4823 return -1;
4824 }
4825
4826 \f
4827 /*
4828 * Support for Erlang -- Anders Lindgren, Feb 1996.
4829 *
4830 * Generates tags for functions, defines, and records.
4831 *
4832 * Assumes that Erlang functions start at column 0.
4833 */
4834 static int erlang_func P_((char *, char *));
4835 static void erlang_attribute P_((char *));
4836 static int erlang_atom P_((char *, int));
4837
4838 static void
4839 Erlang_functions (inf)
4840 FILE *inf;
4841 {
4842 char *cp, *last;
4843 int len;
4844 int allocated;
4845
4846 allocated = 0;
4847 len = 0;
4848 last = NULL;
4849
4850 LOOP_ON_INPUT_LINES (inf, lb, cp)
4851 {
4852 if (cp[0] == '\0') /* Empty line */
4853 continue;
4854 else if (iswhite (cp[0])) /* Not function nor attribute */
4855 continue;
4856 else if (cp[0] == '%') /* comment */
4857 continue;
4858 else if (cp[0] == '"') /* Sometimes, strings start in column one */
4859 continue;
4860 else if (cp[0] == '-') /* attribute, e.g. "-define" */
4861 {
4862 erlang_attribute (cp);
4863 last = NULL;
4864 }
4865 else if ((len = erlang_func (cp, last)) > 0)
4866 {
4867 /*
4868 * Function. Store the function name so that we only
4869 * generates a tag for the first clause.
4870 */
4871 if (last == NULL)
4872 last = xnew (len + 1, char);
4873 else if (len + 1 > allocated)
4874 xrnew (last, len + 1, char);
4875 allocated = len + 1;
4876 strncpy (last, cp, len);
4877 last[len] = '\0';
4878 }
4879 }
4880 }
4881
4882
4883 /*
4884 * A function definition is added if it matches:
4885 * <beginning of line><Erlang Atom><whitespace>(
4886 *
4887 * It is added to the tags database if it doesn't match the
4888 * name of the previous clause header.
4889 *
4890 * Return the size of the name of the function, or 0 if no function
4891 * was found.
4892 */
4893 static int
4894 erlang_func (s, last)
4895 char *s;
4896 char *last; /* Name of last clause. */
4897 {
4898 int pos;
4899 int len;
4900
4901 pos = erlang_atom (s, 0);
4902 if (pos < 1)
4903 return 0;
4904
4905 len = pos;
4906 pos = skip_spaces (s + pos) - s;
4907
4908 /* Save only the first clause. */
4909 if (s[pos++] == '('
4910 && (last == NULL
4911 || len != (int)strlen (last)
4912 || !strneq (s, last, len)))
4913 {
4914 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4915 return len;
4916 }
4917
4918 return 0;
4919 }
4920
4921
4922 /*
4923 * Handle attributes. Currently, tags are generated for defines
4924 * and records.
4925 *
4926 * They are on the form:
4927 * -define(foo, bar).
4928 * -define(Foo(M, N), M+N).
4929 * -record(graph, {vtab = notable, cyclic = true}).
4930 */
4931 static void
4932 erlang_attribute (s)
4933 char *s;
4934 {
4935 int pos;
4936 int len;
4937
4938 if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
4939 {
4940 pos = skip_spaces (s + 7) - s;
4941 if (s[pos++] == '(')
4942 {
4943 pos = skip_spaces (s + pos) - s;
4944 len = erlang_atom (s, pos);
4945 if (len != 0)
4946 pfnote (savenstr (& s[pos], len), TRUE,
4947 s, pos + len, lineno, linecharno);
4948 }
4949 }
4950 return;
4951 }
4952
4953
4954 /*
4955 * Consume an Erlang atom (or variable).
4956 * Return the number of bytes consumed, or -1 if there was an error.
4957 */
4958 static int
4959 erlang_atom (s, pos)
4960 char *s;
4961 int pos;
4962 {
4963 int origpos;
4964
4965 origpos = pos;
4966
4967 if (ISALPHA (s[pos]) || s[pos] == '_')
4968 {
4969 /* The atom is unquoted. */
4970 pos++;
4971 while (ISALNUM (s[pos]) || s[pos] == '_')
4972 pos++;
4973 return pos - origpos;
4974 }
4975 else if (s[pos] == '\'')
4976 {
4977 pos++;
4978
4979 while (1)
4980 {
4981 if (s[pos] == '\'')
4982 {
4983 pos++;
4984 break;
4985 }
4986 else if (s[pos] == '\0')
4987 /* Multiline quoted atoms are ignored. */
4988 return -1;
4989 else if (s[pos] == '\\')
4990 {
4991 if (s[pos+1] == '\0')
4992 return -1;
4993 pos += 2;
4994 }
4995 else
4996 pos++;
4997 }
4998 return pos - origpos;
4999 }
5000 else
5001 return -1;
5002 }
5003
5004 \f
5005 #ifdef ETAGS_REGEXPS
5006
5007 static char *scan_separators P_((char *));
5008 static void analyse_regex P_((char *, bool));
5009 static void add_regex P_((char *, bool, language *));
5010 static char *substitute P_((char *, char *, struct re_registers *));
5011
5012 /* Take a string like "/blah/" and turn it into "blah", making sure
5013 that the first and last characters are the same, and handling
5014 quoted separator characters. Actually, stops on the occurrence of
5015 an unquoted separator. Also turns "\t" into a Tab character.
5016 Returns pointer to terminating separator. Works in place. Null
5017 terminates name string. */
5018 static char *
5019 scan_separators (name)
5020 char *name;
5021 {
5022 char sep = name[0];
5023 char *copyto = name;
5024 bool quoted = FALSE;
5025
5026 for (++name; *name != '\0'; ++name)
5027 {
5028 if (quoted)
5029 {
5030 if (*name == 't')
5031 *copyto++ = '\t';
5032 else if (*name == sep)
5033 *copyto++ = sep;
5034 else
5035 {
5036 /* Something else is quoted, so preserve the quote. */
5037 *copyto++ = '\\';
5038 *copyto++ = *name;
5039 }
5040 quoted = FALSE;
5041 }
5042 else if (*name == '\\')
5043 quoted = TRUE;
5044 else if (*name == sep)
5045 break;
5046 else
5047 *copyto++ = *name;
5048 }
5049
5050 /* Terminate copied string. */
5051 *copyto = '\0';
5052 return name;
5053 }
5054
5055 /* Look at the argument of --regex or --no-regex and do the right
5056 thing. Same for each line of a regexp file. */
5057 static void
5058 analyse_regex (regex_arg, ignore_case)
5059 char *regex_arg;
5060 bool ignore_case;
5061 {
5062 if (regex_arg == NULL)
5063 free_patterns (); /* --no-regex: remove existing regexps */
5064
5065 /* A real --regexp option or a line in a regexp file. */
5066 switch (regex_arg[0])
5067 {
5068 /* Comments in regexp file or null arg to --regex. */
5069 case '\0':
5070 case ' ':
5071 case '\t':
5072 break;
5073
5074 /* Read a regex file. This is recursive and may result in a
5075 loop, which will stop when the file descriptors are exhausted. */
5076 case '@':
5077 {
5078 FILE *regexfp;
5079 linebuffer regexbuf;
5080 char *regexfile = regex_arg + 1;
5081
5082 /* regexfile is a file containing regexps, one per line. */
5083 regexfp = fopen (regexfile, "r");
5084 if (regexfp == NULL)
5085 {
5086 pfatal (regexfile);
5087 return;
5088 }
5089 initbuffer (&regexbuf);
5090 while (readline_internal (&regexbuf, regexfp) > 0)
5091 analyse_regex (regexbuf.buffer, ignore_case);
5092 free (regexbuf.buffer);
5093 fclose (regexfp);
5094 }
5095 break;
5096
5097 /* Regexp to be used for a specific language only. */
5098 case '{':
5099 {
5100 language *lang;
5101 char *lang_name = regex_arg + 1;
5102 char *cp;
5103
5104 for (cp = lang_name; *cp != '}'; cp++)
5105 if (*cp == '\0')
5106 {
5107 error ("unterminated language name in regex: %s", regex_arg);
5108 return;
5109 }
5110 *cp = '\0';
5111 lang = get_language_from_langname (lang_name);
5112 if (lang == NULL)
5113 return;
5114 add_regex (cp + 1, ignore_case, lang);
5115 }
5116 break;
5117
5118 /* Regexp to be used for any language. */
5119 default:
5120 add_regex (regex_arg, ignore_case, NULL);
5121 break;
5122 }
5123 }
5124
5125 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5126 expression, into a real regular expression by compiling it. */
5127 static void
5128 add_regex (regexp_pattern, ignore_case, lang)
5129 char *regexp_pattern;
5130 bool ignore_case;
5131 language *lang;
5132 {
5133 char *name;
5134 const char *err;
5135 struct re_pattern_buffer *patbuf;
5136 pattern *pp;
5137
5138
5139 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
5140 {
5141 error ("%s: unterminated regexp", regexp_pattern);
5142 return;
5143 }
5144 name = scan_separators (regexp_pattern);
5145 if (regexp_pattern[0] == '\0')
5146 {
5147 error ("null regexp", (char *)NULL);
5148 return;
5149 }
5150 (void) scan_separators (name);
5151
5152 patbuf = xnew (1, struct re_pattern_buffer);
5153 /* Translation table to fold case if appropriate. */
5154 patbuf->translate = (ignore_case) ? lc_trans : NULL;
5155 patbuf->fastmap = NULL;
5156 patbuf->buffer = NULL;
5157 patbuf->allocated = 0;
5158
5159 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
5160 if (err != NULL)
5161 {
5162 error ("%s while compiling pattern", err);
5163 return;
5164 }
5165
5166 pp = p_head;
5167 p_head = xnew (1, pattern);
5168 p_head->regex = savestr (regexp_pattern);
5169 p_head->p_next = pp;
5170 p_head->language = lang;
5171 p_head->pattern = patbuf;
5172 p_head->name_pattern = savestr (name);
5173 p_head->error_signaled = FALSE;
5174 }
5175
5176 /*
5177 * Do the substitutions indicated by the regular expression and
5178 * arguments.
5179 */
5180 static char *
5181 substitute (in, out, regs)
5182 char *in, *out;
5183 struct re_registers *regs;
5184 {
5185 char *result, *t;
5186 int size, dig, diglen;
5187
5188 result = NULL;
5189 size = strlen (out);
5190
5191 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5192 if (out[size - 1] == '\\')
5193 fatal ("pattern error in \"%s\"", out);
5194 for (t = etags_strchr (out, '\\');
5195 t != NULL;
5196 t = etags_strchr (t + 2, '\\'))
5197 if (ISDIGIT (t[1]))
5198 {
5199 dig = t[1] - '0';
5200 diglen = regs->end[dig] - regs->start[dig];
5201 size += diglen - 2;
5202 }
5203 else
5204 size -= 1;
5205
5206 /* Allocate space and do the substitutions. */
5207 result = xnew (size + 1, char);
5208
5209 for (t = result; *out != '\0'; out++)
5210 if (*out == '\\' && ISDIGIT (*++out))
5211 {
5212 dig = *out - '0';
5213 diglen = regs->end[dig] - regs->start[dig];
5214 strncpy (t, in + regs->start[dig], diglen);
5215 t += diglen;
5216 }
5217 else
5218 *t++ = *out;
5219 *t = '\0';
5220
5221 assert (t <= result + size && t - result == (int)strlen (result));
5222
5223 return result;
5224 }
5225
5226 /* Deallocate all patterns. */
5227 static void
5228 free_patterns ()
5229 {
5230 pattern *pp;
5231 while (p_head != NULL)
5232 {
5233 pp = p_head->p_next;
5234 free (p_head->regex);
5235 free (p_head->name_pattern);
5236 free (p_head);
5237 p_head = pp;
5238 }
5239 return;
5240 }
5241 #endif /* ETAGS_REGEXPS */
5242
5243 \f
5244 static void
5245 get_tag (bp)
5246 register char *bp;
5247 {
5248 register char *cp;
5249
5250 if (*bp == '\0')
5251 return;
5252 /* Go till you get to white space or a syntactic break */
5253 for (cp = bp + 1;
5254 *cp != '\0' && *cp != '(' && *cp != ')' && !iswhite (*cp);
5255 cp++)
5256 continue;
5257 pfnote (savenstr (bp, cp-bp), TRUE,
5258 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5259 }
5260
5261 /* Initialize a linebuffer for use */
5262 static void
5263 initbuffer (lbp)
5264 linebuffer *lbp;
5265 {
5266 lbp->size = (DEBUG) ? 3 : 200;
5267 lbp->buffer = xnew (lbp->size, char);
5268 lbp->buffer[0] = '\0';
5269 lbp->len = 0;
5270 }
5271
5272 /*
5273 * Read a line of text from `stream' into `lbp', excluding the
5274 * newline or CR-NL, if any. Return the number of characters read from
5275 * `stream', which is the length of the line including the newline.
5276 *
5277 * On DOS or Windows we do not count the CR character, if any, before the
5278 * NL, in the returned length; this mirrors the behavior of emacs on those
5279 * platforms (for text files, it translates CR-NL to NL as it reads in the
5280 * file).
5281 */
5282 static long
5283 readline_internal (lbp, stream)
5284 linebuffer *lbp;
5285 register FILE *stream;
5286 {
5287 char *buffer = lbp->buffer;
5288 register char *p = lbp->buffer;
5289 register char *pend;
5290 int chars_deleted;
5291
5292 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5293
5294 while (1)
5295 {
5296 register int c = getc (stream);
5297 if (p == pend)
5298 {
5299 /* We're at the end of linebuffer: expand it. */
5300 lbp->size *= 2;
5301 xrnew (buffer, lbp->size, char);
5302 p += buffer - lbp->buffer;
5303 pend = buffer + lbp->size;
5304 lbp->buffer = buffer;
5305 }
5306 if (c == EOF)
5307 {
5308 *p = '\0';
5309 chars_deleted = 0;
5310 break;
5311 }
5312 if (c == '\n')
5313 {
5314 if (p > buffer && p[-1] == '\r')
5315 {
5316 p -= 1;
5317 #ifdef DOS_NT
5318 /* Assume CRLF->LF translation will be performed by Emacs
5319 when loading this file, so CRs won't appear in the buffer.
5320 It would be cleaner to compensate within Emacs;
5321 however, Emacs does not know how many CRs were deleted
5322 before any given point in the file. */
5323 chars_deleted = 1;
5324 #else
5325 chars_deleted = 2;
5326 #endif
5327 }
5328 else
5329 {
5330 chars_deleted = 1;
5331 }
5332 *p = '\0';
5333 break;
5334 }
5335 *p++ = c;
5336 }
5337 lbp->len = p - buffer;
5338
5339 return lbp->len + chars_deleted;
5340 }
5341
5342 /*
5343 * Like readline_internal, above, but in addition try to match the
5344 * input line against relevant regular expressions.
5345 */
5346 static long
5347 readline (lbp, stream)
5348 linebuffer *lbp;
5349 FILE *stream;
5350 {
5351 /* Read new line. */
5352 long result = readline_internal (lbp, stream);
5353 #ifdef ETAGS_REGEXPS
5354 int match;
5355 pattern *pp;
5356
5357 /* Match against relevant patterns. */
5358 if (lbp->len > 0)
5359 for (pp = p_head; pp != NULL; pp = pp->p_next)
5360 {
5361 /* Only use generic regexps or those for the current language. */
5362 if (pp->language != NULL && pp->language != curlang)
5363 continue;
5364
5365 match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
5366 switch (match)
5367 {
5368 case -2:
5369 /* Some error. */
5370 if (!pp->error_signaled)
5371 {
5372 error ("error while matching \"%s\"", pp->regex);
5373 pp->error_signaled = TRUE;
5374 }
5375 break;
5376 case -1:
5377 /* No match. */
5378 break;
5379 default:
5380 /* Match occurred. Construct a tag. */
5381 if (pp->name_pattern[0] != '\0')
5382 {
5383 /* Make a named tag. */
5384 char *name = substitute (lbp->buffer,
5385 pp->name_pattern, &pp->regs);
5386 if (name != NULL)
5387 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5388 }
5389 else
5390 {
5391 /* Make an unnamed tag. */
5392 pfnote ((char *)NULL, TRUE,
5393 lbp->buffer, match, lineno, linecharno);
5394 }
5395 break;
5396 }
5397 }
5398 #endif /* ETAGS_REGEXPS */
5399
5400 return result;
5401 }
5402
5403 \f
5404 /*
5405 * Return a pointer to a space of size strlen(cp)+1 allocated
5406 * with xnew where the string CP has been copied.
5407 */
5408 static char *
5409 savestr (cp)
5410 char *cp;
5411 {
5412 return savenstr (cp, strlen (cp));
5413 }
5414
5415 /*
5416 * Return a pointer to a space of size LEN+1 allocated with xnew where
5417 * the string CP has been copied for at most the first LEN characters.
5418 */
5419 static char *
5420 savenstr (cp, len)
5421 char *cp;
5422 int len;
5423 {
5424 register char *dp;
5425
5426 dp = xnew (len + 1, char);
5427 strncpy (dp, cp, len);
5428 dp[len] = '\0';
5429 return dp;
5430 }
5431
5432 /*
5433 * Return the ptr in sp at which the character c last
5434 * appears; NULL if not found
5435 *
5436 * Identical to POSIX strrchr, included for portability.
5437 */
5438 static char *
5439 etags_strrchr (sp, c)
5440 register const char *sp;
5441 register int c;
5442 {
5443 register const char *r;
5444
5445 r = NULL;
5446 do
5447 {
5448 if (*sp == c)
5449 r = sp;
5450 } while (*sp++);
5451 return (char *)r;
5452 }
5453
5454
5455 /*
5456 * Return the ptr in sp at which the character c first
5457 * appears; NULL if not found
5458 *
5459 * Identical to POSIX strchr, included for portability.
5460 */
5461 static char *
5462 etags_strchr (sp, c)
5463 register const char *sp;
5464 register int c;
5465 {
5466 do
5467 {
5468 if (*sp == c)
5469 return (char *)sp;
5470 } while (*sp++);
5471 return NULL;
5472 }
5473
5474 /* Skip spaces, return new pointer. */
5475 static char *
5476 skip_spaces (cp)
5477 char *cp;
5478 {
5479 while (iswhite (*cp))
5480 cp++;
5481 return cp;
5482 }
5483
5484 /* Skip non spaces, return new pointer. */
5485 static char *
5486 skip_non_spaces (cp)
5487 char *cp;
5488 {
5489 while (*cp != '\0' && !iswhite (*cp))
5490 cp++;
5491 return cp;
5492 }
5493
5494 /* Print error message and exit. */
5495 void
5496 fatal (s1, s2)
5497 char *s1, *s2;
5498 {
5499 error (s1, s2);
5500 exit (BAD);
5501 }
5502
5503 static void
5504 pfatal (s1)
5505 char *s1;
5506 {
5507 perror (s1);
5508 exit (BAD);
5509 }
5510
5511 static void
5512 suggest_asking_for_help ()
5513 {
5514 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5515 progname,
5516 #ifdef LONG_OPTIONS
5517 "--help"
5518 #else
5519 "-h"
5520 #endif
5521 );
5522 exit (BAD);
5523 }
5524
5525 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
5526 static void
5527 error (s1, s2)
5528 const char *s1, *s2;
5529 {
5530 fprintf (stderr, "%s: ", progname);
5531 fprintf (stderr, s1, s2);
5532 fprintf (stderr, "\n");
5533 }
5534
5535 /* Return a newly-allocated string whose contents
5536 concatenate those of s1, s2, s3. */
5537 static char *
5538 concat (s1, s2, s3)
5539 char *s1, *s2, *s3;
5540 {
5541 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5542 char *result = xnew (len1 + len2 + len3 + 1, char);
5543
5544 strcpy (result, s1);
5545 strcpy (result + len1, s2);
5546 strcpy (result + len1 + len2, s3);
5547 result[len1 + len2 + len3] = '\0';
5548
5549 return result;
5550 }
5551
5552 \f
5553 /* Does the same work as the system V getcwd, but does not need to
5554 guess the buffer size in advance. */
5555 static char *
5556 etags_getcwd ()
5557 {
5558 #ifdef HAVE_GETCWD
5559 int bufsize = 200;
5560 char *path = xnew (bufsize, char);
5561
5562 while (getcwd (path, bufsize) == NULL)
5563 {
5564 if (errno != ERANGE)
5565 pfatal ("getcwd");
5566 bufsize *= 2;
5567 free (path);
5568 path = xnew (bufsize, char);
5569 }
5570
5571 canonicalize_filename (path);
5572 return path;
5573
5574 #else /* not HAVE_GETCWD */
5575 #if MSDOS
5576
5577 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
5578
5579 getwd (path);
5580
5581 for (p = path; *p != '\0'; p++)
5582 if (*p == '\\')
5583 *p = '/';
5584 else
5585 *p = lowcase (*p);
5586
5587 return strdup (path);
5588 #else /* not MSDOS */
5589 linebuffer path;
5590 FILE *pipe;
5591
5592 initbuffer (&path);
5593 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5594 if (pipe == NULL || readline_internal (&path, pipe) == 0)
5595 pfatal ("pwd");
5596 pclose (pipe);
5597
5598 return path.buffer;
5599 #endif /* not MSDOS */
5600 #endif /* not HAVE_GETCWD */
5601 }
5602
5603 /* Return a newly allocated string containing the file name of FILE
5604 relative to the absolute directory DIR (which should end with a slash). */
5605 static char *
5606 relative_filename (file, dir)
5607 char *file, *dir;
5608 {
5609 char *fp, *dp, *afn, *res;
5610 int i;
5611
5612 /* Find the common root of file and dir (with a trailing slash). */
5613 afn = absolute_filename (file, cwd);
5614 fp = afn;
5615 dp = dir;
5616 while (*fp++ == *dp++)
5617 continue;
5618 fp--, dp--; /* back to the first differing char */
5619 #ifdef DOS_NT
5620 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5621 return afn;
5622 #endif
5623 do /* look at the equal chars until '/' */
5624 fp--, dp--;
5625 while (*fp != '/');
5626
5627 /* Build a sequence of "../" strings for the resulting relative file name. */
5628 i = 0;
5629 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5630 i += 1;
5631 res = xnew (3*i + strlen (fp + 1) + 1, char);
5632 res[0] = '\0';
5633 while (i-- > 0)
5634 strcat (res, "../");
5635
5636 /* Add the file name relative to the common root of file and dir. */
5637 strcat (res, fp + 1);
5638 free (afn);
5639
5640 return res;
5641 }
5642
5643 /* Return a newly allocated string containing the absolute file name
5644 of FILE given DIR (which should end with a slash). */
5645 static char *
5646 absolute_filename (file, dir)
5647 char *file, *dir;
5648 {
5649 char *slashp, *cp, *res;
5650
5651 if (filename_is_absolute (file))
5652 res = savestr (file);
5653 #ifdef DOS_NT
5654 /* We don't support non-absolute file names with a drive
5655 letter, like `d:NAME' (it's too much hassle). */
5656 else if (file[1] == ':')
5657 fatal ("%s: relative file names with drive letters not supported", file);
5658 #endif
5659 else
5660 res = concat (dir, file, "");
5661
5662 /* Delete the "/dirname/.." and "/." substrings. */
5663 slashp = etags_strchr (res, '/');
5664 while (slashp != NULL && slashp[0] != '\0')
5665 {
5666 if (slashp[1] == '.')
5667 {
5668 if (slashp[2] == '.'
5669 && (slashp[3] == '/' || slashp[3] == '\0'))
5670 {
5671 cp = slashp;
5672 do
5673 cp--;
5674 while (cp >= res && !filename_is_absolute (cp));
5675 if (cp < res)
5676 cp = slashp; /* the absolute name begins with "/.." */
5677 #ifdef DOS_NT
5678 /* Under MSDOS and NT we get `d:/NAME' as absolute
5679 file name, so the luser could say `d:/../NAME'.
5680 We silently treat this as `d:/NAME'. */
5681 else if (cp[0] != '/')
5682 cp = slashp;
5683 #endif
5684 strcpy (cp, slashp + 3);
5685 slashp = cp;
5686 continue;
5687 }
5688 else if (slashp[2] == '/' || slashp[2] == '\0')
5689 {
5690 strcpy (slashp, slashp + 2);
5691 continue;
5692 }
5693 }
5694
5695 slashp = etags_strchr (slashp + 1, '/');
5696 }
5697
5698 if (res[0] == '\0')
5699 return savestr ("/");
5700 else
5701 return res;
5702 }
5703
5704 /* Return a newly allocated string containing the absolute
5705 file name of dir where FILE resides given DIR (which should
5706 end with a slash). */
5707 static char *
5708 absolute_dirname (file, dir)
5709 char *file, *dir;
5710 {
5711 char *slashp, *res;
5712 char save;
5713
5714 canonicalize_filename (file);
5715 slashp = etags_strrchr (file, '/');
5716 if (slashp == NULL)
5717 return savestr (dir);
5718 save = slashp[1];
5719 slashp[1] = '\0';
5720 res = absolute_filename (file, dir);
5721 slashp[1] = save;
5722
5723 return res;
5724 }
5725
5726 /* Whether the argument string is an absolute file name. The argument
5727 string must have been canonicalized with canonicalize_filename. */
5728 static bool
5729 filename_is_absolute (fn)
5730 char *fn;
5731 {
5732 return (fn[0] == '/'
5733 #ifdef DOS_NT
5734 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
5735 #endif
5736 );
5737 }
5738
5739 /* Translate backslashes into slashes. Works in place. */
5740 static void
5741 canonicalize_filename (fn)
5742 register char *fn;
5743 {
5744 #ifdef DOS_NT
5745 /* Canonicalize drive letter case. */
5746 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
5747 fn[0] = upcase (fn[0]);
5748 /* Convert backslashes to slashes. */
5749 for (; *fn != '\0'; fn++)
5750 if (*fn == '\\')
5751 *fn = '/';
5752 #else
5753 /* No action. */
5754 fn = NULL; /* shut up the compiler */
5755 #endif
5756 }
5757
5758 /* Set the minimum size of a string contained in a linebuffer. */
5759 static void
5760 linebuffer_setlen (lbp, toksize)
5761 linebuffer *lbp;
5762 int toksize;
5763 {
5764 while (lbp->size <= toksize)
5765 {
5766 lbp->size *= 2;
5767 xrnew (lbp->buffer, lbp->size, char);
5768 }
5769 lbp->len = toksize;
5770 }
5771
5772 /* Like malloc but get fatal error if memory is exhausted. */
5773 long *
5774 xmalloc (size)
5775 unsigned int size;
5776 {
5777 long *result = (long *) malloc (size);
5778 if (result == NULL)
5779 fatal ("virtual memory exhausted", (char *)NULL);
5780 return result;
5781 }
5782
5783 long *
5784 xrealloc (ptr, size)
5785 char *ptr;
5786 unsigned int size;
5787 {
5788 long *result = (long *) realloc (ptr, size);
5789 if (result == NULL)
5790 fatal ("virtual memory exhausted", (char *)NULL);
5791 return result;
5792 }