Work around small preprocessor bugs in sunos4 pcc and MinGW.
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99, 2000, 2001
3 Free Software Foundation, Inc. and Ken Arnold
4
5 This file is not considered part of GNU Emacs.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 /*
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1993 Francesco Potortì reorganised C and C++ based on work by Joe Wells.
29 * 1994 Regexp tags by Tom Tromey.
30 * 2001 Nested classes by Francesco Potortì based on work by Mykola Dzyuba.
31 *
32 * Francesco Potortì <pot@gnu.org> has maintained it since 1993.
33 */
34
35 char pot_etags_version[] = "@(#) pot revision number is $Revision: 14.14 $";
36
37 #define TRUE 1
38 #define FALSE 0
39
40 #ifdef DEBUG
41 # undef DEBUG
42 # define DEBUG TRUE
43 #else
44 # define DEBUG FALSE
45 # define NDEBUG /* disable assert */
46 #endif
47
48 #if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
49 # define P_(proto) proto
50 #else
51 # define P_(proto) ()
52 #endif
53
54 #ifdef HAVE_CONFIG_H
55 # include <config.h>
56 /* On some systems, Emacs defines static as nothing for the sake
57 of unexec. We don't want that here since we don't use unexec. */
58 # undef static
59 # define ETAGS_REGEXPS /* use the regexp features */
60 # define LONG_OPTIONS /* accept long options */
61 #endif /* HAVE_CONFIG_H */
62
63 #ifndef _GNU_SOURCE
64 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
65 #endif
66
67 /* WIN32_NATIVE is for Xemacs.
68 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
69 #ifdef WIN32_NATIVE
70 # undef MSDOS
71 # undef WINDOWSNT
72 # define WINDOWSNT
73 #endif /* WIN32_NATIVE */
74
75 #ifdef MSDOS
76 # undef MSDOS
77 # define MSDOS TRUE
78 # include <fcntl.h>
79 # include <sys/param.h>
80 # include <io.h>
81 # ifndef HAVE_CONFIG_H
82 # define DOS_NT
83 # include <sys/config.h>
84 # endif
85 #else
86 # define MSDOS FALSE
87 #endif /* MSDOS */
88
89 #ifdef WINDOWSNT
90 # include <stdlib.h>
91 # include <fcntl.h>
92 # include <string.h>
93 # include <direct.h>
94 # include <io.h>
95 # define MAXPATHLEN _MAX_PATH
96 # undef HAVE_NTGUI
97 # undef DOS_NT
98 # define DOS_NT
99 # ifndef HAVE_GETCWD
100 # define HAVE_GETCWD
101 # endif /* undef HAVE_GETCWD */
102 #else /* !WINDOWSNT */
103 # ifdef STDC_HEADERS
104 # include <stdlib.h>
105 # include <string.h>
106 # else
107 extern char *getenv ();
108 # endif
109 #endif /* !WINDOWSNT */
110
111 #ifdef HAVE_UNISTD_H
112 # include <unistd.h>
113 #else
114 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
115 extern char *getcwd (char *buf, size_t size);
116 # endif
117 #endif /* HAVE_UNISTD_H */
118
119 #include <stdio.h>
120 #include <ctype.h>
121 #include <errno.h>
122 #ifndef errno
123 extern int errno;
124 #endif
125 #include <sys/types.h>
126 #include <sys/stat.h>
127
128 #include <assert.h>
129 #ifdef NDEBUG
130 # undef assert /* some systems have a buggy assert.h */
131 # define assert(x) ((void) 0)
132 #endif
133
134 #if !defined (S_ISREG) && defined (S_IFREG)
135 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
136 #endif
137
138 #ifdef LONG_OPTIONS
139 # include <getopt.h>
140 #else
141 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
142 extern char *optarg;
143 extern int optind, opterr;
144 #endif /* LONG_OPTIONS */
145
146 #ifdef ETAGS_REGEXPS
147 # include <regex.h>
148 #endif /* ETAGS_REGEXPS */
149
150 /* Define CTAGS to make the program "ctags" compatible with the usual one.
151 Leave it undefined to make the program "etags", which makes emacs-style
152 tag tables and tags typedefs, #defines and struct/union/enum by default. */
153 #ifdef CTAGS
154 # undef CTAGS
155 # define CTAGS TRUE
156 #else
157 # define CTAGS FALSE
158 #endif
159
160 /* Exit codes for success and failure. */
161 #ifdef VMS
162 # define GOOD 1
163 # define BAD 0
164 #else
165 # define GOOD 0
166 # define BAD 1
167 #endif
168
169 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
170 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
171
172 #define CHARS 256 /* 2^sizeof(char) */
173 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
174 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
175 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
176 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
177 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
178 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
179
180 #define ISALNUM(c) isalnum (CHAR(c))
181 #define ISALPHA(c) isalpha (CHAR(c))
182 #define ISDIGIT(c) isdigit (CHAR(c))
183 #define ISLOWER(c) islower (CHAR(c))
184
185 #define lowcase(c) tolower (CHAR(c))
186 #define upcase(c) toupper (CHAR(c))
187
188
189 /*
190 * xnew, xrnew -- allocate, reallocate storage
191 *
192 * SYNOPSIS: Type *xnew (int n, Type);
193 * void xrnew (OldPointer, int n, Type);
194 */
195 #if DEBUG
196 # include "chkmalloc.h"
197 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
198 (n) * sizeof (Type)))
199 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
200 (char *) (op), (n) * sizeof (Type)))
201 #else
202 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
203 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
204 (char *) (op), (n) * sizeof (Type)))
205 #endif
206
207 typedef int bool;
208
209 typedef void Lang_function P_((FILE *));
210
211 typedef struct
212 {
213 char *suffix;
214 char *command; /* Takes one arg and decompresses to stdout */
215 } compressor;
216
217 typedef struct
218 {
219 char *name;
220 Lang_function *function;
221 char **filenames;
222 char **suffixes;
223 char **interpreters;
224 } language;
225
226 typedef struct node_st
227 { /* sorting structure */
228 char *name; /* function or type name */
229 char *file; /* file name */
230 bool is_func; /* use pattern or line no */
231 bool been_warned; /* set if noticed dup */
232 int lno; /* line number tag is on */
233 long cno; /* character number line starts on */
234 char *pat; /* search pattern */
235 struct node_st *left, *right; /* left and right sons */
236 } node;
237
238 /*
239 * A `linebuffer' is a structure which holds a line of text.
240 * `readline_internal' reads a line from a stream into a linebuffer
241 * and works regardless of the length of the line.
242 * SIZE is the size of BUFFER, LEN is the length of the string in
243 * BUFFER after readline reads it.
244 */
245 typedef struct
246 {
247 long size;
248 int len;
249 char *buffer;
250 } linebuffer;
251
252 /* Many compilers barf on this:
253 Lang_function Ada_funcs;
254 so let's write it this way */
255 static void Ada_funcs P_((FILE *));
256 static void Asm_labels P_((FILE *));
257 static void C_entries P_((int c_ext, FILE *));
258 static void default_C_entries P_((FILE *));
259 static void plain_C_entries P_((FILE *));
260 static void Cjava_entries P_((FILE *));
261 static void Cobol_paragraphs P_((FILE *));
262 static void Cplusplus_entries P_((FILE *));
263 static void Cstar_entries P_((FILE *));
264 static void Erlang_functions P_((FILE *));
265 static void Fortran_functions P_((FILE *));
266 static void Yacc_entries P_((FILE *));
267 static void Lisp_functions P_((FILE *));
268 static void Makefile_targets P_((FILE *));
269 static void Pascal_functions P_((FILE *));
270 static void Perl_functions P_((FILE *));
271 static void Postscript_functions P_((FILE *));
272 static void Prolog_functions P_((FILE *));
273 static void Python_functions P_((FILE *));
274 static void Scheme_functions P_((FILE *));
275 static void TeX_commands P_((FILE *));
276 static void Texinfo_nodes P_((FILE *));
277 static void just_read_file P_((FILE *));
278
279 static void print_language_names P_((void));
280 static void print_version P_((void));
281 static void print_help P_((void));
282 int main P_((int, char **));
283 static int number_len P_((long));
284
285 static compressor *get_compressor_from_suffix P_((char *, char **));
286 static language *get_language_from_langname P_((char *));
287 static language *get_language_from_interpreter P_((char *));
288 static language *get_language_from_filename P_((char *));
289 static int total_size_of_entries P_((node *));
290 static long readline P_((linebuffer *, FILE *));
291 static long readline_internal P_((linebuffer *, FILE *));
292 static void get_tag P_((char *));
293
294 #ifdef ETAGS_REGEXPS
295 static void analyse_regex P_((char *, bool));
296 static void add_regex P_((char *, bool, language *));
297 static void free_patterns P_((void));
298 #endif /* ETAGS_REGEXPS */
299 static void error P_((const char *, const char *));
300 static void suggest_asking_for_help P_((void));
301 void fatal P_((char *, char *));
302 static void pfatal P_((char *));
303 static void add_node P_((node *, node **));
304
305 static void init P_((void));
306 static void initbuffer P_((linebuffer *));
307 static void find_entries P_((char *, FILE *));
308 static void free_tree P_((node *));
309 static void pfnote P_((char *, bool, char *, int, int, long));
310 static void new_pfnote P_((char *, int, bool, char *, int, int, long));
311 static void process_file P_((char *));
312 static void put_entries P_((node *));
313 static void takeprec P_((void));
314
315 static char *concat P_((char *, char *, char *));
316 static char *skip_spaces P_((char *));
317 static char *skip_non_spaces P_((char *));
318 static char *savenstr P_((char *, int));
319 static char *savestr P_((char *));
320 static char *etags_strchr P_((const char *, int));
321 static char *etags_strrchr P_((const char *, int));
322 static char *etags_getcwd P_((void));
323 static char *relative_filename P_((char *, char *));
324 static char *absolute_filename P_((char *, char *));
325 static char *absolute_dirname P_((char *, char *));
326 static bool filename_is_absolute P_((char *f));
327 static void canonicalize_filename P_((char *));
328 static void linebuffer_setlen P_((linebuffer *, int));
329 long *xmalloc P_((unsigned int));
330 long *xrealloc P_((char *, unsigned int));
331
332 \f
333 char searchar = '/'; /* use /.../ searches */
334
335 char *tagfile; /* output file */
336 char *progname; /* name this program was invoked with */
337 char *cwd; /* current working directory */
338 char *tagfiledir; /* directory of tagfile */
339 FILE *tagf; /* ioptr for tags file */
340
341 char *curfile; /* current input file name */
342 language *curlang; /* current language */
343
344 int lineno; /* line number of current line */
345 long charno; /* current character number */
346 long linecharno; /* charno of start of current line */
347 char *dbp; /* pointer to start of current tag */
348
349 node *head; /* the head of the binary tree of tags */
350
351 linebuffer lb; /* the current line */
352
353 /* boolean "functions" (see init) */
354 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
355 char
356 /* white chars */
357 *white = " \f\t\n\r\v",
358 /* not in a name */
359 *nonam = " \f\t\n\r(=,[;",
360 /* token ending chars */
361 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
362 /* token starting chars */
363 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
364 /* valid in-token chars */
365 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
366
367 bool append_to_tagfile; /* -a: append to tags */
368 /* The following four default to TRUE for etags, but to FALSE for ctags. */
369 bool typedefs; /* -t: create tags for C and Ada typedefs */
370 bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
371 /* 0 struct/enum/union decls, and C++ */
372 /* member functions. */
373 bool constantypedefs; /* -d: create tags for C #define, enum */
374 /* constants and variables. */
375 /* -D: opposite of -d. Default under ctags. */
376 bool declarations; /* --declarations: tag them and extern in C&Co*/
377 bool globals; /* create tags for global variables */
378 bool members; /* create tags for C member variables */
379 bool update; /* -u: update tags */
380 bool vgrind_style; /* -v: create vgrind style index output */
381 bool no_warnings; /* -w: suppress warnings */
382 bool cxref_style; /* -x: create cxref style output */
383 bool cplusplus; /* .[hc] means C++, not C */
384 bool noindentypedefs; /* -I: ignore indentation in C */
385 bool packages_only; /* --packages-only: in Ada, only tag packages*/
386
387 #ifdef LONG_OPTIONS
388 struct option longopts[] =
389 {
390 { "packages-only", no_argument, &packages_only, TRUE },
391 { "append", no_argument, NULL, 'a' },
392 { "backward-search", no_argument, NULL, 'B' },
393 { "c++", no_argument, NULL, 'C' },
394 { "cxref", no_argument, NULL, 'x' },
395 { "defines", no_argument, NULL, 'd' },
396 { "declarations", no_argument, &declarations, TRUE },
397 { "no-defines", no_argument, NULL, 'D' },
398 { "globals", no_argument, &globals, TRUE },
399 { "no-globals", no_argument, &globals, FALSE },
400 { "help", no_argument, NULL, 'h' },
401 { "help", no_argument, NULL, 'H' },
402 { "ignore-indentation", no_argument, NULL, 'I' },
403 { "include", required_argument, NULL, 'i' },
404 { "language", required_argument, NULL, 'l' },
405 { "members", no_argument, &members, TRUE },
406 { "no-members", no_argument, &members, FALSE },
407 { "no-warn", no_argument, NULL, 'w' },
408 { "output", required_argument, NULL, 'o' },
409 #ifdef ETAGS_REGEXPS
410 { "regex", required_argument, NULL, 'r' },
411 { "no-regex", no_argument, NULL, 'R' },
412 { "ignore-case-regex", required_argument, NULL, 'c' },
413 #endif /* ETAGS_REGEXPS */
414 { "typedefs", no_argument, NULL, 't' },
415 { "typedefs-and-c++", no_argument, NULL, 'T' },
416 { "update", no_argument, NULL, 'u' },
417 { "version", no_argument, NULL, 'V' },
418 { "vgrind", no_argument, NULL, 'v' },
419 { NULL }
420 };
421 #endif /* LONG_OPTIONS */
422
423 #ifdef ETAGS_REGEXPS
424 /* Structure defining a regular expression. Elements are
425 the compiled pattern, and the name string. */
426 typedef struct pattern
427 {
428 struct pattern *p_next;
429 language *language;
430 char *regex;
431 struct re_pattern_buffer *pattern;
432 struct re_registers regs;
433 char *name_pattern;
434 bool error_signaled;
435 } pattern;
436
437 /* List of all regexps. */
438 pattern *p_head = NULL;
439
440 /* How many characters in the character set. (From regex.c.) */
441 #define CHAR_SET_SIZE 256
442 /* Translation table for case-insensitive matching. */
443 char lc_trans[CHAR_SET_SIZE];
444 #endif /* ETAGS_REGEXPS */
445
446 compressor compressors[] =
447 {
448 { "z", "gzip -d -c"},
449 { "Z", "gzip -d -c"},
450 { "gz", "gzip -d -c"},
451 { "GZ", "gzip -d -c"},
452 { "bz2", "bzip2 -d -c" },
453 { NULL }
454 };
455
456 /*
457 * Language stuff.
458 */
459
460 /* Non-NULL if language fixed. */
461 language *forced_lang = NULL;
462
463 /* Ada code */
464 char *Ada_suffixes [] =
465 { "ads", "adb", "ada", NULL };
466
467 /* Assembly code */
468 char *Asm_suffixes [] = { "a", /* Unix assembler */
469 "asm", /* Microcontroller assembly */
470 "def", /* BSO/Tasking definition includes */
471 "inc", /* Microcontroller include files */
472 "ins", /* Microcontroller include files */
473 "s", "sa", /* Unix assembler */
474 "S", /* cpp-processed Unix assembler */
475 "src", /* BSO/Tasking C compiler output */
476 NULL
477 };
478
479 /* Note that .c and .h can be considered C++, if the --c++ flag was
480 given, or if the `class' keyowrd is met inside the file.
481 That is why default_C_entries is called for these. */
482 char *default_C_suffixes [] =
483 { "c", "h", NULL };
484
485 char *Cplusplus_suffixes [] =
486 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
487 "M", /* Objective C++ */
488 "pdb", /* Postscript with C syntax */
489 NULL };
490
491 char *Cjava_suffixes [] =
492 { "java", NULL };
493
494 char *Cobol_suffixes [] =
495 { "COB", "cob", NULL };
496
497 char *Cstar_suffixes [] =
498 { "cs", "hs", NULL };
499
500 char *Erlang_suffixes [] =
501 { "erl", "hrl", NULL };
502
503 char *Fortran_suffixes [] =
504 { "F", "f", "f90", "for", NULL };
505
506 char *Lisp_suffixes [] =
507 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
508
509 char *Makefile_filenames [] =
510 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
511
512 char *Pascal_suffixes [] =
513 { "p", "pas", NULL };
514
515 char *Perl_suffixes [] =
516 { "pl", "pm", NULL };
517 char *Perl_interpreters [] =
518 { "perl", "@PERL@", NULL };
519
520 char *plain_C_suffixes [] =
521 { "lm", /* Objective lex file */
522 "m", /* Objective C file */
523 "pc", /* Pro*C file */
524 NULL };
525
526 char *Postscript_suffixes [] =
527 { "ps", "psw", NULL }; /* .psw is for PSWrap */
528
529 char *Prolog_suffixes [] =
530 { "prolog", NULL };
531
532 char *Python_suffixes [] =
533 { "py", NULL };
534
535 /* Can't do the `SCM' or `scm' prefix with a version number. */
536 char *Scheme_suffixes [] =
537 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
538
539 char *TeX_suffixes [] =
540 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
541
542 char *Texinfo_suffixes [] =
543 { "texi", "texinfo", "txi", NULL };
544
545 char *Yacc_suffixes [] =
546 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
547
548 /*
549 * Table of languages.
550 *
551 * It is ok for a given function to be listed under more than one
552 * name. I just didn't.
553 */
554
555 language lang_names [] =
556 {
557 { "ada", Ada_funcs, NULL, Ada_suffixes, NULL },
558 { "asm", Asm_labels, NULL, Asm_suffixes, NULL },
559 { "c", default_C_entries, NULL, default_C_suffixes, NULL },
560 { "c++", Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
561 { "c*", Cstar_entries, NULL, Cstar_suffixes, NULL },
562 { "cobol", Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
563 { "erlang", Erlang_functions, NULL, Erlang_suffixes, NULL },
564 { "fortran", Fortran_functions, NULL, Fortran_suffixes, NULL },
565 { "java", Cjava_entries, NULL, Cjava_suffixes, NULL },
566 { "lisp", Lisp_functions, NULL, Lisp_suffixes, NULL },
567 { "makefile", Makefile_targets, Makefile_filenames, NULL, NULL },
568 { "pascal", Pascal_functions, NULL, Pascal_suffixes, NULL },
569 { "perl", Perl_functions, NULL, Perl_suffixes, Perl_interpreters },
570 { "postscript", Postscript_functions, NULL, Postscript_suffixes, NULL },
571 { "proc", plain_C_entries, NULL, plain_C_suffixes, NULL },
572 { "prolog", Prolog_functions, NULL, Prolog_suffixes, NULL },
573 { "python", Python_functions, NULL, Python_suffixes, NULL },
574 { "scheme", Scheme_functions, NULL, Scheme_suffixes, NULL },
575 { "tex", TeX_commands, NULL, TeX_suffixes, NULL },
576 { "texinfo", Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
577 { "yacc", Yacc_entries, NULL, Yacc_suffixes, NULL },
578 { "auto", NULL }, /* default guessing scheme */
579 { "none", just_read_file }, /* regexp matching only */
580 { NULL, NULL } /* end of list */
581 };
582
583 \f
584 static void
585 print_language_names ()
586 {
587 language *lang;
588 char **name, **ext;
589
590 puts ("\nThese are the currently supported languages, along with the\n\
591 default file names and dot suffixes:");
592 for (lang = lang_names; lang->name != NULL; lang++)
593 {
594 printf (" %-*s", 10, lang->name);
595 if (lang->filenames != NULL)
596 for (name = lang->filenames; *name != NULL; name++)
597 printf (" %s", *name);
598 if (lang->suffixes != NULL)
599 for (ext = lang->suffixes; *ext != NULL; ext++)
600 printf (" .%s", *ext);
601 puts ("");
602 }
603 puts ("Where `auto' means use default language for files based on file\n\
604 name suffix, and `none' means only do regexp processing on files.\n\
605 If no language is specified and no matching suffix is found,\n\
606 the first line of the file is read for a sharp-bang (#!) sequence\n\
607 followed by the name of an interpreter. If no such sequence is found,\n\
608 Fortran is tried first; if no tags are found, C is tried next.\n\
609 When parsing any C file, a \"class\" keyword switches to C++.\n\
610 Compressed files are supported using gzip and bzip2.");
611 }
612
613 #ifndef EMACS_NAME
614 # define EMACS_NAME "GNU Emacs"
615 #endif
616 #ifndef VERSION
617 # define VERSION "21"
618 #endif
619 static void
620 print_version ()
621 {
622 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
623 puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
624 puts ("This program is distributed under the same terms as Emacs");
625
626 exit (GOOD);
627 }
628
629 static void
630 print_help ()
631 {
632 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
633 \n\
634 These are the options accepted by %s.\n", progname, progname);
635 #ifdef LONG_OPTIONS
636 puts ("You may use unambiguous abbreviations for the long option names.");
637 #else
638 puts ("Long option names do not work with this executable, as it is not\n\
639 linked with GNU getopt.");
640 #endif /* LONG_OPTIONS */
641 puts ("A - as file name means read names from stdin (one per line).");
642 if (!CTAGS)
643 printf (" Absolute names are stored in the output file as they are.\n\
644 Relative ones are stored relative to the output file's directory.");
645 puts ("\n");
646
647 puts ("-a, --append\n\
648 Append tag entries to existing tags file.");
649
650 puts ("--packages-only\n\
651 For Ada files, only generate tags for packages .");
652
653 if (CTAGS)
654 puts ("-B, --backward-search\n\
655 Write the search commands for the tag entries using '?', the\n\
656 backward-search command instead of '/', the forward-search command.");
657
658 /* This option is mostly obsolete, because etags can now automatically
659 detect C++. Retained for backward compatibility and for debugging and
660 experimentation. In principle, we could want to tag as C++ even
661 before any "class" keyword.
662 puts ("-C, --c++\n\
663 Treat files whose name suffix defaults to C language as C++ files.");
664 */
665
666 puts ("--declarations\n\
667 In C and derived languages, create tags for function declarations,");
668 if (CTAGS)
669 puts ("\tand create tags for extern variables if --globals is used.");
670 else
671 puts
672 ("\tand create tags for extern variables unless --no-globals is used.");
673
674 if (CTAGS)
675 puts ("-d, --defines\n\
676 Create tag entries for C #define constants and enum constants, too.");
677 else
678 puts ("-D, --no-defines\n\
679 Don't create tag entries for C #define constants and enum constants.\n\
680 This makes the tags file smaller.");
681
682 if (!CTAGS)
683 {
684 puts ("-i FILE, --include=FILE\n\
685 Include a note in tag file indicating that, when searching for\n\
686 a tag, one should also consult the tags file FILE after\n\
687 checking the current file.");
688 puts ("-l LANG, --language=LANG\n\
689 Force the following files to be considered as written in the\n\
690 named language up to the next --language=LANG option.");
691 }
692
693 if (CTAGS)
694 puts ("--globals\n\
695 Create tag entries for global variables in some languages.");
696 else
697 puts ("--no-globals\n\
698 Do not create tag entries for global variables in some\n\
699 languages. This makes the tags file smaller.");
700 puts ("--members\n\
701 Create tag entries for member variables in C and derived languages.");
702
703 #ifdef ETAGS_REGEXPS
704 puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
705 Make a tag for each line matching pattern REGEXP in the following\n\
706 files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
707 regexfile is a file containing one REGEXP per line.\n\
708 REGEXP is anchored (as if preceded by ^).\n\
709 The form /REGEXP/NAME/ creates a named tag.\n\
710 For example Tcl named tags can be created with:\n\
711 --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
712 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
713 Like -r, --regex but ignore case when matching expressions.");
714 puts ("-R, --no-regex\n\
715 Don't create tags from regexps for the following files.");
716 #endif /* ETAGS_REGEXPS */
717 puts ("-o FILE, --output=FILE\n\
718 Write the tags to FILE.");
719 puts ("-I, --ignore-indentation\n\
720 Don't rely on indentation quite as much as normal. Currently,\n\
721 this means not to assume that a closing brace in the first\n\
722 column is the final brace of a function or structure\n\
723 definition in C and C++.");
724
725 if (CTAGS)
726 {
727 puts ("-t, --typedefs\n\
728 Generate tag entries for C and Ada typedefs.");
729 puts ("-T, --typedefs-and-c++\n\
730 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
731 and C++ member functions.");
732 puts ("-u, --update\n\
733 Update the tag entries for the given files, leaving tag\n\
734 entries for other files in place. Currently, this is\n\
735 implemented by deleting the existing entries for the given\n\
736 files and then rewriting the new entries at the end of the\n\
737 tags file. It is often faster to simply rebuild the entire\n\
738 tag file than to use this.");
739 puts ("-v, --vgrind\n\
740 Generates an index of items intended for human consumption,\n\
741 similar to the output of vgrind. The index is sorted, and\n\
742 gives the page number of each item.");
743 puts ("-w, --no-warn\n\
744 Suppress warning messages about entries defined in multiple\n\
745 files.");
746 puts ("-x, --cxref\n\
747 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
748 The output uses line numbers instead of page numbers, but\n\
749 beyond that the differences are cosmetic; try both to see\n\
750 which you like.");
751 }
752
753 puts ("-V, --version\n\
754 Print the version of the program.\n\
755 -h, --help\n\
756 Print this help message.");
757
758 print_language_names ();
759
760 puts ("");
761 puts ("Report bugs to bug-gnu-emacs@gnu.org");
762
763 exit (GOOD);
764 }
765
766 \f
767 enum argument_type
768 {
769 at_language,
770 at_regexp,
771 at_filename,
772 at_icregexp
773 };
774
775 /* This structure helps us allow mixing of --lang and file names. */
776 typedef struct
777 {
778 enum argument_type arg_type;
779 char *what;
780 language *lang; /* language of the regexp */
781 } argument;
782
783 #ifdef VMS /* VMS specific functions */
784
785 #define EOS '\0'
786
787 /* This is a BUG! ANY arbitrary limit is a BUG!
788 Won't someone please fix this? */
789 #define MAX_FILE_SPEC_LEN 255
790 typedef struct {
791 short curlen;
792 char body[MAX_FILE_SPEC_LEN + 1];
793 } vspec;
794
795 /*
796 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
797 returning in each successive call the next file name matching the input
798 spec. The function expects that each in_spec passed
799 to it will be processed to completion; in particular, up to and
800 including the call following that in which the last matching name
801 is returned, the function ignores the value of in_spec, and will
802 only start processing a new spec with the following call.
803 If an error occurs, on return out_spec contains the value
804 of in_spec when the error occurred.
805
806 With each successive file name returned in out_spec, the
807 function's return value is one. When there are no more matching
808 names the function returns zero. If on the first call no file
809 matches in_spec, or there is any other error, -1 is returned.
810 */
811
812 #include <rmsdef.h>
813 #include <descrip.h>
814 #define OUTSIZE MAX_FILE_SPEC_LEN
815 static short
816 fn_exp (out, in)
817 vspec *out;
818 char *in;
819 {
820 static long context = 0;
821 static struct dsc$descriptor_s o;
822 static struct dsc$descriptor_s i;
823 static bool pass1 = TRUE;
824 long status;
825 short retval;
826
827 if (pass1)
828 {
829 pass1 = FALSE;
830 o.dsc$a_pointer = (char *) out;
831 o.dsc$w_length = (short)OUTSIZE;
832 i.dsc$a_pointer = in;
833 i.dsc$w_length = (short)strlen(in);
834 i.dsc$b_dtype = DSC$K_DTYPE_T;
835 i.dsc$b_class = DSC$K_CLASS_S;
836 o.dsc$b_dtype = DSC$K_DTYPE_VT;
837 o.dsc$b_class = DSC$K_CLASS_VS;
838 }
839 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
840 {
841 out->body[out->curlen] = EOS;
842 return 1;
843 }
844 else if (status == RMS$_NMF)
845 retval = 0;
846 else
847 {
848 strcpy(out->body, in);
849 retval = -1;
850 }
851 lib$find_file_end(&context);
852 pass1 = TRUE;
853 return retval;
854 }
855
856 /*
857 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
858 name of each file specified by the provided arg expanding wildcards.
859 */
860 static char *
861 gfnames (arg, p_error)
862 char *arg;
863 bool *p_error;
864 {
865 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
866
867 switch (fn_exp (&filename, arg))
868 {
869 case 1:
870 *p_error = FALSE;
871 return filename.body;
872 case 0:
873 *p_error = FALSE;
874 return NULL;
875 default:
876 *p_error = TRUE;
877 return filename.body;
878 }
879 }
880
881 #ifndef OLD /* Newer versions of VMS do provide `system'. */
882 system (cmd)
883 char *cmd;
884 {
885 error ("%s", "system() function not implemented under VMS");
886 }
887 #endif
888
889 #define VERSION_DELIM ';'
890 char *massage_name (s)
891 char *s;
892 {
893 char *start = s;
894
895 for ( ; *s; s++)
896 if (*s == VERSION_DELIM)
897 {
898 *s = EOS;
899 break;
900 }
901 else
902 *s = lowcase (*s);
903 return start;
904 }
905 #endif /* VMS */
906
907 \f
908 int
909 main (argc, argv)
910 int argc;
911 char *argv[];
912 {
913 int i;
914 unsigned int nincluded_files;
915 char **included_files;
916 char *this_file;
917 argument *argbuffer;
918 int current_arg, file_count;
919 linebuffer filename_lb;
920 #ifdef VMS
921 bool got_err;
922 #endif
923
924 #ifdef DOS_NT
925 _fmode = O_BINARY; /* all of files are treated as binary files */
926 #endif /* DOS_NT */
927
928 progname = argv[0];
929 nincluded_files = 0;
930 included_files = xnew (argc, char *);
931 current_arg = 0;
932 file_count = 0;
933
934 /* Allocate enough no matter what happens. Overkill, but each one
935 is small. */
936 argbuffer = xnew (argc, argument);
937
938 #ifdef ETAGS_REGEXPS
939 /* Set syntax for regular expression routines. */
940 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
941 /* Translation table for case-insensitive search. */
942 for (i = 0; i < CHAR_SET_SIZE; i++)
943 lc_trans[i] = lowcase (i);
944 #endif /* ETAGS_REGEXPS */
945
946 /*
947 * If etags, always find typedefs and structure tags. Why not?
948 * Also default to find macro constants, enum constants and
949 * global variables.
950 */
951 if (!CTAGS)
952 {
953 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
954 globals = TRUE;
955 declarations = FALSE;
956 members = FALSE;
957 }
958
959 while (1)
960 {
961 int opt;
962 char *optstring;
963
964 #ifdef ETAGS_REGEXPS
965 optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
966 #else
967 optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
968 #endif /* ETAGS_REGEXPS */
969
970 #ifndef LONG_OPTIONS
971 optstring = optstring + 1;
972 #endif /* LONG_OPTIONS */
973
974 opt = getopt_long (argc, argv, optstring, longopts, 0);
975 if (opt == EOF)
976 break;
977
978 switch (opt)
979 {
980 case 0:
981 /* If getopt returns 0, then it has already processed a
982 long-named option. We should do nothing. */
983 break;
984
985 case 1:
986 /* This means that a file name has been seen. Record it. */
987 argbuffer[current_arg].arg_type = at_filename;
988 argbuffer[current_arg].what = optarg;
989 ++current_arg;
990 ++file_count;
991 break;
992
993 /* Common options. */
994 case 'a': append_to_tagfile = TRUE; break;
995 case 'C': cplusplus = TRUE; break;
996 case 'd': constantypedefs = TRUE; break;
997 case 'D': constantypedefs = FALSE; break;
998 case 'f': /* for compatibility with old makefiles */
999 case 'o':
1000 if (tagfile)
1001 {
1002 error ("-o option may only be given once.", (char *)NULL);
1003 suggest_asking_for_help ();
1004 }
1005 tagfile = optarg;
1006 break;
1007 case 'I':
1008 case 'S': /* for backward compatibility */
1009 noindentypedefs = TRUE;
1010 break;
1011 case 'l':
1012 {
1013 language *lang = get_language_from_langname (optarg);
1014 if (lang != NULL)
1015 {
1016 argbuffer[current_arg].lang = lang;
1017 argbuffer[current_arg].arg_type = at_language;
1018 ++current_arg;
1019 }
1020 }
1021 break;
1022 #ifdef ETAGS_REGEXPS
1023 case 'r':
1024 argbuffer[current_arg].arg_type = at_regexp;
1025 argbuffer[current_arg].what = optarg;
1026 ++current_arg;
1027 break;
1028 case 'R':
1029 argbuffer[current_arg].arg_type = at_regexp;
1030 argbuffer[current_arg].what = NULL;
1031 ++current_arg;
1032 break;
1033 case 'c':
1034 argbuffer[current_arg].arg_type = at_icregexp;
1035 argbuffer[current_arg].what = optarg;
1036 ++current_arg;
1037 break;
1038 #endif /* ETAGS_REGEXPS */
1039 case 'V':
1040 print_version ();
1041 break;
1042 case 'h':
1043 case 'H':
1044 print_help ();
1045 break;
1046 case 't':
1047 typedefs = TRUE;
1048 break;
1049 case 'T':
1050 typedefs = typedefs_or_cplusplus = TRUE;
1051 break;
1052 #if (!CTAGS)
1053 /* Etags options */
1054 case 'i':
1055 included_files[nincluded_files++] = optarg;
1056 break;
1057 #else /* CTAGS */
1058 /* Ctags options. */
1059 case 'B': searchar = '?'; break;
1060 case 'u': update = TRUE; break;
1061 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1062 case 'x': cxref_style = TRUE; break;
1063 case 'w': no_warnings = TRUE; break;
1064 #endif /* CTAGS */
1065 default:
1066 suggest_asking_for_help ();
1067 }
1068 }
1069
1070 for (; optind < argc; ++optind)
1071 {
1072 argbuffer[current_arg].arg_type = at_filename;
1073 argbuffer[current_arg].what = argv[optind];
1074 ++current_arg;
1075 ++file_count;
1076 }
1077
1078 if (nincluded_files == 0 && file_count == 0)
1079 {
1080 error ("no input files specified.", (char *)NULL);
1081 suggest_asking_for_help ();
1082 }
1083
1084 if (tagfile == NULL)
1085 tagfile = CTAGS ? "tags" : "TAGS";
1086 cwd = etags_getcwd (); /* the current working directory */
1087 if (cwd[strlen (cwd) - 1] != '/')
1088 {
1089 char *oldcwd = cwd;
1090 cwd = concat (oldcwd, "/", "");
1091 free (oldcwd);
1092 }
1093 if (streq (tagfile, "-"))
1094 tagfiledir = cwd;
1095 else
1096 tagfiledir = absolute_dirname (tagfile, cwd);
1097
1098 init (); /* set up boolean "functions" */
1099
1100 initbuffer (&lb);
1101 initbuffer (&filename_lb);
1102
1103 if (!CTAGS)
1104 {
1105 if (streq (tagfile, "-"))
1106 {
1107 tagf = stdout;
1108 #ifdef DOS_NT
1109 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1110 doesn't take effect until after `stdout' is already open). */
1111 if (!isatty (fileno (stdout)))
1112 setmode (fileno (stdout), O_BINARY);
1113 #endif /* DOS_NT */
1114 }
1115 else
1116 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1117 if (tagf == NULL)
1118 pfatal (tagfile);
1119 }
1120
1121 /*
1122 * Loop through files finding functions.
1123 */
1124 for (i = 0; i < current_arg; ++i)
1125 {
1126 switch (argbuffer[i].arg_type)
1127 {
1128 case at_language:
1129 forced_lang = argbuffer[i].lang;
1130 break;
1131 #ifdef ETAGS_REGEXPS
1132 case at_regexp:
1133 analyse_regex (argbuffer[i].what, FALSE);
1134 break;
1135 case at_icregexp:
1136 analyse_regex (argbuffer[i].what, TRUE);
1137 break;
1138 #endif
1139 case at_filename:
1140 #ifdef VMS
1141 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1142 {
1143 if (got_err)
1144 {
1145 error ("can't find file %s\n", this_file);
1146 argc--, argv++;
1147 }
1148 else
1149 {
1150 this_file = massage_name (this_file);
1151 }
1152 #else
1153 this_file = argbuffer[i].what;
1154 #endif
1155 /* Input file named "-" means read file names from stdin
1156 (one per line) and use them. */
1157 if (streq (this_file, "-"))
1158 while (readline_internal (&filename_lb, stdin) > 0)
1159 process_file (filename_lb.buffer);
1160 else
1161 process_file (this_file);
1162 #ifdef VMS
1163 }
1164 #endif
1165 break;
1166 }
1167 }
1168
1169 #ifdef ETAGS_REGEXPS
1170 free_patterns ();
1171 #endif /* ETAGS_REGEXPS */
1172
1173 if (!CTAGS)
1174 {
1175 while (nincluded_files-- > 0)
1176 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1177
1178 fclose (tagf);
1179 exit (GOOD);
1180 }
1181
1182 /* If CTAGS, we are here. process_file did not write the tags yet,
1183 because we want them ordered. Let's do it now. */
1184 if (cxref_style)
1185 {
1186 put_entries (head);
1187 free_tree (head);
1188 head = NULL;
1189 exit (GOOD);
1190 }
1191
1192 if (update)
1193 {
1194 char cmd[BUFSIZ];
1195 for (i = 0; i < current_arg; ++i)
1196 {
1197 if (argbuffer[i].arg_type != at_filename)
1198 continue;
1199 sprintf (cmd,
1200 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1201 tagfile, argbuffer[i].what, tagfile);
1202 if (system (cmd) != GOOD)
1203 fatal ("failed to execute shell command", (char *)NULL);
1204 }
1205 append_to_tagfile = TRUE;
1206 }
1207
1208 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1209 if (tagf == NULL)
1210 pfatal (tagfile);
1211 put_entries (head);
1212 free_tree (head);
1213 head = NULL;
1214 fclose (tagf);
1215
1216 if (update)
1217 {
1218 char cmd[BUFSIZ];
1219 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1220 exit (system (cmd));
1221 }
1222 return GOOD;
1223 }
1224
1225
1226
1227 /*
1228 * Return a compressor given the file name. If EXTPTR is non-zero,
1229 * return a pointer into FILE where the compressor-specific
1230 * extension begins. If no compressor is found, NULL is returned
1231 * and EXTPTR is not significant.
1232 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1233 */
1234 static compressor *
1235 get_compressor_from_suffix (file, extptr)
1236 char *file;
1237 char **extptr;
1238 {
1239 compressor *compr;
1240 char *slash, *suffix;
1241
1242 /* This relies on FN to be after canonicalize_filename,
1243 so we don't need to consider backslashes on DOS_NT. */
1244 slash = etags_strrchr (file, '/');
1245 suffix = etags_strrchr (file, '.');
1246 if (suffix == NULL || suffix < slash)
1247 return NULL;
1248 if (extptr != NULL)
1249 *extptr = suffix;
1250 suffix += 1;
1251 /* Let those poor souls who live with DOS 8+3 file name limits get
1252 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1253 Only the first do loop is run if not MSDOS */
1254 do
1255 {
1256 for (compr = compressors; compr->suffix != NULL; compr++)
1257 if (streq (compr->suffix, suffix))
1258 return compr;
1259 if (!MSDOS)
1260 break; /* do it only once: not really a loop */
1261 if (extptr != NULL)
1262 *extptr = ++suffix;
1263 } while (*suffix != '\0');
1264 return NULL;
1265 }
1266
1267
1268
1269 /*
1270 * Return a language given the name.
1271 */
1272 static language *
1273 get_language_from_langname (name)
1274 char *name;
1275 {
1276 language *lang;
1277
1278 if (name == NULL)
1279 error ("empty language name", (char *)NULL);
1280 else
1281 {
1282 for (lang = lang_names; lang->name != NULL; lang++)
1283 if (streq (name, lang->name))
1284 return lang;
1285 error ("unknown language \"%s\"", name);
1286 }
1287
1288 return NULL;
1289 }
1290
1291
1292 /*
1293 * Return a language given the interpreter name.
1294 */
1295 static language *
1296 get_language_from_interpreter (interpreter)
1297 char *interpreter;
1298 {
1299 language *lang;
1300 char **iname;
1301
1302 if (interpreter == NULL)
1303 return NULL;
1304 for (lang = lang_names; lang->name != NULL; lang++)
1305 if (lang->interpreters != NULL)
1306 for (iname = lang->interpreters; *iname != NULL; iname++)
1307 if (streq (*iname, interpreter))
1308 return lang;
1309
1310 return NULL;
1311 }
1312
1313
1314
1315 /*
1316 * Return a language given the file name.
1317 */
1318 static language *
1319 get_language_from_filename (file)
1320 char *file;
1321 {
1322 language *lang;
1323 char **name, **ext, *suffix;
1324
1325 /* Try whole file name first. */
1326 for (lang = lang_names; lang->name != NULL; lang++)
1327 if (lang->filenames != NULL)
1328 for (name = lang->filenames; *name != NULL; name++)
1329 if (streq (*name, file))
1330 return lang;
1331
1332 /* If not found, try suffix after last dot. */
1333 suffix = etags_strrchr (file, '.');
1334 if (suffix == NULL)
1335 return NULL;
1336 suffix += 1;
1337 for (lang = lang_names; lang->name != NULL; lang++)
1338 if (lang->suffixes != NULL)
1339 for (ext = lang->suffixes; *ext != NULL; ext++)
1340 if (streq (*ext, suffix))
1341 return lang;
1342 return NULL;
1343 }
1344
1345
1346
1347 /*
1348 * This routine is called on each file argument.
1349 */
1350 static void
1351 process_file (file)
1352 char *file;
1353 {
1354 struct stat stat_buf;
1355 FILE *inf;
1356 compressor *compr;
1357 char *compressed_name, *uncompressed_name;
1358 char *ext, *real_name;
1359
1360
1361 canonicalize_filename (file);
1362 if (streq (file, tagfile) && !streq (tagfile, "-"))
1363 {
1364 error ("skipping inclusion of %s in self.", file);
1365 return;
1366 }
1367 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1368 {
1369 compressed_name = NULL;
1370 real_name = uncompressed_name = savestr (file);
1371 }
1372 else
1373 {
1374 real_name = compressed_name = savestr (file);
1375 uncompressed_name = savenstr (file, ext - file);
1376 }
1377
1378 /* If the canonicalised uncompressed name has already be dealt with,
1379 skip it silently, else add it to the list. */
1380 {
1381 typedef struct processed_file
1382 {
1383 char *filename;
1384 struct processed_file *next;
1385 } processed_file;
1386 static processed_file *pf_head = NULL;
1387 register processed_file *fnp;
1388
1389 for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1390 if (streq (uncompressed_name, fnp->filename))
1391 goto exit;
1392 fnp = pf_head;
1393 pf_head = xnew (1, struct processed_file);
1394 pf_head->filename = savestr (uncompressed_name);
1395 pf_head->next = fnp;
1396 }
1397
1398 if (stat (real_name, &stat_buf) != 0)
1399 {
1400 /* Reset real_name and try with a different name. */
1401 real_name = NULL;
1402 if (compressed_name != NULL) /* try with the given suffix */
1403 {
1404 if (stat (uncompressed_name, &stat_buf) == 0)
1405 real_name = uncompressed_name;
1406 }
1407 else /* try all possible suffixes */
1408 {
1409 for (compr = compressors; compr->suffix != NULL; compr++)
1410 {
1411 compressed_name = concat (file, ".", compr->suffix);
1412 if (stat (compressed_name, &stat_buf) != 0)
1413 {
1414 if (MSDOS)
1415 {
1416 char *suf = compressed_name + strlen (file);
1417 size_t suflen = strlen (compr->suffix) + 1;
1418 for ( ; suf[1]; suf++, suflen--)
1419 {
1420 memmove (suf, suf + 1, suflen);
1421 if (stat (compressed_name, &stat_buf) == 0)
1422 {
1423 real_name = compressed_name;
1424 break;
1425 }
1426 }
1427 if (real_name != NULL)
1428 break;
1429 } /* MSDOS */
1430 free (compressed_name);
1431 compressed_name = NULL;
1432 }
1433 else
1434 {
1435 real_name = compressed_name;
1436 break;
1437 }
1438 }
1439 }
1440 if (real_name == NULL)
1441 {
1442 perror (file);
1443 goto exit;
1444 }
1445 } /* try with a different name */
1446
1447 if (!S_ISREG (stat_buf.st_mode))
1448 {
1449 error ("skipping %s: it is not a regular file.", real_name);
1450 goto exit;
1451 }
1452 if (real_name == compressed_name)
1453 {
1454 char *cmd = concat (compr->command, " ", real_name);
1455 inf = (FILE *) popen (cmd, "r");
1456 free (cmd);
1457 }
1458 else
1459 inf = fopen (real_name, "r");
1460 if (inf == NULL)
1461 {
1462 perror (real_name);
1463 goto exit;
1464 }
1465
1466 find_entries (uncompressed_name, inf);
1467
1468 if (real_name == compressed_name)
1469 pclose (inf);
1470 else
1471 fclose (inf);
1472
1473 if (!CTAGS)
1474 {
1475 char *filename;
1476
1477 if (filename_is_absolute (uncompressed_name))
1478 {
1479 /* file is an absolute file name. Canonicalise it. */
1480 filename = absolute_filename (uncompressed_name, cwd);
1481 }
1482 else
1483 {
1484 /* file is a file name relative to cwd. Make it relative
1485 to the directory of the tags file. */
1486 filename = relative_filename (uncompressed_name, tagfiledir);
1487 }
1488 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1489 free (filename);
1490 put_entries (head);
1491 free_tree (head);
1492 head = NULL;
1493 }
1494
1495 exit:
1496 if (compressed_name) free(compressed_name);
1497 if (uncompressed_name) free(uncompressed_name);
1498 return;
1499 }
1500
1501 /*
1502 * This routine sets up the boolean pseudo-functions which work
1503 * by setting boolean flags dependent upon the corresponding character.
1504 * Every char which is NOT in that string is not a white char. Therefore,
1505 * all of the array "_wht" is set to FALSE, and then the elements
1506 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1507 * of a char is TRUE if it is the string "white", else FALSE.
1508 */
1509 static void
1510 init ()
1511 {
1512 register char *sp;
1513 register int i;
1514
1515 for (i = 0; i < CHARS; i++)
1516 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1517 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1518 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1519 notinname('\0') = notinname('\n');
1520 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1521 begtoken('\0') = begtoken('\n');
1522 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1523 intoken('\0') = intoken('\n');
1524 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1525 endtoken('\0') = endtoken('\n');
1526 }
1527
1528 /*
1529 * This routine opens the specified file and calls the function
1530 * which finds the function and type definitions.
1531 */
1532 node *last_node = NULL;
1533
1534 static void
1535 find_entries (file, inf)
1536 char *file;
1537 FILE *inf;
1538 {
1539 char *cp;
1540 language *lang;
1541 node *old_last_node;
1542
1543 /* Memory leakage here: the string pointed by curfile is
1544 never released, because curfile is copied into np->file
1545 for each node, to be used in CTAGS mode. The amount of
1546 memory leaked here is the sum of the lengths of the
1547 file names. */
1548 curfile = savestr (file);
1549
1550 /* If user specified a language, use it. */
1551 lang = forced_lang;
1552 if (lang != NULL && lang->function != NULL)
1553 {
1554 curlang = lang;
1555 lang->function (inf);
1556 return;
1557 }
1558
1559 /* Try to guess the language given the file name. */
1560 lang = get_language_from_filename (file);
1561 if (lang != NULL && lang->function != NULL)
1562 {
1563 curlang = lang;
1564 lang->function (inf);
1565 return;
1566 }
1567
1568 /* Look for sharp-bang as the first two characters. */
1569 if (readline_internal (&lb, inf) > 0
1570 && lb.len >= 2
1571 && lb.buffer[0] == '#'
1572 && lb.buffer[1] == '!')
1573 {
1574 char *lp;
1575
1576 /* Set lp to point at the first char after the last slash in the
1577 line or, if no slashes, at the first nonblank. Then set cp to
1578 the first successive blank and terminate the string. */
1579 lp = etags_strrchr (lb.buffer+2, '/');
1580 if (lp != NULL)
1581 lp += 1;
1582 else
1583 lp = skip_spaces (lb.buffer + 2);
1584 cp = skip_non_spaces (lp);
1585 *cp = '\0';
1586
1587 if (strlen (lp) > 0)
1588 {
1589 lang = get_language_from_interpreter (lp);
1590 if (lang != NULL && lang->function != NULL)
1591 {
1592 curlang = lang;
1593 lang->function (inf);
1594 return;
1595 }
1596 }
1597 }
1598 /* We rewind here, even if inf may be a pipe. We fail if the
1599 length of the first line is longer than the pipe block size,
1600 which is unlikely. */
1601 rewind (inf);
1602
1603 /* Try Fortran. */
1604 old_last_node = last_node;
1605 curlang = get_language_from_langname ("fortran");
1606 Fortran_functions (inf);
1607
1608 /* No Fortran entries found. Try C. */
1609 if (old_last_node == last_node)
1610 {
1611 /* We do not tag if rewind fails.
1612 Only the file name will be recorded in the tags file. */
1613 rewind (inf);
1614 curlang = get_language_from_langname (cplusplus ? "c++" : "c");
1615 default_C_entries (inf);
1616 }
1617 return;
1618 }
1619
1620 \f
1621 /* Record a tag. */
1622 static void
1623 pfnote (name, is_func, linestart, linelen, lno, cno)
1624 char *name; /* tag name, or NULL if unnamed */
1625 bool is_func; /* tag is a function */
1626 char *linestart; /* start of the line where tag is */
1627 int linelen; /* length of the line where tag is */
1628 int lno; /* line number */
1629 long cno; /* character number */
1630 {
1631 register node *np;
1632
1633 if (CTAGS && name == NULL)
1634 return;
1635
1636 np = xnew (1, node);
1637
1638 /* If ctags mode, change name "main" to M<thisfilename>. */
1639 if (CTAGS && !cxref_style && streq (name, "main"))
1640 {
1641 register char *fp = etags_strrchr (curfile, '/');
1642 np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1643 fp = etags_strrchr (np->name, '.');
1644 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1645 fp[0] = '\0';
1646 }
1647 else
1648 np->name = name;
1649 np->been_warned = FALSE;
1650 np->file = curfile;
1651 np->is_func = is_func;
1652 np->lno = lno;
1653 /* Our char numbers are 0-base, because of C language tradition?
1654 ctags compatibility? old versions compatibility? I don't know.
1655 Anyway, since emacs's are 1-base we expect etags.el to take care
1656 of the difference. If we wanted to have 1-based numbers, we would
1657 uncomment the +1 below. */
1658 np->cno = cno /* + 1 */ ;
1659 np->left = np->right = NULL;
1660 if (CTAGS && !cxref_style)
1661 {
1662 if (strlen (linestart) < 50)
1663 np->pat = concat (linestart, "$", "");
1664 else
1665 np->pat = savenstr (linestart, 50);
1666 }
1667 else
1668 np->pat = savenstr (linestart, linelen);
1669
1670 add_node (np, &head);
1671 }
1672
1673 /*
1674 * TAGS format specification
1675 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1676 *
1677 * pfnote should emit the optimized form [unnamed tag] only if:
1678 * 1. name does not contain any of the characters " \t\r\n(),;";
1679 * 2. linestart contains name as either a rightmost, or rightmost but
1680 * one character, substring;
1681 * 3. the character, if any, immediately before name in linestart must
1682 * be one of the characters " \t(),;";
1683 * 4. the character, if any, immediately after name in linestart must
1684 * also be one of the characters " \t(),;".
1685 *
1686 * The real implementation uses the notinname() macro, which recognises
1687 * characters slightly different form " \t\r\n(),;". See the variable
1688 * `nonam'.
1689 */
1690 #define traditional_tag_style TRUE
1691 static void
1692 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1693 char *name; /* tag name, or NULL if unnamed */
1694 int namelen; /* tag length */
1695 bool is_func; /* tag is a function */
1696 char *linestart; /* start of the line where tag is */
1697 int linelen; /* length of the line where tag is */
1698 int lno; /* line number */
1699 long cno; /* character number */
1700 {
1701 register char *cp;
1702 bool named;
1703
1704 named = TRUE;
1705 if (!CTAGS)
1706 {
1707 for (cp = name; !notinname (*cp); cp++)
1708 continue;
1709 if (*cp == '\0') /* rule #1 */
1710 {
1711 cp = linestart + linelen - namelen;
1712 if (notinname (linestart[linelen-1]))
1713 cp -= 1; /* rule #4 */
1714 if (cp >= linestart /* rule #2 */
1715 && (cp == linestart
1716 || notinname (cp[-1])) /* rule #3 */
1717 && strneq (name, cp, namelen)) /* rule #2 */
1718 named = FALSE; /* use unnamed tag */
1719 }
1720 }
1721
1722 if (named)
1723 name = savenstr (name, namelen);
1724 else
1725 name = NULL;
1726 pfnote (name, is_func, linestart, linelen, lno, cno);
1727 }
1728
1729 /*
1730 * free_tree ()
1731 * recurse on left children, iterate on right children.
1732 */
1733 static void
1734 free_tree (np)
1735 register node *np;
1736 {
1737 while (np)
1738 {
1739 register node *node_right = np->right;
1740 free_tree (np->left);
1741 if (np->name != NULL)
1742 free (np->name);
1743 free (np->pat);
1744 free (np);
1745 np = node_right;
1746 }
1747 }
1748
1749 /*
1750 * add_node ()
1751 * Adds a node to the tree of nodes. In etags mode, we don't keep
1752 * it sorted; we just keep a linear list. In ctags mode, maintain
1753 * an ordered tree, with no attempt at balancing.
1754 *
1755 * add_node is the only function allowed to add nodes, so it can
1756 * maintain state.
1757 */
1758 static void
1759 add_node (np, cur_node_p)
1760 node *np, **cur_node_p;
1761 {
1762 register int dif;
1763 register node *cur_node = *cur_node_p;
1764
1765 if (cur_node == NULL)
1766 {
1767 *cur_node_p = np;
1768 last_node = np;
1769 return;
1770 }
1771
1772 if (!CTAGS)
1773 {
1774 /* Etags Mode */
1775 if (last_node == NULL)
1776 fatal ("internal error in add_node", (char *)NULL);
1777 last_node->right = np;
1778 last_node = np;
1779 }
1780 else
1781 {
1782 /* Ctags Mode */
1783 dif = strcmp (np->name, cur_node->name);
1784
1785 /*
1786 * If this tag name matches an existing one, then
1787 * do not add the node, but maybe print a warning.
1788 */
1789 if (!dif)
1790 {
1791 if (streq (np->file, cur_node->file))
1792 {
1793 if (!no_warnings)
1794 {
1795 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1796 np->file, lineno, np->name);
1797 fprintf (stderr, "Second entry ignored\n");
1798 }
1799 }
1800 else if (!cur_node->been_warned && !no_warnings)
1801 {
1802 fprintf
1803 (stderr,
1804 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1805 np->file, cur_node->file, np->name);
1806 cur_node->been_warned = TRUE;
1807 }
1808 return;
1809 }
1810
1811 /* Actually add the node */
1812 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1813 }
1814 }
1815
1816 \f
1817 static void
1818 put_entries (np)
1819 register node *np;
1820 {
1821 register char *sp;
1822
1823 if (np == NULL)
1824 return;
1825
1826 /* Output subentries that precede this one */
1827 put_entries (np->left);
1828
1829 /* Output this entry */
1830
1831 if (!CTAGS)
1832 {
1833 if (np->name != NULL)
1834 fprintf (tagf, "%s\177%s\001%d,%ld\n",
1835 np->pat, np->name, np->lno, np->cno);
1836 else
1837 fprintf (tagf, "%s\177%d,%ld\n",
1838 np->pat, np->lno, np->cno);
1839 }
1840 else
1841 {
1842 if (np->name == NULL)
1843 error ("internal error: NULL name in ctags mode.", (char *)NULL);
1844
1845 if (cxref_style)
1846 {
1847 if (vgrind_style)
1848 fprintf (stdout, "%s %s %d\n",
1849 np->name, np->file, (np->lno + 63) / 64);
1850 else
1851 fprintf (stdout, "%-16s %3d %-16s %s\n",
1852 np->name, np->lno, np->file, np->pat);
1853 }
1854 else
1855 {
1856 fprintf (tagf, "%s\t%s\t", np->name, np->file);
1857
1858 if (np->is_func)
1859 { /* a function */
1860 putc (searchar, tagf);
1861 putc ('^', tagf);
1862
1863 for (sp = np->pat; *sp; sp++)
1864 {
1865 if (*sp == '\\' || *sp == searchar)
1866 putc ('\\', tagf);
1867 putc (*sp, tagf);
1868 }
1869 putc (searchar, tagf);
1870 }
1871 else
1872 { /* a typedef; text pattern inadequate */
1873 fprintf (tagf, "%d", np->lno);
1874 }
1875 putc ('\n', tagf);
1876 }
1877 }
1878
1879 /* Output subentries that follow this one */
1880 put_entries (np->right);
1881 }
1882
1883 /* Length of a number's decimal representation. */
1884 static int
1885 number_len (num)
1886 long num;
1887 {
1888 int len = 1;
1889 while ((num /= 10) > 0)
1890 len += 1;
1891 return len;
1892 }
1893
1894 /*
1895 * Return total number of characters that put_entries will output for
1896 * the nodes in the subtree of the specified node. Works only if
1897 * we are not ctags, but called only in that case. This count
1898 * is irrelevant with the new tags.el, but is still supplied for
1899 * backward compatibility.
1900 */
1901 static int
1902 total_size_of_entries (np)
1903 register node *np;
1904 {
1905 register int total;
1906
1907 if (np == NULL)
1908 return 0;
1909
1910 for (total = 0; np != NULL; np = np->right)
1911 {
1912 /* Count left subentries. */
1913 total += total_size_of_entries (np->left);
1914
1915 /* Count this entry */
1916 total += strlen (np->pat) + 1;
1917 total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1918 if (np->name != NULL)
1919 total += 1 + strlen (np->name); /* \001name */
1920 }
1921
1922 return total;
1923 }
1924
1925 \f
1926 /* C extensions. */
1927 #define C_EXT 0x00fff /* C extensions */
1928 #define C_PLAIN 0x00000 /* C */
1929 #define C_PLPL 0x00001 /* C++ */
1930 #define C_STAR 0x00003 /* C* */
1931 #define C_JAVA 0x00005 /* JAVA */
1932 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
1933 #define YACC 0x10000 /* yacc file */
1934
1935 /*
1936 * The C symbol tables.
1937 */
1938 enum sym_type
1939 {
1940 st_none,
1941 st_C_objprot, st_C_objimpl, st_C_objend,
1942 st_C_gnumacro,
1943 st_C_ignore,
1944 st_C_javastruct,
1945 st_C_operator,
1946 st_C_class,
1947 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1948 };
1949
1950 static unsigned int hash P_((const char *, unsigned int));
1951 static struct C_stab_entry * in_word_set P_((const char *, unsigned int));
1952 static enum sym_type C_symtype P_((char *, int, int));
1953
1954 /* Feed stuff between (but not including) %[ and %] lines to:
1955 gperf -c -k 1,3 -o -p -r -t
1956 %[
1957 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1958 %%
1959 if, 0, st_C_ignore
1960 for, 0, st_C_ignore
1961 while, 0, st_C_ignore
1962 switch, 0, st_C_ignore
1963 return, 0, st_C_ignore
1964 @interface, 0, st_C_objprot
1965 @protocol, 0, st_C_objprot
1966 @implementation,0, st_C_objimpl
1967 @end, 0, st_C_objend
1968 import, C_JAVA, st_C_ignore
1969 package, C_JAVA, st_C_ignore
1970 friend, C_PLPL, st_C_ignore
1971 extends, C_JAVA, st_C_javastruct
1972 implements, C_JAVA, st_C_javastruct
1973 interface, C_JAVA, st_C_struct
1974 class, 0, st_C_class
1975 namespace, C_PLPL, st_C_struct
1976 domain, C_STAR, st_C_struct
1977 union, 0, st_C_struct
1978 struct, 0, st_C_struct
1979 extern, 0, st_C_extern
1980 enum, 0, st_C_enum
1981 typedef, 0, st_C_typedef
1982 define, 0, st_C_define
1983 operator, C_PLPL, st_C_operator
1984 bool, C_PLPL, st_C_typespec
1985 long, 0, st_C_typespec
1986 short, 0, st_C_typespec
1987 int, 0, st_C_typespec
1988 char, 0, st_C_typespec
1989 float, 0, st_C_typespec
1990 double, 0, st_C_typespec
1991 signed, 0, st_C_typespec
1992 unsigned, 0, st_C_typespec
1993 auto, 0, st_C_typespec
1994 void, 0, st_C_typespec
1995 static, 0, st_C_typespec
1996 const, 0, st_C_typespec
1997 volatile, 0, st_C_typespec
1998 explicit, C_PLPL, st_C_typespec
1999 mutable, C_PLPL, st_C_typespec
2000 typename, C_PLPL, st_C_typespec
2001 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2002 DEFUN, 0, st_C_gnumacro
2003 SYSCALL, 0, st_C_gnumacro
2004 ENTRY, 0, st_C_gnumacro
2005 PSEUDO, 0, st_C_gnumacro
2006 # These are defined inside C functions, so currently they are not met.
2007 # EXFUN used in glibc, DEFVAR_* in emacs.
2008 #EXFUN, 0, st_C_gnumacro
2009 #DEFVAR_, 0, st_C_gnumacro
2010 %]
2011 and replace lines between %< and %> with its output,
2012 then make in_word_set static. */
2013 /*%<*/
2014 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2015 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2016 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2017
2018 #define TOTAL_KEYWORDS 46
2019 #define MIN_WORD_LENGTH 2
2020 #define MAX_WORD_LENGTH 15
2021 #define MIN_HASH_VALUE 13
2022 #define MAX_HASH_VALUE 121
2023 /* maximum key range = 109, duplicates = 0 */
2024
2025 #ifdef __GNUC__
2026 __inline
2027 #endif
2028 static unsigned int
2029 hash (str, len)
2030 register const char *str;
2031 register unsigned int len;
2032 {
2033 static unsigned char asso_values[] =
2034 {
2035 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2036 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2037 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2038 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2039 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2040 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2041 122, 122, 122, 122, 57, 122, 122, 122, 55, 6,
2042 60, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2043 51, 122, 122, 10, 2, 122, 122, 122, 122, 122,
2044 122, 122, 122, 122, 122, 122, 122, 2, 52, 59,
2045 49, 38, 56, 41, 122, 22, 122, 122, 9, 32,
2046 33, 60, 26, 122, 1, 28, 46, 59, 44, 51,
2047 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2048 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2049 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2050 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2051 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2052 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2053 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2054 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2055 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2056 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2057 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2058 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2059 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
2060 122, 122, 122, 122, 122, 122
2061 };
2062 register int hval = len;
2063
2064 switch (hval)
2065 {
2066 default:
2067 case 3:
2068 hval += asso_values[(unsigned char)str[2]];
2069 case 2:
2070 case 1:
2071 hval += asso_values[(unsigned char)str[0]];
2072 break;
2073 }
2074 return hval;
2075 }
2076
2077 #ifdef __GNUC__
2078 __inline
2079 #endif
2080 struct C_stab_entry *
2081 in_word_set (str, len)
2082 register const char *str;
2083 register unsigned int len;
2084 {
2085 static struct C_stab_entry wordlist[] =
2086 {
2087 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2088 {""}, {""}, {""}, {""},
2089 {"ENTRY", 0, st_C_gnumacro},
2090 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2091 {""},
2092 {"if", 0, st_C_ignore},
2093 {""}, {""},
2094 {"SYSCALL", 0, st_C_gnumacro},
2095 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2096 {"struct", 0, st_C_struct},
2097 {"static", 0, st_C_typespec},
2098 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2099 {"long", 0, st_C_typespec},
2100 {""}, {""}, {""}, {""}, {""},
2101 {"auto", 0, st_C_typespec},
2102 {"return", 0, st_C_ignore},
2103 {"import", C_JAVA, st_C_ignore},
2104 {""},
2105 {"switch", 0, st_C_ignore},
2106 {""},
2107 {"implements", C_JAVA, st_C_javastruct},
2108 {""},
2109 {"for", 0, st_C_ignore},
2110 {"volatile", 0, st_C_typespec},
2111 {""},
2112 {"PSEUDO", 0, st_C_gnumacro},
2113 {""},
2114 {"char", 0, st_C_typespec},
2115 {"class", 0, st_C_class},
2116 {"@protocol", 0, st_C_objprot},
2117 {""}, {""},
2118 {"void", 0, st_C_typespec},
2119 {"int", 0, st_C_typespec},
2120 {"explicit", C_PLPL, st_C_typespec},
2121 {""},
2122 {"namespace", C_PLPL, st_C_struct},
2123 {"signed", 0, st_C_typespec},
2124 {""},
2125 {"interface", C_JAVA, st_C_struct},
2126 {"while", 0, st_C_ignore},
2127 {"typedef", 0, st_C_typedef},
2128 {"typename", C_PLPL, st_C_typespec},
2129 {""}, {""}, {""},
2130 {"friend", C_PLPL, st_C_ignore},
2131 {"mutable", C_PLPL, st_C_typespec},
2132 {"union", 0, st_C_struct},
2133 {"domain", C_STAR, st_C_struct},
2134 {""}, {""},
2135 {"extern", 0, st_C_extern},
2136 {"extends", C_JAVA, st_C_javastruct},
2137 {"package", C_JAVA, st_C_ignore},
2138 {"short", 0, st_C_typespec},
2139 {"@end", 0, st_C_objend},
2140 {"unsigned", 0, st_C_typespec},
2141 {""},
2142 {"const", 0, st_C_typespec},
2143 {""}, {""},
2144 {"@interface", 0, st_C_objprot},
2145 {"enum", 0, st_C_enum},
2146 {""}, {""},
2147 {"@implementation",0, st_C_objimpl},
2148 {""},
2149 {"operator", C_PLPL, st_C_operator},
2150 {""}, {""}, {""}, {""},
2151 {"define", 0, st_C_define},
2152 {""}, {""},
2153 {"double", 0, st_C_typespec},
2154 {""},
2155 {"bool", C_PLPL, st_C_typespec},
2156 {""}, {""}, {""},
2157 {"DEFUN", 0, st_C_gnumacro},
2158 {"float", 0, st_C_typespec}
2159 };
2160
2161 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2162 {
2163 register int key = hash (str, len);
2164
2165 if (key <= MAX_HASH_VALUE && key >= 0)
2166 {
2167 register const char *s = wordlist[key].name;
2168
2169 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2170 return &wordlist[key];
2171 }
2172 }
2173 return 0;
2174 }
2175 /*%>*/
2176
2177 static enum sym_type
2178 C_symtype (str, len, c_ext)
2179 char *str;
2180 int len;
2181 int c_ext;
2182 {
2183 register struct C_stab_entry *se = in_word_set (str, len);
2184
2185 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2186 return st_none;
2187 return se->type;
2188 }
2189
2190 \f
2191 /*
2192 * C functions and variables are recognized using a simple
2193 * finite automaton. fvdef is its state variable.
2194 */
2195 enum
2196 {
2197 fvnone, /* nothing seen */
2198 fdefunkey, /* Emacs DEFUN keyword seen */
2199 fdefunname, /* Emacs DEFUN name seen */
2200 foperator, /* func: operator keyword seen (cplpl) */
2201 fvnameseen, /* function or variable name seen */
2202 fstartlist, /* func: just after open parenthesis */
2203 finlist, /* func: in parameter list */
2204 flistseen, /* func: after parameter list */
2205 fignore, /* func: before open brace */
2206 vignore /* var-like: ignore until ';' */
2207 } fvdef;
2208
2209 bool fvextern; /* func or var: extern keyword seen; */
2210
2211 /*
2212 * typedefs are recognized using a simple finite automaton.
2213 * typdef is its state variable.
2214 */
2215 enum
2216 {
2217 tnone, /* nothing seen */
2218 tkeyseen, /* typedef keyword seen */
2219 ttypeseen, /* defined type seen */
2220 tinbody, /* inside typedef body */
2221 tend, /* just before typedef tag */
2222 tignore /* junk after typedef tag */
2223 } typdef;
2224
2225 /*
2226 * struct-like structures (enum, struct and union) are recognized
2227 * using another simple finite automaton. `structdef' is its state
2228 * variable.
2229 */
2230 enum
2231 {
2232 snone, /* nothing seen yet,
2233 or in struct body if cblev > 0 */
2234 skeyseen, /* struct-like keyword seen */
2235 stagseen, /* struct-like tag seen */
2236 sintemplate, /* inside template (ignore) */
2237 scolonseen /* colon seen after struct-like tag */
2238 } structdef;
2239
2240 /*
2241 * When objdef is different from onone, objtag is the name of the class.
2242 */
2243 char *objtag = "<uninited>";
2244
2245 /*
2246 * Yet another little state machine to deal with preprocessor lines.
2247 */
2248 enum
2249 {
2250 dnone, /* nothing seen */
2251 dsharpseen, /* '#' seen as first char on line */
2252 ddefineseen, /* '#' and 'define' seen */
2253 dignorerest /* ignore rest of line */
2254 } definedef;
2255
2256 /*
2257 * State machine for Objective C protocols and implementations.
2258 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2259 */
2260 enum
2261 {
2262 onone, /* nothing seen */
2263 oprotocol, /* @interface or @protocol seen */
2264 oimplementation, /* @implementations seen */
2265 otagseen, /* class name seen */
2266 oparenseen, /* parenthesis before category seen */
2267 ocatseen, /* category name seen */
2268 oinbody, /* in @implementation body */
2269 omethodsign, /* in @implementation body, after +/- */
2270 omethodtag, /* after method name */
2271 omethodcolon, /* after method colon */
2272 omethodparm, /* after method parameter */
2273 oignore /* wait for @end */
2274 } objdef;
2275
2276
2277 /*
2278 * Use this structure to keep info about the token read, and how it
2279 * should be tagged. Used by the make_C_tag function to build a tag.
2280 */
2281 struct tok
2282 {
2283 bool valid;
2284 bool named;
2285 int offset;
2286 int length;
2287 int lineno;
2288 long linepos;
2289 char *line;
2290 } token; /* latest token read */
2291 linebuffer token_name; /* its name */
2292
2293 /*
2294 * Variables and functions for dealing with nested structures.
2295 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2296 */
2297 static void pushclass_above P_((int, char *, int));
2298 static void popclass_above P_((int));
2299 static void write_classname P_((linebuffer *, char *qualifier));
2300
2301 struct {
2302 char **cname; /* nested class names */
2303 int *cblev; /* nested class curly brace level */
2304 int nl; /* class nesting level (elements used) */
2305 int size; /* length of the array */
2306 } cstack; /* stack for nested declaration tags */
2307 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2308 #define nestlev (cstack.nl)
2309 /* After struct keyword or in struct body, not inside an nested function. */
2310 #define instruct (structdef == snone && nestlev > 0 \
2311 && cblev == cstack.cblev[nestlev-1] + 1)
2312
2313 static void
2314 pushclass_above (cblev, str, len)
2315 int cblev;
2316 char *str;
2317 int len;
2318 {
2319 int nl;
2320
2321 popclass_above (cblev);
2322 nl = cstack.nl;
2323 if (nl >= cstack.size)
2324 {
2325 int size = cstack.size *= 2;
2326 xrnew (cstack.cname, size, char *);
2327 xrnew (cstack.cblev, size, int);
2328 }
2329 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2330 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2331 cstack.cblev[nl] = cblev;
2332 cstack.nl = nl + 1;
2333 }
2334
2335 static void
2336 popclass_above (cblev)
2337 int cblev;
2338 {
2339 int nl;
2340
2341 for (nl = cstack.nl - 1;
2342 nl >= 0 && cstack.cblev[nl] >= cblev;
2343 nl--)
2344 {
2345 if (cstack.cname[nl] != NULL)
2346 free (cstack.cname[nl]);
2347 cstack.nl = nl;
2348 }
2349 }
2350
2351 static void
2352 write_classname (cn, qualifier)
2353 linebuffer *cn;
2354 char *qualifier;
2355 {
2356 int i, len;
2357 int qlen = strlen (qualifier);
2358
2359 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2360 {
2361 len = 0;
2362 cn->len = 0;
2363 cn->buffer[0] = '\0';
2364 }
2365 else
2366 {
2367 len = strlen (cstack.cname[0]);
2368 linebuffer_setlen (cn, len);
2369 strcpy (cn->buffer, cstack.cname[0]);
2370 }
2371 for (i = 1; i < cstack.nl; i++)
2372 {
2373 char *s;
2374 int slen;
2375
2376 s = cstack.cname[i];
2377 if (s == NULL)
2378 continue;
2379 slen = strlen (s);
2380 len += slen + qlen;
2381 linebuffer_setlen (cn, len);
2382 strncat (cn->buffer, qualifier, qlen);
2383 strncat (cn->buffer, s, slen);
2384 }
2385 }
2386
2387 \f
2388 static bool consider_token P_((char *, int, int, int *, int, int, bool *));
2389 static void make_C_tag P_((bool));
2390
2391 /*
2392 * consider_token ()
2393 * checks to see if the current token is at the start of a
2394 * function or variable, or corresponds to a typedef, or
2395 * is a struct/union/enum tag, or #define, or an enum constant.
2396 *
2397 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2398 * with args. C_EXTP points to which language we are looking at.
2399 *
2400 * Globals
2401 * fvdef IN OUT
2402 * structdef IN OUT
2403 * definedef IN OUT
2404 * typdef IN OUT
2405 * objdef IN OUT
2406 */
2407
2408 static bool
2409 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2410 register char *str; /* IN: token pointer */
2411 register int len; /* IN: token length */
2412 register int c; /* IN: first char after the token */
2413 int *c_extp; /* IN, OUT: C extensions mask */
2414 int cblev; /* IN: curly brace level */
2415 int parlev; /* IN: parenthesis level */
2416 bool *is_func_or_var; /* OUT: function or variable found */
2417 {
2418 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2419 structtype is the type of the preceding struct-like keyword, and
2420 structcblev is the curly brace level where it has been seen. */
2421 static enum sym_type structtype;
2422 static int structcblev;
2423 static enum sym_type toktype;
2424
2425
2426 toktype = C_symtype (str, len, *c_extp);
2427
2428 /*
2429 * Advance the definedef state machine.
2430 */
2431 switch (definedef)
2432 {
2433 case dnone:
2434 /* We're not on a preprocessor line. */
2435 if (toktype == st_C_gnumacro)
2436 {
2437 fvdef = fdefunkey;
2438 return FALSE;
2439 }
2440 break;
2441 case dsharpseen:
2442 if (toktype == st_C_define)
2443 {
2444 definedef = ddefineseen;
2445 }
2446 else
2447 {
2448 definedef = dignorerest;
2449 }
2450 return FALSE;
2451 case ddefineseen:
2452 /*
2453 * Make a tag for any macro, unless it is a constant
2454 * and constantypedefs is FALSE.
2455 */
2456 definedef = dignorerest;
2457 *is_func_or_var = (c == '(');
2458 if (!*is_func_or_var && !constantypedefs)
2459 return FALSE;
2460 else
2461 return TRUE;
2462 case dignorerest:
2463 return FALSE;
2464 default:
2465 error ("internal error: definedef value.", (char *)NULL);
2466 }
2467
2468 /*
2469 * Now typedefs
2470 */
2471 switch (typdef)
2472 {
2473 case tnone:
2474 if (toktype == st_C_typedef)
2475 {
2476 if (typedefs)
2477 typdef = tkeyseen;
2478 fvextern = FALSE;
2479 fvdef = fvnone;
2480 return FALSE;
2481 }
2482 break;
2483 case tkeyseen:
2484 switch (toktype)
2485 {
2486 case st_none:
2487 case st_C_typespec:
2488 case st_C_class:
2489 case st_C_struct:
2490 case st_C_enum:
2491 typdef = ttypeseen;
2492 break;
2493 }
2494 break;
2495 case ttypeseen:
2496 if (structdef == snone && fvdef == fvnone)
2497 {
2498 fvdef = fvnameseen;
2499 return TRUE;
2500 }
2501 break;
2502 case tend:
2503 switch (toktype)
2504 {
2505 case st_C_typespec:
2506 case st_C_class:
2507 case st_C_struct:
2508 case st_C_enum:
2509 return FALSE;
2510 }
2511 return TRUE;
2512 }
2513
2514 /*
2515 * This structdef business is NOT invoked when we are ctags and the
2516 * file is plain C. This is because a struct tag may have the same
2517 * name as another tag, and this loses with ctags.
2518 */
2519 switch (toktype)
2520 {
2521 case st_C_javastruct:
2522 if (structdef == stagseen)
2523 structdef = scolonseen;
2524 return FALSE;
2525 case st_C_class:
2526 if (cblev == 0
2527 && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2528 && definedef == dnone && structdef == snone
2529 && typdef == tnone && fvdef == fvnone)
2530 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2531 /* FALLTHRU */
2532 case st_C_struct:
2533 case st_C_enum:
2534 if (parlev == 0
2535 && fvdef != vignore
2536 && (typdef == tkeyseen
2537 || (typedefs_or_cplusplus && structdef == snone)))
2538 {
2539 structdef = skeyseen;
2540 structtype = toktype;
2541 structcblev = cblev;
2542 }
2543 return FALSE;
2544 }
2545
2546 if (structdef == skeyseen)
2547 {
2548 structdef = stagseen;
2549 return TRUE;
2550 }
2551
2552 if (typdef != tnone)
2553 definedef = dnone;
2554
2555 /* Detect Objective C constructs. */
2556 switch (objdef)
2557 {
2558 case onone:
2559 switch (toktype)
2560 {
2561 case st_C_objprot:
2562 objdef = oprotocol;
2563 return FALSE;
2564 case st_C_objimpl:
2565 objdef = oimplementation;
2566 return FALSE;
2567 }
2568 break;
2569 case oimplementation:
2570 /* Save the class tag for functions or variables defined inside. */
2571 objtag = savenstr (str, len);
2572 objdef = oinbody;
2573 return FALSE;
2574 case oprotocol:
2575 /* Save the class tag for categories. */
2576 objtag = savenstr (str, len);
2577 objdef = otagseen;
2578 *is_func_or_var = TRUE;
2579 return TRUE;
2580 case oparenseen:
2581 objdef = ocatseen;
2582 *is_func_or_var = TRUE;
2583 return TRUE;
2584 case oinbody:
2585 break;
2586 case omethodsign:
2587 if (parlev == 0)
2588 {
2589 objdef = omethodtag;
2590 linebuffer_setlen (&token_name, len);
2591 strncpy (token_name.buffer, str, len);
2592 token_name.buffer[len] = '\0';
2593 return TRUE;
2594 }
2595 return FALSE;
2596 case omethodcolon:
2597 if (parlev == 0)
2598 objdef = omethodparm;
2599 return FALSE;
2600 case omethodparm:
2601 if (parlev == 0)
2602 {
2603 objdef = omethodtag;
2604 linebuffer_setlen (&token_name, token_name.len + len);
2605 strncat (token_name.buffer, str, len);
2606 return TRUE;
2607 }
2608 return FALSE;
2609 case oignore:
2610 if (toktype == st_C_objend)
2611 {
2612 /* Memory leakage here: the string pointed by objtag is
2613 never released, because many tests would be needed to
2614 avoid breaking on incorrect input code. The amount of
2615 memory leaked here is the sum of the lengths of the
2616 class tags.
2617 free (objtag); */
2618 objdef = onone;
2619 }
2620 return FALSE;
2621 }
2622
2623 /* A function, variable or enum constant? */
2624 switch (toktype)
2625 {
2626 case st_C_extern:
2627 fvextern = TRUE;
2628 /* FALLTHRU */
2629 case st_C_typespec:
2630 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2631 fvdef = fvnone; /* should be useless */
2632 return FALSE;
2633 case st_C_ignore:
2634 fvextern = FALSE;
2635 fvdef = vignore;
2636 return FALSE;
2637 case st_C_operator:
2638 fvdef = foperator;
2639 *is_func_or_var = TRUE;
2640 return TRUE;
2641 case st_none:
2642 if (constantypedefs
2643 && structdef == snone
2644 && structtype == st_C_enum && cblev > structcblev)
2645 return TRUE; /* enum constant */
2646 switch (fvdef)
2647 {
2648 case fdefunkey:
2649 if (cblev > 0)
2650 break;
2651 fvdef = fdefunname; /* GNU macro */
2652 *is_func_or_var = TRUE;
2653 return TRUE;
2654 case fvnone:
2655 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2656 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2657 {
2658 fvdef = vignore;
2659 return FALSE;
2660 }
2661 if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2662 {
2663 fvdef = foperator;
2664 *is_func_or_var = TRUE;
2665 return TRUE;
2666 }
2667 if (cblev > 0 && !instruct)
2668 break;
2669 fvdef = fvnameseen; /* function or variable */
2670 *is_func_or_var = TRUE;
2671 return TRUE;
2672 }
2673 break;
2674 }
2675
2676 return FALSE;
2677 }
2678
2679 \f
2680 /*
2681 * C_entries often keeps pointers to tokens or lines which are older than
2682 * the line currently read. By keeping two line buffers, and switching
2683 * them at end of line, it is possible to use those pointers.
2684 */
2685 struct
2686 {
2687 long linepos;
2688 linebuffer lb;
2689 } lbs[2];
2690
2691 #define current_lb_is_new (newndx == curndx)
2692 #define switch_line_buffers() (curndx = 1 - curndx)
2693
2694 #define curlb (lbs[curndx].lb)
2695 #define newlb (lbs[newndx].lb)
2696 #define curlinepos (lbs[curndx].linepos)
2697 #define newlinepos (lbs[newndx].linepos)
2698
2699 #define CNL_SAVE_DEFINEDEF() \
2700 do { \
2701 curlinepos = charno; \
2702 lineno++; \
2703 linecharno = charno; \
2704 charno += readline (&curlb, inf); \
2705 lp = curlb.buffer; \
2706 quotednl = FALSE; \
2707 newndx = curndx; \
2708 } while (0)
2709
2710 #define CNL() \
2711 do { \
2712 CNL_SAVE_DEFINEDEF(); \
2713 if (savetoken.valid) \
2714 { \
2715 token = savetoken; \
2716 savetoken.valid = FALSE; \
2717 } \
2718 definedef = dnone; \
2719 } while (0)
2720
2721
2722 static void
2723 make_C_tag (isfun)
2724 bool isfun;
2725 {
2726 /* This function should never be called when token.valid is FALSE, but
2727 we must protect against invalid input or internal errors. */
2728 if (DEBUG || token.valid)
2729 {
2730 if (traditional_tag_style)
2731 {
2732 /* This was the original code. Now we call new_pfnote instead,
2733 which uses the new method for naming tags (see new_pfnote). */
2734 char *name = NULL;
2735
2736 if (CTAGS || token.named)
2737 name = savestr (token_name.buffer);
2738 if (DEBUG && !token.valid)
2739 {
2740 if (token.named)
2741 name = concat (name, "##invalid##", "");
2742 else
2743 name = savestr ("##invalid##");
2744 }
2745 pfnote (name, isfun, token.line,
2746 token.offset+token.length+1, token.lineno, token.linepos);
2747 }
2748 else
2749 new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
2750 token.offset+token.length+1, token.lineno, token.linepos);
2751 token.valid = FALSE;
2752 }
2753 }
2754
2755
2756 /*
2757 * C_entries ()
2758 * This routine finds functions, variables, typedefs,
2759 * #define's, enum constants and struct/union/enum definitions in
2760 * C syntax and adds them to the list.
2761 */
2762 static void
2763 C_entries (c_ext, inf)
2764 int c_ext; /* extension of C */
2765 FILE *inf; /* input file */
2766 {
2767 register char c; /* latest char read; '\0' for end of line */
2768 register char *lp; /* pointer one beyond the character `c' */
2769 int curndx, newndx; /* indices for current and new lb */
2770 register int tokoff; /* offset in line of start of current token */
2771 register int toklen; /* length of current token */
2772 char *qualifier; /* string used to qualify names */
2773 int qlen; /* length of qualifier */
2774 int cblev; /* current curly brace level */
2775 int parlev; /* current parenthesis level */
2776 int typdefcblev; /* cblev where a typedef struct body begun */
2777 bool incomm, inquote, inchar, quotednl, midtoken;
2778 bool cplpl, cjava;
2779 bool yacc_rules; /* in the rules part of a yacc file */
2780 struct tok savetoken; /* token saved during preprocessor handling */
2781
2782
2783 initbuffer (&token_name);
2784 initbuffer (&lbs[0].lb);
2785 initbuffer (&lbs[1].lb);
2786 if (cstack.size == 0)
2787 {
2788 cstack.size = (DEBUG) ? 1 : 4;
2789 cstack.nl = 0;
2790 cstack.cname = xnew (cstack.size, char *);
2791 cstack.cblev = xnew (cstack.size, int);
2792 }
2793
2794 tokoff = toklen = 0; /* keep compiler quiet */
2795 curndx = newndx = 0;
2796 lineno = 0;
2797 charno = 0;
2798 lp = curlb.buffer;
2799 *lp = 0;
2800
2801 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2802 structdef = snone; definedef = dnone; objdef = onone;
2803 yacc_rules = FALSE;
2804 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2805 token.valid = savetoken.valid = FALSE;
2806 cblev = 0;
2807 parlev = 0;
2808 cplpl = (c_ext & C_PLPL) == C_PLPL;
2809 cjava = (c_ext & C_JAVA) == C_JAVA;
2810 if (cjava)
2811 { qualifier = "."; qlen = 1; }
2812 else
2813 { qualifier = "::"; qlen = 2; }
2814
2815
2816 while (!feof (inf))
2817 {
2818 c = *lp++;
2819 if (c == '\\')
2820 {
2821 /* If we're at the end of the line, the next character is a
2822 '\0'; don't skip it, because it's the thing that tells us
2823 to read the next line. */
2824 if (*lp == '\0')
2825 {
2826 quotednl = TRUE;
2827 continue;
2828 }
2829 lp++;
2830 c = ' ';
2831 }
2832 else if (incomm)
2833 {
2834 switch (c)
2835 {
2836 case '*':
2837 if (*lp == '/')
2838 {
2839 c = *lp++;
2840 incomm = FALSE;
2841 }
2842 break;
2843 case '\0':
2844 /* Newlines inside comments do not end macro definitions in
2845 traditional cpp. */
2846 CNL_SAVE_DEFINEDEF ();
2847 break;
2848 }
2849 continue;
2850 }
2851 else if (inquote)
2852 {
2853 switch (c)
2854 {
2855 case '"':
2856 inquote = FALSE;
2857 break;
2858 case '\0':
2859 /* Newlines inside strings do not end macro definitions
2860 in traditional cpp, even though compilers don't
2861 usually accept them. */
2862 CNL_SAVE_DEFINEDEF ();
2863 break;
2864 }
2865 continue;
2866 }
2867 else if (inchar)
2868 {
2869 switch (c)
2870 {
2871 case '\0':
2872 /* Hmmm, something went wrong. */
2873 CNL ();
2874 /* FALLTHRU */
2875 case '\'':
2876 inchar = FALSE;
2877 break;
2878 }
2879 continue;
2880 }
2881 else
2882 switch (c)
2883 {
2884 case '"':
2885 inquote = TRUE;
2886 switch (fvdef)
2887 {
2888 case fdefunkey:
2889 case fstartlist:
2890 case finlist:
2891 case fignore:
2892 case vignore:
2893 break;
2894 default:
2895 fvextern = FALSE;
2896 fvdef = fvnone;
2897 }
2898 continue;
2899 case '\'':
2900 inchar = TRUE;
2901 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2902 {
2903 fvextern = FALSE;
2904 fvdef = fvnone;
2905 }
2906 continue;
2907 case '/':
2908 if (*lp == '*')
2909 {
2910 lp++;
2911 incomm = TRUE;
2912 continue;
2913 }
2914 else if (/* cplpl && */ *lp == '/')
2915 {
2916 c = '\0';
2917 break;
2918 }
2919 else
2920 break;
2921 case '%':
2922 if ((c_ext & YACC) && *lp == '%')
2923 {
2924 /* Entering or exiting rules section in yacc file. */
2925 lp++;
2926 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2927 typdef = tnone; structdef = snone;
2928 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2929 cblev = 0;
2930 yacc_rules = !yacc_rules;
2931 continue;
2932 }
2933 else
2934 break;
2935 case '#':
2936 if (definedef == dnone)
2937 {
2938 char *cp;
2939 bool cpptoken = TRUE;
2940
2941 /* Look back on this line. If all blanks, or nonblanks
2942 followed by an end of comment, this is a preprocessor
2943 token. */
2944 for (cp = newlb.buffer; cp < lp-1; cp++)
2945 if (!iswhite (*cp))
2946 {
2947 if (*cp == '*' && *(cp+1) == '/')
2948 {
2949 cp++;
2950 cpptoken = TRUE;
2951 }
2952 else
2953 cpptoken = FALSE;
2954 }
2955 if (cpptoken)
2956 definedef = dsharpseen;
2957 } /* if (definedef == dnone) */
2958
2959 continue;
2960 } /* switch (c) */
2961
2962
2963 /* Consider token only if some involved conditions are satisfied. */
2964 if (typdef != tignore
2965 && definedef != dignorerest
2966 && fvdef != finlist
2967 && structdef != sintemplate
2968 && (definedef != dnone
2969 || structdef != scolonseen))
2970 {
2971 if (midtoken)
2972 {
2973 if (endtoken (c))
2974 {
2975 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
2976 {
2977 /*
2978 * This handles :: in the middle, but not at the
2979 * beginning of an identifier. Also, space-separated
2980 * :: is not recognised.
2981 */
2982 lp += 2;
2983 toklen += 2;
2984 c = lp[-1];
2985 goto still_in_token;
2986 }
2987 else
2988 {
2989 bool funorvar = FALSE;
2990
2991 if (yacc_rules
2992 || consider_token (newlb.buffer + tokoff, toklen, c,
2993 &c_ext, cblev, parlev, &funorvar))
2994 {
2995 if (fvdef == foperator)
2996 {
2997 char *oldlp = lp;
2998 lp = skip_spaces (lp-1);
2999 if (*lp != '\0')
3000 lp += 1;
3001 while (*lp != '\0'
3002 && !iswhite (*lp) && *lp != '(')
3003 lp += 1;
3004 c = *lp++;
3005 toklen += lp - oldlp;
3006 }
3007 token.named = FALSE;
3008 if ((c_ext & C_EXT) /* not pure C */
3009 && nestlev > 0 && definedef == dnone)
3010 /* in struct body */
3011 {
3012 write_classname (&token_name, qualifier);
3013 linebuffer_setlen (&token_name,
3014 token_name.len+qlen+toklen);
3015 strcat (token_name.buffer, qualifier);
3016 strncat (token_name.buffer,
3017 newlb.buffer + tokoff, toklen);
3018 token.named = TRUE;
3019 }
3020 else if (objdef == ocatseen)
3021 /* Objective C category */
3022 {
3023 int len = strlen (objtag) + 2 + toklen;
3024 linebuffer_setlen (&token_name, len);
3025 strcpy (token_name.buffer, objtag);
3026 strcat (token_name.buffer, "(");
3027 strncat (token_name.buffer,
3028 newlb.buffer + tokoff, toklen);
3029 strcat (token_name.buffer, ")");
3030 token.named = TRUE;
3031 }
3032 else if (objdef == omethodtag
3033 || objdef == omethodparm)
3034 /* Objective C method */
3035 {
3036 token.named = TRUE;
3037 }
3038 else if (fvdef == fdefunname)
3039 /* GNU DEFUN and similar macros */
3040 {
3041 bool defun = (newlb.buffer[tokoff] == 'F');
3042 int off = tokoff;
3043 int len = toklen;
3044
3045 /* Rewrite the tag so that emacs lisp DEFUNs
3046 can be found by their elisp name */
3047 if (defun)
3048 {
3049 off += 1;
3050 len -= 1;
3051 }
3052 len = toklen;
3053 linebuffer_setlen (&token_name, len);
3054 strncpy (token_name.buffer,
3055 newlb.buffer + off, len);
3056 token_name.buffer[len] = '\0';
3057 if (defun)
3058 while (--len >= 0)
3059 if (token_name.buffer[len] == '_')
3060 token_name.buffer[len] = '-';
3061 token.named = defun;
3062 }
3063 else
3064 {
3065 linebuffer_setlen (&token_name, toklen);
3066 strncpy (token_name.buffer,
3067 newlb.buffer + tokoff, toklen);
3068 token_name.buffer[toklen] = '\0';
3069 /* Name macros and members. */
3070 token.named = (structdef == stagseen
3071 || typdef == ttypeseen
3072 || typdef == tend
3073 || (funorvar
3074 && definedef == dignorerest)
3075 || (funorvar
3076 && definedef == dnone
3077 && structdef == snone
3078 && cblev > 0));
3079 }
3080 token.lineno = lineno;
3081 token.offset = tokoff;
3082 token.length = toklen;
3083 token.line = newlb.buffer;
3084 token.linepos = newlinepos;
3085 token.valid = TRUE;
3086
3087 if (definedef == dnone
3088 && (fvdef == fvnameseen
3089 || fvdef == foperator
3090 || structdef == stagseen
3091 || typdef == tend
3092 || typdef == ttypeseen
3093 || objdef != onone))
3094 {
3095 if (current_lb_is_new)
3096 switch_line_buffers ();
3097 }
3098 else if (definedef != dnone
3099 || fvdef == fdefunname
3100 || instruct)
3101 make_C_tag (funorvar);
3102 }
3103 midtoken = FALSE;
3104 }
3105 } /* if (endtoken (c)) */
3106 else if (intoken (c))
3107 still_in_token:
3108 {
3109 toklen++;
3110 continue;
3111 }
3112 } /* if (midtoken) */
3113 else if (begtoken (c))
3114 {
3115 switch (definedef)
3116 {
3117 case dnone:
3118 switch (fvdef)
3119 {
3120 case fstartlist:
3121 fvdef = finlist;
3122 continue;
3123 case flistseen:
3124 make_C_tag (TRUE); /* a function */
3125 fvdef = fignore;
3126 break;
3127 case fvnameseen:
3128 fvdef = fvnone;
3129 break;
3130 }
3131 if (structdef == stagseen && !cjava)
3132 {
3133 popclass_above (cblev);
3134 structdef = snone;
3135 }
3136 break;
3137 case dsharpseen:
3138 savetoken = token;
3139 }
3140 if (!yacc_rules || lp == newlb.buffer + 1)
3141 {
3142 tokoff = lp - 1 - newlb.buffer;
3143 toklen = 1;
3144 midtoken = TRUE;
3145 }
3146 continue;
3147 } /* if (begtoken) */
3148 } /* if must look at token */
3149
3150
3151 /* Detect end of line, colon, comma, semicolon and various braces
3152 after having handled a token.*/
3153 switch (c)
3154 {
3155 case ':':
3156 if (yacc_rules && token.offset == 0 && token.valid)
3157 {
3158 make_C_tag (FALSE); /* a yacc function */
3159 break;
3160 }
3161 if (definedef != dnone)
3162 break;
3163 switch (objdef)
3164 {
3165 case otagseen:
3166 objdef = oignore;
3167 make_C_tag (TRUE); /* an Objective C class */
3168 break;
3169 case omethodtag:
3170 case omethodparm:
3171 objdef = omethodcolon;
3172 linebuffer_setlen (&token_name, token_name.len + 1);
3173 strcat (token_name.buffer, ":");
3174 break;
3175 }
3176 if (structdef == stagseen)
3177 structdef = scolonseen;
3178 break;
3179 case ';':
3180 if (definedef != dnone)
3181 break;
3182 switch (typdef)
3183 {
3184 case tend:
3185 case ttypeseen:
3186 make_C_tag (FALSE); /* a typedef */
3187 typdef = tnone;
3188 fvdef = fvnone;
3189 break;
3190 case tnone:
3191 case tinbody:
3192 case tignore:
3193 switch (fvdef)
3194 {
3195 case fignore:
3196 if (typdef == tignore)
3197 fvdef = fvnone;
3198 break;
3199 case fvnameseen:
3200 if ((globals && cblev == 0 && (!fvextern || declarations))
3201 || (members && instruct))
3202 make_C_tag (FALSE); /* a variable */
3203 fvextern = FALSE;
3204 fvdef = fvnone;
3205 token.valid = FALSE;
3206 break;
3207 case flistseen:
3208 if ((declarations && typdef == tnone && !instruct)
3209 || (members && typdef != tignore && instruct))
3210 make_C_tag (TRUE); /* a function declaration */
3211 /* FALLTHRU */
3212 default:
3213 fvextern = FALSE;
3214 fvdef = fvnone;
3215 if (declarations
3216 && structdef == stagseen && (c_ext & C_PLPL))
3217 make_C_tag (FALSE); /* forward declaration */
3218 else
3219 /* The following instruction invalidates the token.
3220 Probably the token should be invalidated in all other
3221 cases where some state machine is reset prematurely. */
3222 token.valid = FALSE;
3223 } /* switch (fvdef) */
3224 /* FALLTHRU */
3225 default:
3226 if (!instruct)
3227 typdef = tnone;
3228 }
3229 if (structdef == stagseen)
3230 structdef = snone;
3231 break;
3232 case ',':
3233 if (definedef != dnone)
3234 break;
3235 switch (objdef)
3236 {
3237 case omethodtag:
3238 case omethodparm:
3239 make_C_tag (TRUE); /* an Objective C method */
3240 objdef = oinbody;
3241 break;
3242 }
3243 switch (fvdef)
3244 {
3245 case fdefunkey:
3246 case foperator:
3247 case fstartlist:
3248 case finlist:
3249 case fignore:
3250 case vignore:
3251 break;
3252 case fdefunname:
3253 fvdef = fignore;
3254 break;
3255 case fvnameseen: /* a variable */
3256 if ((globals && cblev == 0 && (!fvextern || declarations))
3257 || (members && instruct))
3258 make_C_tag (FALSE);
3259 break;
3260 case flistseen: /* a function */
3261 if ((declarations && typdef == tnone && !instruct)
3262 || (members && typdef != tignore && instruct))
3263 {
3264 make_C_tag (TRUE); /* a function declaration */
3265 fvdef = fvnameseen;
3266 }
3267 else if (!declarations)
3268 fvdef = fvnone;
3269 token.valid = FALSE;
3270 break;
3271 default:
3272 fvdef = fvnone;
3273 }
3274 if (structdef == stagseen)
3275 structdef = snone;
3276 break;
3277 case '[':
3278 if (definedef != dnone)
3279 break;
3280 if (structdef == stagseen)
3281 structdef = snone;
3282 switch (typdef)
3283 {
3284 case ttypeseen:
3285 case tend:
3286 typdef = tignore;
3287 make_C_tag (FALSE); /* a typedef */
3288 break;
3289 case tnone:
3290 case tinbody:
3291 switch (fvdef)
3292 {
3293 case foperator:
3294 case finlist:
3295 case fignore:
3296 case vignore:
3297 break;
3298 case fvnameseen:
3299 if ((members && cblev == 1)
3300 || (globals && cblev == 0
3301 && (!fvextern || declarations)))
3302 make_C_tag (FALSE); /* a variable */
3303 /* FALLTHRU */
3304 default:
3305 fvdef = fvnone;
3306 }
3307 break;
3308 }
3309 break;
3310 case '(':
3311 if (definedef != dnone)
3312 break;
3313 if (objdef == otagseen && parlev == 0)
3314 objdef = oparenseen;
3315 switch (fvdef)
3316 {
3317 case fvnameseen:
3318 if (typdef == ttypeseen
3319 && *lp != '*'
3320 && !instruct)
3321 {
3322 /* This handles constructs like:
3323 typedef void OperatorFun (int fun); */
3324 make_C_tag (FALSE);
3325 typdef = tignore;
3326 fvdef = fignore;
3327 break;
3328 }
3329 /* FALLTHRU */
3330 case foperator:
3331 fvdef = fstartlist;
3332 break;
3333 case flistseen:
3334 fvdef = finlist;
3335 break;
3336 }
3337 parlev++;
3338 break;
3339 case ')':
3340 if (definedef != dnone)
3341 break;
3342 if (objdef == ocatseen && parlev == 1)
3343 {
3344 make_C_tag (TRUE); /* an Objective C category */
3345 objdef = oignore;
3346 }
3347 if (--parlev == 0)
3348 {
3349 switch (fvdef)
3350 {
3351 case fstartlist:
3352 case finlist:
3353 fvdef = flistseen;
3354 break;
3355 }
3356 if (!instruct
3357 && (typdef == tend
3358 || typdef == ttypeseen))
3359 {
3360 typdef = tignore;
3361 make_C_tag (FALSE); /* a typedef */
3362 }
3363 }
3364 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3365 parlev = 0;
3366 break;
3367 case '{':
3368 if (definedef != dnone)
3369 break;
3370 if (typdef == ttypeseen)
3371 {
3372 typdefcblev = cblev;
3373 typdef = tinbody;
3374 }
3375 switch (fvdef)
3376 {
3377 case flistseen:
3378 make_C_tag (TRUE); /* a function */
3379 /* FALLTHRU */
3380 case fignore:
3381 fvdef = fvnone;
3382 break;
3383 case fvnone:
3384 switch (objdef)
3385 {
3386 case otagseen:
3387 make_C_tag (TRUE); /* an Objective C class */
3388 objdef = oignore;
3389 break;
3390 case omethodtag:
3391 case omethodparm:
3392 make_C_tag (TRUE); /* an Objective C method */
3393 objdef = oinbody;
3394 break;
3395 default:
3396 /* Neutralize `extern "C" {' grot. */
3397 if (cblev == 0 && structdef == snone && nestlev == 0
3398 && typdef == tnone)
3399 cblev = -1;
3400 }
3401 }
3402 switch (structdef)
3403 {
3404 case skeyseen: /* unnamed struct */
3405 pushclass_above (cblev, NULL, 0);
3406 structdef = snone;
3407 break;
3408 case stagseen: /* named struct or enum */
3409 case scolonseen: /* a class */
3410 pushclass_above (cblev, token.line+token.offset, token.length);
3411 structdef = snone;
3412 make_C_tag (FALSE); /* a struct or enum */
3413 break;
3414 }
3415 cblev++;
3416 break;
3417 case '*':
3418 if (definedef != dnone)
3419 break;
3420 if (fvdef == fstartlist)
3421 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3422 break;
3423 case '}':
3424 if (definedef != dnone)
3425 break;
3426 if (!noindentypedefs && lp == newlb.buffer + 1)
3427 {
3428 cblev = 0; /* reset curly brace level if first column */
3429 parlev = 0; /* also reset paren level, just in case... */
3430 }
3431 else if (cblev > 0)
3432 cblev--;
3433 popclass_above (cblev);
3434 structdef = snone;
3435 if (typdef == tinbody && cblev <= typdefcblev)
3436 {
3437 assert (cblev == typdefcblev);
3438 typdef = tend;
3439 }
3440 break;
3441 case '=':
3442 if (definedef != dnone)
3443 break;
3444 switch (fvdef)
3445 {
3446 case foperator:
3447 case finlist:
3448 case fignore:
3449 case vignore:
3450 break;
3451 case fvnameseen:
3452 if ((members && cblev == 1)
3453 || (globals && cblev == 0 && (!fvextern || declarations)))
3454 make_C_tag (FALSE); /* a variable */
3455 /* FALLTHRU */
3456 default:
3457 fvdef = vignore;
3458 }
3459 break;
3460 case '<':
3461 if (cplpl && structdef == stagseen)
3462 {
3463 structdef = sintemplate;
3464 break;
3465 }
3466 goto resetfvdef;
3467 case '>':
3468 if (structdef == sintemplate)
3469 {
3470 structdef = stagseen;
3471 break;
3472 }
3473 goto resetfvdef;
3474 case '+':
3475 case '-':
3476 if (objdef == oinbody && cblev == 0)
3477 {
3478 objdef = omethodsign;
3479 break;
3480 }
3481 /* FALLTHRU */
3482 resetfvdef:
3483 case '#': case '~': case '&': case '%': case '/': case '|':
3484 case '^': case '!': case '.': case '?': case ']':
3485 if (definedef != dnone)
3486 break;
3487 /* These surely cannot follow a function tag in C. */
3488 switch (fvdef)
3489 {
3490 case foperator:
3491 case finlist:
3492 case fignore:
3493 case vignore:
3494 break;
3495 default:
3496 fvdef = fvnone;
3497 }
3498 break;
3499 case '\0':
3500 if (objdef == otagseen)
3501 {
3502 make_C_tag (TRUE); /* an Objective C class */
3503 objdef = oignore;
3504 }
3505 /* If a macro spans multiple lines don't reset its state. */
3506 if (quotednl)
3507 CNL_SAVE_DEFINEDEF ();
3508 else
3509 CNL ();
3510 break;
3511 } /* switch (c) */
3512
3513 } /* while not eof */
3514
3515 free (token_name.buffer);
3516 free (lbs[0].lb.buffer);
3517 free (lbs[1].lb.buffer);
3518 }
3519
3520 /*
3521 * Process either a C++ file or a C file depending on the setting
3522 * of a global flag.
3523 */
3524 static void
3525 default_C_entries (inf)
3526 FILE *inf;
3527 {
3528 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3529 }
3530
3531 /* Always do plain C. */
3532 static void
3533 plain_C_entries (inf)
3534 FILE *inf;
3535 {
3536 C_entries (0, inf);
3537 }
3538
3539 /* Always do C++. */
3540 static void
3541 Cplusplus_entries (inf)
3542 FILE *inf;
3543 {
3544 C_entries (C_PLPL, inf);
3545 }
3546
3547 /* Always do Java. */
3548 static void
3549 Cjava_entries (inf)
3550 FILE *inf;
3551 {
3552 C_entries (C_JAVA, inf);
3553 }
3554
3555 /* Always do C*. */
3556 static void
3557 Cstar_entries (inf)
3558 FILE *inf;
3559 {
3560 C_entries (C_STAR, inf);
3561 }
3562
3563 /* Always do Yacc. */
3564 static void
3565 Yacc_entries (inf)
3566 FILE *inf;
3567 {
3568 C_entries (YACC, inf);
3569 }
3570
3571 \f
3572 /* A useful macro. */
3573 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3574 for (lineno = charno = 0; /* loop initialization */ \
3575 !feof (file_pointer) /* loop test */ \
3576 && (lineno++, /* instructions at start of loop */ \
3577 linecharno = charno, \
3578 charno += readline (&line_buffer, file_pointer), \
3579 char_pointer = lb.buffer, \
3580 TRUE); \
3581 )
3582
3583
3584 /*
3585 * Read a file, but do no processing. This is used to do regexp
3586 * matching on files that have no language defined.
3587 */
3588 static void
3589 just_read_file (inf)
3590 FILE *inf;
3591 {
3592 register char *dummy;
3593
3594 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3595 continue;
3596 }
3597
3598 \f
3599 /* Fortran parsing */
3600
3601 static bool tail P_((char *));
3602 static void takeprec P_((void));
3603 static void getit P_((FILE *));
3604
3605 static bool
3606 tail (cp)
3607 char *cp;
3608 {
3609 register int len = 0;
3610
3611 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
3612 cp++, len++;
3613 if (*cp == '\0' && !intoken (dbp[len]))
3614 {
3615 dbp += len;
3616 return TRUE;
3617 }
3618 return FALSE;
3619 }
3620
3621 static void
3622 takeprec ()
3623 {
3624 dbp = skip_spaces (dbp);
3625 if (*dbp != '*')
3626 return;
3627 dbp++;
3628 dbp = skip_spaces (dbp);
3629 if (strneq (dbp, "(*)", 3))
3630 {
3631 dbp += 3;
3632 return;
3633 }
3634 if (!ISDIGIT (*dbp))
3635 {
3636 --dbp; /* force failure */
3637 return;
3638 }
3639 do
3640 dbp++;
3641 while (ISDIGIT (*dbp));
3642 }
3643
3644 static void
3645 getit (inf)
3646 FILE *inf;
3647 {
3648 register char *cp;
3649
3650 dbp = skip_spaces (dbp);
3651 if (*dbp == '\0')
3652 {
3653 lineno++;
3654 linecharno = charno;
3655 charno += readline (&lb, inf);
3656 dbp = lb.buffer;
3657 if (dbp[5] != '&')
3658 return;
3659 dbp += 6;
3660 dbp = skip_spaces (dbp);
3661 }
3662 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3663 return;
3664 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3665 continue;
3666 pfnote (savenstr (dbp, cp-dbp), TRUE,
3667 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3668 }
3669
3670
3671 static void
3672 Fortran_functions (inf)
3673 FILE *inf;
3674 {
3675 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3676 {
3677 if (*dbp == '%')
3678 dbp++; /* Ratfor escape to fortran */
3679 dbp = skip_spaces (dbp);
3680 if (*dbp == '\0')
3681 continue;
3682 switch (lowcase (*dbp))
3683 {
3684 case 'i':
3685 if (tail ("integer"))
3686 takeprec ();
3687 break;
3688 case 'r':
3689 if (tail ("real"))
3690 takeprec ();
3691 break;
3692 case 'l':
3693 if (tail ("logical"))
3694 takeprec ();
3695 break;
3696 case 'c':
3697 if (tail ("complex") || tail ("character"))
3698 takeprec ();
3699 break;
3700 case 'd':
3701 if (tail ("double"))
3702 {
3703 dbp = skip_spaces (dbp);
3704 if (*dbp == '\0')
3705 continue;
3706 if (tail ("precision"))
3707 break;
3708 continue;
3709 }
3710 break;
3711 }
3712 dbp = skip_spaces (dbp);
3713 if (*dbp == '\0')
3714 continue;
3715 switch (lowcase (*dbp))
3716 {
3717 case 'f':
3718 if (tail ("function"))
3719 getit (inf);
3720 continue;
3721 case 's':
3722 if (tail ("subroutine"))
3723 getit (inf);
3724 continue;
3725 case 'e':
3726 if (tail ("entry"))
3727 getit (inf);
3728 continue;
3729 case 'b':
3730 if (tail ("blockdata") || tail ("block data"))
3731 {
3732 dbp = skip_spaces (dbp);
3733 if (*dbp == '\0') /* assume un-named */
3734 pfnote (savestr ("blockdata"), TRUE,
3735 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3736 else
3737 getit (inf); /* look for name */
3738 }
3739 continue;
3740 }
3741 }
3742 }
3743
3744 \f
3745 /*
3746 * Ada parsing
3747 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3748 */
3749
3750 static void adagetit P_((FILE *, char *));
3751
3752 /* Once we are positioned after an "interesting" keyword, let's get
3753 the real tag value necessary. */
3754 static void
3755 adagetit (inf, name_qualifier)
3756 FILE *inf;
3757 char *name_qualifier;
3758 {
3759 register char *cp;
3760 char *name;
3761 char c;
3762
3763 while (!feof (inf))
3764 {
3765 dbp = skip_spaces (dbp);
3766 if (*dbp == '\0'
3767 || (dbp[0] == '-' && dbp[1] == '-'))
3768 {
3769 lineno++;
3770 linecharno = charno;
3771 charno += readline (&lb, inf);
3772 dbp = lb.buffer;
3773 }
3774 switch (*dbp)
3775 {
3776 case 'b':
3777 case 'B':
3778 if (tail ("body"))
3779 {
3780 /* Skipping body of procedure body or package body or ....
3781 resetting qualifier to body instead of spec. */
3782 name_qualifier = "/b";
3783 continue;
3784 }
3785 break;
3786 case 't':
3787 case 'T':
3788 /* Skipping type of task type or protected type ... */
3789 if (tail ("type"))
3790 continue;
3791 break;
3792 }
3793 if (*dbp == '"')
3794 {
3795 dbp += 1;
3796 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3797 continue;
3798 }
3799 else
3800 {
3801 dbp = skip_spaces (dbp);
3802 for (cp = dbp;
3803 (*cp != '\0'
3804 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
3805 cp++)
3806 continue;
3807 if (cp == dbp)
3808 return;
3809 }
3810 c = *cp;
3811 *cp = '\0';
3812 name = concat (dbp, name_qualifier, "");
3813 *cp = c;
3814 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3815 if (c == '"')
3816 dbp = cp + 1;
3817 return;
3818 }
3819 }
3820
3821 static void
3822 Ada_funcs (inf)
3823 FILE *inf;
3824 {
3825 bool inquote = FALSE;
3826
3827 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3828 {
3829 while (*dbp != '\0')
3830 {
3831 /* Skip a string i.e. "abcd". */
3832 if (inquote || (*dbp == '"'))
3833 {
3834 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3835 if (dbp != NULL)
3836 {
3837 inquote = FALSE;
3838 dbp += 1;
3839 continue; /* advance char */
3840 }
3841 else
3842 {
3843 inquote = TRUE;
3844 break; /* advance line */
3845 }
3846 }
3847
3848 /* Skip comments. */
3849 if (dbp[0] == '-' && dbp[1] == '-')
3850 break; /* advance line */
3851
3852 /* Skip character enclosed in single quote i.e. 'a'
3853 and skip single quote starting an attribute i.e. 'Image. */
3854 if (*dbp == '\'')
3855 {
3856 dbp++ ;
3857 if (*dbp != '\0')
3858 dbp++;
3859 continue;
3860 }
3861
3862 /* Search for beginning of a token. */
3863 if (!begtoken (*dbp))
3864 {
3865 dbp++;
3866 continue; /* advance char */
3867 }
3868
3869 /* We are at the beginning of a token. */
3870 switch (*dbp)
3871 {
3872 case 'f':
3873 case 'F':
3874 if (!packages_only && tail ("function"))
3875 adagetit (inf, "/f");
3876 else
3877 break; /* from switch */
3878 continue; /* advance char */
3879 case 'p':
3880 case 'P':
3881 if (!packages_only && tail ("procedure"))
3882 adagetit (inf, "/p");
3883 else if (tail ("package"))
3884 adagetit (inf, "/s");
3885 else if (tail ("protected")) /* protected type */
3886 adagetit (inf, "/t");
3887 else
3888 break; /* from switch */
3889 continue; /* advance char */
3890 case 't':
3891 case 'T':
3892 if (!packages_only && tail ("task"))
3893 adagetit (inf, "/k");
3894 else if (typedefs && !packages_only && tail ("type"))
3895 {
3896 adagetit (inf, "/t");
3897 while (*dbp != '\0')
3898 dbp += 1;
3899 }
3900 else
3901 break; /* from switch */
3902 continue; /* advance char */
3903 }
3904
3905 /* Look for the end of the token. */
3906 while (!endtoken (*dbp))
3907 dbp++;
3908
3909 } /* advance char */
3910 } /* advance line */
3911 }
3912
3913 \f
3914 /*
3915 * Bob Weiner, Motorola Inc., 4/3/94
3916 * Unix and microcontroller assembly tag handling
3917 * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
3918 */
3919 static void
3920 Asm_labels (inf)
3921 FILE *inf;
3922 {
3923 register char *cp;
3924
3925 LOOP_ON_INPUT_LINES (inf, lb, cp)
3926 {
3927 /* If first char is alphabetic or one of [_.$], test for colon
3928 following identifier. */
3929 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3930 {
3931 /* Read past label. */
3932 cp++;
3933 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3934 cp++;
3935 if (*cp == ':' || iswhite (*cp))
3936 {
3937 /* Found end of label, so copy it and add it to the table. */
3938 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3939 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3940 }
3941 }
3942 }
3943 }
3944
3945 \f
3946 /*
3947 * Perl support
3948 * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
3949 * Perl variable names: /^(my|local).../
3950 * Bart Robinson <lomew@cs.utah.edu> (1995)
3951 * Michael Ernst <mernst@alum.mit.edu> (1997)
3952 */
3953 static void
3954 Perl_functions (inf)
3955 FILE *inf;
3956 {
3957 register char *cp;
3958
3959 LOOP_ON_INPUT_LINES (inf, lb, cp)
3960 {
3961 if (*cp++ == 's'
3962 && *cp++ == 'u'
3963 && *cp++ == 'b' && iswhite (*cp++))
3964 {
3965 cp = skip_spaces (cp);
3966 if (*cp != '\0')
3967 {
3968 char *sp = cp;
3969 while (*cp != '\0'
3970 && !iswhite (*cp) && *cp != '{' && *cp != '(')
3971 cp++;
3972 pfnote (savenstr (sp, cp-sp), TRUE,
3973 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3974 }
3975 }
3976 else if (globals /* only if tagging global vars is enabled */
3977 && ((cp = lb.buffer,
3978 *cp++ == 'm'
3979 && *cp++ == 'y')
3980 || (cp = lb.buffer,
3981 *cp++ == 'l'
3982 && *cp++ == 'o'
3983 && *cp++ == 'c'
3984 && *cp++ == 'a'
3985 && *cp++ == 'l'))
3986 && (*cp == '(' || iswhite (*cp)))
3987 {
3988 /* After "my" or "local", but before any following paren or space. */
3989 char *varname = NULL;
3990
3991 cp = skip_spaces (cp);
3992 if (*cp == '$' || *cp == '@' || *cp == '%')
3993 {
3994 char* varstart = ++cp;
3995 while (ISALNUM (*cp) || *cp == '_')
3996 cp++;
3997 varname = savenstr (varstart, cp-varstart);
3998 }
3999 else
4000 {
4001 /* Should be examining a variable list at this point;
4002 could insist on seeing an open parenthesis. */
4003 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4004 cp++;
4005 }
4006
4007 /* Perhaps I should back cp up one character, so the TAGS table
4008 doesn't mention (and so depend upon) the following char. */
4009 pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
4010 FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4011 }
4012 }
4013 }
4014
4015 \f
4016 /*
4017 * Python support
4018 * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4019 * Eric S. Raymond <esr@thyrsus.com> (1997)
4020 */
4021 static void
4022 Python_functions (inf)
4023 FILE *inf;
4024 {
4025 register char *cp;
4026
4027 LOOP_ON_INPUT_LINES (inf, lb, cp)
4028 {
4029 if (*cp++ == 'd'
4030 && *cp++ == 'e'
4031 && *cp++ == 'f' && iswhite (*cp++))
4032 {
4033 cp = skip_spaces (cp);
4034 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
4035 cp++;
4036 pfnote (NULL, TRUE,
4037 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4038 }
4039
4040 cp = lb.buffer;
4041 if (*cp++ == 'c'
4042 && *cp++ == 'l'
4043 && *cp++ == 'a'
4044 && *cp++ == 's'
4045 && *cp++ == 's' && iswhite (*cp++))
4046 {
4047 cp = skip_spaces (cp);
4048 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
4049 cp++;
4050 pfnote (NULL, TRUE,
4051 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4052 }
4053 }
4054 }
4055
4056 \f
4057 /* Idea by Corny de Souza
4058 * Cobol tag functions
4059 * We could look for anything that could be a paragraph name.
4060 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4061 */
4062 static void
4063 Cobol_paragraphs (inf)
4064 FILE *inf;
4065 {
4066 register char *bp, *ep;
4067
4068 LOOP_ON_INPUT_LINES (inf, lb, bp)
4069 {
4070 if (lb.len < 9)
4071 continue;
4072 bp += 8;
4073
4074 /* If eoln, compiler option or comment ignore whole line. */
4075 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4076 continue;
4077
4078 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4079 continue;
4080 if (*ep++ == '.')
4081 pfnote (savenstr (bp, ep-bp), TRUE,
4082 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4083 }
4084 }
4085
4086 \f
4087 /*
4088 * Makefile support
4089 * Idea by Assar Westerlund <assar@sics.se> (2001)
4090 */
4091 static void
4092 Makefile_targets (inf)
4093 FILE *inf;
4094 {
4095 register char *bp;
4096
4097 LOOP_ON_INPUT_LINES (inf, lb, bp)
4098 {
4099 if (*bp == '\t' || *bp == '#')
4100 continue;
4101 while (*bp != '\0' && *bp != '=' && *bp != ':')
4102 bp++;
4103 if (*bp == ':')
4104 pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4105 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4106 }
4107 }
4108
4109 \f
4110 /* Added by Mosur Mohan, 4/22/88 */
4111 /* Pascal parsing */
4112
4113 /*
4114 * Locates tags for procedures & functions. Doesn't do any type- or
4115 * var-definitions. It does look for the keyword "extern" or
4116 * "forward" immediately following the procedure statement; if found,
4117 * the tag is skipped.
4118 */
4119 static void
4120 Pascal_functions (inf)
4121 FILE *inf;
4122 {
4123 linebuffer tline; /* mostly copied from C_entries */
4124 long save_lcno;
4125 int save_lineno, save_len;
4126 char c, *cp, *namebuf;
4127
4128 bool /* each of these flags is TRUE iff: */
4129 incomment, /* point is inside a comment */
4130 inquote, /* point is inside '..' string */
4131 get_tagname, /* point is after PROCEDURE/FUNCTION
4132 keyword, so next item = potential tag */
4133 found_tag, /* point is after a potential tag */
4134 inparms, /* point is within parameter-list */
4135 verify_tag; /* point has passed the parm-list, so the
4136 next token will determine whether this
4137 is a FORWARD/EXTERN to be ignored, or
4138 whether it is a real tag */
4139
4140 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4141 namebuf = NULL; /* keep compiler quiet */
4142 lineno = 0;
4143 charno = 0;
4144 dbp = lb.buffer;
4145 *dbp = '\0';
4146 initbuffer (&tline);
4147
4148 incomment = inquote = FALSE;
4149 found_tag = FALSE; /* have a proc name; check if extern */
4150 get_tagname = FALSE; /* have found "procedure" keyword */
4151 inparms = FALSE; /* found '(' after "proc" */
4152 verify_tag = FALSE; /* check if "extern" is ahead */
4153
4154
4155 while (!feof (inf)) /* long main loop to get next char */
4156 {
4157 c = *dbp++;
4158 if (c == '\0') /* if end of line */
4159 {
4160 lineno++;
4161 linecharno = charno;
4162 charno += readline (&lb, inf);
4163 dbp = lb.buffer;
4164 if (*dbp == '\0')
4165 continue;
4166 if (!((found_tag && verify_tag)
4167 || get_tagname))
4168 c = *dbp++; /* only if don't need *dbp pointing
4169 to the beginning of the name of
4170 the procedure or function */
4171 }
4172 if (incomment)
4173 {
4174 if (c == '}') /* within { } comments */
4175 incomment = FALSE;
4176 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4177 {
4178 dbp++;
4179 incomment = FALSE;
4180 }
4181 continue;
4182 }
4183 else if (inquote)
4184 {
4185 if (c == '\'')
4186 inquote = FALSE;
4187 continue;
4188 }
4189 else
4190 switch (c)
4191 {
4192 case '\'':
4193 inquote = TRUE; /* found first quote */
4194 continue;
4195 case '{': /* found open { comment */
4196 incomment = TRUE;
4197 continue;
4198 case '(':
4199 if (*dbp == '*') /* found open (* comment */
4200 {
4201 incomment = TRUE;
4202 dbp++;
4203 }
4204 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4205 inparms = TRUE;
4206 continue;
4207 case ')': /* end of parms list */
4208 if (inparms)
4209 inparms = FALSE;
4210 continue;
4211 case ';':
4212 if (found_tag && !inparms) /* end of proc or fn stmt */
4213 {
4214 verify_tag = TRUE;
4215 break;
4216 }
4217 continue;
4218 }
4219 if (found_tag && verify_tag && (*dbp != ' '))
4220 {
4221 /* check if this is an "extern" declaration */
4222 if (*dbp == '\0')
4223 continue;
4224 if (lowcase (*dbp == 'e'))
4225 {
4226 if (tail ("extern")) /* superfluous, really! */
4227 {
4228 found_tag = FALSE;
4229 verify_tag = FALSE;
4230 }
4231 }
4232 else if (lowcase (*dbp) == 'f')
4233 {
4234 if (tail ("forward")) /* check for forward reference */
4235 {
4236 found_tag = FALSE;
4237 verify_tag = FALSE;
4238 }
4239 }
4240 if (found_tag && verify_tag) /* not external proc, so make tag */
4241 {
4242 found_tag = FALSE;
4243 verify_tag = FALSE;
4244 pfnote (namebuf, TRUE,
4245 tline.buffer, save_len, save_lineno, save_lcno);
4246 continue;
4247 }
4248 }
4249 if (get_tagname) /* grab name of proc or fn */
4250 {
4251 if (*dbp == '\0')
4252 continue;
4253
4254 /* save all values for later tagging */
4255 linebuffer_setlen (&tline, lb.len);
4256 strcpy (tline.buffer, lb.buffer);
4257 save_lineno = lineno;
4258 save_lcno = linecharno;
4259
4260 /* grab block name */
4261 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4262 continue;
4263 namebuf = savenstr (dbp, cp-dbp);
4264 dbp = cp; /* set dbp to e-o-token */
4265 save_len = dbp - lb.buffer + 1;
4266 get_tagname = FALSE;
4267 found_tag = TRUE;
4268 continue;
4269
4270 /* and proceed to check for "extern" */
4271 }
4272 else if (!incomment && !inquote && !found_tag)
4273 {
4274 /* check for proc/fn keywords */
4275 switch (lowcase (c))
4276 {
4277 case 'p':
4278 if (tail ("rocedure")) /* c = 'p', dbp has advanced */
4279 get_tagname = TRUE;
4280 continue;
4281 case 'f':
4282 if (tail ("unction"))
4283 get_tagname = TRUE;
4284 continue;
4285 }
4286 }
4287 } /* while not eof */
4288
4289 free (tline.buffer);
4290 }
4291
4292 \f
4293 /*
4294 * Lisp tag functions
4295 * look for (def or (DEF, quote or QUOTE
4296 */
4297
4298 static int L_isdef P_((char *));
4299 static int L_isquote P_((char *));
4300 static void L_getit P_((void));
4301
4302 static int
4303 L_isdef (strp)
4304 register char *strp;
4305 {
4306 return ((strp[1] == 'd' || strp[1] == 'D')
4307 && (strp[2] == 'e' || strp[2] == 'E')
4308 && (strp[3] == 'f' || strp[3] == 'F'));
4309 }
4310
4311 static int
4312 L_isquote (strp)
4313 register char *strp;
4314 {
4315 return ((*++strp == 'q' || *strp == 'Q')
4316 && (*++strp == 'u' || *strp == 'U')
4317 && (*++strp == 'o' || *strp == 'O')
4318 && (*++strp == 't' || *strp == 'T')
4319 && (*++strp == 'e' || *strp == 'E')
4320 && iswhite (*++strp));
4321 }
4322
4323 static void
4324 L_getit ()
4325 {
4326 register char *cp;
4327
4328 if (*dbp == '\'') /* Skip prefix quote */
4329 dbp++;
4330 else if (*dbp == '(')
4331 {
4332 if (L_isquote (dbp))
4333 dbp += 7; /* Skip "(quote " */
4334 else
4335 dbp += 1; /* Skip "(" before name in (defstruct (foo)) */
4336 dbp = skip_spaces (dbp);
4337 }
4338
4339 for (cp = dbp /*+1*/;
4340 *cp != '\0' && *cp != '(' && !iswhite(*cp) && *cp != ')';
4341 cp++)
4342 continue;
4343 if (cp == dbp)
4344 return;
4345
4346 pfnote (savenstr (dbp, cp-dbp), TRUE,
4347 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4348 }
4349
4350 static void
4351 Lisp_functions (inf)
4352 FILE *inf;
4353 {
4354 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4355 {
4356 if (dbp[0] == '(')
4357 {
4358 if (L_isdef (dbp))
4359 {
4360 dbp = skip_non_spaces (dbp);
4361 dbp = skip_spaces (dbp);
4362 L_getit ();
4363 }
4364 else
4365 {
4366 /* Check for (foo::defmumble name-defined ... */
4367 do
4368 dbp++;
4369 while (*dbp != '\0' && !iswhite (*dbp)
4370 && *dbp != ':' && *dbp != '(' && *dbp != ')');
4371 if (*dbp == ':')
4372 {
4373 do
4374 dbp++;
4375 while (*dbp == ':');
4376
4377 if (L_isdef (dbp - 1))
4378 {
4379 dbp = skip_non_spaces (dbp);
4380 dbp = skip_spaces (dbp);
4381 L_getit ();
4382 }
4383 }
4384 }
4385 }
4386 }
4387 }
4388
4389 \f
4390 /*
4391 * Postscript tag functions
4392 * Just look for lines where the first character is '/'
4393 * Also look at "defineps" for PSWrap
4394 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4395 * Ideas by Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4396 */
4397 static void
4398 Postscript_functions (inf)
4399 FILE *inf;
4400 {
4401 register char *bp, *ep;
4402
4403 LOOP_ON_INPUT_LINES (inf, lb, bp)
4404 {
4405 if (bp[0] == '/')
4406 {
4407 for (ep = bp+1;
4408 *ep != '\0' && *ep != ' ' && *ep != '{';
4409 ep++)
4410 continue;
4411 pfnote (savenstr (bp, ep-bp), TRUE,
4412 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4413 }
4414 else if (strneq (bp, "defineps", 8))
4415 {
4416 bp = skip_non_spaces (bp);
4417 bp = skip_spaces (bp);
4418 get_tag (bp);
4419 }
4420 }
4421 }
4422
4423 \f
4424 /*
4425 * Scheme tag functions
4426 * look for (def... xyzzy
4427 * look for (def... (xyzzy
4428 * look for (def ... ((...(xyzzy ....
4429 * look for (set! xyzzy
4430 */
4431
4432 static void
4433 Scheme_functions (inf)
4434 FILE *inf;
4435 {
4436 register char *bp;
4437
4438 LOOP_ON_INPUT_LINES (inf, lb, bp)
4439 {
4440 if (bp[0] == '('
4441 && (bp[1] == 'D' || bp[1] == 'd')
4442 && (bp[2] == 'E' || bp[2] == 'e')
4443 && (bp[3] == 'F' || bp[3] == 'f'))
4444 {
4445 bp = skip_non_spaces (bp);
4446 /* Skip over open parens and white space */
4447 while (iswhite (*bp) || *bp == '(')
4448 bp++;
4449 get_tag (bp);
4450 }
4451 if (bp[0] == '('
4452 && (bp[1] == 'S' || bp[1] == 's')
4453 && (bp[2] == 'E' || bp[2] == 'e')
4454 && (bp[3] == 'T' || bp[3] == 't')
4455 && (bp[4] == '!' || bp[4] == '!')
4456 && (iswhite (bp[5])))
4457 {
4458 bp = skip_non_spaces (bp);
4459 bp = skip_spaces (bp);
4460 get_tag (bp);
4461 }
4462 }
4463 }
4464
4465 \f
4466 /* Find tags in TeX and LaTeX input files. */
4467
4468 /* TEX_toktab is a table of TeX control sequences that define tags.
4469 Each TEX_tabent records one such control sequence.
4470 CONVERT THIS TO USE THE Stab TYPE!! */
4471 struct TEX_tabent
4472 {
4473 char *name;
4474 int len;
4475 };
4476
4477 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
4478
4479 /* Default set of control sequences to put into TEX_toktab.
4480 The value of environment var TEXTAGS is prepended to this. */
4481
4482 char *TEX_defenv = "\
4483 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4484 :part:appendix:entry:index";
4485
4486 static void TEX_mode P_((FILE *));
4487 static struct TEX_tabent *TEX_decode_env P_((char *, char *));
4488 static int TEX_Token P_((char *));
4489
4490 char TEX_esc = '\\';
4491 char TEX_opgrp = '{';
4492 char TEX_clgrp = '}';
4493
4494 /*
4495 * TeX/LaTeX scanning loop.
4496 */
4497 static void
4498 TeX_commands (inf)
4499 FILE *inf;
4500 {
4501 char *cp, *lasthit;
4502 register int i;
4503
4504 /* Select either \ or ! as escape character. */
4505 TEX_mode (inf);
4506
4507 /* Initialize token table once from environment. */
4508 if (!TEX_toktab)
4509 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4510
4511 LOOP_ON_INPUT_LINES (inf, lb, cp)
4512 {
4513 lasthit = cp;
4514 /* Look at each esc in line. */
4515 while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4516 {
4517 if (*++cp == '\0')
4518 break;
4519 linecharno += cp - lasthit;
4520 lasthit = cp;
4521 i = TEX_Token (lasthit);
4522 if (i >= 0)
4523 {
4524 /* We seem to include the TeX command in the tag name.
4525 register char *p;
4526 for (p = lasthit + TEX_toktab[i].len;
4527 *p != '\0' && *p != TEX_clgrp;
4528 p++)
4529 continue; */
4530 pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4531 lb.buffer, lb.len, lineno, linecharno);
4532 break; /* We only tag a line once */
4533 }
4534 }
4535 }
4536 }
4537
4538 #define TEX_LESC '\\'
4539 #define TEX_SESC '!'
4540 #define TEX_cmt '%'
4541
4542 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4543 chars accordingly. */
4544 static void
4545 TEX_mode (inf)
4546 FILE *inf;
4547 {
4548 int c;
4549
4550 while ((c = getc (inf)) != EOF)
4551 {
4552 /* Skip to next line if we hit the TeX comment char. */
4553 if (c == TEX_cmt)
4554 while (c != '\n')
4555 c = getc (inf);
4556 else if (c == TEX_LESC || c == TEX_SESC )
4557 break;
4558 }
4559
4560 if (c == TEX_LESC)
4561 {
4562 TEX_esc = TEX_LESC;
4563 TEX_opgrp = '{';
4564 TEX_clgrp = '}';
4565 }
4566 else
4567 {
4568 TEX_esc = TEX_SESC;
4569 TEX_opgrp = '<';
4570 TEX_clgrp = '>';
4571 }
4572 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4573 No attempt is made to correct the situation. */
4574 rewind (inf);
4575 }
4576
4577 /* Read environment and prepend it to the default string.
4578 Build token table. */
4579 static struct TEX_tabent *
4580 TEX_decode_env (evarname, defenv)
4581 char *evarname;
4582 char *defenv;
4583 {
4584 register char *env, *p;
4585
4586 struct TEX_tabent *tab;
4587 int size, i;
4588
4589 /* Append default string to environment. */
4590 env = getenv (evarname);
4591 if (!env)
4592 env = defenv;
4593 else
4594 {
4595 char *oldenv = env;
4596 env = concat (oldenv, defenv, "");
4597 }
4598
4599 /* Allocate a token table */
4600 for (size = 1, p = env; p;)
4601 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4602 size++;
4603 /* Add 1 to leave room for null terminator. */
4604 tab = xnew (size + 1, struct TEX_tabent);
4605
4606 /* Unpack environment string into token table. Be careful about */
4607 /* zero-length strings (leading ':', "::" and trailing ':') */
4608 for (i = 0; *env;)
4609 {
4610 p = etags_strchr (env, ':');
4611 if (!p) /* End of environment string. */
4612 p = env + strlen (env);
4613 if (p - env > 0)
4614 { /* Only non-zero strings. */
4615 tab[i].name = savenstr (env, p - env);
4616 tab[i].len = strlen (tab[i].name);
4617 i++;
4618 }
4619 if (*p)
4620 env = p + 1;
4621 else
4622 {
4623 tab[i].name = NULL; /* Mark end of table. */
4624 tab[i].len = 0;
4625 break;
4626 }
4627 }
4628 return tab;
4629 }
4630
4631 /* If the text at CP matches one of the tag-defining TeX command names,
4632 return the pointer to the first occurrence of that command in TEX_toktab.
4633 Otherwise return -1.
4634 Keep the capital `T' in `token' for dumb truncating compilers
4635 (this distinguishes it from `TEX_toktab' */
4636 static int
4637 TEX_Token (cp)
4638 char *cp;
4639 {
4640 int i;
4641
4642 for (i = 0; TEX_toktab[i].len > 0; i++)
4643 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4644 return i;
4645 return -1;
4646 }
4647
4648 \f
4649 /* Texinfo support. Dave Love, Mar. 2000. */
4650 static void
4651 Texinfo_nodes (inf)
4652 FILE * inf;
4653 {
4654 char *cp, *start;
4655 LOOP_ON_INPUT_LINES (inf, lb, cp)
4656 {
4657 if ((*cp++ == '@'
4658 && *cp++ == 'n'
4659 && *cp++ == 'o'
4660 && *cp++ == 'd'
4661 && *cp++ == 'e' && iswhite (*cp++)))
4662 {
4663 start = cp = skip_spaces(cp);
4664 while (*cp != '\0' && *cp != ',')
4665 cp++;
4666 pfnote (savenstr (start, cp - start), TRUE,
4667 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4668 }
4669 }
4670 }
4671
4672 \f
4673 /*
4674 * Prolog support (rewritten) by Anders Lindgren, Mar. 96
4675 *
4676 * Assumes that the predicate starts at column 0.
4677 * Only the first clause of a predicate is added.
4678 */
4679 static int prolog_pred P_((char *, char *));
4680 static void prolog_skip_comment P_((linebuffer *, FILE *));
4681 static int prolog_atom P_((char *, int));
4682
4683 static void
4684 Prolog_functions (inf)
4685 FILE *inf;
4686 {
4687 char *cp, *last;
4688 int len;
4689 int allocated;
4690
4691 allocated = 0;
4692 len = 0;
4693 last = NULL;
4694
4695 LOOP_ON_INPUT_LINES (inf, lb, cp)
4696 {
4697 if (cp[0] == '\0') /* Empty line */
4698 continue;
4699 else if (iswhite (cp[0])) /* Not a predicate */
4700 continue;
4701 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4702 prolog_skip_comment (&lb, inf);
4703 else if ((len = prolog_pred (cp, last)) > 0)
4704 {
4705 /* Predicate. Store the function name so that we only
4706 generate a tag for the first clause. */
4707 if (last == NULL)
4708 last = xnew(len + 1, char);
4709 else if (len + 1 > allocated)
4710 xrnew (last, len + 1, char);
4711 allocated = len + 1;
4712 strncpy (last, cp, len);
4713 last[len] = '\0';
4714 }
4715 }
4716 }
4717
4718
4719 static void
4720 prolog_skip_comment (plb, inf)
4721 linebuffer *plb;
4722 FILE *inf;
4723 {
4724 char *cp;
4725
4726 do
4727 {
4728 for (cp = plb->buffer; *cp != '\0'; cp++)
4729 if (cp[0] == '*' && cp[1] == '/')
4730 return;
4731 lineno++;
4732 linecharno += readline (plb, inf);
4733 }
4734 while (!feof(inf));
4735 }
4736
4737 /*
4738 * A predicate definition is added if it matches:
4739 * <beginning of line><Prolog Atom><whitespace>(
4740 *
4741 * It is added to the tags database if it doesn't match the
4742 * name of the previous clause header.
4743 *
4744 * Return the size of the name of the predicate, or 0 if no header
4745 * was found.
4746 */
4747 static int
4748 prolog_pred (s, last)
4749 char *s;
4750 char *last; /* Name of last clause. */
4751 {
4752 int pos;
4753 int len;
4754
4755 pos = prolog_atom (s, 0);
4756 if (pos < 1)
4757 return 0;
4758
4759 len = pos;
4760 pos = skip_spaces (s + pos) - s;
4761
4762 if ((s[pos] == '(') || (s[pos] == '.'))
4763 {
4764 if (s[pos] == '(')
4765 pos++;
4766
4767 /* Save only the first clause. */
4768 if (last == NULL
4769 || len != (int)strlen (last)
4770 || !strneq (s, last, len))
4771 {
4772 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4773 return len;
4774 }
4775 }
4776 return 0;
4777 }
4778
4779 /*
4780 * Consume a Prolog atom.
4781 * Return the number of bytes consumed, or -1 if there was an error.
4782 *
4783 * A prolog atom, in this context, could be one of:
4784 * - An alphanumeric sequence, starting with a lower case letter.
4785 * - A quoted arbitrary string. Single quotes can escape themselves.
4786 * Backslash quotes everything.
4787 */
4788 static int
4789 prolog_atom (s, pos)
4790 char *s;
4791 int pos;
4792 {
4793 int origpos;
4794
4795 origpos = pos;
4796
4797 if (ISLOWER(s[pos]) || (s[pos] == '_'))
4798 {
4799 /* The atom is unquoted. */
4800 pos++;
4801 while (ISALNUM(s[pos]) || (s[pos] == '_'))
4802 {
4803 pos++;
4804 }
4805 return pos - origpos;
4806 }
4807 else if (s[pos] == '\'')
4808 {
4809 pos++;
4810
4811 while (1)
4812 {
4813 if (s[pos] == '\'')
4814 {
4815 pos++;
4816 if (s[pos] != '\'')
4817 break;
4818 pos++; /* A double quote */
4819 }
4820 else if (s[pos] == '\0')
4821 /* Multiline quoted atoms are ignored. */
4822 return -1;
4823 else if (s[pos] == '\\')
4824 {
4825 if (s[pos+1] == '\0')
4826 return -1;
4827 pos += 2;
4828 }
4829 else
4830 pos++;
4831 }
4832 return pos - origpos;
4833 }
4834 else
4835 return -1;
4836 }
4837
4838 \f
4839 /*
4840 * Support for Erlang -- Anders Lindgren, Feb 1996.
4841 *
4842 * Generates tags for functions, defines, and records.
4843 *
4844 * Assumes that Erlang functions start at column 0.
4845 */
4846 static int erlang_func P_((char *, char *));
4847 static void erlang_attribute P_((char *));
4848 static int erlang_atom P_((char *, int));
4849
4850 static void
4851 Erlang_functions (inf)
4852 FILE *inf;
4853 {
4854 char *cp, *last;
4855 int len;
4856 int allocated;
4857
4858 allocated = 0;
4859 len = 0;
4860 last = NULL;
4861
4862 LOOP_ON_INPUT_LINES (inf, lb, cp)
4863 {
4864 if (cp[0] == '\0') /* Empty line */
4865 continue;
4866 else if (iswhite (cp[0])) /* Not function nor attribute */
4867 continue;
4868 else if (cp[0] == '%') /* comment */
4869 continue;
4870 else if (cp[0] == '"') /* Sometimes, strings start in column one */
4871 continue;
4872 else if (cp[0] == '-') /* attribute, e.g. "-define" */
4873 {
4874 erlang_attribute (cp);
4875 last = NULL;
4876 }
4877 else if ((len = erlang_func (cp, last)) > 0)
4878 {
4879 /*
4880 * Function. Store the function name so that we only
4881 * generates a tag for the first clause.
4882 */
4883 if (last == NULL)
4884 last = xnew (len + 1, char);
4885 else if (len + 1 > allocated)
4886 xrnew (last, len + 1, char);
4887 allocated = len + 1;
4888 strncpy (last, cp, len);
4889 last[len] = '\0';
4890 }
4891 }
4892 }
4893
4894
4895 /*
4896 * A function definition is added if it matches:
4897 * <beginning of line><Erlang Atom><whitespace>(
4898 *
4899 * It is added to the tags database if it doesn't match the
4900 * name of the previous clause header.
4901 *
4902 * Return the size of the name of the function, or 0 if no function
4903 * was found.
4904 */
4905 static int
4906 erlang_func (s, last)
4907 char *s;
4908 char *last; /* Name of last clause. */
4909 {
4910 int pos;
4911 int len;
4912
4913 pos = erlang_atom (s, 0);
4914 if (pos < 1)
4915 return 0;
4916
4917 len = pos;
4918 pos = skip_spaces (s + pos) - s;
4919
4920 /* Save only the first clause. */
4921 if (s[pos++] == '('
4922 && (last == NULL
4923 || len != (int)strlen (last)
4924 || !strneq (s, last, len)))
4925 {
4926 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4927 return len;
4928 }
4929
4930 return 0;
4931 }
4932
4933
4934 /*
4935 * Handle attributes. Currently, tags are generated for defines
4936 * and records.
4937 *
4938 * They are on the form:
4939 * -define(foo, bar).
4940 * -define(Foo(M, N), M+N).
4941 * -record(graph, {vtab = notable, cyclic = true}).
4942 */
4943 static void
4944 erlang_attribute (s)
4945 char *s;
4946 {
4947 int pos;
4948 int len;
4949
4950 if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
4951 {
4952 pos = skip_spaces (s + 7) - s;
4953 if (s[pos++] == '(')
4954 {
4955 pos = skip_spaces (s + pos) - s;
4956 len = erlang_atom (s, pos);
4957 if (len != 0)
4958 pfnote (savenstr (& s[pos], len), TRUE,
4959 s, pos + len, lineno, linecharno);
4960 }
4961 }
4962 return;
4963 }
4964
4965
4966 /*
4967 * Consume an Erlang atom (or variable).
4968 * Return the number of bytes consumed, or -1 if there was an error.
4969 */
4970 static int
4971 erlang_atom (s, pos)
4972 char *s;
4973 int pos;
4974 {
4975 int origpos;
4976
4977 origpos = pos;
4978
4979 if (ISALPHA (s[pos]) || s[pos] == '_')
4980 {
4981 /* The atom is unquoted. */
4982 pos++;
4983 while (ISALNUM (s[pos]) || s[pos] == '_')
4984 pos++;
4985 return pos - origpos;
4986 }
4987 else if (s[pos] == '\'')
4988 {
4989 pos++;
4990
4991 while (1)
4992 {
4993 if (s[pos] == '\'')
4994 {
4995 pos++;
4996 break;
4997 }
4998 else if (s[pos] == '\0')
4999 /* Multiline quoted atoms are ignored. */
5000 return -1;
5001 else if (s[pos] == '\\')
5002 {
5003 if (s[pos+1] == '\0')
5004 return -1;
5005 pos += 2;
5006 }
5007 else
5008 pos++;
5009 }
5010 return pos - origpos;
5011 }
5012 else
5013 return -1;
5014 }
5015
5016 \f
5017 #ifdef ETAGS_REGEXPS
5018
5019 static char *scan_separators P_((char *));
5020 static void analyse_regex P_((char *, bool));
5021 static void add_regex P_((char *, bool, language *));
5022 static char *substitute P_((char *, char *, struct re_registers *));
5023
5024 /* Take a string like "/blah/" and turn it into "blah", making sure
5025 that the first and last characters are the same, and handling
5026 quoted separator characters. Actually, stops on the occurrence of
5027 an unquoted separator. Also turns "\t" into a Tab character.
5028 Returns pointer to terminating separator. Works in place. Null
5029 terminates name string. */
5030 static char *
5031 scan_separators (name)
5032 char *name;
5033 {
5034 char sep = name[0];
5035 char *copyto = name;
5036 bool quoted = FALSE;
5037
5038 for (++name; *name != '\0'; ++name)
5039 {
5040 if (quoted)
5041 {
5042 if (*name == 't')
5043 *copyto++ = '\t';
5044 else if (*name == sep)
5045 *copyto++ = sep;
5046 else
5047 {
5048 /* Something else is quoted, so preserve the quote. */
5049 *copyto++ = '\\';
5050 *copyto++ = *name;
5051 }
5052 quoted = FALSE;
5053 }
5054 else if (*name == '\\')
5055 quoted = TRUE;
5056 else if (*name == sep)
5057 break;
5058 else
5059 *copyto++ = *name;
5060 }
5061
5062 /* Terminate copied string. */
5063 *copyto = '\0';
5064 return name;
5065 }
5066
5067 /* Look at the argument of --regex or --no-regex and do the right
5068 thing. Same for each line of a regexp file. */
5069 static void
5070 analyse_regex (regex_arg, ignore_case)
5071 char *regex_arg;
5072 bool ignore_case;
5073 {
5074 if (regex_arg == NULL)
5075 free_patterns (); /* --no-regex: remove existing regexps */
5076
5077 /* A real --regexp option or a line in a regexp file. */
5078 switch (regex_arg[0])
5079 {
5080 /* Comments in regexp file or null arg to --regex. */
5081 case '\0':
5082 case ' ':
5083 case '\t':
5084 break;
5085
5086 /* Read a regex file. This is recursive and may result in a
5087 loop, which will stop when the file descriptors are exhausted. */
5088 case '@':
5089 {
5090 FILE *regexfp;
5091 linebuffer regexbuf;
5092 char *regexfile = regex_arg + 1;
5093
5094 /* regexfile is a file containing regexps, one per line. */
5095 regexfp = fopen (regexfile, "r");
5096 if (regexfp == NULL)
5097 {
5098 pfatal (regexfile);
5099 return;
5100 }
5101 initbuffer (&regexbuf);
5102 while (readline_internal (&regexbuf, regexfp) > 0)
5103 analyse_regex (regexbuf.buffer, ignore_case);
5104 free (regexbuf.buffer);
5105 fclose (regexfp);
5106 }
5107 break;
5108
5109 /* Regexp to be used for a specific language only. */
5110 case '{':
5111 {
5112 language *lang;
5113 char *lang_name = regex_arg + 1;
5114 char *cp;
5115
5116 for (cp = lang_name; *cp != '}'; cp++)
5117 if (*cp == '\0')
5118 {
5119 error ("unterminated language name in regex: %s", regex_arg);
5120 return;
5121 }
5122 *cp = '\0';
5123 lang = get_language_from_langname (lang_name);
5124 if (lang == NULL)
5125 return;
5126 add_regex (cp + 1, ignore_case, lang);
5127 }
5128 break;
5129
5130 /* Regexp to be used for any language. */
5131 default:
5132 add_regex (regex_arg, ignore_case, NULL);
5133 break;
5134 }
5135 }
5136
5137 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5138 expression, into a real regular expression by compiling it. */
5139 static void
5140 add_regex (regexp_pattern, ignore_case, lang)
5141 char *regexp_pattern;
5142 bool ignore_case;
5143 language *lang;
5144 {
5145 char *name;
5146 const char *err;
5147 struct re_pattern_buffer *patbuf;
5148 pattern *pp;
5149
5150
5151 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
5152 {
5153 error ("%s: unterminated regexp", regexp_pattern);
5154 return;
5155 }
5156 name = scan_separators (regexp_pattern);
5157 if (regexp_pattern[0] == '\0')
5158 {
5159 error ("null regexp", (char *)NULL);
5160 return;
5161 }
5162 (void) scan_separators (name);
5163
5164 patbuf = xnew (1, struct re_pattern_buffer);
5165 /* Translation table to fold case if appropriate. */
5166 patbuf->translate = (ignore_case) ? lc_trans : NULL;
5167 patbuf->fastmap = NULL;
5168 patbuf->buffer = NULL;
5169 patbuf->allocated = 0;
5170
5171 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
5172 if (err != NULL)
5173 {
5174 error ("%s while compiling pattern", err);
5175 return;
5176 }
5177
5178 pp = p_head;
5179 p_head = xnew (1, pattern);
5180 p_head->regex = savestr (regexp_pattern);
5181 p_head->p_next = pp;
5182 p_head->language = lang;
5183 p_head->pattern = patbuf;
5184 p_head->name_pattern = savestr (name);
5185 p_head->error_signaled = FALSE;
5186 }
5187
5188 /*
5189 * Do the substitutions indicated by the regular expression and
5190 * arguments.
5191 */
5192 static char *
5193 substitute (in, out, regs)
5194 char *in, *out;
5195 struct re_registers *regs;
5196 {
5197 char *result, *t;
5198 int size, dig, diglen;
5199
5200 result = NULL;
5201 size = strlen (out);
5202
5203 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5204 if (out[size - 1] == '\\')
5205 fatal ("pattern error in \"%s\"", out);
5206 for (t = etags_strchr (out, '\\');
5207 t != NULL;
5208 t = etags_strchr (t + 2, '\\'))
5209 if (ISDIGIT (t[1]))
5210 {
5211 dig = t[1] - '0';
5212 diglen = regs->end[dig] - regs->start[dig];
5213 size += diglen - 2;
5214 }
5215 else
5216 size -= 1;
5217
5218 /* Allocate space and do the substitutions. */
5219 result = xnew (size + 1, char);
5220
5221 for (t = result; *out != '\0'; out++)
5222 if (*out == '\\' && ISDIGIT (*++out))
5223 {
5224 dig = *out - '0';
5225 diglen = regs->end[dig] - regs->start[dig];
5226 strncpy (t, in + regs->start[dig], diglen);
5227 t += diglen;
5228 }
5229 else
5230 *t++ = *out;
5231 *t = '\0';
5232
5233 assert (t <= result + size && t - result == (int)strlen (result));
5234
5235 return result;
5236 }
5237
5238 /* Deallocate all patterns. */
5239 static void
5240 free_patterns ()
5241 {
5242 pattern *pp;
5243 while (p_head != NULL)
5244 {
5245 pp = p_head->p_next;
5246 free (p_head->regex);
5247 free (p_head->name_pattern);
5248 free (p_head);
5249 p_head = pp;
5250 }
5251 return;
5252 }
5253 #endif /* ETAGS_REGEXPS */
5254
5255 \f
5256 static void
5257 get_tag (bp)
5258 register char *bp;
5259 {
5260 register char *cp;
5261
5262 if (*bp == '\0')
5263 return;
5264 /* Go till you get to white space or a syntactic break */
5265 for (cp = bp + 1;
5266 *cp != '\0' && *cp != '(' && *cp != ')' && !iswhite (*cp);
5267 cp++)
5268 continue;
5269 pfnote (savenstr (bp, cp-bp), TRUE,
5270 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5271 }
5272
5273 /* Initialize a linebuffer for use */
5274 static void
5275 initbuffer (lbp)
5276 linebuffer *lbp;
5277 {
5278 lbp->size = (DEBUG) ? 3 : 200;
5279 lbp->buffer = xnew (lbp->size, char);
5280 lbp->buffer[0] = '\0';
5281 lbp->len = 0;
5282 }
5283
5284 /*
5285 * Read a line of text from `stream' into `lbp', excluding the
5286 * newline or CR-NL, if any. Return the number of characters read from
5287 * `stream', which is the length of the line including the newline.
5288 *
5289 * On DOS or Windows we do not count the CR character, if any, before the
5290 * NL, in the returned length; this mirrors the behavior of emacs on those
5291 * platforms (for text files, it translates CR-NL to NL as it reads in the
5292 * file).
5293 */
5294 static long
5295 readline_internal (lbp, stream)
5296 linebuffer *lbp;
5297 register FILE *stream;
5298 {
5299 char *buffer = lbp->buffer;
5300 register char *p = lbp->buffer;
5301 register char *pend;
5302 int chars_deleted;
5303
5304 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5305
5306 while (1)
5307 {
5308 register int c = getc (stream);
5309 if (p == pend)
5310 {
5311 /* We're at the end of linebuffer: expand it. */
5312 lbp->size *= 2;
5313 xrnew (buffer, lbp->size, char);
5314 p += buffer - lbp->buffer;
5315 pend = buffer + lbp->size;
5316 lbp->buffer = buffer;
5317 }
5318 if (c == EOF)
5319 {
5320 *p = '\0';
5321 chars_deleted = 0;
5322 break;
5323 }
5324 if (c == '\n')
5325 {
5326 if (p > buffer && p[-1] == '\r')
5327 {
5328 p -= 1;
5329 #ifdef DOS_NT
5330 /* Assume CRLF->LF translation will be performed by Emacs
5331 when loading this file, so CRs won't appear in the buffer.
5332 It would be cleaner to compensate within Emacs;
5333 however, Emacs does not know how many CRs were deleted
5334 before any given point in the file. */
5335 chars_deleted = 1;
5336 #else
5337 chars_deleted = 2;
5338 #endif
5339 }
5340 else
5341 {
5342 chars_deleted = 1;
5343 }
5344 *p = '\0';
5345 break;
5346 }
5347 *p++ = c;
5348 }
5349 lbp->len = p - buffer;
5350
5351 return lbp->len + chars_deleted;
5352 }
5353
5354 /*
5355 * Like readline_internal, above, but in addition try to match the
5356 * input line against relevant regular expressions.
5357 */
5358 static long
5359 readline (lbp, stream)
5360 linebuffer *lbp;
5361 FILE *stream;
5362 {
5363 /* Read new line. */
5364 long result = readline_internal (lbp, stream);
5365 #ifdef ETAGS_REGEXPS
5366 int match;
5367 pattern *pp;
5368
5369 /* Match against relevant patterns. */
5370 if (lbp->len > 0)
5371 for (pp = p_head; pp != NULL; pp = pp->p_next)
5372 {
5373 /* Only use generic regexps or those for the current language. */
5374 if (pp->language != NULL && pp->language != curlang)
5375 continue;
5376
5377 match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
5378 switch (match)
5379 {
5380 case -2:
5381 /* Some error. */
5382 if (!pp->error_signaled)
5383 {
5384 error ("error while matching \"%s\"", pp->regex);
5385 pp->error_signaled = TRUE;
5386 }
5387 break;
5388 case -1:
5389 /* No match. */
5390 break;
5391 default:
5392 /* Match occurred. Construct a tag. */
5393 if (pp->name_pattern[0] != '\0')
5394 {
5395 /* Make a named tag. */
5396 char *name = substitute (lbp->buffer,
5397 pp->name_pattern, &pp->regs);
5398 if (name != NULL)
5399 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5400 }
5401 else
5402 {
5403 /* Make an unnamed tag. */
5404 pfnote ((char *)NULL, TRUE,
5405 lbp->buffer, match, lineno, linecharno);
5406 }
5407 break;
5408 }
5409 }
5410 #endif /* ETAGS_REGEXPS */
5411
5412 return result;
5413 }
5414
5415 \f
5416 /*
5417 * Return a pointer to a space of size strlen(cp)+1 allocated
5418 * with xnew where the string CP has been copied.
5419 */
5420 static char *
5421 savestr (cp)
5422 char *cp;
5423 {
5424 return savenstr (cp, strlen (cp));
5425 }
5426
5427 /*
5428 * Return a pointer to a space of size LEN+1 allocated with xnew where
5429 * the string CP has been copied for at most the first LEN characters.
5430 */
5431 static char *
5432 savenstr (cp, len)
5433 char *cp;
5434 int len;
5435 {
5436 register char *dp;
5437
5438 dp = xnew (len + 1, char);
5439 strncpy (dp, cp, len);
5440 dp[len] = '\0';
5441 return dp;
5442 }
5443
5444 /*
5445 * Return the ptr in sp at which the character c last
5446 * appears; NULL if not found
5447 *
5448 * Identical to POSIX strrchr, included for portability.
5449 */
5450 static char *
5451 etags_strrchr (sp, c)
5452 register const char *sp;
5453 register int c;
5454 {
5455 register const char *r;
5456
5457 r = NULL;
5458 do
5459 {
5460 if (*sp == c)
5461 r = sp;
5462 } while (*sp++);
5463 return (char *)r;
5464 }
5465
5466
5467 /*
5468 * Return the ptr in sp at which the character c first
5469 * appears; NULL if not found
5470 *
5471 * Identical to POSIX strchr, included for portability.
5472 */
5473 static char *
5474 etags_strchr (sp, c)
5475 register const char *sp;
5476 register int c;
5477 {
5478 do
5479 {
5480 if (*sp == c)
5481 return (char *)sp;
5482 } while (*sp++);
5483 return NULL;
5484 }
5485
5486 /* Skip spaces, return new pointer. */
5487 static char *
5488 skip_spaces (cp)
5489 char *cp;
5490 {
5491 while (iswhite (*cp))
5492 cp++;
5493 return cp;
5494 }
5495
5496 /* Skip non spaces, return new pointer. */
5497 static char *
5498 skip_non_spaces (cp)
5499 char *cp;
5500 {
5501 while (*cp != '\0' && !iswhite (*cp))
5502 cp++;
5503 return cp;
5504 }
5505
5506 /* Print error message and exit. */
5507 void
5508 fatal (s1, s2)
5509 char *s1, *s2;
5510 {
5511 error (s1, s2);
5512 exit (BAD);
5513 }
5514
5515 static void
5516 pfatal (s1)
5517 char *s1;
5518 {
5519 perror (s1);
5520 exit (BAD);
5521 }
5522
5523 static void
5524 suggest_asking_for_help ()
5525 {
5526 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5527 progname,
5528 #ifdef LONG_OPTIONS
5529 "--help"
5530 #else
5531 "-h"
5532 #endif
5533 );
5534 exit (BAD);
5535 }
5536
5537 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
5538 static void
5539 error (s1, s2)
5540 const char *s1, *s2;
5541 {
5542 fprintf (stderr, "%s: ", progname);
5543 fprintf (stderr, s1, s2);
5544 fprintf (stderr, "\n");
5545 }
5546
5547 /* Return a newly-allocated string whose contents
5548 concatenate those of s1, s2, s3. */
5549 static char *
5550 concat (s1, s2, s3)
5551 char *s1, *s2, *s3;
5552 {
5553 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5554 char *result = xnew (len1 + len2 + len3 + 1, char);
5555
5556 strcpy (result, s1);
5557 strcpy (result + len1, s2);
5558 strcpy (result + len1 + len2, s3);
5559 result[len1 + len2 + len3] = '\0';
5560
5561 return result;
5562 }
5563
5564 \f
5565 /* Does the same work as the system V getcwd, but does not need to
5566 guess the buffer size in advance. */
5567 static char *
5568 etags_getcwd ()
5569 {
5570 #ifdef HAVE_GETCWD
5571 int bufsize = 200;
5572 char *path = xnew (bufsize, char);
5573
5574 while (getcwd (path, bufsize) == NULL)
5575 {
5576 if (errno != ERANGE)
5577 pfatal ("getcwd");
5578 bufsize *= 2;
5579 free (path);
5580 path = xnew (bufsize, char);
5581 }
5582
5583 canonicalize_filename (path);
5584 return path;
5585
5586 #else /* not HAVE_GETCWD */
5587 #if MSDOS
5588
5589 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
5590
5591 getwd (path);
5592
5593 for (p = path; *p != '\0'; p++)
5594 if (*p == '\\')
5595 *p = '/';
5596 else
5597 *p = lowcase (*p);
5598
5599 return strdup (path);
5600 #else /* not MSDOS */
5601 linebuffer path;
5602 FILE *pipe;
5603
5604 initbuffer (&path);
5605 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5606 if (pipe == NULL || readline_internal (&path, pipe) == 0)
5607 pfatal ("pwd");
5608 pclose (pipe);
5609
5610 return path.buffer;
5611 #endif /* not MSDOS */
5612 #endif /* not HAVE_GETCWD */
5613 }
5614
5615 /* Return a newly allocated string containing the file name of FILE
5616 relative to the absolute directory DIR (which should end with a slash). */
5617 static char *
5618 relative_filename (file, dir)
5619 char *file, *dir;
5620 {
5621 char *fp, *dp, *afn, *res;
5622 int i;
5623
5624 /* Find the common root of file and dir (with a trailing slash). */
5625 afn = absolute_filename (file, cwd);
5626 fp = afn;
5627 dp = dir;
5628 while (*fp++ == *dp++)
5629 continue;
5630 fp--, dp--; /* back to the first differing char */
5631 #ifdef DOS_NT
5632 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5633 return afn;
5634 #endif
5635 do /* look at the equal chars until '/' */
5636 fp--, dp--;
5637 while (*fp != '/');
5638
5639 /* Build a sequence of "../" strings for the resulting relative file name. */
5640 i = 0;
5641 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5642 i += 1;
5643 res = xnew (3*i + strlen (fp + 1) + 1, char);
5644 res[0] = '\0';
5645 while (i-- > 0)
5646 strcat (res, "../");
5647
5648 /* Add the file name relative to the common root of file and dir. */
5649 strcat (res, fp + 1);
5650 free (afn);
5651
5652 return res;
5653 }
5654
5655 /* Return a newly allocated string containing the absolute file name
5656 of FILE given DIR (which should end with a slash). */
5657 static char *
5658 absolute_filename (file, dir)
5659 char *file, *dir;
5660 {
5661 char *slashp, *cp, *res;
5662
5663 if (filename_is_absolute (file))
5664 res = savestr (file);
5665 #ifdef DOS_NT
5666 /* We don't support non-absolute file names with a drive
5667 letter, like `d:NAME' (it's too much hassle). */
5668 else if (file[1] == ':')
5669 fatal ("%s: relative file names with drive letters not supported", file);
5670 #endif
5671 else
5672 res = concat (dir, file, "");
5673
5674 /* Delete the "/dirname/.." and "/." substrings. */
5675 slashp = etags_strchr (res, '/');
5676 while (slashp != NULL && slashp[0] != '\0')
5677 {
5678 if (slashp[1] == '.')
5679 {
5680 if (slashp[2] == '.'
5681 && (slashp[3] == '/' || slashp[3] == '\0'))
5682 {
5683 cp = slashp;
5684 do
5685 cp--;
5686 while (cp >= res && !filename_is_absolute (cp));
5687 if (cp < res)
5688 cp = slashp; /* the absolute name begins with "/.." */
5689 #ifdef DOS_NT
5690 /* Under MSDOS and NT we get `d:/NAME' as absolute
5691 file name, so the luser could say `d:/../NAME'.
5692 We silently treat this as `d:/NAME'. */
5693 else if (cp[0] != '/')
5694 cp = slashp;
5695 #endif
5696 strcpy (cp, slashp + 3);
5697 slashp = cp;
5698 continue;
5699 }
5700 else if (slashp[2] == '/' || slashp[2] == '\0')
5701 {
5702 strcpy (slashp, slashp + 2);
5703 continue;
5704 }
5705 }
5706
5707 slashp = etags_strchr (slashp + 1, '/');
5708 }
5709
5710 if (res[0] == '\0')
5711 return savestr ("/");
5712 else
5713 return res;
5714 }
5715
5716 /* Return a newly allocated string containing the absolute
5717 file name of dir where FILE resides given DIR (which should
5718 end with a slash). */
5719 static char *
5720 absolute_dirname (file, dir)
5721 char *file, *dir;
5722 {
5723 char *slashp, *res;
5724 char save;
5725
5726 canonicalize_filename (file);
5727 slashp = etags_strrchr (file, '/');
5728 if (slashp == NULL)
5729 return savestr (dir);
5730 save = slashp[1];
5731 slashp[1] = '\0';
5732 res = absolute_filename (file, dir);
5733 slashp[1] = save;
5734
5735 return res;
5736 }
5737
5738 /* Whether the argument string is an absolute file name. The argument
5739 string must have been canonicalized with canonicalize_filename. */
5740 static bool
5741 filename_is_absolute (fn)
5742 char *fn;
5743 {
5744 return (fn[0] == '/'
5745 #ifdef DOS_NT
5746 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
5747 #endif
5748 );
5749 }
5750
5751 /* Translate backslashes into slashes. Works in place. */
5752 static void
5753 canonicalize_filename (fn)
5754 register char *fn;
5755 {
5756 #ifdef DOS_NT
5757 /* Canonicalize drive letter case. */
5758 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
5759 fn[0] = upcase (fn[0]);
5760 /* Convert backslashes to slashes. */
5761 for (; *fn != '\0'; fn++)
5762 if (*fn == '\\')
5763 *fn = '/';
5764 #else
5765 /* No action. */
5766 fn = NULL; /* shut up the compiler */
5767 #endif
5768 }
5769
5770 /* Set the minimum size of a string contained in a linebuffer. */
5771 static void
5772 linebuffer_setlen (lbp, toksize)
5773 linebuffer *lbp;
5774 int toksize;
5775 {
5776 while (lbp->size <= toksize)
5777 {
5778 lbp->size *= 2;
5779 xrnew (lbp->buffer, lbp->size, char);
5780 }
5781 lbp->len = toksize;
5782 }
5783
5784 /* Like malloc but get fatal error if memory is exhausted. */
5785 long *
5786 xmalloc (size)
5787 unsigned int size;
5788 {
5789 long *result = (long *) malloc (size);
5790 if (result == NULL)
5791 fatal ("virtual memory exhausted", (char *)NULL);
5792 return result;
5793 }
5794
5795 long *
5796 xrealloc (ptr, size)
5797 char *ptr;
5798 unsigned int size;
5799 {
5800 long *result = (long *) realloc (ptr, size);
5801 if (result == NULL)
5802 fatal ("virtual memory exhausted", (char *)NULL);
5803 return result;
5804 }