(print_help): Enclose the regexp in the help text example in quotes.
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99, 2000, 2001
3 Free Software Foundation, Inc. and Ken Arnold
4
5 This file is not considered part of GNU Emacs.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 /*
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1993 Francesco Potortì reorganised C and C++ based on work by Joe Wells.
29 * 1994 Regexp tags by Tom Tromey.
30 * 2001 Nested classes by Francesco Potortì based on work by Mykola Dzyuba.
31 *
32 * Francesco Potortì <pot@gnu.org> has maintained it since 1993.
33 */
34
35 char pot_etags_version[] = "@(#) pot revision number is 14.18";
36
37 #define TRUE 1
38 #define FALSE 0
39
40 #ifdef DEBUG
41 # undef DEBUG
42 # define DEBUG TRUE
43 #else
44 # define DEBUG FALSE
45 # define NDEBUG /* disable assert */
46 #endif
47
48 #if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
49 # define P_(proto) proto
50 #else
51 # define P_(proto) ()
52 #endif
53
54 #ifdef HAVE_CONFIG_H
55 # include <config.h>
56 /* On some systems, Emacs defines static as nothing for the sake
57 of unexec. We don't want that here since we don't use unexec. */
58 # undef static
59 # define ETAGS_REGEXPS /* use the regexp features */
60 # define LONG_OPTIONS /* accept long options */
61 #else
62 # ifndef __STDC__
63 # define static /* remove static for old compilers' sake */
64 # endif
65 #endif /* !HAVE_CONFIG_H */
66
67 #ifndef _GNU_SOURCE
68 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
69 #endif
70
71 /* WIN32_NATIVE is for Xemacs.
72 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
73 #ifdef WIN32_NATIVE
74 # undef MSDOS
75 # undef WINDOWSNT
76 # define WINDOWSNT
77 #endif /* WIN32_NATIVE */
78
79 #ifdef MSDOS
80 # undef MSDOS
81 # define MSDOS TRUE
82 # include <fcntl.h>
83 # include <sys/param.h>
84 # include <io.h>
85 # ifndef HAVE_CONFIG_H
86 # define DOS_NT
87 # include <sys/config.h>
88 # endif
89 #else
90 # define MSDOS FALSE
91 #endif /* MSDOS */
92
93 #ifdef WINDOWSNT
94 # include <stdlib.h>
95 # include <fcntl.h>
96 # include <string.h>
97 # include <direct.h>
98 # include <io.h>
99 # define MAXPATHLEN _MAX_PATH
100 # undef HAVE_NTGUI
101 # undef DOS_NT
102 # define DOS_NT
103 # ifndef HAVE_GETCWD
104 # define HAVE_GETCWD
105 # endif /* undef HAVE_GETCWD */
106 #else /* !WINDOWSNT */
107 # ifdef STDC_HEADERS
108 # include <stdlib.h>
109 # include <string.h>
110 # else
111 extern char *getenv ();
112 # endif
113 #endif /* !WINDOWSNT */
114
115 #ifdef HAVE_UNISTD_H
116 # include <unistd.h>
117 #else
118 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
119 extern char *getcwd (char *buf, size_t size);
120 # endif
121 #endif /* HAVE_UNISTD_H */
122
123 #include <stdio.h>
124 #include <ctype.h>
125 #include <errno.h>
126 #ifndef errno
127 extern int errno;
128 #endif
129 #include <sys/types.h>
130 #include <sys/stat.h>
131
132 #include <assert.h>
133 #ifdef NDEBUG
134 # undef assert /* some systems have a buggy assert.h */
135 # define assert(x) ((void) 0)
136 #endif
137
138 #if !defined (S_ISREG) && defined (S_IFREG)
139 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
140 #endif
141
142 #ifdef LONG_OPTIONS
143 # include <getopt.h>
144 #else
145 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
146 extern char *optarg;
147 extern int optind, opterr;
148 #endif /* LONG_OPTIONS */
149
150 #ifdef ETAGS_REGEXPS
151 # include <regex.h>
152 #endif /* ETAGS_REGEXPS */
153
154 /* Define CTAGS to make the program "ctags" compatible with the usual one.
155 Leave it undefined to make the program "etags", which makes emacs-style
156 tag tables and tags typedefs, #defines and struct/union/enum by default. */
157 #ifdef CTAGS
158 # undef CTAGS
159 # define CTAGS TRUE
160 #else
161 # define CTAGS FALSE
162 #endif
163
164 /* Exit codes for success and failure. */
165 #ifdef VMS
166 # define GOOD 1
167 # define BAD 0
168 #else
169 # define GOOD 0
170 # define BAD 1
171 #endif
172
173 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
174 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
175
176 #define CHARS 256 /* 2^sizeof(char) */
177 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
178 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
179 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
180 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
181 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
182 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
183
184 #define ISALNUM(c) isalnum (CHAR(c))
185 #define ISALPHA(c) isalpha (CHAR(c))
186 #define ISDIGIT(c) isdigit (CHAR(c))
187 #define ISLOWER(c) islower (CHAR(c))
188
189 #define lowcase(c) tolower (CHAR(c))
190 #define upcase(c) toupper (CHAR(c))
191
192
193 /*
194 * xnew, xrnew -- allocate, reallocate storage
195 *
196 * SYNOPSIS: Type *xnew (int n, Type);
197 * void xrnew (OldPointer, int n, Type);
198 */
199 #if DEBUG
200 # include "chkmalloc.h"
201 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
202 (n) * sizeof (Type)))
203 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
204 (char *) (op), (n) * sizeof (Type)))
205 #else
206 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
207 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
208 (char *) (op), (n) * sizeof (Type)))
209 #endif
210
211 typedef int bool;
212
213 typedef void Lang_function P_((FILE *));
214
215 typedef struct
216 {
217 char *suffix;
218 char *command; /* Takes one arg and decompresses to stdout */
219 } compressor;
220
221 typedef struct
222 {
223 char *name;
224 Lang_function *function;
225 char **filenames;
226 char **suffixes;
227 char **interpreters;
228 } language;
229
230 typedef struct node_st
231 { /* sorting structure */
232 char *name; /* function or type name */
233 char *file; /* file name */
234 bool is_func; /* use pattern or line no */
235 bool been_warned; /* set if noticed dup */
236 int lno; /* line number tag is on */
237 long cno; /* character number line starts on */
238 char *pat; /* search pattern */
239 struct node_st *left, *right; /* left and right sons */
240 } node;
241
242 /*
243 * A `linebuffer' is a structure which holds a line of text.
244 * `readline_internal' reads a line from a stream into a linebuffer
245 * and works regardless of the length of the line.
246 * SIZE is the size of BUFFER, LEN is the length of the string in
247 * BUFFER after readline reads it.
248 */
249 typedef struct
250 {
251 long size;
252 int len;
253 char *buffer;
254 } linebuffer;
255
256 /* Many compilers barf on this:
257 Lang_function Ada_funcs;
258 so let's write it this way */
259 static void Ada_funcs P_((FILE *));
260 static void Asm_labels P_((FILE *));
261 static void C_entries P_((int c_ext, FILE *));
262 static void default_C_entries P_((FILE *));
263 static void plain_C_entries P_((FILE *));
264 static void Cjava_entries P_((FILE *));
265 static void Cobol_paragraphs P_((FILE *));
266 static void Cplusplus_entries P_((FILE *));
267 static void Cstar_entries P_((FILE *));
268 static void Erlang_functions P_((FILE *));
269 static void Fortran_functions P_((FILE *));
270 static void Yacc_entries P_((FILE *));
271 static void Lisp_functions P_((FILE *));
272 static void Makefile_targets P_((FILE *));
273 static void Pascal_functions P_((FILE *));
274 static void Perl_functions P_((FILE *));
275 static void Postscript_functions P_((FILE *));
276 static void Prolog_functions P_((FILE *));
277 static void Python_functions P_((FILE *));
278 static void Scheme_functions P_((FILE *));
279 static void TeX_commands P_((FILE *));
280 static void Texinfo_nodes P_((FILE *));
281 static void just_read_file P_((FILE *));
282
283 static void print_language_names P_((void));
284 static void print_version P_((void));
285 static void print_help P_((void));
286 int main P_((int, char **));
287 static int number_len P_((long));
288
289 static compressor *get_compressor_from_suffix P_((char *, char **));
290 static language *get_language_from_langname P_((char *));
291 static language *get_language_from_interpreter P_((char *));
292 static language *get_language_from_filename P_((char *));
293 static int total_size_of_entries P_((node *));
294 static long readline P_((linebuffer *, FILE *));
295 static long readline_internal P_((linebuffer *, FILE *));
296 static void get_tag P_((char *));
297
298 #ifdef ETAGS_REGEXPS
299 static void analyse_regex P_((char *, bool));
300 static void add_regex P_((char *, bool, language *));
301 static void free_patterns P_((void));
302 #endif /* ETAGS_REGEXPS */
303 static void error P_((const char *, const char *));
304 static void suggest_asking_for_help P_((void));
305 void fatal P_((char *, char *));
306 static void pfatal P_((char *));
307 static void add_node P_((node *, node **));
308
309 static void init P_((void));
310 static void initbuffer P_((linebuffer *));
311 static void find_entries P_((char *, FILE *));
312 static void free_tree P_((node *));
313 static void pfnote P_((char *, bool, char *, int, int, long));
314 static void new_pfnote P_((char *, int, bool, char *, int, int, long));
315 static void process_file P_((char *));
316 static void put_entries P_((node *));
317 static void takeprec P_((void));
318
319 static char *concat P_((char *, char *, char *));
320 static char *skip_spaces P_((char *));
321 static char *skip_non_spaces P_((char *));
322 static char *savenstr P_((char *, int));
323 static char *savestr P_((char *));
324 static char *etags_strchr P_((const char *, int));
325 static char *etags_strrchr P_((const char *, int));
326 static char *etags_getcwd P_((void));
327 static char *relative_filename P_((char *, char *));
328 static char *absolute_filename P_((char *, char *));
329 static char *absolute_dirname P_((char *, char *));
330 static bool filename_is_absolute P_((char *f));
331 static void canonicalize_filename P_((char *));
332 static void linebuffer_setlen P_((linebuffer *, int));
333 long *xmalloc P_((unsigned int));
334 long *xrealloc P_((char *, unsigned int));
335
336 \f
337 char searchar = '/'; /* use /.../ searches */
338
339 char *tagfile; /* output file */
340 char *progname; /* name this program was invoked with */
341 char *cwd; /* current working directory */
342 char *tagfiledir; /* directory of tagfile */
343 FILE *tagf; /* ioptr for tags file */
344
345 char *curfile; /* current input file name */
346 language *curlang; /* current language */
347
348 int lineno; /* line number of current line */
349 long charno; /* current character number */
350 long linecharno; /* charno of start of current line */
351 char *dbp; /* pointer to start of current tag */
352
353 node *head; /* the head of the binary tree of tags */
354
355 linebuffer lb; /* the current line */
356
357 /* boolean "functions" (see init) */
358 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
359 char
360 /* white chars */
361 *white = " \f\t\n\r\v",
362 /* not in a name */
363 *nonam = " \f\t\n\r(=,[;",
364 /* token ending chars */
365 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
366 /* token starting chars */
367 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
368 /* valid in-token chars */
369 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
370
371 bool append_to_tagfile; /* -a: append to tags */
372 /* The following four default to TRUE for etags, but to FALSE for ctags. */
373 bool typedefs; /* -t: create tags for C and Ada typedefs */
374 bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
375 /* 0 struct/enum/union decls, and C++ */
376 /* member functions. */
377 bool constantypedefs; /* -d: create tags for C #define, enum */
378 /* constants and variables. */
379 /* -D: opposite of -d. Default under ctags. */
380 bool declarations; /* --declarations: tag them and extern in C&Co*/
381 bool globals; /* create tags for global variables */
382 bool members; /* create tags for C member variables */
383 bool update; /* -u: update tags */
384 bool vgrind_style; /* -v: create vgrind style index output */
385 bool no_warnings; /* -w: suppress warnings */
386 bool cxref_style; /* -x: create cxref style output */
387 bool cplusplus; /* .[hc] means C++, not C */
388 bool noindentypedefs; /* -I: ignore indentation in C */
389 bool packages_only; /* --packages-only: in Ada, only tag packages*/
390
391 #ifdef LONG_OPTIONS
392 struct option longopts[] =
393 {
394 { "packages-only", no_argument, &packages_only, TRUE },
395 { "append", no_argument, NULL, 'a' },
396 { "backward-search", no_argument, NULL, 'B' },
397 { "c++", no_argument, NULL, 'C' },
398 { "cxref", no_argument, NULL, 'x' },
399 { "defines", no_argument, NULL, 'd' },
400 { "declarations", no_argument, &declarations, TRUE },
401 { "no-defines", no_argument, NULL, 'D' },
402 { "globals", no_argument, &globals, TRUE },
403 { "no-globals", no_argument, &globals, FALSE },
404 { "help", no_argument, NULL, 'h' },
405 { "help", no_argument, NULL, 'H' },
406 { "ignore-indentation", no_argument, NULL, 'I' },
407 { "include", required_argument, NULL, 'i' },
408 { "language", required_argument, NULL, 'l' },
409 { "members", no_argument, &members, TRUE },
410 { "no-members", no_argument, &members, FALSE },
411 { "no-warn", no_argument, NULL, 'w' },
412 { "output", required_argument, NULL, 'o' },
413 #ifdef ETAGS_REGEXPS
414 { "regex", required_argument, NULL, 'r' },
415 { "no-regex", no_argument, NULL, 'R' },
416 { "ignore-case-regex", required_argument, NULL, 'c' },
417 #endif /* ETAGS_REGEXPS */
418 { "typedefs", no_argument, NULL, 't' },
419 { "typedefs-and-c++", no_argument, NULL, 'T' },
420 { "update", no_argument, NULL, 'u' },
421 { "version", no_argument, NULL, 'V' },
422 { "vgrind", no_argument, NULL, 'v' },
423 { NULL }
424 };
425 #endif /* LONG_OPTIONS */
426
427 #ifdef ETAGS_REGEXPS
428 /* Structure defining a regular expression. Elements are
429 the compiled pattern, and the name string. */
430 typedef struct pattern
431 {
432 struct pattern *p_next;
433 language *language;
434 char *regex;
435 struct re_pattern_buffer *pattern;
436 struct re_registers regs;
437 char *name_pattern;
438 bool error_signaled;
439 } pattern;
440
441 /* List of all regexps. */
442 pattern *p_head = NULL;
443
444 /* How many characters in the character set. (From regex.c.) */
445 #define CHAR_SET_SIZE 256
446 /* Translation table for case-insensitive matching. */
447 char lc_trans[CHAR_SET_SIZE];
448 #endif /* ETAGS_REGEXPS */
449
450 compressor compressors[] =
451 {
452 { "z", "gzip -d -c"},
453 { "Z", "gzip -d -c"},
454 { "gz", "gzip -d -c"},
455 { "GZ", "gzip -d -c"},
456 { "bz2", "bzip2 -d -c" },
457 { NULL }
458 };
459
460 /*
461 * Language stuff.
462 */
463
464 /* Non-NULL if language fixed. */
465 language *forced_lang = NULL;
466
467 /* Ada code */
468 char *Ada_suffixes [] =
469 { "ads", "adb", "ada", NULL };
470
471 /* Assembly code */
472 char *Asm_suffixes [] = { "a", /* Unix assembler */
473 "asm", /* Microcontroller assembly */
474 "def", /* BSO/Tasking definition includes */
475 "inc", /* Microcontroller include files */
476 "ins", /* Microcontroller include files */
477 "s", "sa", /* Unix assembler */
478 "S", /* cpp-processed Unix assembler */
479 "src", /* BSO/Tasking C compiler output */
480 NULL
481 };
482
483 /* Note that .c and .h can be considered C++, if the --c++ flag was
484 given, or if the `class' keyowrd is met inside the file.
485 That is why default_C_entries is called for these. */
486 char *default_C_suffixes [] =
487 { "c", "h", NULL };
488
489 char *Cplusplus_suffixes [] =
490 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
491 "M", /* Objective C++ */
492 "pdb", /* Postscript with C syntax */
493 NULL };
494
495 char *Cjava_suffixes [] =
496 { "java", NULL };
497
498 char *Cobol_suffixes [] =
499 { "COB", "cob", NULL };
500
501 char *Cstar_suffixes [] =
502 { "cs", "hs", NULL };
503
504 char *Erlang_suffixes [] =
505 { "erl", "hrl", NULL };
506
507 char *Fortran_suffixes [] =
508 { "F", "f", "f90", "for", NULL };
509
510 char *Lisp_suffixes [] =
511 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
512
513 char *Makefile_filenames [] =
514 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
515
516 char *Pascal_suffixes [] =
517 { "p", "pas", NULL };
518
519 char *Perl_suffixes [] =
520 { "pl", "pm", NULL };
521 char *Perl_interpreters [] =
522 { "perl", "@PERL@", NULL };
523
524 char *plain_C_suffixes [] =
525 { "lm", /* Objective lex file */
526 "m", /* Objective C file */
527 "pc", /* Pro*C file */
528 NULL };
529
530 char *Postscript_suffixes [] =
531 { "ps", "psw", NULL }; /* .psw is for PSWrap */
532
533 char *Prolog_suffixes [] =
534 { "prolog", NULL };
535
536 char *Python_suffixes [] =
537 { "py", NULL };
538
539 /* Can't do the `SCM' or `scm' prefix with a version number. */
540 char *Scheme_suffixes [] =
541 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
542
543 char *TeX_suffixes [] =
544 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
545
546 char *Texinfo_suffixes [] =
547 { "texi", "texinfo", "txi", NULL };
548
549 char *Yacc_suffixes [] =
550 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
551
552 /*
553 * Table of languages.
554 *
555 * It is ok for a given function to be listed under more than one
556 * name. I just didn't.
557 */
558
559 language lang_names [] =
560 {
561 { "ada", Ada_funcs, NULL, Ada_suffixes, NULL },
562 { "asm", Asm_labels, NULL, Asm_suffixes, NULL },
563 { "c", default_C_entries, NULL, default_C_suffixes, NULL },
564 { "c++", Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
565 { "c*", Cstar_entries, NULL, Cstar_suffixes, NULL },
566 { "cobol", Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
567 { "erlang", Erlang_functions, NULL, Erlang_suffixes, NULL },
568 { "fortran", Fortran_functions, NULL, Fortran_suffixes, NULL },
569 { "java", Cjava_entries, NULL, Cjava_suffixes, NULL },
570 { "lisp", Lisp_functions, NULL, Lisp_suffixes, NULL },
571 { "makefile", Makefile_targets, Makefile_filenames, NULL, NULL },
572 { "pascal", Pascal_functions, NULL, Pascal_suffixes, NULL },
573 { "perl", Perl_functions, NULL, Perl_suffixes, Perl_interpreters },
574 { "postscript", Postscript_functions, NULL, Postscript_suffixes, NULL },
575 { "proc", plain_C_entries, NULL, plain_C_suffixes, NULL },
576 { "prolog", Prolog_functions, NULL, Prolog_suffixes, NULL },
577 { "python", Python_functions, NULL, Python_suffixes, NULL },
578 { "scheme", Scheme_functions, NULL, Scheme_suffixes, NULL },
579 { "tex", TeX_commands, NULL, TeX_suffixes, NULL },
580 { "texinfo", Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
581 { "yacc", Yacc_entries, NULL, Yacc_suffixes, NULL },
582 { "auto", NULL }, /* default guessing scheme */
583 { "none", just_read_file }, /* regexp matching only */
584 { NULL, NULL } /* end of list */
585 };
586
587 \f
588 static void
589 print_language_names ()
590 {
591 language *lang;
592 char **name, **ext;
593
594 puts ("\nThese are the currently supported languages, along with the\n\
595 default file names and dot suffixes:");
596 for (lang = lang_names; lang->name != NULL; lang++)
597 {
598 printf (" %-*s", 10, lang->name);
599 if (lang->filenames != NULL)
600 for (name = lang->filenames; *name != NULL; name++)
601 printf (" %s", *name);
602 if (lang->suffixes != NULL)
603 for (ext = lang->suffixes; *ext != NULL; ext++)
604 printf (" .%s", *ext);
605 puts ("");
606 }
607 puts ("Where `auto' means use default language for files based on file\n\
608 name suffix, and `none' means only do regexp processing on files.\n\
609 If no language is specified and no matching suffix is found,\n\
610 the first line of the file is read for a sharp-bang (#!) sequence\n\
611 followed by the name of an interpreter. If no such sequence is found,\n\
612 Fortran is tried first; if no tags are found, C is tried next.\n\
613 When parsing any C file, a \"class\" keyword switches to C++.\n\
614 Compressed files are supported using gzip and bzip2.");
615 }
616
617 #ifndef EMACS_NAME
618 # define EMACS_NAME "GNU Emacs"
619 #endif
620 #ifndef VERSION
621 # define VERSION "21"
622 #endif
623 static void
624 print_version ()
625 {
626 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
627 puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
628 puts ("This program is distributed under the same terms as Emacs");
629
630 exit (GOOD);
631 }
632
633 static void
634 print_help ()
635 {
636 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
637 \n\
638 These are the options accepted by %s.\n", progname, progname);
639 #ifdef LONG_OPTIONS
640 puts ("You may use unambiguous abbreviations for the long option names.");
641 #else
642 puts ("Long option names do not work with this executable, as it is not\n\
643 linked with GNU getopt.");
644 #endif /* LONG_OPTIONS */
645 puts ("A - as file name means read names from stdin (one per line).");
646 if (!CTAGS)
647 printf (" Absolute names are stored in the output file as they are.\n\
648 Relative ones are stored relative to the output file's directory.");
649 puts ("\n");
650
651 puts ("-a, --append\n\
652 Append tag entries to existing tags file.");
653
654 puts ("--packages-only\n\
655 For Ada files, only generate tags for packages .");
656
657 if (CTAGS)
658 puts ("-B, --backward-search\n\
659 Write the search commands for the tag entries using '?', the\n\
660 backward-search command instead of '/', the forward-search command.");
661
662 /* This option is mostly obsolete, because etags can now automatically
663 detect C++. Retained for backward compatibility and for debugging and
664 experimentation. In principle, we could want to tag as C++ even
665 before any "class" keyword.
666 puts ("-C, --c++\n\
667 Treat files whose name suffix defaults to C language as C++ files.");
668 */
669
670 puts ("--declarations\n\
671 In C and derived languages, create tags for function declarations,");
672 if (CTAGS)
673 puts ("\tand create tags for extern variables if --globals is used.");
674 else
675 puts
676 ("\tand create tags for extern variables unless --no-globals is used.");
677
678 if (CTAGS)
679 puts ("-d, --defines\n\
680 Create tag entries for C #define constants and enum constants, too.");
681 else
682 puts ("-D, --no-defines\n\
683 Don't create tag entries for C #define constants and enum constants.\n\
684 This makes the tags file smaller.");
685
686 if (!CTAGS)
687 {
688 puts ("-i FILE, --include=FILE\n\
689 Include a note in tag file indicating that, when searching for\n\
690 a tag, one should also consult the tags file FILE after\n\
691 checking the current file.");
692 puts ("-l LANG, --language=LANG\n\
693 Force the following files to be considered as written in the\n\
694 named language up to the next --language=LANG option.");
695 }
696
697 if (CTAGS)
698 puts ("--globals\n\
699 Create tag entries for global variables in some languages.");
700 else
701 puts ("--no-globals\n\
702 Do not create tag entries for global variables in some\n\
703 languages. This makes the tags file smaller.");
704 puts ("--members\n\
705 Create tag entries for member variables in C and derived languages.");
706
707 #ifdef ETAGS_REGEXPS
708 puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
709 Make a tag for each line matching pattern REGEXP in the following\n\
710 files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
711 regexfile is a file containing one REGEXP per line.\n\
712 REGEXP is anchored (as if preceded by ^).\n\
713 The form /REGEXP/NAME/ creates a named tag.\n\
714 For example Tcl named tags can be created with:\n\
715 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\"");
716 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
717 Like -r, --regex but ignore case when matching expressions.");
718 puts ("-R, --no-regex\n\
719 Don't create tags from regexps for the following files.");
720 #endif /* ETAGS_REGEXPS */
721 puts ("-o FILE, --output=FILE\n\
722 Write the tags to FILE.");
723 puts ("-I, --ignore-indentation\n\
724 Don't rely on indentation quite as much as normal. Currently,\n\
725 this means not to assume that a closing brace in the first\n\
726 column is the final brace of a function or structure\n\
727 definition in C and C++.");
728
729 if (CTAGS)
730 {
731 puts ("-t, --typedefs\n\
732 Generate tag entries for C and Ada typedefs.");
733 puts ("-T, --typedefs-and-c++\n\
734 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
735 and C++ member functions.");
736 puts ("-u, --update\n\
737 Update the tag entries for the given files, leaving tag\n\
738 entries for other files in place. Currently, this is\n\
739 implemented by deleting the existing entries for the given\n\
740 files and then rewriting the new entries at the end of the\n\
741 tags file. It is often faster to simply rebuild the entire\n\
742 tag file than to use this.");
743 puts ("-v, --vgrind\n\
744 Generates an index of items intended for human consumption,\n\
745 similar to the output of vgrind. The index is sorted, and\n\
746 gives the page number of each item.");
747 puts ("-w, --no-warn\n\
748 Suppress warning messages about entries defined in multiple\n\
749 files.");
750 puts ("-x, --cxref\n\
751 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
752 The output uses line numbers instead of page numbers, but\n\
753 beyond that the differences are cosmetic; try both to see\n\
754 which you like.");
755 }
756
757 puts ("-V, --version\n\
758 Print the version of the program.\n\
759 -h, --help\n\
760 Print this help message.");
761
762 print_language_names ();
763
764 puts ("");
765 puts ("Report bugs to bug-gnu-emacs@gnu.org");
766
767 exit (GOOD);
768 }
769
770 \f
771 enum argument_type
772 {
773 at_language,
774 at_regexp,
775 at_filename,
776 at_icregexp
777 };
778
779 /* This structure helps us allow mixing of --lang and file names. */
780 typedef struct
781 {
782 enum argument_type arg_type;
783 char *what;
784 language *lang; /* language of the regexp */
785 } argument;
786
787 #ifdef VMS /* VMS specific functions */
788
789 #define EOS '\0'
790
791 /* This is a BUG! ANY arbitrary limit is a BUG!
792 Won't someone please fix this? */
793 #define MAX_FILE_SPEC_LEN 255
794 typedef struct {
795 short curlen;
796 char body[MAX_FILE_SPEC_LEN + 1];
797 } vspec;
798
799 /*
800 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
801 returning in each successive call the next file name matching the input
802 spec. The function expects that each in_spec passed
803 to it will be processed to completion; in particular, up to and
804 including the call following that in which the last matching name
805 is returned, the function ignores the value of in_spec, and will
806 only start processing a new spec with the following call.
807 If an error occurs, on return out_spec contains the value
808 of in_spec when the error occurred.
809
810 With each successive file name returned in out_spec, the
811 function's return value is one. When there are no more matching
812 names the function returns zero. If on the first call no file
813 matches in_spec, or there is any other error, -1 is returned.
814 */
815
816 #include <rmsdef.h>
817 #include <descrip.h>
818 #define OUTSIZE MAX_FILE_SPEC_LEN
819 static short
820 fn_exp (out, in)
821 vspec *out;
822 char *in;
823 {
824 static long context = 0;
825 static struct dsc$descriptor_s o;
826 static struct dsc$descriptor_s i;
827 static bool pass1 = TRUE;
828 long status;
829 short retval;
830
831 if (pass1)
832 {
833 pass1 = FALSE;
834 o.dsc$a_pointer = (char *) out;
835 o.dsc$w_length = (short)OUTSIZE;
836 i.dsc$a_pointer = in;
837 i.dsc$w_length = (short)strlen(in);
838 i.dsc$b_dtype = DSC$K_DTYPE_T;
839 i.dsc$b_class = DSC$K_CLASS_S;
840 o.dsc$b_dtype = DSC$K_DTYPE_VT;
841 o.dsc$b_class = DSC$K_CLASS_VS;
842 }
843 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
844 {
845 out->body[out->curlen] = EOS;
846 return 1;
847 }
848 else if (status == RMS$_NMF)
849 retval = 0;
850 else
851 {
852 strcpy(out->body, in);
853 retval = -1;
854 }
855 lib$find_file_end(&context);
856 pass1 = TRUE;
857 return retval;
858 }
859
860 /*
861 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
862 name of each file specified by the provided arg expanding wildcards.
863 */
864 static char *
865 gfnames (arg, p_error)
866 char *arg;
867 bool *p_error;
868 {
869 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
870
871 switch (fn_exp (&filename, arg))
872 {
873 case 1:
874 *p_error = FALSE;
875 return filename.body;
876 case 0:
877 *p_error = FALSE;
878 return NULL;
879 default:
880 *p_error = TRUE;
881 return filename.body;
882 }
883 }
884
885 #ifndef OLD /* Newer versions of VMS do provide `system'. */
886 system (cmd)
887 char *cmd;
888 {
889 error ("%s", "system() function not implemented under VMS");
890 }
891 #endif
892
893 #define VERSION_DELIM ';'
894 char *massage_name (s)
895 char *s;
896 {
897 char *start = s;
898
899 for ( ; *s; s++)
900 if (*s == VERSION_DELIM)
901 {
902 *s = EOS;
903 break;
904 }
905 else
906 *s = lowcase (*s);
907 return start;
908 }
909 #endif /* VMS */
910
911 \f
912 int
913 main (argc, argv)
914 int argc;
915 char *argv[];
916 {
917 int i;
918 unsigned int nincluded_files;
919 char **included_files;
920 char *this_file;
921 argument *argbuffer;
922 int current_arg, file_count;
923 linebuffer filename_lb;
924 #ifdef VMS
925 bool got_err;
926 #endif
927
928 #ifdef DOS_NT
929 _fmode = O_BINARY; /* all of files are treated as binary files */
930 #endif /* DOS_NT */
931
932 progname = argv[0];
933 nincluded_files = 0;
934 included_files = xnew (argc, char *);
935 current_arg = 0;
936 file_count = 0;
937
938 /* Allocate enough no matter what happens. Overkill, but each one
939 is small. */
940 argbuffer = xnew (argc, argument);
941
942 #ifdef ETAGS_REGEXPS
943 /* Set syntax for regular expression routines. */
944 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
945 /* Translation table for case-insensitive search. */
946 for (i = 0; i < CHAR_SET_SIZE; i++)
947 lc_trans[i] = lowcase (i);
948 #endif /* ETAGS_REGEXPS */
949
950 /*
951 * If etags, always find typedefs and structure tags. Why not?
952 * Also default to find macro constants, enum constants and
953 * global variables.
954 */
955 if (!CTAGS)
956 {
957 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
958 globals = TRUE;
959 declarations = FALSE;
960 members = FALSE;
961 }
962
963 while (1)
964 {
965 int opt;
966 char *optstring;
967
968 #ifdef ETAGS_REGEXPS
969 optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
970 #else
971 optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
972 #endif /* ETAGS_REGEXPS */
973
974 #ifndef LONG_OPTIONS
975 optstring = optstring + 1;
976 #endif /* LONG_OPTIONS */
977
978 opt = getopt_long (argc, argv, optstring, longopts, 0);
979 if (opt == EOF)
980 break;
981
982 switch (opt)
983 {
984 case 0:
985 /* If getopt returns 0, then it has already processed a
986 long-named option. We should do nothing. */
987 break;
988
989 case 1:
990 /* This means that a file name has been seen. Record it. */
991 argbuffer[current_arg].arg_type = at_filename;
992 argbuffer[current_arg].what = optarg;
993 ++current_arg;
994 ++file_count;
995 break;
996
997 /* Common options. */
998 case 'a': append_to_tagfile = TRUE; break;
999 case 'C': cplusplus = TRUE; break;
1000 case 'd': constantypedefs = TRUE; break;
1001 case 'D': constantypedefs = FALSE; break;
1002 case 'f': /* for compatibility with old makefiles */
1003 case 'o':
1004 if (tagfile)
1005 {
1006 error ("-o option may only be given once.", (char *)NULL);
1007 suggest_asking_for_help ();
1008 }
1009 tagfile = optarg;
1010 break;
1011 case 'I':
1012 case 'S': /* for backward compatibility */
1013 noindentypedefs = TRUE;
1014 break;
1015 case 'l':
1016 {
1017 language *lang = get_language_from_langname (optarg);
1018 if (lang != NULL)
1019 {
1020 argbuffer[current_arg].lang = lang;
1021 argbuffer[current_arg].arg_type = at_language;
1022 ++current_arg;
1023 }
1024 }
1025 break;
1026 #ifdef ETAGS_REGEXPS
1027 case 'r':
1028 argbuffer[current_arg].arg_type = at_regexp;
1029 argbuffer[current_arg].what = optarg;
1030 ++current_arg;
1031 break;
1032 case 'R':
1033 argbuffer[current_arg].arg_type = at_regexp;
1034 argbuffer[current_arg].what = NULL;
1035 ++current_arg;
1036 break;
1037 case 'c':
1038 argbuffer[current_arg].arg_type = at_icregexp;
1039 argbuffer[current_arg].what = optarg;
1040 ++current_arg;
1041 break;
1042 #endif /* ETAGS_REGEXPS */
1043 case 'V':
1044 print_version ();
1045 break;
1046 case 'h':
1047 case 'H':
1048 print_help ();
1049 break;
1050 case 't':
1051 typedefs = TRUE;
1052 break;
1053 case 'T':
1054 typedefs = typedefs_or_cplusplus = TRUE;
1055 break;
1056 #if (!CTAGS)
1057 /* Etags options */
1058 case 'i':
1059 included_files[nincluded_files++] = optarg;
1060 break;
1061 #else /* CTAGS */
1062 /* Ctags options. */
1063 case 'B': searchar = '?'; break;
1064 case 'u': update = TRUE; break;
1065 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1066 case 'x': cxref_style = TRUE; break;
1067 case 'w': no_warnings = TRUE; break;
1068 #endif /* CTAGS */
1069 default:
1070 suggest_asking_for_help ();
1071 }
1072 }
1073
1074 for (; optind < argc; ++optind)
1075 {
1076 argbuffer[current_arg].arg_type = at_filename;
1077 argbuffer[current_arg].what = argv[optind];
1078 ++current_arg;
1079 ++file_count;
1080 }
1081
1082 if (nincluded_files == 0 && file_count == 0)
1083 {
1084 error ("no input files specified.", (char *)NULL);
1085 suggest_asking_for_help ();
1086 }
1087
1088 if (tagfile == NULL)
1089 tagfile = CTAGS ? "tags" : "TAGS";
1090 cwd = etags_getcwd (); /* the current working directory */
1091 if (cwd[strlen (cwd) - 1] != '/')
1092 {
1093 char *oldcwd = cwd;
1094 cwd = concat (oldcwd, "/", "");
1095 free (oldcwd);
1096 }
1097 if (streq (tagfile, "-"))
1098 tagfiledir = cwd;
1099 else
1100 tagfiledir = absolute_dirname (tagfile, cwd);
1101
1102 init (); /* set up boolean "functions" */
1103
1104 initbuffer (&lb);
1105 initbuffer (&filename_lb);
1106
1107 if (!CTAGS)
1108 {
1109 if (streq (tagfile, "-"))
1110 {
1111 tagf = stdout;
1112 #ifdef DOS_NT
1113 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1114 doesn't take effect until after `stdout' is already open). */
1115 if (!isatty (fileno (stdout)))
1116 setmode (fileno (stdout), O_BINARY);
1117 #endif /* DOS_NT */
1118 }
1119 else
1120 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1121 if (tagf == NULL)
1122 pfatal (tagfile);
1123 }
1124
1125 /*
1126 * Loop through files finding functions.
1127 */
1128 for (i = 0; i < current_arg; ++i)
1129 {
1130 switch (argbuffer[i].arg_type)
1131 {
1132 case at_language:
1133 forced_lang = argbuffer[i].lang;
1134 break;
1135 #ifdef ETAGS_REGEXPS
1136 case at_regexp:
1137 analyse_regex (argbuffer[i].what, FALSE);
1138 break;
1139 case at_icregexp:
1140 analyse_regex (argbuffer[i].what, TRUE);
1141 break;
1142 #endif
1143 case at_filename:
1144 #ifdef VMS
1145 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1146 {
1147 if (got_err)
1148 {
1149 error ("can't find file %s\n", this_file);
1150 argc--, argv++;
1151 }
1152 else
1153 {
1154 this_file = massage_name (this_file);
1155 }
1156 #else
1157 this_file = argbuffer[i].what;
1158 #endif
1159 /* Input file named "-" means read file names from stdin
1160 (one per line) and use them. */
1161 if (streq (this_file, "-"))
1162 while (readline_internal (&filename_lb, stdin) > 0)
1163 process_file (filename_lb.buffer);
1164 else
1165 process_file (this_file);
1166 #ifdef VMS
1167 }
1168 #endif
1169 break;
1170 }
1171 }
1172
1173 #ifdef ETAGS_REGEXPS
1174 free_patterns ();
1175 #endif /* ETAGS_REGEXPS */
1176
1177 if (!CTAGS)
1178 {
1179 while (nincluded_files-- > 0)
1180 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1181
1182 fclose (tagf);
1183 exit (GOOD);
1184 }
1185
1186 /* If CTAGS, we are here. process_file did not write the tags yet,
1187 because we want them ordered. Let's do it now. */
1188 if (cxref_style)
1189 {
1190 put_entries (head);
1191 free_tree (head);
1192 head = NULL;
1193 exit (GOOD);
1194 }
1195
1196 if (update)
1197 {
1198 char cmd[BUFSIZ];
1199 for (i = 0; i < current_arg; ++i)
1200 {
1201 if (argbuffer[i].arg_type != at_filename)
1202 continue;
1203 sprintf (cmd,
1204 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1205 tagfile, argbuffer[i].what, tagfile);
1206 if (system (cmd) != GOOD)
1207 fatal ("failed to execute shell command", (char *)NULL);
1208 }
1209 append_to_tagfile = TRUE;
1210 }
1211
1212 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1213 if (tagf == NULL)
1214 pfatal (tagfile);
1215 put_entries (head);
1216 free_tree (head);
1217 head = NULL;
1218 fclose (tagf);
1219
1220 if (update)
1221 {
1222 char cmd[BUFSIZ];
1223 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1224 exit (system (cmd));
1225 }
1226 return GOOD;
1227 }
1228
1229
1230
1231 /*
1232 * Return a compressor given the file name. If EXTPTR is non-zero,
1233 * return a pointer into FILE where the compressor-specific
1234 * extension begins. If no compressor is found, NULL is returned
1235 * and EXTPTR is not significant.
1236 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1237 */
1238 static compressor *
1239 get_compressor_from_suffix (file, extptr)
1240 char *file;
1241 char **extptr;
1242 {
1243 compressor *compr;
1244 char *slash, *suffix;
1245
1246 /* This relies on FN to be after canonicalize_filename,
1247 so we don't need to consider backslashes on DOS_NT. */
1248 slash = etags_strrchr (file, '/');
1249 suffix = etags_strrchr (file, '.');
1250 if (suffix == NULL || suffix < slash)
1251 return NULL;
1252 if (extptr != NULL)
1253 *extptr = suffix;
1254 suffix += 1;
1255 /* Let those poor souls who live with DOS 8+3 file name limits get
1256 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1257 Only the first do loop is run if not MSDOS */
1258 do
1259 {
1260 for (compr = compressors; compr->suffix != NULL; compr++)
1261 if (streq (compr->suffix, suffix))
1262 return compr;
1263 if (!MSDOS)
1264 break; /* do it only once: not really a loop */
1265 if (extptr != NULL)
1266 *extptr = ++suffix;
1267 } while (*suffix != '\0');
1268 return NULL;
1269 }
1270
1271
1272
1273 /*
1274 * Return a language given the name.
1275 */
1276 static language *
1277 get_language_from_langname (name)
1278 char *name;
1279 {
1280 language *lang;
1281
1282 if (name == NULL)
1283 error ("empty language name", (char *)NULL);
1284 else
1285 {
1286 for (lang = lang_names; lang->name != NULL; lang++)
1287 if (streq (name, lang->name))
1288 return lang;
1289 error ("unknown language \"%s\"", name);
1290 }
1291
1292 return NULL;
1293 }
1294
1295
1296 /*
1297 * Return a language given the interpreter name.
1298 */
1299 static language *
1300 get_language_from_interpreter (interpreter)
1301 char *interpreter;
1302 {
1303 language *lang;
1304 char **iname;
1305
1306 if (interpreter == NULL)
1307 return NULL;
1308 for (lang = lang_names; lang->name != NULL; lang++)
1309 if (lang->interpreters != NULL)
1310 for (iname = lang->interpreters; *iname != NULL; iname++)
1311 if (streq (*iname, interpreter))
1312 return lang;
1313
1314 return NULL;
1315 }
1316
1317
1318
1319 /*
1320 * Return a language given the file name.
1321 */
1322 static language *
1323 get_language_from_filename (file)
1324 char *file;
1325 {
1326 language *lang;
1327 char **name, **ext, *suffix;
1328
1329 /* Try whole file name first. */
1330 for (lang = lang_names; lang->name != NULL; lang++)
1331 if (lang->filenames != NULL)
1332 for (name = lang->filenames; *name != NULL; name++)
1333 if (streq (*name, file))
1334 return lang;
1335
1336 /* If not found, try suffix after last dot. */
1337 suffix = etags_strrchr (file, '.');
1338 if (suffix == NULL)
1339 return NULL;
1340 suffix += 1;
1341 for (lang = lang_names; lang->name != NULL; lang++)
1342 if (lang->suffixes != NULL)
1343 for (ext = lang->suffixes; *ext != NULL; ext++)
1344 if (streq (*ext, suffix))
1345 return lang;
1346 return NULL;
1347 }
1348
1349
1350
1351 /*
1352 * This routine is called on each file argument.
1353 */
1354 static void
1355 process_file (file)
1356 char *file;
1357 {
1358 struct stat stat_buf;
1359 FILE *inf;
1360 compressor *compr;
1361 char *compressed_name, *uncompressed_name;
1362 char *ext, *real_name;
1363
1364
1365 canonicalize_filename (file);
1366 if (streq (file, tagfile) && !streq (tagfile, "-"))
1367 {
1368 error ("skipping inclusion of %s in self.", file);
1369 return;
1370 }
1371 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1372 {
1373 compressed_name = NULL;
1374 real_name = uncompressed_name = savestr (file);
1375 }
1376 else
1377 {
1378 real_name = compressed_name = savestr (file);
1379 uncompressed_name = savenstr (file, ext - file);
1380 }
1381
1382 /* If the canonicalised uncompressed name has already be dealt with,
1383 skip it silently, else add it to the list. */
1384 {
1385 typedef struct processed_file
1386 {
1387 char *filename;
1388 struct processed_file *next;
1389 } processed_file;
1390 static processed_file *pf_head = NULL;
1391 register processed_file *fnp;
1392
1393 for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1394 if (streq (uncompressed_name, fnp->filename))
1395 goto exit;
1396 fnp = pf_head;
1397 pf_head = xnew (1, struct processed_file);
1398 pf_head->filename = savestr (uncompressed_name);
1399 pf_head->next = fnp;
1400 }
1401
1402 if (stat (real_name, &stat_buf) != 0)
1403 {
1404 /* Reset real_name and try with a different name. */
1405 real_name = NULL;
1406 if (compressed_name != NULL) /* try with the given suffix */
1407 {
1408 if (stat (uncompressed_name, &stat_buf) == 0)
1409 real_name = uncompressed_name;
1410 }
1411 else /* try all possible suffixes */
1412 {
1413 for (compr = compressors; compr->suffix != NULL; compr++)
1414 {
1415 compressed_name = concat (file, ".", compr->suffix);
1416 if (stat (compressed_name, &stat_buf) != 0)
1417 {
1418 if (MSDOS)
1419 {
1420 char *suf = compressed_name + strlen (file);
1421 size_t suflen = strlen (compr->suffix) + 1;
1422 for ( ; suf[1]; suf++, suflen--)
1423 {
1424 memmove (suf, suf + 1, suflen);
1425 if (stat (compressed_name, &stat_buf) == 0)
1426 {
1427 real_name = compressed_name;
1428 break;
1429 }
1430 }
1431 if (real_name != NULL)
1432 break;
1433 } /* MSDOS */
1434 free (compressed_name);
1435 compressed_name = NULL;
1436 }
1437 else
1438 {
1439 real_name = compressed_name;
1440 break;
1441 }
1442 }
1443 }
1444 if (real_name == NULL)
1445 {
1446 perror (file);
1447 goto exit;
1448 }
1449 } /* try with a different name */
1450
1451 if (!S_ISREG (stat_buf.st_mode))
1452 {
1453 error ("skipping %s: it is not a regular file.", real_name);
1454 goto exit;
1455 }
1456 if (real_name == compressed_name)
1457 {
1458 char *cmd = concat (compr->command, " ", real_name);
1459 inf = (FILE *) popen (cmd, "r");
1460 free (cmd);
1461 }
1462 else
1463 inf = fopen (real_name, "r");
1464 if (inf == NULL)
1465 {
1466 perror (real_name);
1467 goto exit;
1468 }
1469
1470 find_entries (uncompressed_name, inf);
1471
1472 if (real_name == compressed_name)
1473 pclose (inf);
1474 else
1475 fclose (inf);
1476
1477 if (!CTAGS)
1478 {
1479 char *filename;
1480
1481 if (filename_is_absolute (uncompressed_name))
1482 {
1483 /* file is an absolute file name. Canonicalise it. */
1484 filename = absolute_filename (uncompressed_name, cwd);
1485 }
1486 else
1487 {
1488 /* file is a file name relative to cwd. Make it relative
1489 to the directory of the tags file. */
1490 filename = relative_filename (uncompressed_name, tagfiledir);
1491 }
1492 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1493 free (filename);
1494 put_entries (head);
1495 free_tree (head);
1496 head = NULL;
1497 }
1498
1499 exit:
1500 if (compressed_name) free(compressed_name);
1501 if (uncompressed_name) free(uncompressed_name);
1502 return;
1503 }
1504
1505 /*
1506 * This routine sets up the boolean pseudo-functions which work
1507 * by setting boolean flags dependent upon the corresponding character.
1508 * Every char which is NOT in that string is not a white char. Therefore,
1509 * all of the array "_wht" is set to FALSE, and then the elements
1510 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1511 * of a char is TRUE if it is the string "white", else FALSE.
1512 */
1513 static void
1514 init ()
1515 {
1516 register char *sp;
1517 register int i;
1518
1519 for (i = 0; i < CHARS; i++)
1520 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1521 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1522 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1523 notinname('\0') = notinname('\n');
1524 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1525 begtoken('\0') = begtoken('\n');
1526 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1527 intoken('\0') = intoken('\n');
1528 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1529 endtoken('\0') = endtoken('\n');
1530 }
1531
1532 /*
1533 * This routine opens the specified file and calls the function
1534 * which finds the function and type definitions.
1535 */
1536 node *last_node = NULL;
1537
1538 static void
1539 find_entries (file, inf)
1540 char *file;
1541 FILE *inf;
1542 {
1543 char *cp;
1544 language *lang;
1545 node *old_last_node;
1546
1547 /* Memory leakage here: the string pointed by curfile is
1548 never released, because curfile is copied into np->file
1549 for each node, to be used in CTAGS mode. The amount of
1550 memory leaked here is the sum of the lengths of the
1551 file names. */
1552 curfile = savestr (file);
1553
1554 /* If user specified a language, use it. */
1555 lang = forced_lang;
1556 if (lang != NULL && lang->function != NULL)
1557 {
1558 curlang = lang;
1559 lang->function (inf);
1560 return;
1561 }
1562
1563 /* Try to guess the language given the file name. */
1564 lang = get_language_from_filename (file);
1565 if (lang != NULL && lang->function != NULL)
1566 {
1567 curlang = lang;
1568 lang->function (inf);
1569 return;
1570 }
1571
1572 /* Look for sharp-bang as the first two characters. */
1573 if (readline_internal (&lb, inf) > 0
1574 && lb.len >= 2
1575 && lb.buffer[0] == '#'
1576 && lb.buffer[1] == '!')
1577 {
1578 char *lp;
1579
1580 /* Set lp to point at the first char after the last slash in the
1581 line or, if no slashes, at the first nonblank. Then set cp to
1582 the first successive blank and terminate the string. */
1583 lp = etags_strrchr (lb.buffer+2, '/');
1584 if (lp != NULL)
1585 lp += 1;
1586 else
1587 lp = skip_spaces (lb.buffer + 2);
1588 cp = skip_non_spaces (lp);
1589 *cp = '\0';
1590
1591 if (strlen (lp) > 0)
1592 {
1593 lang = get_language_from_interpreter (lp);
1594 if (lang != NULL && lang->function != NULL)
1595 {
1596 curlang = lang;
1597 lang->function (inf);
1598 return;
1599 }
1600 }
1601 }
1602 /* We rewind here, even if inf may be a pipe. We fail if the
1603 length of the first line is longer than the pipe block size,
1604 which is unlikely. */
1605 rewind (inf);
1606
1607 /* Try Fortran. */
1608 old_last_node = last_node;
1609 curlang = get_language_from_langname ("fortran");
1610 Fortran_functions (inf);
1611
1612 /* No Fortran entries found. Try C. */
1613 if (old_last_node == last_node)
1614 {
1615 /* We do not tag if rewind fails.
1616 Only the file name will be recorded in the tags file. */
1617 rewind (inf);
1618 curlang = get_language_from_langname (cplusplus ? "c++" : "c");
1619 default_C_entries (inf);
1620 }
1621 return;
1622 }
1623
1624 \f
1625 /* Record a tag. */
1626 static void
1627 pfnote (name, is_func, linestart, linelen, lno, cno)
1628 char *name; /* tag name, or NULL if unnamed */
1629 bool is_func; /* tag is a function */
1630 char *linestart; /* start of the line where tag is */
1631 int linelen; /* length of the line where tag is */
1632 int lno; /* line number */
1633 long cno; /* character number */
1634 {
1635 register node *np;
1636
1637 if (CTAGS && name == NULL)
1638 return;
1639
1640 np = xnew (1, node);
1641
1642 /* If ctags mode, change name "main" to M<thisfilename>. */
1643 if (CTAGS && !cxref_style && streq (name, "main"))
1644 {
1645 register char *fp = etags_strrchr (curfile, '/');
1646 np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1647 fp = etags_strrchr (np->name, '.');
1648 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1649 fp[0] = '\0';
1650 }
1651 else
1652 np->name = name;
1653 np->been_warned = FALSE;
1654 np->file = curfile;
1655 np->is_func = is_func;
1656 np->lno = lno;
1657 /* Our char numbers are 0-base, because of C language tradition?
1658 ctags compatibility? old versions compatibility? I don't know.
1659 Anyway, since emacs's are 1-base we expect etags.el to take care
1660 of the difference. If we wanted to have 1-based numbers, we would
1661 uncomment the +1 below. */
1662 np->cno = cno /* + 1 */ ;
1663 np->left = np->right = NULL;
1664 if (CTAGS && !cxref_style)
1665 {
1666 if (strlen (linestart) < 50)
1667 np->pat = concat (linestart, "$", "");
1668 else
1669 np->pat = savenstr (linestart, 50);
1670 }
1671 else
1672 np->pat = savenstr (linestart, linelen);
1673
1674 add_node (np, &head);
1675 }
1676
1677 /*
1678 * TAGS format specification
1679 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1680 *
1681 * pfnote should emit the optimized form [unnamed tag] only if:
1682 * 1. name does not contain any of the characters " \t\r\n(),;";
1683 * 2. linestart contains name as either a rightmost, or rightmost but
1684 * one character, substring;
1685 * 3. the character, if any, immediately before name in linestart must
1686 * be one of the characters " \t(),;";
1687 * 4. the character, if any, immediately after name in linestart must
1688 * also be one of the characters " \t(),;".
1689 *
1690 * The real implementation uses the notinname() macro, which recognises
1691 * characters slightly different form " \t\r\n(),;". See the variable
1692 * `nonam'.
1693 */
1694 #define traditional_tag_style TRUE
1695 static void
1696 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1697 char *name; /* tag name, or NULL if unnamed */
1698 int namelen; /* tag length */
1699 bool is_func; /* tag is a function */
1700 char *linestart; /* start of the line where tag is */
1701 int linelen; /* length of the line where tag is */
1702 int lno; /* line number */
1703 long cno; /* character number */
1704 {
1705 register char *cp;
1706 bool named;
1707
1708 named = TRUE;
1709 if (!CTAGS)
1710 {
1711 for (cp = name; !notinname (*cp); cp++)
1712 continue;
1713 if (*cp == '\0') /* rule #1 */
1714 {
1715 cp = linestart + linelen - namelen;
1716 if (notinname (linestart[linelen-1]))
1717 cp -= 1; /* rule #4 */
1718 if (cp >= linestart /* rule #2 */
1719 && (cp == linestart
1720 || notinname (cp[-1])) /* rule #3 */
1721 && strneq (name, cp, namelen)) /* rule #2 */
1722 named = FALSE; /* use unnamed tag */
1723 }
1724 }
1725
1726 if (named)
1727 name = savenstr (name, namelen);
1728 else
1729 name = NULL;
1730 pfnote (name, is_func, linestart, linelen, lno, cno);
1731 }
1732
1733 /*
1734 * free_tree ()
1735 * recurse on left children, iterate on right children.
1736 */
1737 static void
1738 free_tree (np)
1739 register node *np;
1740 {
1741 while (np)
1742 {
1743 register node *node_right = np->right;
1744 free_tree (np->left);
1745 if (np->name != NULL)
1746 free (np->name);
1747 free (np->pat);
1748 free (np);
1749 np = node_right;
1750 }
1751 }
1752
1753 /*
1754 * add_node ()
1755 * Adds a node to the tree of nodes. In etags mode, we don't keep
1756 * it sorted; we just keep a linear list. In ctags mode, maintain
1757 * an ordered tree, with no attempt at balancing.
1758 *
1759 * add_node is the only function allowed to add nodes, so it can
1760 * maintain state.
1761 */
1762 static void
1763 add_node (np, cur_node_p)
1764 node *np, **cur_node_p;
1765 {
1766 register int dif;
1767 register node *cur_node = *cur_node_p;
1768
1769 if (cur_node == NULL)
1770 {
1771 *cur_node_p = np;
1772 last_node = np;
1773 return;
1774 }
1775
1776 if (!CTAGS)
1777 {
1778 /* Etags Mode */
1779 if (last_node == NULL)
1780 fatal ("internal error in add_node", (char *)NULL);
1781 last_node->right = np;
1782 last_node = np;
1783 }
1784 else
1785 {
1786 /* Ctags Mode */
1787 dif = strcmp (np->name, cur_node->name);
1788
1789 /*
1790 * If this tag name matches an existing one, then
1791 * do not add the node, but maybe print a warning.
1792 */
1793 if (!dif)
1794 {
1795 if (streq (np->file, cur_node->file))
1796 {
1797 if (!no_warnings)
1798 {
1799 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1800 np->file, lineno, np->name);
1801 fprintf (stderr, "Second entry ignored\n");
1802 }
1803 }
1804 else if (!cur_node->been_warned && !no_warnings)
1805 {
1806 fprintf
1807 (stderr,
1808 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1809 np->file, cur_node->file, np->name);
1810 cur_node->been_warned = TRUE;
1811 }
1812 return;
1813 }
1814
1815 /* Actually add the node */
1816 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1817 }
1818 }
1819
1820 \f
1821 static void
1822 put_entries (np)
1823 register node *np;
1824 {
1825 register char *sp;
1826
1827 if (np == NULL)
1828 return;
1829
1830 /* Output subentries that precede this one */
1831 put_entries (np->left);
1832
1833 /* Output this entry */
1834
1835 if (!CTAGS)
1836 {
1837 if (np->name != NULL)
1838 fprintf (tagf, "%s\177%s\001%d,%ld\n",
1839 np->pat, np->name, np->lno, np->cno);
1840 else
1841 fprintf (tagf, "%s\177%d,%ld\n",
1842 np->pat, np->lno, np->cno);
1843 }
1844 else
1845 {
1846 if (np->name == NULL)
1847 error ("internal error: NULL name in ctags mode.", (char *)NULL);
1848
1849 if (cxref_style)
1850 {
1851 if (vgrind_style)
1852 fprintf (stdout, "%s %s %d\n",
1853 np->name, np->file, (np->lno + 63) / 64);
1854 else
1855 fprintf (stdout, "%-16s %3d %-16s %s\n",
1856 np->name, np->lno, np->file, np->pat);
1857 }
1858 else
1859 {
1860 fprintf (tagf, "%s\t%s\t", np->name, np->file);
1861
1862 if (np->is_func)
1863 { /* a function */
1864 putc (searchar, tagf);
1865 putc ('^', tagf);
1866
1867 for (sp = np->pat; *sp; sp++)
1868 {
1869 if (*sp == '\\' || *sp == searchar)
1870 putc ('\\', tagf);
1871 putc (*sp, tagf);
1872 }
1873 putc (searchar, tagf);
1874 }
1875 else
1876 { /* a typedef; text pattern inadequate */
1877 fprintf (tagf, "%d", np->lno);
1878 }
1879 putc ('\n', tagf);
1880 }
1881 }
1882
1883 /* Output subentries that follow this one */
1884 put_entries (np->right);
1885 }
1886
1887 /* Length of a number's decimal representation. */
1888 static int
1889 number_len (num)
1890 long num;
1891 {
1892 int len = 1;
1893 while ((num /= 10) > 0)
1894 len += 1;
1895 return len;
1896 }
1897
1898 /*
1899 * Return total number of characters that put_entries will output for
1900 * the nodes in the subtree of the specified node. Works only if
1901 * we are not ctags, but called only in that case. This count
1902 * is irrelevant with the new tags.el, but is still supplied for
1903 * backward compatibility.
1904 */
1905 static int
1906 total_size_of_entries (np)
1907 register node *np;
1908 {
1909 register int total;
1910
1911 if (np == NULL)
1912 return 0;
1913
1914 for (total = 0; np != NULL; np = np->right)
1915 {
1916 /* Count left subentries. */
1917 total += total_size_of_entries (np->left);
1918
1919 /* Count this entry */
1920 total += strlen (np->pat) + 1;
1921 total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1922 if (np->name != NULL)
1923 total += 1 + strlen (np->name); /* \001name */
1924 }
1925
1926 return total;
1927 }
1928
1929 \f
1930 /* C extensions. */
1931 #define C_EXT 0x00fff /* C extensions */
1932 #define C_PLAIN 0x00000 /* C */
1933 #define C_PLPL 0x00001 /* C++ */
1934 #define C_STAR 0x00003 /* C* */
1935 #define C_JAVA 0x00005 /* JAVA */
1936 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
1937 #define YACC 0x10000 /* yacc file */
1938
1939 /*
1940 * The C symbol tables.
1941 */
1942 enum sym_type
1943 {
1944 st_none,
1945 st_C_objprot, st_C_objimpl, st_C_objend,
1946 st_C_gnumacro,
1947 st_C_ignore,
1948 st_C_javastruct,
1949 st_C_operator,
1950 st_C_class, st_C_template,
1951 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1952 };
1953
1954 static unsigned int hash P_((const char *, unsigned int));
1955 static struct C_stab_entry * in_word_set P_((const char *, unsigned int));
1956 static enum sym_type C_symtype P_((char *, int, int));
1957
1958 /* Feed stuff between (but not including) %[ and %] lines to:
1959 gperf -c -k 1,3 -o -p -r -t
1960 %[
1961 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1962 %%
1963 if, 0, st_C_ignore
1964 for, 0, st_C_ignore
1965 while, 0, st_C_ignore
1966 switch, 0, st_C_ignore
1967 return, 0, st_C_ignore
1968 @interface, 0, st_C_objprot
1969 @protocol, 0, st_C_objprot
1970 @implementation,0, st_C_objimpl
1971 @end, 0, st_C_objend
1972 import, C_JAVA, st_C_ignore
1973 package, C_JAVA, st_C_ignore
1974 friend, C_PLPL, st_C_ignore
1975 extends, C_JAVA, st_C_javastruct
1976 implements, C_JAVA, st_C_javastruct
1977 interface, C_JAVA, st_C_struct
1978 class, 0, st_C_class
1979 namespace, C_PLPL, st_C_struct
1980 domain, C_STAR, st_C_struct
1981 union, 0, st_C_struct
1982 struct, 0, st_C_struct
1983 extern, 0, st_C_extern
1984 enum, 0, st_C_enum
1985 typedef, 0, st_C_typedef
1986 define, 0, st_C_define
1987 operator, C_PLPL, st_C_operator
1988 template, 0, st_C_template
1989 bool, C_PLPL, st_C_typespec
1990 long, 0, st_C_typespec
1991 short, 0, st_C_typespec
1992 int, 0, st_C_typespec
1993 char, 0, st_C_typespec
1994 float, 0, st_C_typespec
1995 double, 0, st_C_typespec
1996 signed, 0, st_C_typespec
1997 unsigned, 0, st_C_typespec
1998 auto, 0, st_C_typespec
1999 void, 0, st_C_typespec
2000 static, 0, st_C_typespec
2001 const, 0, st_C_typespec
2002 volatile, 0, st_C_typespec
2003 explicit, C_PLPL, st_C_typespec
2004 mutable, C_PLPL, st_C_typespec
2005 typename, C_PLPL, st_C_typespec
2006 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2007 DEFUN, 0, st_C_gnumacro
2008 SYSCALL, 0, st_C_gnumacro
2009 ENTRY, 0, st_C_gnumacro
2010 PSEUDO, 0, st_C_gnumacro
2011 # These are defined inside C functions, so currently they are not met.
2012 # EXFUN used in glibc, DEFVAR_* in emacs.
2013 #EXFUN, 0, st_C_gnumacro
2014 #DEFVAR_, 0, st_C_gnumacro
2015 %]
2016 and replace lines between %< and %> with its output,
2017 then make in_word_set static. */
2018 /*%<*/
2019 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2020 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2021 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2022
2023 #define TOTAL_KEYWORDS 47
2024 #define MIN_WORD_LENGTH 2
2025 #define MAX_WORD_LENGTH 15
2026 #define MIN_HASH_VALUE 18
2027 #define MAX_HASH_VALUE 138
2028 /* maximum key range = 121, duplicates = 0 */
2029
2030 #ifdef __GNUC__
2031 __inline
2032 #endif
2033 static unsigned int
2034 hash (str, len)
2035 register const char *str;
2036 register unsigned int len;
2037 {
2038 static unsigned char asso_values[] =
2039 {
2040 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2041 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2042 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2043 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2044 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2045 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2046 139, 139, 139, 139, 63, 139, 139, 139, 33, 44,
2047 62, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2048 42, 139, 139, 12, 32, 139, 139, 139, 139, 139,
2049 139, 139, 139, 139, 139, 139, 139, 34, 59, 37,
2050 24, 58, 33, 3, 139, 16, 139, 139, 42, 60,
2051 18, 11, 39, 139, 23, 57, 4, 63, 6, 20,
2052 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2053 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2054 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2055 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2056 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2057 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2058 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2059 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2060 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2061 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2062 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2063 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2064 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2065 139, 139, 139, 139, 139, 139
2066 };
2067 register int hval = len;
2068
2069 switch (hval)
2070 {
2071 default:
2072 case 3:
2073 hval += asso_values[(unsigned char)str[2]];
2074 case 2:
2075 case 1:
2076 hval += asso_values[(unsigned char)str[0]];
2077 break;
2078 }
2079 return hval;
2080 }
2081
2082 #ifdef __GNUC__
2083 __inline
2084 #endif
2085 static struct C_stab_entry *
2086 in_word_set (str, len)
2087 register const char *str;
2088 register unsigned int len;
2089 {
2090 static struct C_stab_entry wordlist[] =
2091 {
2092 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2093 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2094 {"if", 0, st_C_ignore},
2095 {""}, {""}, {""}, {""},
2096 {"int", 0, st_C_typespec},
2097 {""}, {""},
2098 {"void", 0, st_C_typespec},
2099 {""}, {""},
2100 {"interface", C_JAVA, st_C_struct},
2101 {""},
2102 {"SYSCALL", 0, st_C_gnumacro},
2103 {""},
2104 {"return", 0, st_C_ignore},
2105 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2106 {"while", 0, st_C_ignore},
2107 {"auto", 0, st_C_typespec},
2108 {""}, {""}, {""}, {""}, {""}, {""},
2109 {"float", 0, st_C_typespec},
2110 {"typedef", 0, st_C_typedef},
2111 {"typename", C_PLPL, st_C_typespec},
2112 {""}, {""}, {""},
2113 {"friend", C_PLPL, st_C_ignore},
2114 {"volatile", 0, st_C_typespec},
2115 {""}, {""},
2116 {"for", 0, st_C_ignore},
2117 {"const", 0, st_C_typespec},
2118 {"import", C_JAVA, st_C_ignore},
2119 {""},
2120 {"define", 0, st_C_define},
2121 {"long", 0, st_C_typespec},
2122 {"implements", C_JAVA, st_C_javastruct},
2123 {"signed", 0, st_C_typespec},
2124 {""},
2125 {"extern", 0, st_C_extern},
2126 {"extends", C_JAVA, st_C_javastruct},
2127 {""},
2128 {"mutable", C_PLPL, st_C_typespec},
2129 {"template", 0, st_C_template},
2130 {"short", 0, st_C_typespec},
2131 {"bool", C_PLPL, st_C_typespec},
2132 {"char", 0, st_C_typespec},
2133 {"class", 0, st_C_class},
2134 {"operator", C_PLPL, st_C_operator},
2135 {""},
2136 {"switch", 0, st_C_ignore},
2137 {""},
2138 {"ENTRY", 0, st_C_gnumacro},
2139 {""},
2140 {"package", C_JAVA, st_C_ignore},
2141 {"union", 0, st_C_struct},
2142 {"@end", 0, st_C_objend},
2143 {"struct", 0, st_C_struct},
2144 {"namespace", C_PLPL, st_C_struct},
2145 {""}, {""},
2146 {"domain", C_STAR, st_C_struct},
2147 {"@interface", 0, st_C_objprot},
2148 {"PSEUDO", 0, st_C_gnumacro},
2149 {"double", 0, st_C_typespec},
2150 {""},
2151 {"@protocol", 0, st_C_objprot},
2152 {""},
2153 {"static", 0, st_C_typespec},
2154 {""}, {""},
2155 {"DEFUN", 0, st_C_gnumacro},
2156 {""}, {""}, {""}, {""},
2157 {"explicit", C_PLPL, st_C_typespec},
2158 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2159 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2160 {""},
2161 {"enum", 0, st_C_enum},
2162 {""}, {""},
2163 {"unsigned", 0, st_C_typespec},
2164 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2165 {"@implementation",0, st_C_objimpl}
2166 };
2167
2168 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2169 {
2170 register int key = hash (str, len);
2171
2172 if (key <= MAX_HASH_VALUE && key >= 0)
2173 {
2174 register const char *s = wordlist[key].name;
2175
2176 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2177 return &wordlist[key];
2178 }
2179 }
2180 return 0;
2181 }
2182 /*%>*/
2183
2184 static enum sym_type
2185 C_symtype (str, len, c_ext)
2186 char *str;
2187 int len;
2188 int c_ext;
2189 {
2190 register struct C_stab_entry *se = in_word_set (str, len);
2191
2192 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2193 return st_none;
2194 return se->type;
2195 }
2196
2197 \f
2198 /*
2199 * C functions and variables are recognized using a simple
2200 * finite automaton. fvdef is its state variable.
2201 */
2202 enum
2203 {
2204 fvnone, /* nothing seen */
2205 fdefunkey, /* Emacs DEFUN keyword seen */
2206 fdefunname, /* Emacs DEFUN name seen */
2207 foperator, /* func: operator keyword seen (cplpl) */
2208 fvnameseen, /* function or variable name seen */
2209 fstartlist, /* func: just after open parenthesis */
2210 finlist, /* func: in parameter list */
2211 flistseen, /* func: after parameter list */
2212 fignore, /* func: before open brace */
2213 vignore /* var-like: ignore until ';' */
2214 } fvdef;
2215
2216 bool fvextern; /* func or var: extern keyword seen; */
2217
2218 /*
2219 * typedefs are recognized using a simple finite automaton.
2220 * typdef is its state variable.
2221 */
2222 enum
2223 {
2224 tnone, /* nothing seen */
2225 tkeyseen, /* typedef keyword seen */
2226 ttypeseen, /* defined type seen */
2227 tinbody, /* inside typedef body */
2228 tend, /* just before typedef tag */
2229 tignore /* junk after typedef tag */
2230 } typdef;
2231
2232 /*
2233 * struct-like structures (enum, struct and union) are recognized
2234 * using another simple finite automaton. `structdef' is its state
2235 * variable.
2236 */
2237 enum
2238 {
2239 snone, /* nothing seen yet,
2240 or in struct body if cblev > 0 */
2241 skeyseen, /* struct-like keyword seen */
2242 stagseen, /* struct-like tag seen */
2243 sintemplate, /* inside template (ignore) */
2244 scolonseen /* colon seen after struct-like tag */
2245 } structdef;
2246
2247 /*
2248 * When objdef is different from onone, objtag is the name of the class.
2249 */
2250 char *objtag = "<uninited>";
2251
2252 /*
2253 * Yet another little state machine to deal with preprocessor lines.
2254 */
2255 enum
2256 {
2257 dnone, /* nothing seen */
2258 dsharpseen, /* '#' seen as first char on line */
2259 ddefineseen, /* '#' and 'define' seen */
2260 dignorerest /* ignore rest of line */
2261 } definedef;
2262
2263 /*
2264 * State machine for Objective C protocols and implementations.
2265 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2266 */
2267 enum
2268 {
2269 onone, /* nothing seen */
2270 oprotocol, /* @interface or @protocol seen */
2271 oimplementation, /* @implementations seen */
2272 otagseen, /* class name seen */
2273 oparenseen, /* parenthesis before category seen */
2274 ocatseen, /* category name seen */
2275 oinbody, /* in @implementation body */
2276 omethodsign, /* in @implementation body, after +/- */
2277 omethodtag, /* after method name */
2278 omethodcolon, /* after method colon */
2279 omethodparm, /* after method parameter */
2280 oignore /* wait for @end */
2281 } objdef;
2282
2283
2284 /*
2285 * Use this structure to keep info about the token read, and how it
2286 * should be tagged. Used by the make_C_tag function to build a tag.
2287 */
2288 struct tok
2289 {
2290 bool valid;
2291 bool named;
2292 int offset;
2293 int length;
2294 int lineno;
2295 long linepos;
2296 char *line;
2297 } token; /* latest token read */
2298 linebuffer token_name; /* its name */
2299
2300 /*
2301 * Variables and functions for dealing with nested structures.
2302 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2303 */
2304 static void pushclass_above P_((int, char *, int));
2305 static void popclass_above P_((int));
2306 static void write_classname P_((linebuffer *, char *qualifier));
2307
2308 struct {
2309 char **cname; /* nested class names */
2310 int *cblev; /* nested class curly brace level */
2311 int nl; /* class nesting level (elements used) */
2312 int size; /* length of the array */
2313 } cstack; /* stack for nested declaration tags */
2314 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2315 #define nestlev (cstack.nl)
2316 /* After struct keyword or in struct body, not inside an nested function. */
2317 #define instruct (structdef == snone && nestlev > 0 \
2318 && cblev == cstack.cblev[nestlev-1] + 1)
2319
2320 static void
2321 pushclass_above (cblev, str, len)
2322 int cblev;
2323 char *str;
2324 int len;
2325 {
2326 int nl;
2327
2328 popclass_above (cblev);
2329 nl = cstack.nl;
2330 if (nl >= cstack.size)
2331 {
2332 int size = cstack.size *= 2;
2333 xrnew (cstack.cname, size, char *);
2334 xrnew (cstack.cblev, size, int);
2335 }
2336 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2337 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2338 cstack.cblev[nl] = cblev;
2339 cstack.nl = nl + 1;
2340 }
2341
2342 static void
2343 popclass_above (cblev)
2344 int cblev;
2345 {
2346 int nl;
2347
2348 for (nl = cstack.nl - 1;
2349 nl >= 0 && cstack.cblev[nl] >= cblev;
2350 nl--)
2351 {
2352 if (cstack.cname[nl] != NULL)
2353 free (cstack.cname[nl]);
2354 cstack.nl = nl;
2355 }
2356 }
2357
2358 static void
2359 write_classname (cn, qualifier)
2360 linebuffer *cn;
2361 char *qualifier;
2362 {
2363 int i, len;
2364 int qlen = strlen (qualifier);
2365
2366 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2367 {
2368 len = 0;
2369 cn->len = 0;
2370 cn->buffer[0] = '\0';
2371 }
2372 else
2373 {
2374 len = strlen (cstack.cname[0]);
2375 linebuffer_setlen (cn, len);
2376 strcpy (cn->buffer, cstack.cname[0]);
2377 }
2378 for (i = 1; i < cstack.nl; i++)
2379 {
2380 char *s;
2381 int slen;
2382
2383 s = cstack.cname[i];
2384 if (s == NULL)
2385 continue;
2386 slen = strlen (s);
2387 len += slen + qlen;
2388 linebuffer_setlen (cn, len);
2389 strncat (cn->buffer, qualifier, qlen);
2390 strncat (cn->buffer, s, slen);
2391 }
2392 }
2393
2394 \f
2395 static bool consider_token P_((char *, int, int, int *, int, int, bool *));
2396 static void make_C_tag P_((bool));
2397
2398 /*
2399 * consider_token ()
2400 * checks to see if the current token is at the start of a
2401 * function or variable, or corresponds to a typedef, or
2402 * is a struct/union/enum tag, or #define, or an enum constant.
2403 *
2404 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2405 * with args. C_EXTP points to which language we are looking at.
2406 *
2407 * Globals
2408 * fvdef IN OUT
2409 * structdef IN OUT
2410 * definedef IN OUT
2411 * typdef IN OUT
2412 * objdef IN OUT
2413 */
2414
2415 static bool
2416 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2417 register char *str; /* IN: token pointer */
2418 register int len; /* IN: token length */
2419 register int c; /* IN: first char after the token */
2420 int *c_extp; /* IN, OUT: C extensions mask */
2421 int cblev; /* IN: curly brace level */
2422 int parlev; /* IN: parenthesis level */
2423 bool *is_func_or_var; /* OUT: function or variable found */
2424 {
2425 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2426 structtype is the type of the preceding struct-like keyword, and
2427 structcblev is the curly brace level where it has been seen. */
2428 static enum sym_type structtype;
2429 static int structcblev;
2430 static enum sym_type toktype;
2431
2432
2433 toktype = C_symtype (str, len, *c_extp);
2434
2435 /*
2436 * Advance the definedef state machine.
2437 */
2438 switch (definedef)
2439 {
2440 case dnone:
2441 /* We're not on a preprocessor line. */
2442 if (toktype == st_C_gnumacro)
2443 {
2444 fvdef = fdefunkey;
2445 return FALSE;
2446 }
2447 break;
2448 case dsharpseen:
2449 if (toktype == st_C_define)
2450 {
2451 definedef = ddefineseen;
2452 }
2453 else
2454 {
2455 definedef = dignorerest;
2456 }
2457 return FALSE;
2458 case ddefineseen:
2459 /*
2460 * Make a tag for any macro, unless it is a constant
2461 * and constantypedefs is FALSE.
2462 */
2463 definedef = dignorerest;
2464 *is_func_or_var = (c == '(');
2465 if (!*is_func_or_var && !constantypedefs)
2466 return FALSE;
2467 else
2468 return TRUE;
2469 case dignorerest:
2470 return FALSE;
2471 default:
2472 error ("internal error: definedef value.", (char *)NULL);
2473 }
2474
2475 /*
2476 * Now typedefs
2477 */
2478 switch (typdef)
2479 {
2480 case tnone:
2481 if (toktype == st_C_typedef)
2482 {
2483 if (typedefs)
2484 typdef = tkeyseen;
2485 fvextern = FALSE;
2486 fvdef = fvnone;
2487 return FALSE;
2488 }
2489 break;
2490 case tkeyseen:
2491 switch (toktype)
2492 {
2493 case st_none:
2494 case st_C_typespec:
2495 case st_C_class:
2496 case st_C_struct:
2497 case st_C_enum:
2498 typdef = ttypeseen;
2499 break;
2500 }
2501 break;
2502 case ttypeseen:
2503 if (structdef == snone && fvdef == fvnone)
2504 {
2505 fvdef = fvnameseen;
2506 return TRUE;
2507 }
2508 break;
2509 case tend:
2510 switch (toktype)
2511 {
2512 case st_C_typespec:
2513 case st_C_class:
2514 case st_C_struct:
2515 case st_C_enum:
2516 return FALSE;
2517 }
2518 return TRUE;
2519 }
2520
2521 /*
2522 * This structdef business is NOT invoked when we are ctags and the
2523 * file is plain C. This is because a struct tag may have the same
2524 * name as another tag, and this loses with ctags.
2525 */
2526 switch (toktype)
2527 {
2528 case st_C_javastruct:
2529 if (structdef == stagseen)
2530 structdef = scolonseen;
2531 return FALSE;
2532 case st_C_template:
2533 case st_C_class:
2534 if (cblev == 0
2535 && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2536 && definedef == dnone && structdef == snone
2537 && typdef == tnone && fvdef == fvnone)
2538 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2539 if (toktype == st_C_template)
2540 break;
2541 /* FALLTHRU */
2542 case st_C_struct:
2543 case st_C_enum:
2544 if (parlev == 0
2545 && fvdef != vignore
2546 && (typdef == tkeyseen
2547 || (typedefs_or_cplusplus && structdef == snone)))
2548 {
2549 structdef = skeyseen;
2550 structtype = toktype;
2551 structcblev = cblev;
2552 }
2553 return FALSE;
2554 }
2555
2556 if (structdef == skeyseen)
2557 {
2558 structdef = stagseen;
2559 return TRUE;
2560 }
2561
2562 if (typdef != tnone)
2563 definedef = dnone;
2564
2565 /* Detect Objective C constructs. */
2566 switch (objdef)
2567 {
2568 case onone:
2569 switch (toktype)
2570 {
2571 case st_C_objprot:
2572 objdef = oprotocol;
2573 return FALSE;
2574 case st_C_objimpl:
2575 objdef = oimplementation;
2576 return FALSE;
2577 }
2578 break;
2579 case oimplementation:
2580 /* Save the class tag for functions or variables defined inside. */
2581 objtag = savenstr (str, len);
2582 objdef = oinbody;
2583 return FALSE;
2584 case oprotocol:
2585 /* Save the class tag for categories. */
2586 objtag = savenstr (str, len);
2587 objdef = otagseen;
2588 *is_func_or_var = TRUE;
2589 return TRUE;
2590 case oparenseen:
2591 objdef = ocatseen;
2592 *is_func_or_var = TRUE;
2593 return TRUE;
2594 case oinbody:
2595 break;
2596 case omethodsign:
2597 if (parlev == 0)
2598 {
2599 objdef = omethodtag;
2600 linebuffer_setlen (&token_name, len);
2601 strncpy (token_name.buffer, str, len);
2602 token_name.buffer[len] = '\0';
2603 return TRUE;
2604 }
2605 return FALSE;
2606 case omethodcolon:
2607 if (parlev == 0)
2608 objdef = omethodparm;
2609 return FALSE;
2610 case omethodparm:
2611 if (parlev == 0)
2612 {
2613 objdef = omethodtag;
2614 linebuffer_setlen (&token_name, token_name.len + len);
2615 strncat (token_name.buffer, str, len);
2616 return TRUE;
2617 }
2618 return FALSE;
2619 case oignore:
2620 if (toktype == st_C_objend)
2621 {
2622 /* Memory leakage here: the string pointed by objtag is
2623 never released, because many tests would be needed to
2624 avoid breaking on incorrect input code. The amount of
2625 memory leaked here is the sum of the lengths of the
2626 class tags.
2627 free (objtag); */
2628 objdef = onone;
2629 }
2630 return FALSE;
2631 }
2632
2633 /* A function, variable or enum constant? */
2634 switch (toktype)
2635 {
2636 case st_C_extern:
2637 fvextern = TRUE;
2638 /* FALLTHRU */
2639 case st_C_typespec:
2640 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2641 fvdef = fvnone; /* should be useless */
2642 return FALSE;
2643 case st_C_ignore:
2644 fvextern = FALSE;
2645 fvdef = vignore;
2646 return FALSE;
2647 case st_C_operator:
2648 fvdef = foperator;
2649 *is_func_or_var = TRUE;
2650 return TRUE;
2651 case st_none:
2652 if (constantypedefs
2653 && structdef == snone
2654 && structtype == st_C_enum && cblev > structcblev)
2655 return TRUE; /* enum constant */
2656 switch (fvdef)
2657 {
2658 case fdefunkey:
2659 if (cblev > 0)
2660 break;
2661 fvdef = fdefunname; /* GNU macro */
2662 *is_func_or_var = TRUE;
2663 return TRUE;
2664 case fvnone:
2665 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2666 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2667 {
2668 fvdef = vignore;
2669 return FALSE;
2670 }
2671 if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2672 {
2673 fvdef = foperator;
2674 *is_func_or_var = TRUE;
2675 return TRUE;
2676 }
2677 if (cblev > 0 && !instruct)
2678 break;
2679 fvdef = fvnameseen; /* function or variable */
2680 *is_func_or_var = TRUE;
2681 return TRUE;
2682 }
2683 break;
2684 }
2685
2686 return FALSE;
2687 }
2688
2689 \f
2690 /*
2691 * C_entries often keeps pointers to tokens or lines which are older than
2692 * the line currently read. By keeping two line buffers, and switching
2693 * them at end of line, it is possible to use those pointers.
2694 */
2695 struct
2696 {
2697 long linepos;
2698 linebuffer lb;
2699 } lbs[2];
2700
2701 #define current_lb_is_new (newndx == curndx)
2702 #define switch_line_buffers() (curndx = 1 - curndx)
2703
2704 #define curlb (lbs[curndx].lb)
2705 #define newlb (lbs[newndx].lb)
2706 #define curlinepos (lbs[curndx].linepos)
2707 #define newlinepos (lbs[newndx].linepos)
2708
2709 #define CNL_SAVE_DEFINEDEF() \
2710 do { \
2711 curlinepos = charno; \
2712 lineno++; \
2713 linecharno = charno; \
2714 charno += readline (&curlb, inf); \
2715 lp = curlb.buffer; \
2716 quotednl = FALSE; \
2717 newndx = curndx; \
2718 } while (0)
2719
2720 #define CNL() \
2721 do { \
2722 CNL_SAVE_DEFINEDEF(); \
2723 if (savetoken.valid) \
2724 { \
2725 token = savetoken; \
2726 savetoken.valid = FALSE; \
2727 } \
2728 definedef = dnone; \
2729 } while (0)
2730
2731
2732 static void
2733 make_C_tag (isfun)
2734 bool isfun;
2735 {
2736 /* This function should never be called when token.valid is FALSE, but
2737 we must protect against invalid input or internal errors. */
2738 if (DEBUG || token.valid)
2739 {
2740 if (traditional_tag_style)
2741 {
2742 /* This was the original code. Now we call new_pfnote instead,
2743 which uses the new method for naming tags (see new_pfnote). */
2744 char *name = NULL;
2745
2746 if (CTAGS || token.named)
2747 name = savestr (token_name.buffer);
2748 if (DEBUG && !token.valid)
2749 {
2750 if (token.named)
2751 name = concat (name, "##invalid##", "");
2752 else
2753 name = savestr ("##invalid##");
2754 }
2755 pfnote (name, isfun, token.line,
2756 token.offset+token.length+1, token.lineno, token.linepos);
2757 }
2758 else
2759 new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
2760 token.offset+token.length+1, token.lineno, token.linepos);
2761 token.valid = FALSE;
2762 }
2763 }
2764
2765
2766 /*
2767 * C_entries ()
2768 * This routine finds functions, variables, typedefs,
2769 * #define's, enum constants and struct/union/enum definitions in
2770 * C syntax and adds them to the list.
2771 */
2772 static void
2773 C_entries (c_ext, inf)
2774 int c_ext; /* extension of C */
2775 FILE *inf; /* input file */
2776 {
2777 register char c; /* latest char read; '\0' for end of line */
2778 register char *lp; /* pointer one beyond the character `c' */
2779 int curndx, newndx; /* indices for current and new lb */
2780 register int tokoff; /* offset in line of start of current token */
2781 register int toklen; /* length of current token */
2782 char *qualifier; /* string used to qualify names */
2783 int qlen; /* length of qualifier */
2784 int cblev; /* current curly brace level */
2785 int parlev; /* current parenthesis level */
2786 int typdefcblev; /* cblev where a typedef struct body begun */
2787 bool incomm, inquote, inchar, quotednl, midtoken;
2788 bool cplpl, cjava;
2789 bool yacc_rules; /* in the rules part of a yacc file */
2790 struct tok savetoken; /* token saved during preprocessor handling */
2791
2792
2793 initbuffer (&token_name);
2794 initbuffer (&lbs[0].lb);
2795 initbuffer (&lbs[1].lb);
2796 if (cstack.size == 0)
2797 {
2798 cstack.size = (DEBUG) ? 1 : 4;
2799 cstack.nl = 0;
2800 cstack.cname = xnew (cstack.size, char *);
2801 cstack.cblev = xnew (cstack.size, int);
2802 }
2803
2804 tokoff = toklen = typdefcblev = 0; /* keep compiler quiet */
2805 curndx = newndx = 0;
2806 lineno = 0;
2807 charno = 0;
2808 lp = curlb.buffer;
2809 *lp = 0;
2810
2811 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2812 structdef = snone; definedef = dnone; objdef = onone;
2813 yacc_rules = FALSE;
2814 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2815 token.valid = savetoken.valid = FALSE;
2816 cblev = 0;
2817 parlev = 0;
2818 cplpl = (c_ext & C_PLPL) == C_PLPL;
2819 cjava = (c_ext & C_JAVA) == C_JAVA;
2820 if (cjava)
2821 { qualifier = "."; qlen = 1; }
2822 else
2823 { qualifier = "::"; qlen = 2; }
2824
2825
2826 while (!feof (inf))
2827 {
2828 c = *lp++;
2829 if (c == '\\')
2830 {
2831 /* If we're at the end of the line, the next character is a
2832 '\0'; don't skip it, because it's the thing that tells us
2833 to read the next line. */
2834 if (*lp == '\0')
2835 {
2836 quotednl = TRUE;
2837 continue;
2838 }
2839 lp++;
2840 c = ' ';
2841 }
2842 else if (incomm)
2843 {
2844 switch (c)
2845 {
2846 case '*':
2847 if (*lp == '/')
2848 {
2849 c = *lp++;
2850 incomm = FALSE;
2851 }
2852 break;
2853 case '\0':
2854 /* Newlines inside comments do not end macro definitions in
2855 traditional cpp. */
2856 CNL_SAVE_DEFINEDEF ();
2857 break;
2858 }
2859 continue;
2860 }
2861 else if (inquote)
2862 {
2863 switch (c)
2864 {
2865 case '"':
2866 inquote = FALSE;
2867 break;
2868 case '\0':
2869 /* Newlines inside strings do not end macro definitions
2870 in traditional cpp, even though compilers don't
2871 usually accept them. */
2872 CNL_SAVE_DEFINEDEF ();
2873 break;
2874 }
2875 continue;
2876 }
2877 else if (inchar)
2878 {
2879 switch (c)
2880 {
2881 case '\0':
2882 /* Hmmm, something went wrong. */
2883 CNL ();
2884 /* FALLTHRU */
2885 case '\'':
2886 inchar = FALSE;
2887 break;
2888 }
2889 continue;
2890 }
2891 else
2892 switch (c)
2893 {
2894 case '"':
2895 inquote = TRUE;
2896 switch (fvdef)
2897 {
2898 case fdefunkey:
2899 case fstartlist:
2900 case finlist:
2901 case fignore:
2902 case vignore:
2903 break;
2904 default:
2905 fvextern = FALSE;
2906 fvdef = fvnone;
2907 }
2908 continue;
2909 case '\'':
2910 inchar = TRUE;
2911 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2912 {
2913 fvextern = FALSE;
2914 fvdef = fvnone;
2915 }
2916 continue;
2917 case '/':
2918 if (*lp == '*')
2919 {
2920 lp++;
2921 incomm = TRUE;
2922 continue;
2923 }
2924 else if (/* cplpl && */ *lp == '/')
2925 {
2926 c = '\0';
2927 break;
2928 }
2929 else
2930 break;
2931 case '%':
2932 if ((c_ext & YACC) && *lp == '%')
2933 {
2934 /* Entering or exiting rules section in yacc file. */
2935 lp++;
2936 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2937 typdef = tnone; structdef = snone;
2938 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2939 cblev = 0;
2940 yacc_rules = !yacc_rules;
2941 continue;
2942 }
2943 else
2944 break;
2945 case '#':
2946 if (definedef == dnone)
2947 {
2948 char *cp;
2949 bool cpptoken = TRUE;
2950
2951 /* Look back on this line. If all blanks, or nonblanks
2952 followed by an end of comment, this is a preprocessor
2953 token. */
2954 for (cp = newlb.buffer; cp < lp-1; cp++)
2955 if (!iswhite (*cp))
2956 {
2957 if (*cp == '*' && *(cp+1) == '/')
2958 {
2959 cp++;
2960 cpptoken = TRUE;
2961 }
2962 else
2963 cpptoken = FALSE;
2964 }
2965 if (cpptoken)
2966 definedef = dsharpseen;
2967 } /* if (definedef == dnone) */
2968
2969 continue;
2970 } /* switch (c) */
2971
2972
2973 /* Consider token only if some involved conditions are satisfied. */
2974 if (typdef != tignore
2975 && definedef != dignorerest
2976 && fvdef != finlist
2977 && structdef != sintemplate
2978 && (definedef != dnone
2979 || structdef != scolonseen))
2980 {
2981 if (midtoken)
2982 {
2983 if (endtoken (c))
2984 {
2985 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
2986 {
2987 /*
2988 * This handles :: in the middle, but not at the
2989 * beginning of an identifier. Also, space-separated
2990 * :: is not recognised.
2991 */
2992 lp += 2;
2993 toklen += 2;
2994 c = lp[-1];
2995 goto still_in_token;
2996 }
2997 else
2998 {
2999 bool funorvar = FALSE;
3000
3001 if (yacc_rules
3002 || consider_token (newlb.buffer + tokoff, toklen, c,
3003 &c_ext, cblev, parlev, &funorvar))
3004 {
3005 if (fvdef == foperator)
3006 {
3007 char *oldlp = lp;
3008 lp = skip_spaces (lp-1);
3009 if (*lp != '\0')
3010 lp += 1;
3011 while (*lp != '\0'
3012 && !iswhite (*lp) && *lp != '(')
3013 lp += 1;
3014 c = *lp++;
3015 toklen += lp - oldlp;
3016 }
3017 token.named = FALSE;
3018 if ((c_ext & C_EXT) /* not pure C */
3019 && nestlev > 0 && definedef == dnone)
3020 /* in struct body */
3021 {
3022 write_classname (&token_name, qualifier);
3023 linebuffer_setlen (&token_name,
3024 token_name.len+qlen+toklen);
3025 strcat (token_name.buffer, qualifier);
3026 strncat (token_name.buffer,
3027 newlb.buffer + tokoff, toklen);
3028 token.named = TRUE;
3029 }
3030 else if (objdef == ocatseen)
3031 /* Objective C category */
3032 {
3033 int len = strlen (objtag) + 2 + toklen;
3034 linebuffer_setlen (&token_name, len);
3035 strcpy (token_name.buffer, objtag);
3036 strcat (token_name.buffer, "(");
3037 strncat (token_name.buffer,
3038 newlb.buffer + tokoff, toklen);
3039 strcat (token_name.buffer, ")");
3040 token.named = TRUE;
3041 }
3042 else if (objdef == omethodtag
3043 || objdef == omethodparm)
3044 /* Objective C method */
3045 {
3046 token.named = TRUE;
3047 }
3048 else if (fvdef == fdefunname)
3049 /* GNU DEFUN and similar macros */
3050 {
3051 bool defun = (newlb.buffer[tokoff] == 'F');
3052 int off = tokoff;
3053 int len = toklen;
3054
3055 /* Rewrite the tag so that emacs lisp DEFUNs
3056 can be found by their elisp name */
3057 if (defun)
3058 {
3059 off += 1;
3060 len -= 1;
3061 }
3062 len = toklen;
3063 linebuffer_setlen (&token_name, len);
3064 strncpy (token_name.buffer,
3065 newlb.buffer + off, len);
3066 token_name.buffer[len] = '\0';
3067 if (defun)
3068 while (--len >= 0)
3069 if (token_name.buffer[len] == '_')
3070 token_name.buffer[len] = '-';
3071 token.named = defun;
3072 }
3073 else
3074 {
3075 linebuffer_setlen (&token_name, toklen);
3076 strncpy (token_name.buffer,
3077 newlb.buffer + tokoff, toklen);
3078 token_name.buffer[toklen] = '\0';
3079 /* Name macros and members. */
3080 token.named = (structdef == stagseen
3081 || typdef == ttypeseen
3082 || typdef == tend
3083 || (funorvar
3084 && definedef == dignorerest)
3085 || (funorvar
3086 && definedef == dnone
3087 && structdef == snone
3088 && cblev > 0));
3089 }
3090 token.lineno = lineno;
3091 token.offset = tokoff;
3092 token.length = toklen;
3093 token.line = newlb.buffer;
3094 token.linepos = newlinepos;
3095 token.valid = TRUE;
3096
3097 if (definedef == dnone
3098 && (fvdef == fvnameseen
3099 || fvdef == foperator
3100 || structdef == stagseen
3101 || typdef == tend
3102 || typdef == ttypeseen
3103 || objdef != onone))
3104 {
3105 if (current_lb_is_new)
3106 switch_line_buffers ();
3107 }
3108 else if (definedef != dnone
3109 || fvdef == fdefunname
3110 || instruct)
3111 make_C_tag (funorvar);
3112 }
3113 midtoken = FALSE;
3114 }
3115 } /* if (endtoken (c)) */
3116 else if (intoken (c))
3117 still_in_token:
3118 {
3119 toklen++;
3120 continue;
3121 }
3122 } /* if (midtoken) */
3123 else if (begtoken (c))
3124 {
3125 switch (definedef)
3126 {
3127 case dnone:
3128 switch (fvdef)
3129 {
3130 case fstartlist:
3131 fvdef = finlist;
3132 continue;
3133 case flistseen:
3134 make_C_tag (TRUE); /* a function */
3135 fvdef = fignore;
3136 break;
3137 case fvnameseen:
3138 fvdef = fvnone;
3139 break;
3140 }
3141 if (structdef == stagseen && !cjava)
3142 {
3143 popclass_above (cblev);
3144 structdef = snone;
3145 }
3146 break;
3147 case dsharpseen:
3148 savetoken = token;
3149 }
3150 if (!yacc_rules || lp == newlb.buffer + 1)
3151 {
3152 tokoff = lp - 1 - newlb.buffer;
3153 toklen = 1;
3154 midtoken = TRUE;
3155 }
3156 continue;
3157 } /* if (begtoken) */
3158 } /* if must look at token */
3159
3160
3161 /* Detect end of line, colon, comma, semicolon and various braces
3162 after having handled a token.*/
3163 switch (c)
3164 {
3165 case ':':
3166 if (yacc_rules && token.offset == 0 && token.valid)
3167 {
3168 make_C_tag (FALSE); /* a yacc function */
3169 break;
3170 }
3171 if (definedef != dnone)
3172 break;
3173 switch (objdef)
3174 {
3175 case otagseen:
3176 objdef = oignore;
3177 make_C_tag (TRUE); /* an Objective C class */
3178 break;
3179 case omethodtag:
3180 case omethodparm:
3181 objdef = omethodcolon;
3182 linebuffer_setlen (&token_name, token_name.len + 1);
3183 strcat (token_name.buffer, ":");
3184 break;
3185 }
3186 if (structdef == stagseen)
3187 structdef = scolonseen;
3188 break;
3189 case ';':
3190 if (definedef != dnone)
3191 break;
3192 switch (typdef)
3193 {
3194 case tend:
3195 case ttypeseen:
3196 make_C_tag (FALSE); /* a typedef */
3197 typdef = tnone;
3198 fvdef = fvnone;
3199 break;
3200 case tnone:
3201 case tinbody:
3202 case tignore:
3203 switch (fvdef)
3204 {
3205 case fignore:
3206 if (typdef == tignore)
3207 fvdef = fvnone;
3208 break;
3209 case fvnameseen:
3210 if ((globals && cblev == 0 && (!fvextern || declarations))
3211 || (members && instruct))
3212 make_C_tag (FALSE); /* a variable */
3213 fvextern = FALSE;
3214 fvdef = fvnone;
3215 token.valid = FALSE;
3216 break;
3217 case flistseen:
3218 if ((declarations && typdef == tnone && !instruct)
3219 || (members && typdef != tignore && instruct))
3220 make_C_tag (TRUE); /* a function declaration */
3221 /* FALLTHRU */
3222 default:
3223 fvextern = FALSE;
3224 fvdef = fvnone;
3225 if (declarations
3226 && structdef == stagseen && (c_ext & C_PLPL))
3227 make_C_tag (FALSE); /* forward declaration */
3228 else
3229 /* The following instruction invalidates the token.
3230 Probably the token should be invalidated in all other
3231 cases where some state machine is reset prematurely. */
3232 token.valid = FALSE;
3233 } /* switch (fvdef) */
3234 /* FALLTHRU */
3235 default:
3236 if (!instruct)
3237 typdef = tnone;
3238 }
3239 if (structdef == stagseen)
3240 structdef = snone;
3241 break;
3242 case ',':
3243 if (definedef != dnone)
3244 break;
3245 switch (objdef)
3246 {
3247 case omethodtag:
3248 case omethodparm:
3249 make_C_tag (TRUE); /* an Objective C method */
3250 objdef = oinbody;
3251 break;
3252 }
3253 switch (fvdef)
3254 {
3255 case fdefunkey:
3256 case foperator:
3257 case fstartlist:
3258 case finlist:
3259 case fignore:
3260 case vignore:
3261 break;
3262 case fdefunname:
3263 fvdef = fignore;
3264 break;
3265 case fvnameseen: /* a variable */
3266 if ((globals && cblev == 0 && (!fvextern || declarations))
3267 || (members && instruct))
3268 make_C_tag (FALSE);
3269 break;
3270 case flistseen: /* a function */
3271 if ((declarations && typdef == tnone && !instruct)
3272 || (members && typdef != tignore && instruct))
3273 {
3274 make_C_tag (TRUE); /* a function declaration */
3275 fvdef = fvnameseen;
3276 }
3277 else if (!declarations)
3278 fvdef = fvnone;
3279 token.valid = FALSE;
3280 break;
3281 default:
3282 fvdef = fvnone;
3283 }
3284 if (structdef == stagseen)
3285 structdef = snone;
3286 break;
3287 case '[':
3288 if (definedef != dnone)
3289 break;
3290 if (structdef == stagseen)
3291 structdef = snone;
3292 switch (typdef)
3293 {
3294 case ttypeseen:
3295 case tend:
3296 typdef = tignore;
3297 make_C_tag (FALSE); /* a typedef */
3298 break;
3299 case tnone:
3300 case tinbody:
3301 switch (fvdef)
3302 {
3303 case foperator:
3304 case finlist:
3305 case fignore:
3306 case vignore:
3307 break;
3308 case fvnameseen:
3309 if ((members && cblev == 1)
3310 || (globals && cblev == 0
3311 && (!fvextern || declarations)))
3312 make_C_tag (FALSE); /* a variable */
3313 /* FALLTHRU */
3314 default:
3315 fvdef = fvnone;
3316 }
3317 break;
3318 }
3319 break;
3320 case '(':
3321 if (definedef != dnone)
3322 break;
3323 if (objdef == otagseen && parlev == 0)
3324 objdef = oparenseen;
3325 switch (fvdef)
3326 {
3327 case fvnameseen:
3328 if (typdef == ttypeseen
3329 && *lp != '*'
3330 && !instruct)
3331 {
3332 /* This handles constructs like:
3333 typedef void OperatorFun (int fun); */
3334 make_C_tag (FALSE);
3335 typdef = tignore;
3336 fvdef = fignore;
3337 break;
3338 }
3339 /* FALLTHRU */
3340 case foperator:
3341 fvdef = fstartlist;
3342 break;
3343 case flistseen:
3344 fvdef = finlist;
3345 break;
3346 }
3347 parlev++;
3348 break;
3349 case ')':
3350 if (definedef != dnone)
3351 break;
3352 if (objdef == ocatseen && parlev == 1)
3353 {
3354 make_C_tag (TRUE); /* an Objective C category */
3355 objdef = oignore;
3356 }
3357 if (--parlev == 0)
3358 {
3359 switch (fvdef)
3360 {
3361 case fstartlist:
3362 case finlist:
3363 fvdef = flistseen;
3364 break;
3365 }
3366 if (!instruct
3367 && (typdef == tend
3368 || typdef == ttypeseen))
3369 {
3370 typdef = tignore;
3371 make_C_tag (FALSE); /* a typedef */
3372 }
3373 }
3374 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3375 parlev = 0;
3376 break;
3377 case '{':
3378 if (definedef != dnone)
3379 break;
3380 if (typdef == ttypeseen)
3381 {
3382 /* Whenever typdef is set to tinbody (currently only
3383 here), typdefcblev should be set to cblev. */
3384 typdef = tinbody;
3385 typdefcblev = cblev;
3386 }
3387 switch (fvdef)
3388 {
3389 case flistseen:
3390 make_C_tag (TRUE); /* a function */
3391 /* FALLTHRU */
3392 case fignore:
3393 fvdef = fvnone;
3394 break;
3395 case fvnone:
3396 switch (objdef)
3397 {
3398 case otagseen:
3399 make_C_tag (TRUE); /* an Objective C class */
3400 objdef = oignore;
3401 break;
3402 case omethodtag:
3403 case omethodparm:
3404 make_C_tag (TRUE); /* an Objective C method */
3405 objdef = oinbody;
3406 break;
3407 default:
3408 /* Neutralize `extern "C" {' grot. */
3409 if (cblev == 0 && structdef == snone && nestlev == 0
3410 && typdef == tnone)
3411 cblev = -1;
3412 }
3413 }
3414 switch (structdef)
3415 {
3416 case skeyseen: /* unnamed struct */
3417 pushclass_above (cblev, NULL, 0);
3418 structdef = snone;
3419 break;
3420 case stagseen: /* named struct or enum */
3421 case scolonseen: /* a class */
3422 pushclass_above (cblev, token.line+token.offset, token.length);
3423 structdef = snone;
3424 make_C_tag (FALSE); /* a struct or enum */
3425 break;
3426 }
3427 cblev++;
3428 break;
3429 case '*':
3430 if (definedef != dnone)
3431 break;
3432 if (fvdef == fstartlist)
3433 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3434 break;
3435 case '}':
3436 if (definedef != dnone)
3437 break;
3438 if (!noindentypedefs && lp == newlb.buffer + 1)
3439 {
3440 cblev = 0; /* reset curly brace level if first column */
3441 parlev = 0; /* also reset paren level, just in case... */
3442 }
3443 else if (cblev > 0)
3444 cblev--;
3445 popclass_above (cblev);
3446 structdef = snone;
3447 /* Only if typdef == tinbody is typdefcblev significant. */
3448 if (typdef == tinbody && cblev <= typdefcblev)
3449 {
3450 assert (cblev == typdefcblev);
3451 typdef = tend;
3452 }
3453 break;
3454 case '=':
3455 if (definedef != dnone)
3456 break;
3457 switch (fvdef)
3458 {
3459 case foperator:
3460 case finlist:
3461 case fignore:
3462 case vignore:
3463 break;
3464 case fvnameseen:
3465 if ((members && cblev == 1)
3466 || (globals && cblev == 0 && (!fvextern || declarations)))
3467 make_C_tag (FALSE); /* a variable */
3468 /* FALLTHRU */
3469 default:
3470 fvdef = vignore;
3471 }
3472 break;
3473 case '<':
3474 if (cplpl && structdef == stagseen)
3475 {
3476 structdef = sintemplate;
3477 break;
3478 }
3479 goto resetfvdef;
3480 case '>':
3481 if (structdef == sintemplate)
3482 {
3483 structdef = stagseen;
3484 break;
3485 }
3486 goto resetfvdef;
3487 case '+':
3488 case '-':
3489 if (objdef == oinbody && cblev == 0)
3490 {
3491 objdef = omethodsign;
3492 break;
3493 }
3494 /* FALLTHRU */
3495 resetfvdef:
3496 case '#': case '~': case '&': case '%': case '/': case '|':
3497 case '^': case '!': case '.': case '?': case ']':
3498 if (definedef != dnone)
3499 break;
3500 /* These surely cannot follow a function tag in C. */
3501 switch (fvdef)
3502 {
3503 case foperator:
3504 case finlist:
3505 case fignore:
3506 case vignore:
3507 break;
3508 default:
3509 fvdef = fvnone;
3510 }
3511 break;
3512 case '\0':
3513 if (objdef == otagseen)
3514 {
3515 make_C_tag (TRUE); /* an Objective C class */
3516 objdef = oignore;
3517 }
3518 /* If a macro spans multiple lines don't reset its state. */
3519 if (quotednl)
3520 CNL_SAVE_DEFINEDEF ();
3521 else
3522 CNL ();
3523 break;
3524 } /* switch (c) */
3525
3526 } /* while not eof */
3527
3528 free (token_name.buffer);
3529 free (lbs[0].lb.buffer);
3530 free (lbs[1].lb.buffer);
3531 }
3532
3533 /*
3534 * Process either a C++ file or a C file depending on the setting
3535 * of a global flag.
3536 */
3537 static void
3538 default_C_entries (inf)
3539 FILE *inf;
3540 {
3541 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3542 }
3543
3544 /* Always do plain C. */
3545 static void
3546 plain_C_entries (inf)
3547 FILE *inf;
3548 {
3549 C_entries (0, inf);
3550 }
3551
3552 /* Always do C++. */
3553 static void
3554 Cplusplus_entries (inf)
3555 FILE *inf;
3556 {
3557 C_entries (C_PLPL, inf);
3558 }
3559
3560 /* Always do Java. */
3561 static void
3562 Cjava_entries (inf)
3563 FILE *inf;
3564 {
3565 C_entries (C_JAVA, inf);
3566 }
3567
3568 /* Always do C*. */
3569 static void
3570 Cstar_entries (inf)
3571 FILE *inf;
3572 {
3573 C_entries (C_STAR, inf);
3574 }
3575
3576 /* Always do Yacc. */
3577 static void
3578 Yacc_entries (inf)
3579 FILE *inf;
3580 {
3581 C_entries (YACC, inf);
3582 }
3583
3584 \f
3585 /* A useful macro. */
3586 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3587 for (lineno = charno = 0; /* loop initialization */ \
3588 !feof (file_pointer) /* loop test */ \
3589 && (lineno++, /* instructions at start of loop */ \
3590 linecharno = charno, \
3591 charno += readline (&line_buffer, file_pointer), \
3592 char_pointer = lb.buffer, \
3593 TRUE); \
3594 )
3595
3596
3597 /*
3598 * Read a file, but do no processing. This is used to do regexp
3599 * matching on files that have no language defined.
3600 */
3601 static void
3602 just_read_file (inf)
3603 FILE *inf;
3604 {
3605 register char *dummy;
3606
3607 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3608 continue;
3609 }
3610
3611 \f
3612 /* Fortran parsing */
3613
3614 static bool tail P_((char *));
3615 static void takeprec P_((void));
3616 static void getit P_((FILE *));
3617
3618 static bool
3619 tail (cp)
3620 char *cp;
3621 {
3622 register int len = 0;
3623
3624 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
3625 cp++, len++;
3626 if (*cp == '\0' && !intoken (dbp[len]))
3627 {
3628 dbp += len;
3629 return TRUE;
3630 }
3631 return FALSE;
3632 }
3633
3634 static void
3635 takeprec ()
3636 {
3637 dbp = skip_spaces (dbp);
3638 if (*dbp != '*')
3639 return;
3640 dbp++;
3641 dbp = skip_spaces (dbp);
3642 if (strneq (dbp, "(*)", 3))
3643 {
3644 dbp += 3;
3645 return;
3646 }
3647 if (!ISDIGIT (*dbp))
3648 {
3649 --dbp; /* force failure */
3650 return;
3651 }
3652 do
3653 dbp++;
3654 while (ISDIGIT (*dbp));
3655 }
3656
3657 static void
3658 getit (inf)
3659 FILE *inf;
3660 {
3661 register char *cp;
3662
3663 dbp = skip_spaces (dbp);
3664 if (*dbp == '\0')
3665 {
3666 lineno++;
3667 linecharno = charno;
3668 charno += readline (&lb, inf);
3669 dbp = lb.buffer;
3670 if (dbp[5] != '&')
3671 return;
3672 dbp += 6;
3673 dbp = skip_spaces (dbp);
3674 }
3675 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3676 return;
3677 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3678 continue;
3679 pfnote (savenstr (dbp, cp-dbp), TRUE,
3680 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3681 }
3682
3683
3684 static void
3685 Fortran_functions (inf)
3686 FILE *inf;
3687 {
3688 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3689 {
3690 if (*dbp == '%')
3691 dbp++; /* Ratfor escape to fortran */
3692 dbp = skip_spaces (dbp);
3693 if (*dbp == '\0')
3694 continue;
3695 switch (lowcase (*dbp))
3696 {
3697 case 'i':
3698 if (tail ("integer"))
3699 takeprec ();
3700 break;
3701 case 'r':
3702 if (tail ("real"))
3703 takeprec ();
3704 break;
3705 case 'l':
3706 if (tail ("logical"))
3707 takeprec ();
3708 break;
3709 case 'c':
3710 if (tail ("complex") || tail ("character"))
3711 takeprec ();
3712 break;
3713 case 'd':
3714 if (tail ("double"))
3715 {
3716 dbp = skip_spaces (dbp);
3717 if (*dbp == '\0')
3718 continue;
3719 if (tail ("precision"))
3720 break;
3721 continue;
3722 }
3723 break;
3724 }
3725 dbp = skip_spaces (dbp);
3726 if (*dbp == '\0')
3727 continue;
3728 switch (lowcase (*dbp))
3729 {
3730 case 'f':
3731 if (tail ("function"))
3732 getit (inf);
3733 continue;
3734 case 's':
3735 if (tail ("subroutine"))
3736 getit (inf);
3737 continue;
3738 case 'e':
3739 if (tail ("entry"))
3740 getit (inf);
3741 continue;
3742 case 'b':
3743 if (tail ("blockdata") || tail ("block data"))
3744 {
3745 dbp = skip_spaces (dbp);
3746 if (*dbp == '\0') /* assume un-named */
3747 pfnote (savestr ("blockdata"), TRUE,
3748 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3749 else
3750 getit (inf); /* look for name */
3751 }
3752 continue;
3753 }
3754 }
3755 }
3756
3757 \f
3758 /*
3759 * Ada parsing
3760 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3761 */
3762
3763 static void adagetit P_((FILE *, char *));
3764
3765 /* Once we are positioned after an "interesting" keyword, let's get
3766 the real tag value necessary. */
3767 static void
3768 adagetit (inf, name_qualifier)
3769 FILE *inf;
3770 char *name_qualifier;
3771 {
3772 register char *cp;
3773 char *name;
3774 char c;
3775
3776 while (!feof (inf))
3777 {
3778 dbp = skip_spaces (dbp);
3779 if (*dbp == '\0'
3780 || (dbp[0] == '-' && dbp[1] == '-'))
3781 {
3782 lineno++;
3783 linecharno = charno;
3784 charno += readline (&lb, inf);
3785 dbp = lb.buffer;
3786 }
3787 switch (*dbp)
3788 {
3789 case 'b':
3790 case 'B':
3791 if (tail ("body"))
3792 {
3793 /* Skipping body of procedure body or package body or ....
3794 resetting qualifier to body instead of spec. */
3795 name_qualifier = "/b";
3796 continue;
3797 }
3798 break;
3799 case 't':
3800 case 'T':
3801 /* Skipping type of task type or protected type ... */
3802 if (tail ("type"))
3803 continue;
3804 break;
3805 }
3806 if (*dbp == '"')
3807 {
3808 dbp += 1;
3809 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3810 continue;
3811 }
3812 else
3813 {
3814 dbp = skip_spaces (dbp);
3815 for (cp = dbp;
3816 (*cp != '\0'
3817 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
3818 cp++)
3819 continue;
3820 if (cp == dbp)
3821 return;
3822 }
3823 c = *cp;
3824 *cp = '\0';
3825 name = concat (dbp, name_qualifier, "");
3826 *cp = c;
3827 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3828 if (c == '"')
3829 dbp = cp + 1;
3830 return;
3831 }
3832 }
3833
3834 static void
3835 Ada_funcs (inf)
3836 FILE *inf;
3837 {
3838 bool inquote = FALSE;
3839
3840 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3841 {
3842 while (*dbp != '\0')
3843 {
3844 /* Skip a string i.e. "abcd". */
3845 if (inquote || (*dbp == '"'))
3846 {
3847 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3848 if (dbp != NULL)
3849 {
3850 inquote = FALSE;
3851 dbp += 1;
3852 continue; /* advance char */
3853 }
3854 else
3855 {
3856 inquote = TRUE;
3857 break; /* advance line */
3858 }
3859 }
3860
3861 /* Skip comments. */
3862 if (dbp[0] == '-' && dbp[1] == '-')
3863 break; /* advance line */
3864
3865 /* Skip character enclosed in single quote i.e. 'a'
3866 and skip single quote starting an attribute i.e. 'Image. */
3867 if (*dbp == '\'')
3868 {
3869 dbp++ ;
3870 if (*dbp != '\0')
3871 dbp++;
3872 continue;
3873 }
3874
3875 /* Search for beginning of a token. */
3876 if (!begtoken (*dbp))
3877 {
3878 dbp++;
3879 continue; /* advance char */
3880 }
3881
3882 /* We are at the beginning of a token. */
3883 switch (*dbp)
3884 {
3885 case 'f':
3886 case 'F':
3887 if (!packages_only && tail ("function"))
3888 adagetit (inf, "/f");
3889 else
3890 break; /* from switch */
3891 continue; /* advance char */
3892 case 'p':
3893 case 'P':
3894 if (!packages_only && tail ("procedure"))
3895 adagetit (inf, "/p");
3896 else if (tail ("package"))
3897 adagetit (inf, "/s");
3898 else if (tail ("protected")) /* protected type */
3899 adagetit (inf, "/t");
3900 else
3901 break; /* from switch */
3902 continue; /* advance char */
3903 case 't':
3904 case 'T':
3905 if (!packages_only && tail ("task"))
3906 adagetit (inf, "/k");
3907 else if (typedefs && !packages_only && tail ("type"))
3908 {
3909 adagetit (inf, "/t");
3910 while (*dbp != '\0')
3911 dbp += 1;
3912 }
3913 else
3914 break; /* from switch */
3915 continue; /* advance char */
3916 }
3917
3918 /* Look for the end of the token. */
3919 while (!endtoken (*dbp))
3920 dbp++;
3921
3922 } /* advance char */
3923 } /* advance line */
3924 }
3925
3926 \f
3927 /*
3928 * Bob Weiner, Motorola Inc., 4/3/94
3929 * Unix and microcontroller assembly tag handling
3930 * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
3931 */
3932 static void
3933 Asm_labels (inf)
3934 FILE *inf;
3935 {
3936 register char *cp;
3937
3938 LOOP_ON_INPUT_LINES (inf, lb, cp)
3939 {
3940 /* If first char is alphabetic or one of [_.$], test for colon
3941 following identifier. */
3942 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3943 {
3944 /* Read past label. */
3945 cp++;
3946 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3947 cp++;
3948 if (*cp == ':' || iswhite (*cp))
3949 {
3950 /* Found end of label, so copy it and add it to the table. */
3951 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3952 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3953 }
3954 }
3955 }
3956 }
3957
3958 \f
3959 /*
3960 * Perl support
3961 * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
3962 * Perl variable names: /^(my|local).../
3963 * Bart Robinson <lomew@cs.utah.edu> (1995)
3964 * Michael Ernst <mernst@alum.mit.edu> (1997)
3965 */
3966 static void
3967 Perl_functions (inf)
3968 FILE *inf;
3969 {
3970 register char *cp;
3971
3972 LOOP_ON_INPUT_LINES (inf, lb, cp)
3973 {
3974 if (*cp++ == 's'
3975 && *cp++ == 'u'
3976 && *cp++ == 'b' && iswhite (*cp++))
3977 {
3978 cp = skip_spaces (cp);
3979 if (*cp != '\0')
3980 {
3981 char *sp = cp;
3982 while (*cp != '\0'
3983 && !iswhite (*cp) && *cp != '{' && *cp != '(')
3984 cp++;
3985 pfnote (savenstr (sp, cp-sp), TRUE,
3986 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3987 }
3988 }
3989 else if (globals /* only if tagging global vars is enabled */
3990 && ((cp = lb.buffer,
3991 *cp++ == 'm'
3992 && *cp++ == 'y')
3993 || (cp = lb.buffer,
3994 *cp++ == 'l'
3995 && *cp++ == 'o'
3996 && *cp++ == 'c'
3997 && *cp++ == 'a'
3998 && *cp++ == 'l'))
3999 && (*cp == '(' || iswhite (*cp)))
4000 {
4001 /* After "my" or "local", but before any following paren or space. */
4002 char *varname = NULL;
4003
4004 cp = skip_spaces (cp);
4005 if (*cp == '$' || *cp == '@' || *cp == '%')
4006 {
4007 char* varstart = ++cp;
4008 while (ISALNUM (*cp) || *cp == '_')
4009 cp++;
4010 varname = savenstr (varstart, cp-varstart);
4011 }
4012 else
4013 {
4014 /* Should be examining a variable list at this point;
4015 could insist on seeing an open parenthesis. */
4016 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4017 cp++;
4018 }
4019
4020 /* Perhaps I should back cp up one character, so the TAGS table
4021 doesn't mention (and so depend upon) the following char. */
4022 pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
4023 FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4024 }
4025 }
4026 }
4027
4028 \f
4029 /*
4030 * Python support
4031 * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4032 * Eric S. Raymond <esr@thyrsus.com> (1997)
4033 */
4034 static void
4035 Python_functions (inf)
4036 FILE *inf;
4037 {
4038 register char *cp;
4039
4040 LOOP_ON_INPUT_LINES (inf, lb, cp)
4041 {
4042 if (*cp++ == 'd'
4043 && *cp++ == 'e'
4044 && *cp++ == 'f' && iswhite (*cp++))
4045 {
4046 cp = skip_spaces (cp);
4047 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
4048 cp++;
4049 pfnote (NULL, TRUE,
4050 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4051 }
4052
4053 cp = lb.buffer;
4054 if (*cp++ == 'c'
4055 && *cp++ == 'l'
4056 && *cp++ == 'a'
4057 && *cp++ == 's'
4058 && *cp++ == 's' && iswhite (*cp++))
4059 {
4060 cp = skip_spaces (cp);
4061 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
4062 cp++;
4063 pfnote (NULL, TRUE,
4064 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4065 }
4066 }
4067 }
4068
4069 \f
4070 /* Idea by Corny de Souza
4071 * Cobol tag functions
4072 * We could look for anything that could be a paragraph name.
4073 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4074 */
4075 static void
4076 Cobol_paragraphs (inf)
4077 FILE *inf;
4078 {
4079 register char *bp, *ep;
4080
4081 LOOP_ON_INPUT_LINES (inf, lb, bp)
4082 {
4083 if (lb.len < 9)
4084 continue;
4085 bp += 8;
4086
4087 /* If eoln, compiler option or comment ignore whole line. */
4088 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4089 continue;
4090
4091 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4092 continue;
4093 if (*ep++ == '.')
4094 pfnote (savenstr (bp, ep-bp), TRUE,
4095 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4096 }
4097 }
4098
4099 \f
4100 /*
4101 * Makefile support
4102 * Idea by Assar Westerlund <assar@sics.se> (2001)
4103 */
4104 static void
4105 Makefile_targets (inf)
4106 FILE *inf;
4107 {
4108 register char *bp;
4109
4110 LOOP_ON_INPUT_LINES (inf, lb, bp)
4111 {
4112 if (*bp == '\t' || *bp == '#')
4113 continue;
4114 while (*bp != '\0' && *bp != '=' && *bp != ':')
4115 bp++;
4116 if (*bp == ':')
4117 pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4118 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4119 }
4120 }
4121
4122 \f
4123 /* Added by Mosur Mohan, 4/22/88 */
4124 /* Pascal parsing */
4125
4126 /*
4127 * Locates tags for procedures & functions. Doesn't do any type- or
4128 * var-definitions. It does look for the keyword "extern" or
4129 * "forward" immediately following the procedure statement; if found,
4130 * the tag is skipped.
4131 */
4132 static void
4133 Pascal_functions (inf)
4134 FILE *inf;
4135 {
4136 linebuffer tline; /* mostly copied from C_entries */
4137 long save_lcno;
4138 int save_lineno, save_len;
4139 char c, *cp, *namebuf;
4140
4141 bool /* each of these flags is TRUE iff: */
4142 incomment, /* point is inside a comment */
4143 inquote, /* point is inside '..' string */
4144 get_tagname, /* point is after PROCEDURE/FUNCTION
4145 keyword, so next item = potential tag */
4146 found_tag, /* point is after a potential tag */
4147 inparms, /* point is within parameter-list */
4148 verify_tag; /* point has passed the parm-list, so the
4149 next token will determine whether this
4150 is a FORWARD/EXTERN to be ignored, or
4151 whether it is a real tag */
4152
4153 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4154 namebuf = NULL; /* keep compiler quiet */
4155 lineno = 0;
4156 charno = 0;
4157 dbp = lb.buffer;
4158 *dbp = '\0';
4159 initbuffer (&tline);
4160
4161 incomment = inquote = FALSE;
4162 found_tag = FALSE; /* have a proc name; check if extern */
4163 get_tagname = FALSE; /* have found "procedure" keyword */
4164 inparms = FALSE; /* found '(' after "proc" */
4165 verify_tag = FALSE; /* check if "extern" is ahead */
4166
4167
4168 while (!feof (inf)) /* long main loop to get next char */
4169 {
4170 c = *dbp++;
4171 if (c == '\0') /* if end of line */
4172 {
4173 lineno++;
4174 linecharno = charno;
4175 charno += readline (&lb, inf);
4176 dbp = lb.buffer;
4177 if (*dbp == '\0')
4178 continue;
4179 if (!((found_tag && verify_tag)
4180 || get_tagname))
4181 c = *dbp++; /* only if don't need *dbp pointing
4182 to the beginning of the name of
4183 the procedure or function */
4184 }
4185 if (incomment)
4186 {
4187 if (c == '}') /* within { } comments */
4188 incomment = FALSE;
4189 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4190 {
4191 dbp++;
4192 incomment = FALSE;
4193 }
4194 continue;
4195 }
4196 else if (inquote)
4197 {
4198 if (c == '\'')
4199 inquote = FALSE;
4200 continue;
4201 }
4202 else
4203 switch (c)
4204 {
4205 case '\'':
4206 inquote = TRUE; /* found first quote */
4207 continue;
4208 case '{': /* found open { comment */
4209 incomment = TRUE;
4210 continue;
4211 case '(':
4212 if (*dbp == '*') /* found open (* comment */
4213 {
4214 incomment = TRUE;
4215 dbp++;
4216 }
4217 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4218 inparms = TRUE;
4219 continue;
4220 case ')': /* end of parms list */
4221 if (inparms)
4222 inparms = FALSE;
4223 continue;
4224 case ';':
4225 if (found_tag && !inparms) /* end of proc or fn stmt */
4226 {
4227 verify_tag = TRUE;
4228 break;
4229 }
4230 continue;
4231 }
4232 if (found_tag && verify_tag && (*dbp != ' '))
4233 {
4234 /* check if this is an "extern" declaration */
4235 if (*dbp == '\0')
4236 continue;
4237 if (lowcase (*dbp == 'e'))
4238 {
4239 if (tail ("extern")) /* superfluous, really! */
4240 {
4241 found_tag = FALSE;
4242 verify_tag = FALSE;
4243 }
4244 }
4245 else if (lowcase (*dbp) == 'f')
4246 {
4247 if (tail ("forward")) /* check for forward reference */
4248 {
4249 found_tag = FALSE;
4250 verify_tag = FALSE;
4251 }
4252 }
4253 if (found_tag && verify_tag) /* not external proc, so make tag */
4254 {
4255 found_tag = FALSE;
4256 verify_tag = FALSE;
4257 pfnote (namebuf, TRUE,
4258 tline.buffer, save_len, save_lineno, save_lcno);
4259 continue;
4260 }
4261 }
4262 if (get_tagname) /* grab name of proc or fn */
4263 {
4264 if (*dbp == '\0')
4265 continue;
4266
4267 /* save all values for later tagging */
4268 linebuffer_setlen (&tline, lb.len);
4269 strcpy (tline.buffer, lb.buffer);
4270 save_lineno = lineno;
4271 save_lcno = linecharno;
4272
4273 /* grab block name */
4274 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4275 continue;
4276 namebuf = savenstr (dbp, cp-dbp);
4277 dbp = cp; /* set dbp to e-o-token */
4278 save_len = dbp - lb.buffer + 1;
4279 get_tagname = FALSE;
4280 found_tag = TRUE;
4281 continue;
4282
4283 /* and proceed to check for "extern" */
4284 }
4285 else if (!incomment && !inquote && !found_tag)
4286 {
4287 /* check for proc/fn keywords */
4288 switch (lowcase (c))
4289 {
4290 case 'p':
4291 if (tail ("rocedure")) /* c = 'p', dbp has advanced */
4292 get_tagname = TRUE;
4293 continue;
4294 case 'f':
4295 if (tail ("unction"))
4296 get_tagname = TRUE;
4297 continue;
4298 }
4299 }
4300 } /* while not eof */
4301
4302 free (tline.buffer);
4303 }
4304
4305 \f
4306 /*
4307 * Lisp tag functions
4308 * look for (def or (DEF, quote or QUOTE
4309 */
4310
4311 static int L_isdef P_((char *));
4312 static int L_isquote P_((char *));
4313 static void L_getit P_((void));
4314
4315 static int
4316 L_isdef (strp)
4317 register char *strp;
4318 {
4319 return ((strp[1] == 'd' || strp[1] == 'D')
4320 && (strp[2] == 'e' || strp[2] == 'E')
4321 && (strp[3] == 'f' || strp[3] == 'F'));
4322 }
4323
4324 static int
4325 L_isquote (strp)
4326 register char *strp;
4327 {
4328 return ((*++strp == 'q' || *strp == 'Q')
4329 && (*++strp == 'u' || *strp == 'U')
4330 && (*++strp == 'o' || *strp == 'O')
4331 && (*++strp == 't' || *strp == 'T')
4332 && (*++strp == 'e' || *strp == 'E')
4333 && iswhite (*++strp));
4334 }
4335
4336 static void
4337 L_getit ()
4338 {
4339 register char *cp;
4340
4341 if (*dbp == '\'') /* Skip prefix quote */
4342 dbp++;
4343 else if (*dbp == '(')
4344 {
4345 if (L_isquote (dbp))
4346 dbp += 7; /* Skip "(quote " */
4347 else
4348 dbp += 1; /* Skip "(" before name in (defstruct (foo)) */
4349 dbp = skip_spaces (dbp);
4350 }
4351
4352 for (cp = dbp /*+1*/;
4353 *cp != '\0' && *cp != '(' && !iswhite(*cp) && *cp != ')';
4354 cp++)
4355 continue;
4356 if (cp == dbp)
4357 return;
4358
4359 pfnote (savenstr (dbp, cp-dbp), TRUE,
4360 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4361 }
4362
4363 static void
4364 Lisp_functions (inf)
4365 FILE *inf;
4366 {
4367 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4368 {
4369 if (dbp[0] == '(')
4370 {
4371 if (L_isdef (dbp))
4372 {
4373 dbp = skip_non_spaces (dbp);
4374 dbp = skip_spaces (dbp);
4375 L_getit ();
4376 }
4377 else
4378 {
4379 /* Check for (foo::defmumble name-defined ... */
4380 do
4381 dbp++;
4382 while (*dbp != '\0' && !iswhite (*dbp)
4383 && *dbp != ':' && *dbp != '(' && *dbp != ')');
4384 if (*dbp == ':')
4385 {
4386 do
4387 dbp++;
4388 while (*dbp == ':');
4389
4390 if (L_isdef (dbp - 1))
4391 {
4392 dbp = skip_non_spaces (dbp);
4393 dbp = skip_spaces (dbp);
4394 L_getit ();
4395 }
4396 }
4397 }
4398 }
4399 }
4400 }
4401
4402 \f
4403 /*
4404 * Postscript tag functions
4405 * Just look for lines where the first character is '/'
4406 * Also look at "defineps" for PSWrap
4407 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4408 * Ideas by Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4409 */
4410 static void
4411 Postscript_functions (inf)
4412 FILE *inf;
4413 {
4414 register char *bp, *ep;
4415
4416 LOOP_ON_INPUT_LINES (inf, lb, bp)
4417 {
4418 if (bp[0] == '/')
4419 {
4420 for (ep = bp+1;
4421 *ep != '\0' && *ep != ' ' && *ep != '{';
4422 ep++)
4423 continue;
4424 pfnote (savenstr (bp, ep-bp), TRUE,
4425 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4426 }
4427 else if (strneq (bp, "defineps", 8))
4428 {
4429 bp = skip_non_spaces (bp);
4430 bp = skip_spaces (bp);
4431 get_tag (bp);
4432 }
4433 }
4434 }
4435
4436 \f
4437 /*
4438 * Scheme tag functions
4439 * look for (def... xyzzy
4440 * look for (def... (xyzzy
4441 * look for (def ... ((...(xyzzy ....
4442 * look for (set! xyzzy
4443 */
4444
4445 static void
4446 Scheme_functions (inf)
4447 FILE *inf;
4448 {
4449 register char *bp;
4450
4451 LOOP_ON_INPUT_LINES (inf, lb, bp)
4452 {
4453 if (bp[0] == '('
4454 && (bp[1] == 'D' || bp[1] == 'd')
4455 && (bp[2] == 'E' || bp[2] == 'e')
4456 && (bp[3] == 'F' || bp[3] == 'f'))
4457 {
4458 bp = skip_non_spaces (bp);
4459 /* Skip over open parens and white space */
4460 while (iswhite (*bp) || *bp == '(')
4461 bp++;
4462 get_tag (bp);
4463 }
4464 if (bp[0] == '('
4465 && (bp[1] == 'S' || bp[1] == 's')
4466 && (bp[2] == 'E' || bp[2] == 'e')
4467 && (bp[3] == 'T' || bp[3] == 't')
4468 && (bp[4] == '!' || bp[4] == '!')
4469 && (iswhite (bp[5])))
4470 {
4471 bp = skip_non_spaces (bp);
4472 bp = skip_spaces (bp);
4473 get_tag (bp);
4474 }
4475 }
4476 }
4477
4478 \f
4479 /* Find tags in TeX and LaTeX input files. */
4480
4481 /* TEX_toktab is a table of TeX control sequences that define tags.
4482 Each TEX_tabent records one such control sequence.
4483 CONVERT THIS TO USE THE Stab TYPE!! */
4484 struct TEX_tabent
4485 {
4486 char *name;
4487 int len;
4488 };
4489
4490 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
4491
4492 /* Default set of control sequences to put into TEX_toktab.
4493 The value of environment var TEXTAGS is prepended to this. */
4494
4495 char *TEX_defenv = "\
4496 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4497 :part:appendix:entry:index";
4498
4499 static void TEX_mode P_((FILE *));
4500 static struct TEX_tabent *TEX_decode_env P_((char *, char *));
4501 static int TEX_Token P_((char *));
4502
4503 char TEX_esc = '\\';
4504 char TEX_opgrp = '{';
4505 char TEX_clgrp = '}';
4506
4507 /*
4508 * TeX/LaTeX scanning loop.
4509 */
4510 static void
4511 TeX_commands (inf)
4512 FILE *inf;
4513 {
4514 char *cp, *lasthit;
4515 register int i;
4516
4517 /* Select either \ or ! as escape character. */
4518 TEX_mode (inf);
4519
4520 /* Initialize token table once from environment. */
4521 if (!TEX_toktab)
4522 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4523
4524 LOOP_ON_INPUT_LINES (inf, lb, cp)
4525 {
4526 lasthit = cp;
4527 /* Look at each esc in line. */
4528 while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4529 {
4530 if (*++cp == '\0')
4531 break;
4532 linecharno += cp - lasthit;
4533 lasthit = cp;
4534 i = TEX_Token (lasthit);
4535 if (i >= 0)
4536 {
4537 /* We seem to include the TeX command in the tag name.
4538 register char *p;
4539 for (p = lasthit + TEX_toktab[i].len;
4540 *p != '\0' && *p != TEX_clgrp;
4541 p++)
4542 continue; */
4543 pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4544 lb.buffer, lb.len, lineno, linecharno);
4545 break; /* We only tag a line once */
4546 }
4547 }
4548 }
4549 }
4550
4551 #define TEX_LESC '\\'
4552 #define TEX_SESC '!'
4553 #define TEX_cmt '%'
4554
4555 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4556 chars accordingly. */
4557 static void
4558 TEX_mode (inf)
4559 FILE *inf;
4560 {
4561 int c;
4562
4563 while ((c = getc (inf)) != EOF)
4564 {
4565 /* Skip to next line if we hit the TeX comment char. */
4566 if (c == TEX_cmt)
4567 while (c != '\n')
4568 c = getc (inf);
4569 else if (c == TEX_LESC || c == TEX_SESC )
4570 break;
4571 }
4572
4573 if (c == TEX_LESC)
4574 {
4575 TEX_esc = TEX_LESC;
4576 TEX_opgrp = '{';
4577 TEX_clgrp = '}';
4578 }
4579 else
4580 {
4581 TEX_esc = TEX_SESC;
4582 TEX_opgrp = '<';
4583 TEX_clgrp = '>';
4584 }
4585 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4586 No attempt is made to correct the situation. */
4587 rewind (inf);
4588 }
4589
4590 /* Read environment and prepend it to the default string.
4591 Build token table. */
4592 static struct TEX_tabent *
4593 TEX_decode_env (evarname, defenv)
4594 char *evarname;
4595 char *defenv;
4596 {
4597 register char *env, *p;
4598
4599 struct TEX_tabent *tab;
4600 int size, i;
4601
4602 /* Append default string to environment. */
4603 env = getenv (evarname);
4604 if (!env)
4605 env = defenv;
4606 else
4607 {
4608 char *oldenv = env;
4609 env = concat (oldenv, defenv, "");
4610 }
4611
4612 /* Allocate a token table */
4613 for (size = 1, p = env; p;)
4614 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4615 size++;
4616 /* Add 1 to leave room for null terminator. */
4617 tab = xnew (size + 1, struct TEX_tabent);
4618
4619 /* Unpack environment string into token table. Be careful about */
4620 /* zero-length strings (leading ':', "::" and trailing ':') */
4621 for (i = 0; *env;)
4622 {
4623 p = etags_strchr (env, ':');
4624 if (!p) /* End of environment string. */
4625 p = env + strlen (env);
4626 if (p - env > 0)
4627 { /* Only non-zero strings. */
4628 tab[i].name = savenstr (env, p - env);
4629 tab[i].len = strlen (tab[i].name);
4630 i++;
4631 }
4632 if (*p)
4633 env = p + 1;
4634 else
4635 {
4636 tab[i].name = NULL; /* Mark end of table. */
4637 tab[i].len = 0;
4638 break;
4639 }
4640 }
4641 return tab;
4642 }
4643
4644 /* If the text at CP matches one of the tag-defining TeX command names,
4645 return the pointer to the first occurrence of that command in TEX_toktab.
4646 Otherwise return -1.
4647 Keep the capital `T' in `token' for dumb truncating compilers
4648 (this distinguishes it from `TEX_toktab' */
4649 static int
4650 TEX_Token (cp)
4651 char *cp;
4652 {
4653 int i;
4654
4655 for (i = 0; TEX_toktab[i].len > 0; i++)
4656 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4657 return i;
4658 return -1;
4659 }
4660
4661 \f
4662 /* Texinfo support. Dave Love, Mar. 2000. */
4663 static void
4664 Texinfo_nodes (inf)
4665 FILE * inf;
4666 {
4667 char *cp, *start;
4668 LOOP_ON_INPUT_LINES (inf, lb, cp)
4669 {
4670 if ((*cp++ == '@'
4671 && *cp++ == 'n'
4672 && *cp++ == 'o'
4673 && *cp++ == 'd'
4674 && *cp++ == 'e' && iswhite (*cp++)))
4675 {
4676 start = cp = skip_spaces(cp);
4677 while (*cp != '\0' && *cp != ',')
4678 cp++;
4679 pfnote (savenstr (start, cp - start), TRUE,
4680 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4681 }
4682 }
4683 }
4684
4685 \f
4686 /*
4687 * Prolog support (rewritten) by Anders Lindgren, Mar. 96
4688 *
4689 * Assumes that the predicate starts at column 0.
4690 * Only the first clause of a predicate is added.
4691 */
4692 static int prolog_pred P_((char *, char *));
4693 static void prolog_skip_comment P_((linebuffer *, FILE *));
4694 static int prolog_atom P_((char *, int));
4695
4696 static void
4697 Prolog_functions (inf)
4698 FILE *inf;
4699 {
4700 char *cp, *last;
4701 int len;
4702 int allocated;
4703
4704 allocated = 0;
4705 len = 0;
4706 last = NULL;
4707
4708 LOOP_ON_INPUT_LINES (inf, lb, cp)
4709 {
4710 if (cp[0] == '\0') /* Empty line */
4711 continue;
4712 else if (iswhite (cp[0])) /* Not a predicate */
4713 continue;
4714 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4715 prolog_skip_comment (&lb, inf);
4716 else if ((len = prolog_pred (cp, last)) > 0)
4717 {
4718 /* Predicate. Store the function name so that we only
4719 generate a tag for the first clause. */
4720 if (last == NULL)
4721 last = xnew(len + 1, char);
4722 else if (len + 1 > allocated)
4723 xrnew (last, len + 1, char);
4724 allocated = len + 1;
4725 strncpy (last, cp, len);
4726 last[len] = '\0';
4727 }
4728 }
4729 }
4730
4731
4732 static void
4733 prolog_skip_comment (plb, inf)
4734 linebuffer *plb;
4735 FILE *inf;
4736 {
4737 char *cp;
4738
4739 do
4740 {
4741 for (cp = plb->buffer; *cp != '\0'; cp++)
4742 if (cp[0] == '*' && cp[1] == '/')
4743 return;
4744 lineno++;
4745 linecharno += readline (plb, inf);
4746 }
4747 while (!feof(inf));
4748 }
4749
4750 /*
4751 * A predicate definition is added if it matches:
4752 * <beginning of line><Prolog Atom><whitespace>(
4753 *
4754 * It is added to the tags database if it doesn't match the
4755 * name of the previous clause header.
4756 *
4757 * Return the size of the name of the predicate, or 0 if no header
4758 * was found.
4759 */
4760 static int
4761 prolog_pred (s, last)
4762 char *s;
4763 char *last; /* Name of last clause. */
4764 {
4765 int pos;
4766 int len;
4767
4768 pos = prolog_atom (s, 0);
4769 if (pos < 1)
4770 return 0;
4771
4772 len = pos;
4773 pos = skip_spaces (s + pos) - s;
4774
4775 if ((s[pos] == '(') || (s[pos] == '.'))
4776 {
4777 if (s[pos] == '(')
4778 pos++;
4779
4780 /* Save only the first clause. */
4781 if (last == NULL
4782 || len != (int)strlen (last)
4783 || !strneq (s, last, len))
4784 {
4785 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4786 return len;
4787 }
4788 }
4789 return 0;
4790 }
4791
4792 /*
4793 * Consume a Prolog atom.
4794 * Return the number of bytes consumed, or -1 if there was an error.
4795 *
4796 * A prolog atom, in this context, could be one of:
4797 * - An alphanumeric sequence, starting with a lower case letter.
4798 * - A quoted arbitrary string. Single quotes can escape themselves.
4799 * Backslash quotes everything.
4800 */
4801 static int
4802 prolog_atom (s, pos)
4803 char *s;
4804 int pos;
4805 {
4806 int origpos;
4807
4808 origpos = pos;
4809
4810 if (ISLOWER(s[pos]) || (s[pos] == '_'))
4811 {
4812 /* The atom is unquoted. */
4813 pos++;
4814 while (ISALNUM(s[pos]) || (s[pos] == '_'))
4815 {
4816 pos++;
4817 }
4818 return pos - origpos;
4819 }
4820 else if (s[pos] == '\'')
4821 {
4822 pos++;
4823
4824 while (1)
4825 {
4826 if (s[pos] == '\'')
4827 {
4828 pos++;
4829 if (s[pos] != '\'')
4830 break;
4831 pos++; /* A double quote */
4832 }
4833 else if (s[pos] == '\0')
4834 /* Multiline quoted atoms are ignored. */
4835 return -1;
4836 else if (s[pos] == '\\')
4837 {
4838 if (s[pos+1] == '\0')
4839 return -1;
4840 pos += 2;
4841 }
4842 else
4843 pos++;
4844 }
4845 return pos - origpos;
4846 }
4847 else
4848 return -1;
4849 }
4850
4851 \f
4852 /*
4853 * Support for Erlang -- Anders Lindgren, Feb 1996.
4854 *
4855 * Generates tags for functions, defines, and records.
4856 *
4857 * Assumes that Erlang functions start at column 0.
4858 */
4859 static int erlang_func P_((char *, char *));
4860 static void erlang_attribute P_((char *));
4861 static int erlang_atom P_((char *, int));
4862
4863 static void
4864 Erlang_functions (inf)
4865 FILE *inf;
4866 {
4867 char *cp, *last;
4868 int len;
4869 int allocated;
4870
4871 allocated = 0;
4872 len = 0;
4873 last = NULL;
4874
4875 LOOP_ON_INPUT_LINES (inf, lb, cp)
4876 {
4877 if (cp[0] == '\0') /* Empty line */
4878 continue;
4879 else if (iswhite (cp[0])) /* Not function nor attribute */
4880 continue;
4881 else if (cp[0] == '%') /* comment */
4882 continue;
4883 else if (cp[0] == '"') /* Sometimes, strings start in column one */
4884 continue;
4885 else if (cp[0] == '-') /* attribute, e.g. "-define" */
4886 {
4887 erlang_attribute (cp);
4888 last = NULL;
4889 }
4890 else if ((len = erlang_func (cp, last)) > 0)
4891 {
4892 /*
4893 * Function. Store the function name so that we only
4894 * generates a tag for the first clause.
4895 */
4896 if (last == NULL)
4897 last = xnew (len + 1, char);
4898 else if (len + 1 > allocated)
4899 xrnew (last, len + 1, char);
4900 allocated = len + 1;
4901 strncpy (last, cp, len);
4902 last[len] = '\0';
4903 }
4904 }
4905 }
4906
4907
4908 /*
4909 * A function definition is added if it matches:
4910 * <beginning of line><Erlang Atom><whitespace>(
4911 *
4912 * It is added to the tags database if it doesn't match the
4913 * name of the previous clause header.
4914 *
4915 * Return the size of the name of the function, or 0 if no function
4916 * was found.
4917 */
4918 static int
4919 erlang_func (s, last)
4920 char *s;
4921 char *last; /* Name of last clause. */
4922 {
4923 int pos;
4924 int len;
4925
4926 pos = erlang_atom (s, 0);
4927 if (pos < 1)
4928 return 0;
4929
4930 len = pos;
4931 pos = skip_spaces (s + pos) - s;
4932
4933 /* Save only the first clause. */
4934 if (s[pos++] == '('
4935 && (last == NULL
4936 || len != (int)strlen (last)
4937 || !strneq (s, last, len)))
4938 {
4939 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4940 return len;
4941 }
4942
4943 return 0;
4944 }
4945
4946
4947 /*
4948 * Handle attributes. Currently, tags are generated for defines
4949 * and records.
4950 *
4951 * They are on the form:
4952 * -define(foo, bar).
4953 * -define(Foo(M, N), M+N).
4954 * -record(graph, {vtab = notable, cyclic = true}).
4955 */
4956 static void
4957 erlang_attribute (s)
4958 char *s;
4959 {
4960 int pos;
4961 int len;
4962
4963 if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
4964 {
4965 pos = skip_spaces (s + 7) - s;
4966 if (s[pos++] == '(')
4967 {
4968 pos = skip_spaces (s + pos) - s;
4969 len = erlang_atom (s, pos);
4970 if (len != 0)
4971 pfnote (savenstr (& s[pos], len), TRUE,
4972 s, pos + len, lineno, linecharno);
4973 }
4974 }
4975 return;
4976 }
4977
4978
4979 /*
4980 * Consume an Erlang atom (or variable).
4981 * Return the number of bytes consumed, or -1 if there was an error.
4982 */
4983 static int
4984 erlang_atom (s, pos)
4985 char *s;
4986 int pos;
4987 {
4988 int origpos;
4989
4990 origpos = pos;
4991
4992 if (ISALPHA (s[pos]) || s[pos] == '_')
4993 {
4994 /* The atom is unquoted. */
4995 pos++;
4996 while (ISALNUM (s[pos]) || s[pos] == '_')
4997 pos++;
4998 return pos - origpos;
4999 }
5000 else if (s[pos] == '\'')
5001 {
5002 pos++;
5003
5004 while (1)
5005 {
5006 if (s[pos] == '\'')
5007 {
5008 pos++;
5009 break;
5010 }
5011 else if (s[pos] == '\0')
5012 /* Multiline quoted atoms are ignored. */
5013 return -1;
5014 else if (s[pos] == '\\')
5015 {
5016 if (s[pos+1] == '\0')
5017 return -1;
5018 pos += 2;
5019 }
5020 else
5021 pos++;
5022 }
5023 return pos - origpos;
5024 }
5025 else
5026 return -1;
5027 }
5028
5029 \f
5030 #ifdef ETAGS_REGEXPS
5031
5032 static char *scan_separators P_((char *));
5033 static void analyse_regex P_((char *, bool));
5034 static void add_regex P_((char *, bool, language *));
5035 static char *substitute P_((char *, char *, struct re_registers *));
5036
5037 /* Take a string like "/blah/" and turn it into "blah", making sure
5038 that the first and last characters are the same, and handling
5039 quoted separator characters. Actually, stops on the occurrence of
5040 an unquoted separator. Also turns "\t" into a Tab character.
5041 Returns pointer to terminating separator. Works in place. Null
5042 terminates name string. */
5043 static char *
5044 scan_separators (name)
5045 char *name;
5046 {
5047 char sep = name[0];
5048 char *copyto = name;
5049 bool quoted = FALSE;
5050
5051 for (++name; *name != '\0'; ++name)
5052 {
5053 if (quoted)
5054 {
5055 if (*name == 't')
5056 *copyto++ = '\t';
5057 else if (*name == sep)
5058 *copyto++ = sep;
5059 else
5060 {
5061 /* Something else is quoted, so preserve the quote. */
5062 *copyto++ = '\\';
5063 *copyto++ = *name;
5064 }
5065 quoted = FALSE;
5066 }
5067 else if (*name == '\\')
5068 quoted = TRUE;
5069 else if (*name == sep)
5070 break;
5071 else
5072 *copyto++ = *name;
5073 }
5074
5075 /* Terminate copied string. */
5076 *copyto = '\0';
5077 return name;
5078 }
5079
5080 /* Look at the argument of --regex or --no-regex and do the right
5081 thing. Same for each line of a regexp file. */
5082 static void
5083 analyse_regex (regex_arg, ignore_case)
5084 char *regex_arg;
5085 bool ignore_case;
5086 {
5087 if (regex_arg == NULL)
5088 free_patterns (); /* --no-regex: remove existing regexps */
5089
5090 /* A real --regexp option or a line in a regexp file. */
5091 switch (regex_arg[0])
5092 {
5093 /* Comments in regexp file or null arg to --regex. */
5094 case '\0':
5095 case ' ':
5096 case '\t':
5097 break;
5098
5099 /* Read a regex file. This is recursive and may result in a
5100 loop, which will stop when the file descriptors are exhausted. */
5101 case '@':
5102 {
5103 FILE *regexfp;
5104 linebuffer regexbuf;
5105 char *regexfile = regex_arg + 1;
5106
5107 /* regexfile is a file containing regexps, one per line. */
5108 regexfp = fopen (regexfile, "r");
5109 if (regexfp == NULL)
5110 {
5111 pfatal (regexfile);
5112 return;
5113 }
5114 initbuffer (&regexbuf);
5115 while (readline_internal (&regexbuf, regexfp) > 0)
5116 analyse_regex (regexbuf.buffer, ignore_case);
5117 free (regexbuf.buffer);
5118 fclose (regexfp);
5119 }
5120 break;
5121
5122 /* Regexp to be used for a specific language only. */
5123 case '{':
5124 {
5125 language *lang;
5126 char *lang_name = regex_arg + 1;
5127 char *cp;
5128
5129 for (cp = lang_name; *cp != '}'; cp++)
5130 if (*cp == '\0')
5131 {
5132 error ("unterminated language name in regex: %s", regex_arg);
5133 return;
5134 }
5135 *cp = '\0';
5136 lang = get_language_from_langname (lang_name);
5137 if (lang == NULL)
5138 return;
5139 add_regex (cp + 1, ignore_case, lang);
5140 }
5141 break;
5142
5143 /* Regexp to be used for any language. */
5144 default:
5145 add_regex (regex_arg, ignore_case, NULL);
5146 break;
5147 }
5148 }
5149
5150 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5151 expression, into a real regular expression by compiling it. */
5152 static void
5153 add_regex (regexp_pattern, ignore_case, lang)
5154 char *regexp_pattern;
5155 bool ignore_case;
5156 language *lang;
5157 {
5158 char *name;
5159 const char *err;
5160 struct re_pattern_buffer *patbuf;
5161 pattern *pp;
5162
5163
5164 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
5165 {
5166 error ("%s: unterminated regexp", regexp_pattern);
5167 return;
5168 }
5169 name = scan_separators (regexp_pattern);
5170 if (regexp_pattern[0] == '\0')
5171 {
5172 error ("null regexp", (char *)NULL);
5173 return;
5174 }
5175 (void) scan_separators (name);
5176
5177 patbuf = xnew (1, struct re_pattern_buffer);
5178 /* Translation table to fold case if appropriate. */
5179 patbuf->translate = (ignore_case) ? lc_trans : NULL;
5180 patbuf->fastmap = NULL;
5181 patbuf->buffer = NULL;
5182 patbuf->allocated = 0;
5183
5184 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
5185 if (err != NULL)
5186 {
5187 error ("%s while compiling pattern", err);
5188 return;
5189 }
5190
5191 pp = p_head;
5192 p_head = xnew (1, pattern);
5193 p_head->regex = savestr (regexp_pattern);
5194 p_head->p_next = pp;
5195 p_head->language = lang;
5196 p_head->pattern = patbuf;
5197 p_head->name_pattern = savestr (name);
5198 p_head->error_signaled = FALSE;
5199 }
5200
5201 /*
5202 * Do the substitutions indicated by the regular expression and
5203 * arguments.
5204 */
5205 static char *
5206 substitute (in, out, regs)
5207 char *in, *out;
5208 struct re_registers *regs;
5209 {
5210 char *result, *t;
5211 int size, dig, diglen;
5212
5213 result = NULL;
5214 size = strlen (out);
5215
5216 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5217 if (out[size - 1] == '\\')
5218 fatal ("pattern error in \"%s\"", out);
5219 for (t = etags_strchr (out, '\\');
5220 t != NULL;
5221 t = etags_strchr (t + 2, '\\'))
5222 if (ISDIGIT (t[1]))
5223 {
5224 dig = t[1] - '0';
5225 diglen = regs->end[dig] - regs->start[dig];
5226 size += diglen - 2;
5227 }
5228 else
5229 size -= 1;
5230
5231 /* Allocate space and do the substitutions. */
5232 result = xnew (size + 1, char);
5233
5234 for (t = result; *out != '\0'; out++)
5235 if (*out == '\\' && ISDIGIT (*++out))
5236 {
5237 dig = *out - '0';
5238 diglen = regs->end[dig] - regs->start[dig];
5239 strncpy (t, in + regs->start[dig], diglen);
5240 t += diglen;
5241 }
5242 else
5243 *t++ = *out;
5244 *t = '\0';
5245
5246 assert (t <= result + size && t - result == (int)strlen (result));
5247
5248 return result;
5249 }
5250
5251 /* Deallocate all patterns. */
5252 static void
5253 free_patterns ()
5254 {
5255 pattern *pp;
5256 while (p_head != NULL)
5257 {
5258 pp = p_head->p_next;
5259 free (p_head->regex);
5260 free (p_head->name_pattern);
5261 free (p_head);
5262 p_head = pp;
5263 }
5264 return;
5265 }
5266 #endif /* ETAGS_REGEXPS */
5267
5268 \f
5269 static void
5270 get_tag (bp)
5271 register char *bp;
5272 {
5273 register char *cp;
5274
5275 if (*bp == '\0')
5276 return;
5277 /* Go till you get to white space or a syntactic break */
5278 for (cp = bp + 1;
5279 *cp != '\0' && *cp != '(' && *cp != ')' && !iswhite (*cp);
5280 cp++)
5281 continue;
5282 pfnote (savenstr (bp, cp-bp), TRUE,
5283 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5284 }
5285
5286 /* Initialize a linebuffer for use */
5287 static void
5288 initbuffer (lbp)
5289 linebuffer *lbp;
5290 {
5291 lbp->size = (DEBUG) ? 3 : 200;
5292 lbp->buffer = xnew (lbp->size, char);
5293 lbp->buffer[0] = '\0';
5294 lbp->len = 0;
5295 }
5296
5297 /*
5298 * Read a line of text from `stream' into `lbp', excluding the
5299 * newline or CR-NL, if any. Return the number of characters read from
5300 * `stream', which is the length of the line including the newline.
5301 *
5302 * On DOS or Windows we do not count the CR character, if any, before the
5303 * NL, in the returned length; this mirrors the behavior of emacs on those
5304 * platforms (for text files, it translates CR-NL to NL as it reads in the
5305 * file).
5306 */
5307 static long
5308 readline_internal (lbp, stream)
5309 linebuffer *lbp;
5310 register FILE *stream;
5311 {
5312 char *buffer = lbp->buffer;
5313 register char *p = lbp->buffer;
5314 register char *pend;
5315 int chars_deleted;
5316
5317 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5318
5319 while (1)
5320 {
5321 register int c = getc (stream);
5322 if (p == pend)
5323 {
5324 /* We're at the end of linebuffer: expand it. */
5325 lbp->size *= 2;
5326 xrnew (buffer, lbp->size, char);
5327 p += buffer - lbp->buffer;
5328 pend = buffer + lbp->size;
5329 lbp->buffer = buffer;
5330 }
5331 if (c == EOF)
5332 {
5333 *p = '\0';
5334 chars_deleted = 0;
5335 break;
5336 }
5337 if (c == '\n')
5338 {
5339 if (p > buffer && p[-1] == '\r')
5340 {
5341 p -= 1;
5342 #ifdef DOS_NT
5343 /* Assume CRLF->LF translation will be performed by Emacs
5344 when loading this file, so CRs won't appear in the buffer.
5345 It would be cleaner to compensate within Emacs;
5346 however, Emacs does not know how many CRs were deleted
5347 before any given point in the file. */
5348 chars_deleted = 1;
5349 #else
5350 chars_deleted = 2;
5351 #endif
5352 }
5353 else
5354 {
5355 chars_deleted = 1;
5356 }
5357 *p = '\0';
5358 break;
5359 }
5360 *p++ = c;
5361 }
5362 lbp->len = p - buffer;
5363
5364 return lbp->len + chars_deleted;
5365 }
5366
5367 /*
5368 * Like readline_internal, above, but in addition try to match the
5369 * input line against relevant regular expressions.
5370 */
5371 static long
5372 readline (lbp, stream)
5373 linebuffer *lbp;
5374 FILE *stream;
5375 {
5376 /* Read new line. */
5377 long result = readline_internal (lbp, stream);
5378 #ifdef ETAGS_REGEXPS
5379 int match;
5380 pattern *pp;
5381
5382 /* Match against relevant patterns. */
5383 if (lbp->len > 0)
5384 for (pp = p_head; pp != NULL; pp = pp->p_next)
5385 {
5386 /* Only use generic regexps or those for the current language. */
5387 if (pp->language != NULL && pp->language != curlang)
5388 continue;
5389
5390 match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
5391 switch (match)
5392 {
5393 case -2:
5394 /* Some error. */
5395 if (!pp->error_signaled)
5396 {
5397 error ("error while matching \"%s\"", pp->regex);
5398 pp->error_signaled = TRUE;
5399 }
5400 break;
5401 case -1:
5402 /* No match. */
5403 break;
5404 default:
5405 /* Match occurred. Construct a tag. */
5406 if (pp->name_pattern[0] != '\0')
5407 {
5408 /* Make a named tag. */
5409 char *name = substitute (lbp->buffer,
5410 pp->name_pattern, &pp->regs);
5411 if (name != NULL)
5412 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5413 }
5414 else
5415 {
5416 /* Make an unnamed tag. */
5417 pfnote ((char *)NULL, TRUE,
5418 lbp->buffer, match, lineno, linecharno);
5419 }
5420 break;
5421 }
5422 }
5423 #endif /* ETAGS_REGEXPS */
5424
5425 return result;
5426 }
5427
5428 \f
5429 /*
5430 * Return a pointer to a space of size strlen(cp)+1 allocated
5431 * with xnew where the string CP has been copied.
5432 */
5433 static char *
5434 savestr (cp)
5435 char *cp;
5436 {
5437 return savenstr (cp, strlen (cp));
5438 }
5439
5440 /*
5441 * Return a pointer to a space of size LEN+1 allocated with xnew where
5442 * the string CP has been copied for at most the first LEN characters.
5443 */
5444 static char *
5445 savenstr (cp, len)
5446 char *cp;
5447 int len;
5448 {
5449 register char *dp;
5450
5451 dp = xnew (len + 1, char);
5452 strncpy (dp, cp, len);
5453 dp[len] = '\0';
5454 return dp;
5455 }
5456
5457 /*
5458 * Return the ptr in sp at which the character c last
5459 * appears; NULL if not found
5460 *
5461 * Identical to POSIX strrchr, included for portability.
5462 */
5463 static char *
5464 etags_strrchr (sp, c)
5465 register const char *sp;
5466 register int c;
5467 {
5468 register const char *r;
5469
5470 r = NULL;
5471 do
5472 {
5473 if (*sp == c)
5474 r = sp;
5475 } while (*sp++);
5476 return (char *)r;
5477 }
5478
5479
5480 /*
5481 * Return the ptr in sp at which the character c first
5482 * appears; NULL if not found
5483 *
5484 * Identical to POSIX strchr, included for portability.
5485 */
5486 static char *
5487 etags_strchr (sp, c)
5488 register const char *sp;
5489 register int c;
5490 {
5491 do
5492 {
5493 if (*sp == c)
5494 return (char *)sp;
5495 } while (*sp++);
5496 return NULL;
5497 }
5498
5499 /* Skip spaces, return new pointer. */
5500 static char *
5501 skip_spaces (cp)
5502 char *cp;
5503 {
5504 while (iswhite (*cp))
5505 cp++;
5506 return cp;
5507 }
5508
5509 /* Skip non spaces, return new pointer. */
5510 static char *
5511 skip_non_spaces (cp)
5512 char *cp;
5513 {
5514 while (*cp != '\0' && !iswhite (*cp))
5515 cp++;
5516 return cp;
5517 }
5518
5519 /* Print error message and exit. */
5520 void
5521 fatal (s1, s2)
5522 char *s1, *s2;
5523 {
5524 error (s1, s2);
5525 exit (BAD);
5526 }
5527
5528 static void
5529 pfatal (s1)
5530 char *s1;
5531 {
5532 perror (s1);
5533 exit (BAD);
5534 }
5535
5536 static void
5537 suggest_asking_for_help ()
5538 {
5539 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5540 progname,
5541 #ifdef LONG_OPTIONS
5542 "--help"
5543 #else
5544 "-h"
5545 #endif
5546 );
5547 exit (BAD);
5548 }
5549
5550 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
5551 static void
5552 error (s1, s2)
5553 const char *s1, *s2;
5554 {
5555 fprintf (stderr, "%s: ", progname);
5556 fprintf (stderr, s1, s2);
5557 fprintf (stderr, "\n");
5558 }
5559
5560 /* Return a newly-allocated string whose contents
5561 concatenate those of s1, s2, s3. */
5562 static char *
5563 concat (s1, s2, s3)
5564 char *s1, *s2, *s3;
5565 {
5566 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5567 char *result = xnew (len1 + len2 + len3 + 1, char);
5568
5569 strcpy (result, s1);
5570 strcpy (result + len1, s2);
5571 strcpy (result + len1 + len2, s3);
5572 result[len1 + len2 + len3] = '\0';
5573
5574 return result;
5575 }
5576
5577 \f
5578 /* Does the same work as the system V getcwd, but does not need to
5579 guess the buffer size in advance. */
5580 static char *
5581 etags_getcwd ()
5582 {
5583 #ifdef HAVE_GETCWD
5584 int bufsize = 200;
5585 char *path = xnew (bufsize, char);
5586
5587 while (getcwd (path, bufsize) == NULL)
5588 {
5589 if (errno != ERANGE)
5590 pfatal ("getcwd");
5591 bufsize *= 2;
5592 free (path);
5593 path = xnew (bufsize, char);
5594 }
5595
5596 canonicalize_filename (path);
5597 return path;
5598
5599 #else /* not HAVE_GETCWD */
5600 #if MSDOS
5601
5602 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
5603
5604 getwd (path);
5605
5606 for (p = path; *p != '\0'; p++)
5607 if (*p == '\\')
5608 *p = '/';
5609 else
5610 *p = lowcase (*p);
5611
5612 return strdup (path);
5613 #else /* not MSDOS */
5614 linebuffer path;
5615 FILE *pipe;
5616
5617 initbuffer (&path);
5618 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5619 if (pipe == NULL || readline_internal (&path, pipe) == 0)
5620 pfatal ("pwd");
5621 pclose (pipe);
5622
5623 return path.buffer;
5624 #endif /* not MSDOS */
5625 #endif /* not HAVE_GETCWD */
5626 }
5627
5628 /* Return a newly allocated string containing the file name of FILE
5629 relative to the absolute directory DIR (which should end with a slash). */
5630 static char *
5631 relative_filename (file, dir)
5632 char *file, *dir;
5633 {
5634 char *fp, *dp, *afn, *res;
5635 int i;
5636
5637 /* Find the common root of file and dir (with a trailing slash). */
5638 afn = absolute_filename (file, cwd);
5639 fp = afn;
5640 dp = dir;
5641 while (*fp++ == *dp++)
5642 continue;
5643 fp--, dp--; /* back to the first differing char */
5644 #ifdef DOS_NT
5645 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5646 return afn;
5647 #endif
5648 do /* look at the equal chars until '/' */
5649 fp--, dp--;
5650 while (*fp != '/');
5651
5652 /* Build a sequence of "../" strings for the resulting relative file name. */
5653 i = 0;
5654 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5655 i += 1;
5656 res = xnew (3*i + strlen (fp + 1) + 1, char);
5657 res[0] = '\0';
5658 while (i-- > 0)
5659 strcat (res, "../");
5660
5661 /* Add the file name relative to the common root of file and dir. */
5662 strcat (res, fp + 1);
5663 free (afn);
5664
5665 return res;
5666 }
5667
5668 /* Return a newly allocated string containing the absolute file name
5669 of FILE given DIR (which should end with a slash). */
5670 static char *
5671 absolute_filename (file, dir)
5672 char *file, *dir;
5673 {
5674 char *slashp, *cp, *res;
5675
5676 if (filename_is_absolute (file))
5677 res = savestr (file);
5678 #ifdef DOS_NT
5679 /* We don't support non-absolute file names with a drive
5680 letter, like `d:NAME' (it's too much hassle). */
5681 else if (file[1] == ':')
5682 fatal ("%s: relative file names with drive letters not supported", file);
5683 #endif
5684 else
5685 res = concat (dir, file, "");
5686
5687 /* Delete the "/dirname/.." and "/." substrings. */
5688 slashp = etags_strchr (res, '/');
5689 while (slashp != NULL && slashp[0] != '\0')
5690 {
5691 if (slashp[1] == '.')
5692 {
5693 if (slashp[2] == '.'
5694 && (slashp[3] == '/' || slashp[3] == '\0'))
5695 {
5696 cp = slashp;
5697 do
5698 cp--;
5699 while (cp >= res && !filename_is_absolute (cp));
5700 if (cp < res)
5701 cp = slashp; /* the absolute name begins with "/.." */
5702 #ifdef DOS_NT
5703 /* Under MSDOS and NT we get `d:/NAME' as absolute
5704 file name, so the luser could say `d:/../NAME'.
5705 We silently treat this as `d:/NAME'. */
5706 else if (cp[0] != '/')
5707 cp = slashp;
5708 #endif
5709 strcpy (cp, slashp + 3);
5710 slashp = cp;
5711 continue;
5712 }
5713 else if (slashp[2] == '/' || slashp[2] == '\0')
5714 {
5715 strcpy (slashp, slashp + 2);
5716 continue;
5717 }
5718 }
5719
5720 slashp = etags_strchr (slashp + 1, '/');
5721 }
5722
5723 if (res[0] == '\0')
5724 return savestr ("/");
5725 else
5726 return res;
5727 }
5728
5729 /* Return a newly allocated string containing the absolute
5730 file name of dir where FILE resides given DIR (which should
5731 end with a slash). */
5732 static char *
5733 absolute_dirname (file, dir)
5734 char *file, *dir;
5735 {
5736 char *slashp, *res;
5737 char save;
5738
5739 canonicalize_filename (file);
5740 slashp = etags_strrchr (file, '/');
5741 if (slashp == NULL)
5742 return savestr (dir);
5743 save = slashp[1];
5744 slashp[1] = '\0';
5745 res = absolute_filename (file, dir);
5746 slashp[1] = save;
5747
5748 return res;
5749 }
5750
5751 /* Whether the argument string is an absolute file name. The argument
5752 string must have been canonicalized with canonicalize_filename. */
5753 static bool
5754 filename_is_absolute (fn)
5755 char *fn;
5756 {
5757 return (fn[0] == '/'
5758 #ifdef DOS_NT
5759 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
5760 #endif
5761 );
5762 }
5763
5764 /* Translate backslashes into slashes. Works in place. */
5765 static void
5766 canonicalize_filename (fn)
5767 register char *fn;
5768 {
5769 #ifdef DOS_NT
5770 /* Canonicalize drive letter case. */
5771 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
5772 fn[0] = upcase (fn[0]);
5773 /* Convert backslashes to slashes. */
5774 for (; *fn != '\0'; fn++)
5775 if (*fn == '\\')
5776 *fn = '/';
5777 #else
5778 /* No action. */
5779 fn = NULL; /* shut up the compiler */
5780 #endif
5781 }
5782
5783 /* Set the minimum size of a string contained in a linebuffer. */
5784 static void
5785 linebuffer_setlen (lbp, toksize)
5786 linebuffer *lbp;
5787 int toksize;
5788 {
5789 while (lbp->size <= toksize)
5790 {
5791 lbp->size *= 2;
5792 xrnew (lbp->buffer, lbp->size, char);
5793 }
5794 lbp->len = toksize;
5795 }
5796
5797 /* Like malloc but get fatal error if memory is exhausted. */
5798 long *
5799 xmalloc (size)
5800 unsigned int size;
5801 {
5802 long *result = (long *) malloc (size);
5803 if (result == NULL)
5804 fatal ("virtual memory exhausted", (char *)NULL);
5805 return result;
5806 }
5807
5808 long *
5809 xrealloc (ptr, size)
5810 char *ptr;
5811 unsigned int size;
5812 {
5813 long *result = (long *) realloc (ptr, size);
5814 if (result == NULL)
5815 fatal ("virtual memory exhausted", (char *)NULL);
5816 return result;
5817 }