(C_entries): Corrected a problem with const C++ funcs.
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
3 Free Software Foundation, Inc. and Ken Arnold
4
5 This file is not considered part of GNU Emacs.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 /*
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1992 Joseph B. Wells improved C and C++ parsing.
29 * 1993 Francesco Potortì reorganised C and C++.
30 * 1994 Line-by-line regexp tags by Tom Tromey.
31 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
32 * 2002 #line directives by Francesco Potortì.
33 *
34 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
35 *
36 */
37
38 char pot_etags_version[] = "@(#) pot revision number is 16.46";
39
40 #define TRUE 1
41 #define FALSE 0
42
43 #ifdef DEBUG
44 # undef DEBUG
45 # define DEBUG TRUE
46 #else
47 # define DEBUG FALSE
48 # define NDEBUG /* disable assert */
49 #endif
50
51 #ifdef HAVE_CONFIG_H
52 # include <config.h>
53 /* On some systems, Emacs defines static as nothing for the sake
54 of unexec. We don't want that here since we don't use unexec. */
55 # undef static
56 # define ETAGS_REGEXPS /* use the regexp features */
57 # define LONG_OPTIONS /* accept long options */
58 # ifndef PTR /* for Xemacs */
59 # define PTR void *
60 # endif
61 # ifndef __P /* for Xemacs */
62 # define __P(args) args
63 # endif
64 #else
65 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
66 # define __P(args) args /* use prototypes */
67 # define PTR void * /* for generic pointers */
68 # else
69 # define __P(args) () /* no prototypes */
70 # define const /* remove const for old compilers' sake */
71 # define PTR long * /* don't use void* */
72 # endif
73 #endif /* !HAVE_CONFIG_H */
74
75 #ifndef _GNU_SOURCE
76 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
77 #endif
78
79 /* WIN32_NATIVE is for Xemacs.
80 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
81 #ifdef WIN32_NATIVE
82 # undef MSDOS
83 # undef WINDOWSNT
84 # define WINDOWSNT
85 #endif /* WIN32_NATIVE */
86
87 #ifdef MSDOS
88 # undef MSDOS
89 # define MSDOS TRUE
90 # include <fcntl.h>
91 # include <sys/param.h>
92 # include <io.h>
93 # ifndef HAVE_CONFIG_H
94 # define DOS_NT
95 # include <sys/config.h>
96 # endif
97 #else
98 # define MSDOS FALSE
99 #endif /* MSDOS */
100
101 #ifdef WINDOWSNT
102 # include <stdlib.h>
103 # include <fcntl.h>
104 # include <string.h>
105 # include <direct.h>
106 # include <io.h>
107 # define MAXPATHLEN _MAX_PATH
108 # undef HAVE_NTGUI
109 # undef DOS_NT
110 # define DOS_NT
111 # ifndef HAVE_GETCWD
112 # define HAVE_GETCWD
113 # endif /* undef HAVE_GETCWD */
114 #else /* !WINDOWSNT */
115 # ifdef STDC_HEADERS
116 # include <stdlib.h>
117 # include <string.h>
118 # else
119 extern char *getenv ();
120 # endif
121 #endif /* !WINDOWSNT */
122
123 #ifdef HAVE_UNISTD_H
124 # include <unistd.h>
125 #else
126 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
127 extern char *getcwd (char *buf, size_t size);
128 # endif
129 #endif /* HAVE_UNISTD_H */
130
131 #include <stdio.h>
132 #include <ctype.h>
133 #include <errno.h>
134 #ifndef errno
135 extern int errno;
136 #endif
137 #include <sys/types.h>
138 #include <sys/stat.h>
139
140 #include <assert.h>
141 #ifdef NDEBUG
142 # undef assert /* some systems have a buggy assert.h */
143 # define assert(x) ((void) 0)
144 #endif
145
146 #if !defined (S_ISREG) && defined (S_IFREG)
147 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
148 #endif
149
150 #ifdef LONG_OPTIONS
151 # include <getopt.h>
152 #else
153 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
154 extern char *optarg;
155 extern int optind, opterr;
156 #endif /* LONG_OPTIONS */
157
158 #ifdef ETAGS_REGEXPS
159 # ifndef HAVE_CONFIG_H /* this is a standalone compilation */
160 # ifdef __CYGWIN__ /* compiling on Cygwin */
161 !!! NOTICE !!!
162 the regex.h distributed with Cygwin is not compatible with etags, alas!
163 If you want regular expression support, you should delete this notice and
164 arrange to use the GNU regex.h and regex.c.
165 # endif
166 # endif
167 # include <regex.h>
168 #endif /* ETAGS_REGEXPS */
169
170 /* Define CTAGS to make the program "ctags" compatible with the usual one.
171 Leave it undefined to make the program "etags", which makes emacs-style
172 tag tables and tags typedefs, #defines and struct/union/enum by default. */
173 #ifdef CTAGS
174 # undef CTAGS
175 # define CTAGS TRUE
176 #else
177 # define CTAGS FALSE
178 #endif
179
180 /* Exit codes for success and failure. */
181 #ifdef VMS
182 # define GOOD 1
183 # define BAD 0
184 #else
185 # define GOOD 0
186 # define BAD 1
187 #endif
188
189 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
190 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
191 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
192 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
193
194 #define CHARS 256 /* 2^sizeof(char) */
195 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
196 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
197 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
198 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
199 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
200 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
201
202 #define ISALNUM(c) isalnum (CHAR(c))
203 #define ISALPHA(c) isalpha (CHAR(c))
204 #define ISDIGIT(c) isdigit (CHAR(c))
205 #define ISLOWER(c) islower (CHAR(c))
206
207 #define lowcase(c) tolower (CHAR(c))
208 #define upcase(c) toupper (CHAR(c))
209
210
211 /*
212 * xnew, xrnew -- allocate, reallocate storage
213 *
214 * SYNOPSIS: Type *xnew (int n, Type);
215 * void xrnew (OldPointer, int n, Type);
216 */
217 #if DEBUG
218 # include "chkmalloc.h"
219 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
220 (n) * sizeof (Type)))
221 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
222 (char *) (op), (n) * sizeof (Type)))
223 #else
224 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
225 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
226 (char *) (op), (n) * sizeof (Type)))
227 #endif
228
229 #define bool int
230
231 typedef void Lang_function __P((FILE *));
232
233 typedef struct
234 {
235 char *suffix; /* file name suffix for this compressor */
236 char *command; /* takes one arg and decompresses to stdout */
237 } compressor;
238
239 typedef struct
240 {
241 char *name; /* language name */
242 bool metasource; /* source used to generate other sources */
243 Lang_function *function; /* parse function */
244 char **filenames; /* names of this language's files */
245 char **suffixes; /* name suffixes of this language's files */
246 char **interpreters; /* interpreters for this language */
247 } language;
248
249 typedef struct fdesc
250 {
251 struct fdesc *next; /* for the linked list */
252 char *infname; /* uncompressed input file name */
253 char *infabsname; /* absolute uncompressed input file name */
254 char *infabsdir; /* absolute dir of input file */
255 char *taggedfname; /* file name to write in tagfile */
256 language *lang; /* language of file */
257 char *prop; /* file properties to write in tagfile */
258 bool usecharno; /* etags tags shall contain char number */
259 bool written; /* entry written in the tags file */
260 } fdesc;
261
262 typedef struct node_st
263 { /* sorting structure */
264 struct node_st *left, *right; /* left and right sons */
265 fdesc *fdp; /* description of file to whom tag belongs */
266 char *name; /* tag name */
267 char *regex; /* search regexp */
268 bool valid; /* write this tag on the tag file */
269 bool is_func; /* function tag: use regexp in CTAGS mode */
270 bool been_warned; /* warning already given for duplicated tag */
271 int lno; /* line number tag is on */
272 long cno; /* character number line starts on */
273 } node;
274
275 /*
276 * A `linebuffer' is a structure which holds a line of text.
277 * `readline_internal' reads a line from a stream into a linebuffer
278 * and works regardless of the length of the line.
279 * SIZE is the size of BUFFER, LEN is the length of the string in
280 * BUFFER after readline reads it.
281 */
282 typedef struct
283 {
284 long size;
285 int len;
286 char *buffer;
287 } linebuffer;
288
289 /* Used to support mixing of --lang and file names. */
290 typedef struct
291 {
292 enum {
293 at_language, /* a language specification */
294 at_regexp, /* a regular expression */
295 at_filename, /* a file name */
296 at_stdin /* read from stdin here */
297 } arg_type; /* argument type */
298 language *lang; /* language associated with the argument */
299 char *what; /* the argument itself */
300 } argument;
301
302 #ifdef ETAGS_REGEXPS
303 /* Structure defining a regular expression. */
304 typedef struct regexp
305 {
306 struct regexp *p_next; /* pointer to next in list */
307 language *lang; /* if set, use only for this language */
308 char *pattern; /* the regexp pattern */
309 char *name; /* tag name */
310 struct re_pattern_buffer *pat; /* the compiled pattern */
311 struct re_registers regs; /* re registers */
312 bool error_signaled; /* already signaled for this regexp */
313 bool force_explicit_name; /* do not allow implict tag name */
314 bool ignore_case; /* ignore case when matching */
315 bool multi_line; /* do a multi-line match on the whole file */
316 } regexp;
317 #endif /* ETAGS_REGEXPS */
318
319
320 /* Many compilers barf on this:
321 Lang_function Ada_funcs;
322 so let's write it this way */
323 static void Ada_funcs __P((FILE *));
324 static void Asm_labels __P((FILE *));
325 static void C_entries __P((int c_ext, FILE *));
326 static void default_C_entries __P((FILE *));
327 static void plain_C_entries __P((FILE *));
328 static void Cjava_entries __P((FILE *));
329 static void Cobol_paragraphs __P((FILE *));
330 static void Cplusplus_entries __P((FILE *));
331 static void Cstar_entries __P((FILE *));
332 static void Erlang_functions __P((FILE *));
333 static void Fortran_functions __P((FILE *));
334 static void HTML_labels __P((FILE *));
335 static void Lisp_functions __P((FILE *));
336 static void Makefile_targets __P((FILE *));
337 static void Pascal_functions __P((FILE *));
338 static void Perl_functions __P((FILE *));
339 static void PHP_functions __P((FILE *));
340 static void Postscript_functions __P((FILE *));
341 static void Prolog_functions __P((FILE *));
342 static void Python_functions __P((FILE *));
343 static void Scheme_functions __P((FILE *));
344 static void TeX_commands __P((FILE *));
345 static void Texinfo_nodes __P((FILE *));
346 static void Yacc_entries __P((FILE *));
347 static void just_read_file __P((FILE *));
348
349 static void print_language_names __P((void));
350 static void print_version __P((void));
351 static void print_help __P((void));
352 int main __P((int, char **));
353
354 static compressor *get_compressor_from_suffix __P((char *, char **));
355 static language *get_language_from_langname __P((const char *));
356 static language *get_language_from_interpreter __P((char *));
357 static language *get_language_from_filename __P((char *, bool));
358 static void readline __P((linebuffer *, FILE *));
359 static long readline_internal __P((linebuffer *, FILE *));
360 static bool nocase_tail __P((char *));
361 static void get_tag __P((char *, char **));
362
363 #ifdef ETAGS_REGEXPS
364 static void analyse_regex __P((char *));
365 static void free_regexps __P((void));
366 static void regex_tag_multiline __P((void));
367 #endif /* ETAGS_REGEXPS */
368 static void error __P((const char *, const char *));
369 static void suggest_asking_for_help __P((void));
370 void fatal __P((char *, char *));
371 static void pfatal __P((char *));
372 static void add_node __P((node *, node **));
373
374 static void init __P((void));
375 static void process_file_name __P((char *, language *));
376 static void process_file __P((FILE *, char *, language *));
377 static void find_entries __P((FILE *));
378 static void free_tree __P((node *));
379 static void free_fdesc __P((fdesc *));
380 static void pfnote __P((char *, bool, char *, int, int, long));
381 static void make_tag __P((char *, int, bool, char *, int, int, long));
382 static void invalidate_nodes __P((fdesc *, node **));
383 static void put_entries __P((node *));
384
385 static char *concat __P((char *, char *, char *));
386 static char *skip_spaces __P((char *));
387 static char *skip_non_spaces __P((char *));
388 static char *savenstr __P((char *, int));
389 static char *savestr __P((char *));
390 static char *etags_strchr __P((const char *, int));
391 static char *etags_strrchr __P((const char *, int));
392 static int etags_strcasecmp __P((const char *, const char *));
393 static int etags_strncasecmp __P((const char *, const char *, int));
394 static char *etags_getcwd __P((void));
395 static char *relative_filename __P((char *, char *));
396 static char *absolute_filename __P((char *, char *));
397 static char *absolute_dirname __P((char *, char *));
398 static bool filename_is_absolute __P((char *f));
399 static void canonicalize_filename __P((char *));
400 static void linebuffer_init __P((linebuffer *));
401 static void linebuffer_setlen __P((linebuffer *, int));
402 static PTR xmalloc __P((unsigned int));
403 static PTR xrealloc __P((char *, unsigned int));
404
405 \f
406 static char searchar = '/'; /* use /.../ searches */
407
408 static char *tagfile; /* output file */
409 static char *progname; /* name this program was invoked with */
410 static char *cwd; /* current working directory */
411 static char *tagfiledir; /* directory of tagfile */
412 static FILE *tagf; /* ioptr for tags file */
413
414 static fdesc *fdhead; /* head of file description list */
415 static fdesc *curfdp; /* current file description */
416 static int lineno; /* line number of current line */
417 static long charno; /* current character number */
418 static long linecharno; /* charno of start of current line */
419 static char *dbp; /* pointer to start of current tag */
420
421 static const int invalidcharno = -1;
422
423 static node *nodehead; /* the head of the binary tree of tags */
424 static node *last_node; /* the last node created */
425
426 static linebuffer lb; /* the current line */
427 static linebuffer filebuf; /* a buffer containing the whole file */
428 static linebuffer token_name; /* a buffer containing a tag name */
429
430 /* boolean "functions" (see init) */
431 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
432 static char
433 /* white chars */
434 *white = " \f\t\n\r\v",
435 /* not in a name */
436 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
437 /* token ending chars */
438 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
439 /* token starting chars */
440 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
441 /* valid in-token chars */
442 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
443
444 static bool append_to_tagfile; /* -a: append to tags */
445 /* The next four default to TRUE for etags, but to FALSE for ctags. */
446 static bool typedefs; /* -t: create tags for C and Ada typedefs */
447 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
448 /* 0 struct/enum/union decls, and C++ */
449 /* member functions. */
450 static bool constantypedefs; /* -d: create tags for C #define, enum */
451 /* constants and variables. */
452 /* -D: opposite of -d. Default under ctags. */
453 static bool globals; /* create tags for global variables */
454 static bool declarations; /* --declarations: tag them and extern in C&Co*/
455 static bool members; /* create tags for C member variables */
456 static bool no_line_directive; /* ignore #line directives (undocumented) */
457 static bool update; /* -u: update tags */
458 static bool vgrind_style; /* -v: create vgrind style index output */
459 static bool no_warnings; /* -w: suppress warnings */
460 static bool cxref_style; /* -x: create cxref style output */
461 static bool cplusplus; /* .[hc] means C++, not C */
462 static bool ignoreindent; /* -I: ignore indentation in C */
463 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
464
465 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
466 static bool parsing_stdin; /* --parse-stdin used */
467
468 #ifdef ETAGS_REGEXPS
469 static regexp *p_head; /* list of all regexps */
470 static bool need_filebuf; /* some regexes are multi-line */
471 #else
472 # define need_filebuf FALSE
473 #endif /* ETAGS_REGEXPS */
474
475 #ifdef LONG_OPTIONS
476 static struct option longopts[] =
477 {
478 { "packages-only", no_argument, &packages_only, TRUE },
479 { "c++", no_argument, NULL, 'C' },
480 { "declarations", no_argument, &declarations, TRUE },
481 { "no-line-directive", no_argument, &no_line_directive, TRUE },
482 { "help", no_argument, NULL, 'h' },
483 { "help", no_argument, NULL, 'H' },
484 { "ignore-indentation", no_argument, NULL, 'I' },
485 { "language", required_argument, NULL, 'l' },
486 { "members", no_argument, &members, TRUE },
487 { "no-members", no_argument, &members, FALSE },
488 { "output", required_argument, NULL, 'o' },
489 #ifdef ETAGS_REGEXPS
490 { "regex", required_argument, NULL, 'r' },
491 { "no-regex", no_argument, NULL, 'R' },
492 { "ignore-case-regex", required_argument, NULL, 'c' },
493 #endif /* ETAGS_REGEXPS */
494 { "parse-stdin", required_argument, NULL, STDIN },
495 { "version", no_argument, NULL, 'V' },
496
497 #if CTAGS /* Etags options */
498 { "backward-search", no_argument, NULL, 'B' },
499 { "cxref", no_argument, NULL, 'x' },
500 { "defines", no_argument, NULL, 'd' },
501 { "globals", no_argument, &globals, TRUE },
502 { "typedefs", no_argument, NULL, 't' },
503 { "typedefs-and-c++", no_argument, NULL, 'T' },
504 { "update", no_argument, NULL, 'u' },
505 { "vgrind", no_argument, NULL, 'v' },
506 { "no-warn", no_argument, NULL, 'w' },
507
508 #else /* Ctags options */
509 { "append", no_argument, NULL, 'a' },
510 { "no-defines", no_argument, NULL, 'D' },
511 { "no-globals", no_argument, &globals, FALSE },
512 { "include", required_argument, NULL, 'i' },
513 #endif
514 { NULL }
515 };
516 #endif /* LONG_OPTIONS */
517
518 static compressor compressors[] =
519 {
520 { "z", "gzip -d -c"},
521 { "Z", "gzip -d -c"},
522 { "gz", "gzip -d -c"},
523 { "GZ", "gzip -d -c"},
524 { "bz2", "bzip2 -d -c" },
525 { NULL }
526 };
527
528 /*
529 * Language stuff.
530 */
531
532 /* Ada code */
533 static char *Ada_suffixes [] =
534 { "ads", "adb", "ada", NULL };
535
536 /* Assembly code */
537 static char *Asm_suffixes [] =
538 { "a", /* Unix assembler */
539 "asm", /* Microcontroller assembly */
540 "def", /* BSO/Tasking definition includes */
541 "inc", /* Microcontroller include files */
542 "ins", /* Microcontroller include files */
543 "s", "sa", /* Unix assembler */
544 "S", /* cpp-processed Unix assembler */
545 "src", /* BSO/Tasking C compiler output */
546 NULL
547 };
548
549 /* Note that .c and .h can be considered C++, if the --c++ flag was
550 given, or if the `class' keyowrd is met inside the file.
551 That is why default_C_entries is called for these. */
552 static char *default_C_suffixes [] =
553 { "c", "h", NULL };
554
555 static char *Cplusplus_suffixes [] =
556 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
557 "M", /* Objective C++ */
558 "pdb", /* Postscript with C syntax */
559 NULL };
560
561 static char *Cjava_suffixes [] =
562 { "java", NULL };
563
564 static char *Cobol_suffixes [] =
565 { "COB", "cob", NULL };
566
567 static char *Cstar_suffixes [] =
568 { "cs", "hs", NULL };
569
570 static char *Erlang_suffixes [] =
571 { "erl", "hrl", NULL };
572
573 static char *Fortran_suffixes [] =
574 { "F", "f", "f90", "for", NULL };
575
576 static char *HTML_suffixes [] =
577 { "htm", "html", "shtml", NULL };
578
579 static char *Lisp_suffixes [] =
580 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
581
582 static char *Makefile_filenames [] =
583 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
584
585 static char *Pascal_suffixes [] =
586 { "p", "pas", NULL };
587
588 static char *Perl_suffixes [] =
589 { "pl", "pm", NULL };
590
591 static char *Perl_interpreters [] =
592 { "perl", "@PERL@", NULL };
593
594 static char *PHP_suffixes [] =
595 { "php", "php3", "php4", NULL };
596
597 static char *plain_C_suffixes [] =
598 { "lm", /* Objective lex file */
599 "m", /* Objective C file */
600 "pc", /* Pro*C file */
601 NULL };
602
603 static char *Postscript_suffixes [] =
604 { "ps", "psw", NULL }; /* .psw is for PSWrap */
605
606 static char *Prolog_suffixes [] =
607 { "prolog", NULL };
608
609 static char *Python_suffixes [] =
610 { "py", NULL };
611
612 /* Can't do the `SCM' or `scm' prefix with a version number. */
613 static char *Scheme_suffixes [] =
614 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
615
616 static char *TeX_suffixes [] =
617 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
618
619 static char *Texinfo_suffixes [] =
620 { "texi", "texinfo", "txi", NULL };
621
622 static char *Yacc_suffixes [] =
623 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
624
625 /*
626 * Table of languages.
627 *
628 * It is ok for a given function to be listed under more than one
629 * name. I just didn't.
630 */
631
632 static language lang_names [] =
633 {
634 { "ada", FALSE, Ada_funcs, NULL, Ada_suffixes, NULL },
635 { "asm", FALSE, Asm_labels, NULL, Asm_suffixes, NULL },
636 { "c", FALSE, default_C_entries, NULL, default_C_suffixes, NULL },
637 { "c++", FALSE, Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
638 { "c*", FALSE, Cstar_entries, NULL, Cstar_suffixes, NULL },
639 { "cobol", FALSE, Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
640 { "erlang", FALSE, Erlang_functions, NULL, Erlang_suffixes, NULL },
641 { "fortran", FALSE, Fortran_functions, NULL, Fortran_suffixes, NULL },
642 { "html", FALSE, HTML_labels, NULL, HTML_suffixes, NULL },
643 { "java", FALSE, Cjava_entries, NULL, Cjava_suffixes, NULL },
644 { "lisp", FALSE, Lisp_functions, NULL, Lisp_suffixes, NULL },
645 { "makefile", FALSE, Makefile_targets, Makefile_filenames, NULL, NULL },
646 { "pascal", FALSE, Pascal_functions, NULL, Pascal_suffixes, NULL },
647 { "perl", FALSE, Perl_functions,NULL, Perl_suffixes, Perl_interpreters },
648 { "php", FALSE, PHP_functions, NULL, PHP_suffixes, NULL },
649 { "postscript",FALSE, Postscript_functions,NULL, Postscript_suffixes, NULL },
650 { "proc", FALSE, plain_C_entries, NULL, plain_C_suffixes, NULL },
651 { "prolog", FALSE, Prolog_functions, NULL, Prolog_suffixes, NULL },
652 { "python", FALSE, Python_functions, NULL, Python_suffixes, NULL },
653 { "scheme", FALSE, Scheme_functions, NULL, Scheme_suffixes, NULL },
654 { "tex", FALSE, TeX_commands, NULL, TeX_suffixes, NULL },
655 { "texinfo", FALSE, Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
656 { "yacc", TRUE, Yacc_entries, NULL, Yacc_suffixes, NULL },
657 { "auto", FALSE, NULL }, /* default guessing scheme */
658 { "none", FALSE, just_read_file }, /* regexp matching only */
659 { NULL, FALSE, NULL } /* end of list */
660 };
661
662 \f
663 static void
664 print_language_names ()
665 {
666 language *lang;
667 char **name, **ext;
668
669 puts ("\nThese are the currently supported languages, along with the\n\
670 default file names and dot suffixes:");
671 for (lang = lang_names; lang->name != NULL; lang++)
672 {
673 printf (" %-*s", 10, lang->name);
674 if (lang->filenames != NULL)
675 for (name = lang->filenames; *name != NULL; name++)
676 printf (" %s", *name);
677 if (lang->suffixes != NULL)
678 for (ext = lang->suffixes; *ext != NULL; ext++)
679 printf (" .%s", *ext);
680 puts ("");
681 }
682 puts ("Where `auto' means use default language for files based on file\n\
683 name suffix, and `none' means only do regexp processing on files.\n\
684 If no language is specified and no matching suffix is found,\n\
685 the first line of the file is read for a sharp-bang (#!) sequence\n\
686 followed by the name of an interpreter. If no such sequence is found,\n\
687 Fortran is tried first; if no tags are found, C is tried next.\n\
688 When parsing any C file, a \"class\" keyword switches to C++.\n\
689 Compressed files are supported using gzip and bzip2.");
690 }
691
692 #ifndef EMACS_NAME
693 # define EMACS_NAME "standalone"
694 #endif
695 #ifndef VERSION
696 # define VERSION "version"
697 #endif
698 static void
699 print_version ()
700 {
701 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
702 puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
703 puts ("This program is distributed under the same terms as Emacs");
704
705 exit (GOOD);
706 }
707
708 static void
709 print_help ()
710 {
711 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
712 \n\
713 These are the options accepted by %s.\n", progname, progname);
714 #ifdef LONG_OPTIONS
715 puts ("You may use unambiguous abbreviations for the long option names.");
716 #else
717 puts ("Long option names do not work with this executable, as it is not\n\
718 linked with GNU getopt.");
719 #endif /* LONG_OPTIONS */
720 puts (" A - as file name means read names from stdin (one per line).\n\
721 Absolute names are stored in the output file as they are.\n\
722 Relative ones are stored relative to the output file's directory.\n");
723
724 if (!CTAGS)
725 puts ("-a, --append\n\
726 Append tag entries to existing tags file.");
727
728 puts ("--packages-only\n\
729 For Ada files, only generate tags for packages.");
730
731 if (CTAGS)
732 puts ("-B, --backward-search\n\
733 Write the search commands for the tag entries using '?', the\n\
734 backward-search command instead of '/', the forward-search command.");
735
736 /* This option is mostly obsolete, because etags can now automatically
737 detect C++. Retained for backward compatibility and for debugging and
738 experimentation. In principle, we could want to tag as C++ even
739 before any "class" keyword.
740 puts ("-C, --c++\n\
741 Treat files whose name suffix defaults to C language as C++ files.");
742 */
743
744 puts ("--declarations\n\
745 In C and derived languages, create tags for function declarations,");
746 if (CTAGS)
747 puts ("\tand create tags for extern variables if --globals is used.");
748 else
749 puts
750 ("\tand create tags for extern variables unless --no-globals is used.");
751
752 if (CTAGS)
753 puts ("-d, --defines\n\
754 Create tag entries for C #define constants and enum constants, too.");
755 else
756 puts ("-D, --no-defines\n\
757 Don't create tag entries for C #define constants and enum constants.\n\
758 This makes the tags file smaller.");
759
760 if (!CTAGS)
761 puts ("-i FILE, --include=FILE\n\
762 Include a note in tag file indicating that, when searching for\n\
763 a tag, one should also consult the tags file FILE after\n\
764 checking the current file.");
765
766 puts ("-l LANG, --language=LANG\n\
767 Force the following files to be considered as written in the\n\
768 named language up to the next --language=LANG option.");
769
770 if (CTAGS)
771 puts ("--globals\n\
772 Create tag entries for global variables in some languages.");
773 else
774 puts ("--no-globals\n\
775 Do not create tag entries for global variables in some\n\
776 languages. This makes the tags file smaller.");
777 puts ("--members\n\
778 Create tag entries for member variables in some languages.");
779
780 #ifdef ETAGS_REGEXPS
781 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
782 Make a tag for each line matching a regular expression pattern\n\
783 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
784 files only. REGEXFILE is a file containing one REGEXP per line.\n\
785 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
786 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
787 puts (" If TAGNAME/ is present, the tags created are named.\n\
788 For example Tcl named tags can be created with:\n\
789 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
790 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
791 `m' means to allow multi-line matches, `s' implies `m' and\n\
792 causes dot to match any character, including newline.");
793 puts ("-R, --no-regex\n\
794 Don't create tags from regexps for the following files.");
795 #endif /* ETAGS_REGEXPS */
796 puts ("-I, --ignore-indentation\n\
797 In C and C++ do not assume that a closing brace in the first\n\
798 column is the final brace of a function or structure definition.");
799 puts ("-o FILE, --output=FILE\n\
800 Write the tags to FILE.");
801 puts ("--parse-stdin=NAME\n\
802 Read from standard input and record tags as belonging to file NAME.");
803
804 if (CTAGS)
805 {
806 puts ("-t, --typedefs\n\
807 Generate tag entries for C and Ada typedefs.");
808 puts ("-T, --typedefs-and-c++\n\
809 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
810 and C++ member functions.");
811 }
812
813 if (CTAGS)
814 puts ("-u, --update\n\
815 Update the tag entries for the given files, leaving tag\n\
816 entries for other files in place. Currently, this is\n\
817 implemented by deleting the existing entries for the given\n\
818 files and then rewriting the new entries at the end of the\n\
819 tags file. It is often faster to simply rebuild the entire\n\
820 tag file than to use this.");
821
822 if (CTAGS)
823 {
824 puts ("-v, --vgrind\n\
825 Generates an index of items intended for human consumption,\n\
826 similar to the output of vgrind. The index is sorted, and\n\
827 gives the page number of each item.");
828 puts ("-w, --no-warn\n\
829 Suppress warning messages about entries defined in multiple\n\
830 files.");
831 puts ("-x, --cxref\n\
832 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
833 The output uses line numbers instead of page numbers, but\n\
834 beyond that the differences are cosmetic; try both to see\n\
835 which you like.");
836 }
837
838 puts ("-V, --version\n\
839 Print the version of the program.\n\
840 -h, --help\n\
841 Print this help message.");
842
843 print_language_names ();
844
845 puts ("");
846 puts ("Report bugs to bug-gnu-emacs@gnu.org");
847
848 exit (GOOD);
849 }
850
851 \f
852 #ifdef VMS /* VMS specific functions */
853
854 #define EOS '\0'
855
856 /* This is a BUG! ANY arbitrary limit is a BUG!
857 Won't someone please fix this? */
858 #define MAX_FILE_SPEC_LEN 255
859 typedef struct {
860 short curlen;
861 char body[MAX_FILE_SPEC_LEN + 1];
862 } vspec;
863
864 /*
865 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
866 returning in each successive call the next file name matching the input
867 spec. The function expects that each in_spec passed
868 to it will be processed to completion; in particular, up to and
869 including the call following that in which the last matching name
870 is returned, the function ignores the value of in_spec, and will
871 only start processing a new spec with the following call.
872 If an error occurs, on return out_spec contains the value
873 of in_spec when the error occurred.
874
875 With each successive file name returned in out_spec, the
876 function's return value is one. When there are no more matching
877 names the function returns zero. If on the first call no file
878 matches in_spec, or there is any other error, -1 is returned.
879 */
880
881 #include <rmsdef.h>
882 #include <descrip.h>
883 #define OUTSIZE MAX_FILE_SPEC_LEN
884 static short
885 fn_exp (out, in)
886 vspec *out;
887 char *in;
888 {
889 static long context = 0;
890 static struct dsc$descriptor_s o;
891 static struct dsc$descriptor_s i;
892 static bool pass1 = TRUE;
893 long status;
894 short retval;
895
896 if (pass1)
897 {
898 pass1 = FALSE;
899 o.dsc$a_pointer = (char *) out;
900 o.dsc$w_length = (short)OUTSIZE;
901 i.dsc$a_pointer = in;
902 i.dsc$w_length = (short)strlen(in);
903 i.dsc$b_dtype = DSC$K_DTYPE_T;
904 i.dsc$b_class = DSC$K_CLASS_S;
905 o.dsc$b_dtype = DSC$K_DTYPE_VT;
906 o.dsc$b_class = DSC$K_CLASS_VS;
907 }
908 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
909 {
910 out->body[out->curlen] = EOS;
911 return 1;
912 }
913 else if (status == RMS$_NMF)
914 retval = 0;
915 else
916 {
917 strcpy(out->body, in);
918 retval = -1;
919 }
920 lib$find_file_end(&context);
921 pass1 = TRUE;
922 return retval;
923 }
924
925 /*
926 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
927 name of each file specified by the provided arg expanding wildcards.
928 */
929 static char *
930 gfnames (arg, p_error)
931 char *arg;
932 bool *p_error;
933 {
934 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
935
936 switch (fn_exp (&filename, arg))
937 {
938 case 1:
939 *p_error = FALSE;
940 return filename.body;
941 case 0:
942 *p_error = FALSE;
943 return NULL;
944 default:
945 *p_error = TRUE;
946 return filename.body;
947 }
948 }
949
950 #ifndef OLD /* Newer versions of VMS do provide `system'. */
951 system (cmd)
952 char *cmd;
953 {
954 error ("%s", "system() function not implemented under VMS");
955 }
956 #endif
957
958 #define VERSION_DELIM ';'
959 char *massage_name (s)
960 char *s;
961 {
962 char *start = s;
963
964 for ( ; *s; s++)
965 if (*s == VERSION_DELIM)
966 {
967 *s = EOS;
968 break;
969 }
970 else
971 *s = lowcase (*s);
972 return start;
973 }
974 #endif /* VMS */
975
976 \f
977 int
978 main (argc, argv)
979 int argc;
980 char *argv[];
981 {
982 int i;
983 unsigned int nincluded_files;
984 char **included_files;
985 argument *argbuffer;
986 int current_arg, file_count;
987 linebuffer filename_lb;
988 #ifdef VMS
989 bool got_err;
990 #endif
991 char *optstring;
992 int opt;
993
994
995 #ifdef DOS_NT
996 _fmode = O_BINARY; /* all of files are treated as binary files */
997 #endif /* DOS_NT */
998
999 progname = argv[0];
1000 nincluded_files = 0;
1001 included_files = xnew (argc, char *);
1002 current_arg = 0;
1003 file_count = 0;
1004
1005 /* Allocate enough no matter what happens. Overkill, but each one
1006 is small. */
1007 argbuffer = xnew (argc, argument);
1008
1009 /*
1010 * If etags, always find typedefs and structure tags. Why not?
1011 * Also default to find macro constants, enum constants and
1012 * global variables.
1013 */
1014 if (!CTAGS)
1015 {
1016 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1017 globals = TRUE;
1018 }
1019
1020 optstring = "-";
1021 #ifdef ETAGS_REGEXPS
1022 optstring = "-r:Rc:";
1023 #endif /* ETAGS_REGEXPS */
1024 #ifndef LONG_OPTIONS
1025 optstring = optstring + 1;
1026 #endif /* LONG_OPTIONS */
1027 optstring = concat (optstring,
1028 "Cf:Il:o:SVhH",
1029 (CTAGS) ? "BxdtTuvw" : "aDi:");
1030
1031 while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1032 switch (opt)
1033 {
1034 case 0:
1035 /* If getopt returns 0, then it has already processed a
1036 long-named option. We should do nothing. */
1037 break;
1038
1039 case 1:
1040 /* This means that a file name has been seen. Record it. */
1041 argbuffer[current_arg].arg_type = at_filename;
1042 argbuffer[current_arg].what = optarg;
1043 ++current_arg;
1044 ++file_count;
1045 break;
1046
1047 case STDIN:
1048 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1049 argbuffer[current_arg].arg_type = at_stdin;
1050 argbuffer[current_arg].what = optarg;
1051 ++current_arg;
1052 ++file_count;
1053 if (parsing_stdin)
1054 fatal ("cannot parse standard input more than once", (char *)NULL);
1055 parsing_stdin = TRUE;
1056 break;
1057
1058 /* Common options. */
1059 case 'C': cplusplus = TRUE; break;
1060 case 'f': /* for compatibility with old makefiles */
1061 case 'o':
1062 if (tagfile)
1063 {
1064 error ("-o option may only be given once.", (char *)NULL);
1065 suggest_asking_for_help ();
1066 }
1067 tagfile = optarg;
1068 break;
1069 case 'I':
1070 case 'S': /* for backward compatibility */
1071 ignoreindent = TRUE;
1072 break;
1073 case 'l':
1074 {
1075 language *lang = get_language_from_langname (optarg);
1076 if (lang != NULL)
1077 {
1078 argbuffer[current_arg].lang = lang;
1079 argbuffer[current_arg].arg_type = at_language;
1080 ++current_arg;
1081 }
1082 }
1083 break;
1084 case 'c':
1085 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1086 optarg = concat (optarg, "i", ""); /* memory leak here */
1087 /* FALLTHRU */
1088 case 'r':
1089 argbuffer[current_arg].arg_type = at_regexp;
1090 argbuffer[current_arg].what = optarg;
1091 ++current_arg;
1092 break;
1093 case 'R':
1094 argbuffer[current_arg].arg_type = at_regexp;
1095 argbuffer[current_arg].what = NULL;
1096 ++current_arg;
1097 break;
1098 case 'V':
1099 print_version ();
1100 break;
1101 case 'h':
1102 case 'H':
1103 print_help ();
1104 break;
1105
1106 /* Etags options */
1107 case 'a': append_to_tagfile = TRUE; break;
1108 case 'D': constantypedefs = FALSE; break;
1109 case 'i': included_files[nincluded_files++] = optarg; break;
1110
1111 /* Ctags options. */
1112 case 'B': searchar = '?'; break;
1113 case 'd': constantypedefs = TRUE; break;
1114 case 't': typedefs = TRUE; break;
1115 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1116 case 'u': update = TRUE; break;
1117 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1118 case 'x': cxref_style = TRUE; break;
1119 case 'w': no_warnings = TRUE; break;
1120 default:
1121 suggest_asking_for_help ();
1122 }
1123
1124 for (; optind < argc; ++optind)
1125 {
1126 argbuffer[current_arg].arg_type = at_filename;
1127 argbuffer[current_arg].what = argv[optind];
1128 ++current_arg;
1129 ++file_count;
1130 }
1131
1132 if (nincluded_files == 0 && file_count == 0)
1133 {
1134 error ("no input files specified.", (char *)NULL);
1135 suggest_asking_for_help ();
1136 }
1137
1138 if (tagfile == NULL)
1139 tagfile = CTAGS ? "tags" : "TAGS";
1140 cwd = etags_getcwd (); /* the current working directory */
1141 if (cwd[strlen (cwd) - 1] != '/')
1142 {
1143 char *oldcwd = cwd;
1144 cwd = concat (oldcwd, "/", "");
1145 free (oldcwd);
1146 }
1147 if (streq (tagfile, "-"))
1148 tagfiledir = cwd;
1149 else
1150 tagfiledir = absolute_dirname (tagfile, cwd);
1151
1152 init (); /* set up boolean "functions" */
1153
1154 linebuffer_init (&lb);
1155 linebuffer_init (&filename_lb);
1156 linebuffer_init (&filebuf);
1157 linebuffer_init (&token_name);
1158
1159 if (!CTAGS)
1160 {
1161 if (streq (tagfile, "-"))
1162 {
1163 tagf = stdout;
1164 #ifdef DOS_NT
1165 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1166 doesn't take effect until after `stdout' is already open). */
1167 if (!isatty (fileno (stdout)))
1168 setmode (fileno (stdout), O_BINARY);
1169 #endif /* DOS_NT */
1170 }
1171 else
1172 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1173 if (tagf == NULL)
1174 pfatal (tagfile);
1175 }
1176
1177 /*
1178 * Loop through files finding functions.
1179 */
1180 for (i = 0; i < current_arg; ++i)
1181 {
1182 static language *lang; /* non-NULL if language is forced */
1183 char *this_file;
1184
1185 switch (argbuffer[i].arg_type)
1186 {
1187 case at_language:
1188 lang = argbuffer[i].lang;
1189 break;
1190 #ifdef ETAGS_REGEXPS
1191 case at_regexp:
1192 analyse_regex (argbuffer[i].what);
1193 break;
1194 #endif
1195 case at_filename:
1196 #ifdef VMS
1197 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1198 {
1199 if (got_err)
1200 {
1201 error ("can't find file %s\n", this_file);
1202 argc--, argv++;
1203 }
1204 else
1205 {
1206 this_file = massage_name (this_file);
1207 }
1208 #else
1209 this_file = argbuffer[i].what;
1210 #endif
1211 /* Input file named "-" means read file names from stdin
1212 (one per line) and use them. */
1213 if (streq (this_file, "-"))
1214 {
1215 if (parsing_stdin)
1216 fatal ("cannot parse standard input AND read file names from it",
1217 (char *)NULL);
1218 while (readline_internal (&filename_lb, stdin) > 0)
1219 process_file_name (filename_lb.buffer, lang);
1220 }
1221 else
1222 process_file_name (this_file, lang);
1223 #ifdef VMS
1224 }
1225 #endif
1226 break;
1227 case at_stdin:
1228 this_file = argbuffer[i].what;
1229 process_file (stdin, this_file, lang);
1230 break;
1231 }
1232 }
1233
1234 #ifdef ETAGS_REGEXPS
1235 free_regexps ();
1236 #endif /* ETAGS_REGEXPS */
1237 free (lb.buffer);
1238 free (filebuf.buffer);
1239 free (token_name.buffer);
1240
1241 if (!CTAGS || cxref_style)
1242 {
1243 put_entries (nodehead); /* write the remainig tags (ETAGS) */
1244 free_tree (nodehead);
1245 nodehead = NULL;
1246 if (!CTAGS)
1247 {
1248 fdesc *fdp;
1249
1250 /* Output file entries that have no tags. */
1251 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1252 if (!fdp->written)
1253 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1254
1255 while (nincluded_files-- > 0)
1256 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1257 }
1258
1259 if (fclose (tagf) == EOF)
1260 pfatal (tagfile);
1261 exit (GOOD);
1262 }
1263
1264 if (update)
1265 {
1266 char cmd[BUFSIZ];
1267 for (i = 0; i < current_arg; ++i)
1268 {
1269 switch (argbuffer[i].arg_type)
1270 {
1271 case at_filename:
1272 case at_stdin:
1273 break;
1274 default:
1275 continue; /* the for loop */
1276 }
1277 sprintf (cmd,
1278 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1279 tagfile, argbuffer[i].what, tagfile);
1280 if (system (cmd) != GOOD)
1281 fatal ("failed to execute shell command", (char *)NULL);
1282 }
1283 append_to_tagfile = TRUE;
1284 }
1285
1286 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1287 if (tagf == NULL)
1288 pfatal (tagfile);
1289 put_entries (nodehead); /* write all the tags (CTAGS) */
1290 free_tree (nodehead);
1291 nodehead = NULL;
1292 if (fclose (tagf) == EOF)
1293 pfatal (tagfile);
1294
1295 if (update)
1296 {
1297 char cmd[2*BUFSIZ+10];
1298 sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1299 exit (system (cmd));
1300 }
1301 return GOOD;
1302 }
1303
1304
1305 /*
1306 * Return a compressor given the file name. If EXTPTR is non-zero,
1307 * return a pointer into FILE where the compressor-specific
1308 * extension begins. If no compressor is found, NULL is returned
1309 * and EXTPTR is not significant.
1310 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1311 */
1312 static compressor *
1313 get_compressor_from_suffix (file, extptr)
1314 char *file;
1315 char **extptr;
1316 {
1317 compressor *compr;
1318 char *slash, *suffix;
1319
1320 /* This relies on FN to be after canonicalize_filename,
1321 so we don't need to consider backslashes on DOS_NT. */
1322 slash = etags_strrchr (file, '/');
1323 suffix = etags_strrchr (file, '.');
1324 if (suffix == NULL || suffix < slash)
1325 return NULL;
1326 if (extptr != NULL)
1327 *extptr = suffix;
1328 suffix += 1;
1329 /* Let those poor souls who live with DOS 8+3 file name limits get
1330 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1331 Only the first do loop is run if not MSDOS */
1332 do
1333 {
1334 for (compr = compressors; compr->suffix != NULL; compr++)
1335 if (streq (compr->suffix, suffix))
1336 return compr;
1337 if (!MSDOS)
1338 break; /* do it only once: not really a loop */
1339 if (extptr != NULL)
1340 *extptr = ++suffix;
1341 } while (*suffix != '\0');
1342 return NULL;
1343 }
1344
1345
1346
1347 /*
1348 * Return a language given the name.
1349 */
1350 static language *
1351 get_language_from_langname (name)
1352 const char *name;
1353 {
1354 language *lang;
1355
1356 if (name == NULL)
1357 error ("empty language name", (char *)NULL);
1358 else
1359 {
1360 for (lang = lang_names; lang->name != NULL; lang++)
1361 if (streq (name, lang->name))
1362 return lang;
1363 error ("unknown language \"%s\"", name);
1364 }
1365
1366 return NULL;
1367 }
1368
1369
1370 /*
1371 * Return a language given the interpreter name.
1372 */
1373 static language *
1374 get_language_from_interpreter (interpreter)
1375 char *interpreter;
1376 {
1377 language *lang;
1378 char **iname;
1379
1380 if (interpreter == NULL)
1381 return NULL;
1382 for (lang = lang_names; lang->name != NULL; lang++)
1383 if (lang->interpreters != NULL)
1384 for (iname = lang->interpreters; *iname != NULL; iname++)
1385 if (streq (*iname, interpreter))
1386 return lang;
1387
1388 return NULL;
1389 }
1390
1391
1392
1393 /*
1394 * Return a language given the file name.
1395 */
1396 static language *
1397 get_language_from_filename (file, case_sensitive)
1398 char *file;
1399 bool case_sensitive;
1400 {
1401 language *lang;
1402 char **name, **ext, *suffix;
1403
1404 /* Try whole file name first. */
1405 for (lang = lang_names; lang->name != NULL; lang++)
1406 if (lang->filenames != NULL)
1407 for (name = lang->filenames; *name != NULL; name++)
1408 if ((case_sensitive)
1409 ? streq (*name, file)
1410 : strcaseeq (*name, file))
1411 return lang;
1412
1413 /* If not found, try suffix after last dot. */
1414 suffix = etags_strrchr (file, '.');
1415 if (suffix == NULL)
1416 return NULL;
1417 suffix += 1;
1418 for (lang = lang_names; lang->name != NULL; lang++)
1419 if (lang->suffixes != NULL)
1420 for (ext = lang->suffixes; *ext != NULL; ext++)
1421 if ((case_sensitive)
1422 ? streq (*ext, suffix)
1423 : strcaseeq (*ext, suffix))
1424 return lang;
1425 return NULL;
1426 }
1427
1428 \f
1429 /*
1430 * This routine is called on each file argument.
1431 */
1432 static void
1433 process_file_name (file, lang)
1434 char *file;
1435 language *lang;
1436 {
1437 struct stat stat_buf;
1438 FILE *inf;
1439 fdesc *fdp;
1440 compressor *compr;
1441 char *compressed_name, *uncompressed_name;
1442 char *ext, *real_name;
1443 int retval;
1444
1445 canonicalize_filename (file);
1446 if (streq (file, tagfile) && !streq (tagfile, "-"))
1447 {
1448 error ("skipping inclusion of %s in self.", file);
1449 return;
1450 }
1451 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1452 {
1453 compressed_name = NULL;
1454 real_name = uncompressed_name = savestr (file);
1455 }
1456 else
1457 {
1458 real_name = compressed_name = savestr (file);
1459 uncompressed_name = savenstr (file, ext - file);
1460 }
1461
1462 /* If the canonicalized uncompressed name
1463 has already been dealt with, skip it silently. */
1464 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1465 {
1466 assert (fdp->infname != NULL);
1467 if (streq (uncompressed_name, fdp->infname))
1468 goto cleanup;
1469 }
1470
1471 if (stat (real_name, &stat_buf) != 0)
1472 {
1473 /* Reset real_name and try with a different name. */
1474 real_name = NULL;
1475 if (compressed_name != NULL) /* try with the given suffix */
1476 {
1477 if (stat (uncompressed_name, &stat_buf) == 0)
1478 real_name = uncompressed_name;
1479 }
1480 else /* try all possible suffixes */
1481 {
1482 for (compr = compressors; compr->suffix != NULL; compr++)
1483 {
1484 compressed_name = concat (file, ".", compr->suffix);
1485 if (stat (compressed_name, &stat_buf) != 0)
1486 {
1487 if (MSDOS)
1488 {
1489 char *suf = compressed_name + strlen (file);
1490 size_t suflen = strlen (compr->suffix) + 1;
1491 for ( ; suf[1]; suf++, suflen--)
1492 {
1493 memmove (suf, suf + 1, suflen);
1494 if (stat (compressed_name, &stat_buf) == 0)
1495 {
1496 real_name = compressed_name;
1497 break;
1498 }
1499 }
1500 if (real_name != NULL)
1501 break;
1502 } /* MSDOS */
1503 free (compressed_name);
1504 compressed_name = NULL;
1505 }
1506 else
1507 {
1508 real_name = compressed_name;
1509 break;
1510 }
1511 }
1512 }
1513 if (real_name == NULL)
1514 {
1515 perror (file);
1516 goto cleanup;
1517 }
1518 } /* try with a different name */
1519
1520 if (!S_ISREG (stat_buf.st_mode))
1521 {
1522 error ("skipping %s: it is not a regular file.", real_name);
1523 goto cleanup;
1524 }
1525 if (real_name == compressed_name)
1526 {
1527 char *cmd = concat (compr->command, " ", real_name);
1528 inf = (FILE *) popen (cmd, "r");
1529 free (cmd);
1530 }
1531 else
1532 inf = fopen (real_name, "r");
1533 if (inf == NULL)
1534 {
1535 perror (real_name);
1536 goto cleanup;
1537 }
1538
1539 process_file (inf, uncompressed_name, lang);
1540
1541 if (real_name == compressed_name)
1542 retval = pclose (inf);
1543 else
1544 retval = fclose (inf);
1545 if (retval < 0)
1546 pfatal (file);
1547
1548 cleanup:
1549 if (compressed_name) free (compressed_name);
1550 if (uncompressed_name) free (uncompressed_name);
1551 last_node = NULL;
1552 curfdp = NULL;
1553 return;
1554 }
1555
1556 static void
1557 process_file (fh, fn, lang)
1558 FILE *fh;
1559 char *fn;
1560 language *lang;
1561 {
1562 static const fdesc emptyfdesc;
1563 fdesc *fdp;
1564
1565 /* Create a new input file description entry. */
1566 fdp = xnew (1, fdesc);
1567 *fdp = emptyfdesc;
1568 fdp->next = fdhead;
1569 fdp->infname = savestr (fn);
1570 fdp->lang = lang;
1571 fdp->infabsname = absolute_filename (fn, cwd);
1572 fdp->infabsdir = absolute_dirname (fn, cwd);
1573 if (filename_is_absolute (fn))
1574 {
1575 /* An absolute file name. Canonicalize it. */
1576 fdp->taggedfname = absolute_filename (fn, NULL);
1577 }
1578 else
1579 {
1580 /* A file name relative to cwd. Make it relative
1581 to the directory of the tags file. */
1582 fdp->taggedfname = relative_filename (fn, tagfiledir);
1583 }
1584 fdp->usecharno = TRUE; /* use char position when making tags */
1585 fdp->prop = NULL;
1586 fdp->written = FALSE; /* not written on tags file yet */
1587
1588 fdhead = fdp;
1589 curfdp = fdhead; /* the current file description */
1590
1591 find_entries (fh);
1592
1593 /* If not Ctags, and if this is not metasource and if it contained no #line
1594 directives, we can write the tags and free all nodes pointing to
1595 curfdp. */
1596 if (!CTAGS
1597 && curfdp->usecharno /* no #line directives in this file */
1598 && !curfdp->lang->metasource)
1599 {
1600 node *np, *prev;
1601
1602 /* Look for the head of the sublist relative to this file. See add_node
1603 for the structure of the node tree. */
1604 prev = NULL;
1605 for (np = nodehead; np != NULL; prev = np, np = np->left)
1606 if (np->fdp == curfdp)
1607 break;
1608
1609 /* If we generated tags for this file, write and delete them. */
1610 if (np != NULL)
1611 {
1612 /* This is the head of the last sublist, if any. The following
1613 instructions depend on this being true. */
1614 assert (np->left == NULL);
1615
1616 assert (fdhead == curfdp);
1617 assert (last_node->fdp == curfdp);
1618 put_entries (np); /* write tags for file curfdp->taggedfname */
1619 free_tree (np); /* remove the written nodes */
1620 if (prev == NULL)
1621 nodehead = NULL; /* no nodes left */
1622 else
1623 prev->left = NULL; /* delete the pointer to the sublist */
1624 }
1625 }
1626 }
1627
1628 /*
1629 * This routine sets up the boolean pseudo-functions which work
1630 * by setting boolean flags dependent upon the corresponding character.
1631 * Every char which is NOT in that string is not a white char. Therefore,
1632 * all of the array "_wht" is set to FALSE, and then the elements
1633 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1634 * of a char is TRUE if it is the string "white", else FALSE.
1635 */
1636 static void
1637 init ()
1638 {
1639 register char *sp;
1640 register int i;
1641
1642 for (i = 0; i < CHARS; i++)
1643 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1644 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1645 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1646 notinname('\0') = notinname('\n');
1647 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1648 begtoken('\0') = begtoken('\n');
1649 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1650 intoken('\0') = intoken('\n');
1651 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1652 endtoken('\0') = endtoken('\n');
1653 }
1654
1655 /*
1656 * This routine opens the specified file and calls the function
1657 * which finds the function and type definitions.
1658 */
1659 static void
1660 find_entries (inf)
1661 FILE *inf;
1662 {
1663 char *cp;
1664 language *lang = curfdp->lang;
1665 Lang_function *parser = NULL;
1666
1667 /* If user specified a language, use it. */
1668 if (lang != NULL && lang->function != NULL)
1669 {
1670 parser = lang->function;
1671 }
1672
1673 /* Else try to guess the language given the file name. */
1674 if (parser == NULL)
1675 {
1676 lang = get_language_from_filename (curfdp->infname, TRUE);
1677 if (lang != NULL && lang->function != NULL)
1678 {
1679 curfdp->lang = lang;
1680 parser = lang->function;
1681 }
1682 }
1683
1684 /* Else look for sharp-bang as the first two characters. */
1685 if (parser == NULL
1686 && readline_internal (&lb, inf) > 0
1687 && lb.len >= 2
1688 && lb.buffer[0] == '#'
1689 && lb.buffer[1] == '!')
1690 {
1691 char *lp;
1692
1693 /* Set lp to point at the first char after the last slash in the
1694 line or, if no slashes, at the first nonblank. Then set cp to
1695 the first successive blank and terminate the string. */
1696 lp = etags_strrchr (lb.buffer+2, '/');
1697 if (lp != NULL)
1698 lp += 1;
1699 else
1700 lp = skip_spaces (lb.buffer + 2);
1701 cp = skip_non_spaces (lp);
1702 *cp = '\0';
1703
1704 if (strlen (lp) > 0)
1705 {
1706 lang = get_language_from_interpreter (lp);
1707 if (lang != NULL && lang->function != NULL)
1708 {
1709 curfdp->lang = lang;
1710 parser = lang->function;
1711 }
1712 }
1713 }
1714
1715 /* We rewind here, even if inf may be a pipe. We fail if the
1716 length of the first line is longer than the pipe block size,
1717 which is unlikely. */
1718 rewind (inf);
1719
1720 /* Else try to guess the language given the case insensitive file name. */
1721 if (parser == NULL)
1722 {
1723 lang = get_language_from_filename (curfdp->infname, FALSE);
1724 if (lang != NULL && lang->function != NULL)
1725 {
1726 curfdp->lang = lang;
1727 parser = lang->function;
1728 }
1729 }
1730
1731 /* Else try Fortran or C. */
1732 if (parser == NULL)
1733 {
1734 node *old_last_node = last_node;
1735
1736 curfdp->lang = get_language_from_langname ("fortran");
1737 find_entries (inf);
1738
1739 if (old_last_node == last_node)
1740 /* No Fortran entries found. Try C. */
1741 {
1742 /* We do not tag if rewind fails.
1743 Only the file name will be recorded in the tags file. */
1744 rewind (inf);
1745 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1746 find_entries (inf);
1747 }
1748 return;
1749 }
1750
1751 if (!no_line_directive
1752 && curfdp->lang != NULL && curfdp->lang->metasource)
1753 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1754 file, or anyway we parsed a file that is automatically generated from
1755 this one. If this is the case, the bingo.c file contained #line
1756 directives that generated tags pointing to this file. Let's delete
1757 them all before parsing this file, which is the real source. */
1758 {
1759 fdesc **fdpp = &fdhead;
1760 while (*fdpp != NULL)
1761 if (*fdpp != curfdp
1762 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1763 /* We found one of those! We must delete both the file description
1764 and all tags referring to it. */
1765 {
1766 fdesc *badfdp = *fdpp;
1767
1768 if (DEBUG)
1769 fprintf (stderr,
1770 "Removing references to \"%s\" obtained from \"%s\"\n",
1771 badfdp->taggedfname, badfdp->infname);
1772
1773 /* Delete the tags referring to badfdp. */
1774 invalidate_nodes (badfdp, &nodehead);
1775
1776 *fdpp = badfdp->next; /* remove the bad description from the list */
1777 free_fdesc (badfdp);
1778 }
1779 else
1780 fdpp = &(*fdpp)->next; /* advance the list pointer */
1781 }
1782
1783 assert (parser != NULL);
1784
1785 /* Generic initialisations before reading from file. */
1786 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1787
1788 /* Generic initialisations before parsing file with readline. */
1789 lineno = 0; /* reset global line number */
1790 charno = 0; /* reset global char number */
1791 linecharno = 0; /* reset global char number of line start */
1792
1793 parser (inf);
1794
1795 #ifdef ETAGS_REGEXPS
1796 regex_tag_multiline ();
1797 #endif /* ETAGS_REGEXPS */
1798 }
1799
1800 \f
1801 /*
1802 * Check whether an implicitly named tag should be created,
1803 * then call `pfnote'.
1804 * NAME is a string that is internally copied by this function.
1805 *
1806 * TAGS format specification
1807 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1808 * The following is explained in some more detail in etc/ETAGS.EBNF.
1809 *
1810 * make_tag creates tags with "implicit tag names" (unnamed tags)
1811 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1812 * 1. NAME does not contain any of the characters in NONAM;
1813 * 2. LINESTART contains name as either a rightmost, or rightmost but
1814 * one character, substring;
1815 * 3. the character, if any, immediately before NAME in LINESTART must
1816 * be a character in NONAM;
1817 * 4. the character, if any, immediately after NAME in LINESTART must
1818 * also be a character in NONAM.
1819 *
1820 * The implementation uses the notinname() macro, which recognises the
1821 * characters stored in the string `nonam'.
1822 * etags.el needs to use the same characters that are in NONAM.
1823 */
1824 static void
1825 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1826 char *name; /* tag name, or NULL if unnamed */
1827 int namelen; /* tag length */
1828 bool is_func; /* tag is a function */
1829 char *linestart; /* start of the line where tag is */
1830 int linelen; /* length of the line where tag is */
1831 int lno; /* line number */
1832 long cno; /* character number */
1833 {
1834 bool named = (name != NULL && namelen > 0);
1835
1836 if (!CTAGS && named) /* maybe set named to false */
1837 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1838 such that etags.el can guess a name from it. */
1839 {
1840 int i;
1841 register char *cp = name;
1842
1843 for (i = 0; i < namelen; i++)
1844 if (notinname (*cp++))
1845 break;
1846 if (i == namelen) /* rule #1 */
1847 {
1848 cp = linestart + linelen - namelen;
1849 if (notinname (linestart[linelen-1]))
1850 cp -= 1; /* rule #4 */
1851 if (cp >= linestart /* rule #2 */
1852 && (cp == linestart
1853 || notinname (cp[-1])) /* rule #3 */
1854 && strneq (name, cp, namelen)) /* rule #2 */
1855 named = FALSE; /* use implicit tag name */
1856 }
1857 }
1858
1859 if (named)
1860 name = savenstr (name, namelen);
1861 else
1862 name = NULL;
1863 pfnote (name, is_func, linestart, linelen, lno, cno);
1864 }
1865
1866 /* Record a tag. */
1867 static void
1868 pfnote (name, is_func, linestart, linelen, lno, cno)
1869 char *name; /* tag name, or NULL if unnamed */
1870 bool is_func; /* tag is a function */
1871 char *linestart; /* start of the line where tag is */
1872 int linelen; /* length of the line where tag is */
1873 int lno; /* line number */
1874 long cno; /* character number */
1875 {
1876 register node *np;
1877
1878 if (CTAGS && name == NULL)
1879 return;
1880
1881 np = xnew (1, node);
1882
1883 /* If ctags mode, change name "main" to M<thisfilename>. */
1884 if (CTAGS && !cxref_style && streq (name, "main"))
1885 {
1886 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1887 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1888 fp = etags_strrchr (np->name, '.');
1889 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1890 fp[0] = '\0';
1891 }
1892 else
1893 np->name = name;
1894 np->valid = TRUE;
1895 np->been_warned = FALSE;
1896 np->fdp = curfdp;
1897 np->is_func = is_func;
1898 np->lno = lno;
1899 if (np->fdp->usecharno)
1900 /* Our char numbers are 0-base, because of C language tradition?
1901 ctags compatibility? old versions compatibility? I don't know.
1902 Anyway, since emacs's are 1-base we expect etags.el to take care
1903 of the difference. If we wanted to have 1-based numbers, we would
1904 uncomment the +1 below. */
1905 np->cno = cno /* + 1 */ ;
1906 else
1907 np->cno = invalidcharno;
1908 np->left = np->right = NULL;
1909 if (CTAGS && !cxref_style)
1910 {
1911 if (strlen (linestart) < 50)
1912 np->regex = concat (linestart, "$", "");
1913 else
1914 np->regex = savenstr (linestart, 50);
1915 }
1916 else
1917 np->regex = savenstr (linestart, linelen);
1918
1919 add_node (np, &nodehead);
1920 }
1921
1922 /*
1923 * free_tree ()
1924 * recurse on left children, iterate on right children.
1925 */
1926 static void
1927 free_tree (np)
1928 register node *np;
1929 {
1930 while (np)
1931 {
1932 register node *node_right = np->right;
1933 free_tree (np->left);
1934 if (np->name != NULL)
1935 free (np->name);
1936 free (np->regex);
1937 free (np);
1938 np = node_right;
1939 }
1940 }
1941
1942 /*
1943 * free_fdesc ()
1944 * delete a file description
1945 */
1946 static void
1947 free_fdesc (fdp)
1948 register fdesc *fdp;
1949 {
1950 if (fdp->infname != NULL) free (fdp->infname);
1951 if (fdp->infabsname != NULL) free (fdp->infabsname);
1952 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
1953 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
1954 if (fdp->prop != NULL) free (fdp->prop);
1955 free (fdp);
1956 }
1957
1958 /*
1959 * add_node ()
1960 * Adds a node to the tree of nodes. In etags mode, sort by file
1961 * name. In ctags mode, sort by tag name. Make no attempt at
1962 * balancing.
1963 *
1964 * add_node is the only function allowed to add nodes, so it can
1965 * maintain state.
1966 */
1967 static void
1968 add_node (np, cur_node_p)
1969 node *np, **cur_node_p;
1970 {
1971 register int dif;
1972 register node *cur_node = *cur_node_p;
1973
1974 if (cur_node == NULL)
1975 {
1976 *cur_node_p = np;
1977 last_node = np;
1978 return;
1979 }
1980
1981 if (!CTAGS)
1982 /* Etags Mode */
1983 {
1984 /* For each file name, tags are in a linked sublist on the right
1985 pointer. The first tags of different files are a linked list
1986 on the left pointer. last_node points to the end of the last
1987 used sublist. */
1988 if (last_node != NULL && last_node->fdp == np->fdp)
1989 {
1990 /* Let's use the same sublist as the last added node. */
1991 assert (last_node->right == NULL);
1992 last_node->right = np;
1993 last_node = np;
1994 }
1995 else if (cur_node->fdp == np->fdp)
1996 {
1997 /* Scanning the list we found the head of a sublist which is
1998 good for us. Let's scan this sublist. */
1999 add_node (np, &cur_node->right);
2000 }
2001 else
2002 /* The head of this sublist is not good for us. Let's try the
2003 next one. */
2004 add_node (np, &cur_node->left);
2005 } /* if ETAGS mode */
2006
2007 else
2008 {
2009 /* Ctags Mode */
2010 dif = strcmp (np->name, cur_node->name);
2011
2012 /*
2013 * If this tag name matches an existing one, then
2014 * do not add the node, but maybe print a warning.
2015 */
2016 if (!dif)
2017 {
2018 if (np->fdp == cur_node->fdp)
2019 {
2020 if (!no_warnings)
2021 {
2022 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2023 np->fdp->infname, lineno, np->name);
2024 fprintf (stderr, "Second entry ignored\n");
2025 }
2026 }
2027 else if (!cur_node->been_warned && !no_warnings)
2028 {
2029 fprintf
2030 (stderr,
2031 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2032 np->fdp->infname, cur_node->fdp->infname, np->name);
2033 cur_node->been_warned = TRUE;
2034 }
2035 return;
2036 }
2037
2038 /* Actually add the node */
2039 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2040 } /* if CTAGS mode */
2041 }
2042
2043 /*
2044 * invalidate_nodes ()
2045 * Scan the node tree and invalidate all nodes pointing to the
2046 * given file description (CTAGS case) or free them (ETAGS case).
2047 */
2048 static void
2049 invalidate_nodes (badfdp, npp)
2050 fdesc *badfdp;
2051 node **npp;
2052 {
2053 node *np = *npp;
2054
2055 if (np == NULL)
2056 return;
2057
2058 if (CTAGS)
2059 {
2060 if (np->left != NULL)
2061 invalidate_nodes (badfdp, &np->left);
2062 if (np->fdp == badfdp)
2063 np->valid = FALSE;
2064 if (np->right != NULL)
2065 invalidate_nodes (badfdp, &np->right);
2066 }
2067 else
2068 {
2069 assert (np->fdp != NULL);
2070 if (np->fdp == badfdp)
2071 {
2072 *npp = np->left; /* detach the sublist from the list */
2073 np->left = NULL; /* isolate it */
2074 free_tree (np); /* free it */
2075 invalidate_nodes (badfdp, npp);
2076 }
2077 else
2078 invalidate_nodes (badfdp, &np->left);
2079 }
2080 }
2081
2082 \f
2083 static int total_size_of_entries __P((node *));
2084 static int number_len __P((long));
2085
2086 /* Length of a non-negative number's decimal representation. */
2087 static int
2088 number_len (num)
2089 long num;
2090 {
2091 int len = 1;
2092 while ((num /= 10) > 0)
2093 len += 1;
2094 return len;
2095 }
2096
2097 /*
2098 * Return total number of characters that put_entries will output for
2099 * the nodes in the linked list at the right of the specified node.
2100 * This count is irrelevant with etags.el since emacs 19.34 at least,
2101 * but is still supplied for backward compatibility.
2102 */
2103 static int
2104 total_size_of_entries (np)
2105 register node *np;
2106 {
2107 register int total = 0;
2108
2109 for (; np != NULL; np = np->right)
2110 if (np->valid)
2111 {
2112 total += strlen (np->regex) + 1; /* pat\177 */
2113 if (np->name != NULL)
2114 total += strlen (np->name) + 1; /* name\001 */
2115 total += number_len ((long) np->lno) + 1; /* lno, */
2116 if (np->cno != invalidcharno) /* cno */
2117 total += number_len (np->cno);
2118 total += 1; /* newline */
2119 }
2120
2121 return total;
2122 }
2123
2124 static void
2125 put_entries (np)
2126 register node *np;
2127 {
2128 register char *sp;
2129 static fdesc *fdp = NULL;
2130
2131 if (np == NULL)
2132 return;
2133
2134 /* Output subentries that precede this one */
2135 if (CTAGS)
2136 put_entries (np->left);
2137
2138 /* Output this entry */
2139 if (np->valid)
2140 {
2141 if (!CTAGS)
2142 {
2143 /* Etags mode */
2144 if (fdp != np->fdp)
2145 {
2146 fdp = np->fdp;
2147 fprintf (tagf, "\f\n%s,%d\n",
2148 fdp->taggedfname, total_size_of_entries (np));
2149 fdp->written = TRUE;
2150 }
2151 fputs (np->regex, tagf);
2152 fputc ('\177', tagf);
2153 if (np->name != NULL)
2154 {
2155 fputs (np->name, tagf);
2156 fputc ('\001', tagf);
2157 }
2158 fprintf (tagf, "%d,", np->lno);
2159 if (np->cno != invalidcharno)
2160 fprintf (tagf, "%ld", np->cno);
2161 fputs ("\n", tagf);
2162 }
2163 else
2164 {
2165 /* Ctags mode */
2166 if (np->name == NULL)
2167 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2168
2169 if (cxref_style)
2170 {
2171 if (vgrind_style)
2172 fprintf (stdout, "%s %s %d\n",
2173 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2174 else
2175 fprintf (stdout, "%-16s %3d %-16s %s\n",
2176 np->name, np->lno, np->fdp->taggedfname, np->regex);
2177 }
2178 else
2179 {
2180 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2181
2182 if (np->is_func)
2183 { /* function or #define macro with args */
2184 putc (searchar, tagf);
2185 putc ('^', tagf);
2186
2187 for (sp = np->regex; *sp; sp++)
2188 {
2189 if (*sp == '\\' || *sp == searchar)
2190 putc ('\\', tagf);
2191 putc (*sp, tagf);
2192 }
2193 putc (searchar, tagf);
2194 }
2195 else
2196 { /* anything else; text pattern inadequate */
2197 fprintf (tagf, "%d", np->lno);
2198 }
2199 putc ('\n', tagf);
2200 }
2201 }
2202 } /* if this node contains a valid tag */
2203
2204 /* Output subentries that follow this one */
2205 put_entries (np->right);
2206 if (!CTAGS)
2207 put_entries (np->left);
2208 }
2209
2210 \f
2211 /* C extensions. */
2212 #define C_EXT 0x00fff /* C extensions */
2213 #define C_PLAIN 0x00000 /* C */
2214 #define C_PLPL 0x00001 /* C++ */
2215 #define C_STAR 0x00003 /* C* */
2216 #define C_JAVA 0x00005 /* JAVA */
2217 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2218 #define YACC 0x10000 /* yacc file */
2219
2220 /*
2221 * The C symbol tables.
2222 */
2223 enum sym_type
2224 {
2225 st_none,
2226 st_C_objprot, st_C_objimpl, st_C_objend,
2227 st_C_gnumacro,
2228 st_C_ignore,
2229 st_C_javastruct,
2230 st_C_operator,
2231 st_C_class, st_C_template,
2232 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
2233 };
2234
2235 static unsigned int hash __P((const char *, unsigned int));
2236 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2237 static enum sym_type C_symtype __P((char *, int, int));
2238
2239 /* Feed stuff between (but not including) %[ and %] lines to:
2240 gperf -c -k 1,3 -o -p -r -t
2241 %[
2242 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2243 %%
2244 if, 0, st_C_ignore
2245 for, 0, st_C_ignore
2246 while, 0, st_C_ignore
2247 switch, 0, st_C_ignore
2248 return, 0, st_C_ignore
2249 @interface, 0, st_C_objprot
2250 @protocol, 0, st_C_objprot
2251 @implementation,0, st_C_objimpl
2252 @end, 0, st_C_objend
2253 import, C_JAVA, st_C_ignore
2254 package, C_JAVA, st_C_ignore
2255 friend, C_PLPL, st_C_ignore
2256 extends, C_JAVA, st_C_javastruct
2257 implements, C_JAVA, st_C_javastruct
2258 interface, C_JAVA, st_C_struct
2259 class, 0, st_C_class
2260 namespace, C_PLPL, st_C_struct
2261 domain, C_STAR, st_C_struct
2262 union, 0, st_C_struct
2263 struct, 0, st_C_struct
2264 extern, 0, st_C_extern
2265 enum, 0, st_C_enum
2266 typedef, 0, st_C_typedef
2267 define, 0, st_C_define
2268 operator, C_PLPL, st_C_operator
2269 template, 0, st_C_template
2270 bool, C_PLPL, st_C_typespec
2271 long, 0, st_C_typespec
2272 short, 0, st_C_typespec
2273 int, 0, st_C_typespec
2274 char, 0, st_C_typespec
2275 float, 0, st_C_typespec
2276 double, 0, st_C_typespec
2277 signed, 0, st_C_typespec
2278 unsigned, 0, st_C_typespec
2279 auto, 0, st_C_typespec
2280 void, 0, st_C_typespec
2281 static, 0, st_C_typespec
2282 const, 0, st_C_typespec
2283 volatile, 0, st_C_typespec
2284 explicit, C_PLPL, st_C_typespec
2285 mutable, C_PLPL, st_C_typespec
2286 typename, C_PLPL, st_C_typespec
2287 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2288 DEFUN, 0, st_C_gnumacro
2289 SYSCALL, 0, st_C_gnumacro
2290 ENTRY, 0, st_C_gnumacro
2291 PSEUDO, 0, st_C_gnumacro
2292 # These are defined inside C functions, so currently they are not met.
2293 # EXFUN used in glibc, DEFVAR_* in emacs.
2294 #EXFUN, 0, st_C_gnumacro
2295 #DEFVAR_, 0, st_C_gnumacro
2296 %]
2297 and replace lines between %< and %> with its output,
2298 then make in_word_set and C_stab_entry static. */
2299 /*%<*/
2300 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2301 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2302 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2303
2304 #define TOTAL_KEYWORDS 47
2305 #define MIN_WORD_LENGTH 2
2306 #define MAX_WORD_LENGTH 15
2307 #define MIN_HASH_VALUE 18
2308 #define MAX_HASH_VALUE 138
2309 /* maximum key range = 121, duplicates = 0 */
2310
2311 #ifdef __GNUC__
2312 __inline
2313 #endif
2314 static unsigned int
2315 hash (str, len)
2316 register const char *str;
2317 register unsigned int len;
2318 {
2319 static unsigned char asso_values[] =
2320 {
2321 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2322 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2323 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2324 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2325 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2326 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2327 139, 139, 139, 139, 63, 139, 139, 139, 33, 44,
2328 62, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2329 42, 139, 139, 12, 32, 139, 139, 139, 139, 139,
2330 139, 139, 139, 139, 139, 139, 139, 34, 59, 37,
2331 24, 58, 33, 3, 139, 16, 139, 139, 42, 60,
2332 18, 11, 39, 139, 23, 57, 4, 63, 6, 20,
2333 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2334 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2335 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2336 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2337 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2338 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2339 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2340 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2341 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2342 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2343 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2344 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2345 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2346 139, 139, 139, 139, 139, 139
2347 };
2348 register int hval = len;
2349
2350 switch (hval)
2351 {
2352 default:
2353 case 3:
2354 hval += asso_values[(unsigned char)str[2]];
2355 case 2:
2356 case 1:
2357 hval += asso_values[(unsigned char)str[0]];
2358 break;
2359 }
2360 return hval;
2361 }
2362
2363 #ifdef __GNUC__
2364 __inline
2365 #endif
2366 static struct C_stab_entry *
2367 in_word_set (str, len)
2368 register const char *str;
2369 register unsigned int len;
2370 {
2371 static struct C_stab_entry wordlist[] =
2372 {
2373 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2374 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2375 {"if", 0, st_C_ignore},
2376 {""}, {""}, {""}, {""},
2377 {"int", 0, st_C_typespec},
2378 {""}, {""},
2379 {"void", 0, st_C_typespec},
2380 {""}, {""},
2381 {"interface", C_JAVA, st_C_struct},
2382 {""},
2383 {"SYSCALL", 0, st_C_gnumacro},
2384 {""},
2385 {"return", 0, st_C_ignore},
2386 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2387 {"while", 0, st_C_ignore},
2388 {"auto", 0, st_C_typespec},
2389 {""}, {""}, {""}, {""}, {""}, {""},
2390 {"float", 0, st_C_typespec},
2391 {"typedef", 0, st_C_typedef},
2392 {"typename", C_PLPL, st_C_typespec},
2393 {""}, {""}, {""},
2394 {"friend", C_PLPL, st_C_ignore},
2395 {"volatile", 0, st_C_typespec},
2396 {""}, {""},
2397 {"for", 0, st_C_ignore},
2398 {"const", 0, st_C_typespec},
2399 {"import", C_JAVA, st_C_ignore},
2400 {""},
2401 {"define", 0, st_C_define},
2402 {"long", 0, st_C_typespec},
2403 {"implements", C_JAVA, st_C_javastruct},
2404 {"signed", 0, st_C_typespec},
2405 {""},
2406 {"extern", 0, st_C_extern},
2407 {"extends", C_JAVA, st_C_javastruct},
2408 {""},
2409 {"mutable", C_PLPL, st_C_typespec},
2410 {"template", 0, st_C_template},
2411 {"short", 0, st_C_typespec},
2412 {"bool", C_PLPL, st_C_typespec},
2413 {"char", 0, st_C_typespec},
2414 {"class", 0, st_C_class},
2415 {"operator", C_PLPL, st_C_operator},
2416 {""},
2417 {"switch", 0, st_C_ignore},
2418 {""},
2419 {"ENTRY", 0, st_C_gnumacro},
2420 {""},
2421 {"package", C_JAVA, st_C_ignore},
2422 {"union", 0, st_C_struct},
2423 {"@end", 0, st_C_objend},
2424 {"struct", 0, st_C_struct},
2425 {"namespace", C_PLPL, st_C_struct},
2426 {""}, {""},
2427 {"domain", C_STAR, st_C_struct},
2428 {"@interface", 0, st_C_objprot},
2429 {"PSEUDO", 0, st_C_gnumacro},
2430 {"double", 0, st_C_typespec},
2431 {""},
2432 {"@protocol", 0, st_C_objprot},
2433 {""},
2434 {"static", 0, st_C_typespec},
2435 {""}, {""},
2436 {"DEFUN", 0, st_C_gnumacro},
2437 {""}, {""}, {""}, {""},
2438 {"explicit", C_PLPL, st_C_typespec},
2439 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2440 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2441 {""},
2442 {"enum", 0, st_C_enum},
2443 {""}, {""},
2444 {"unsigned", 0, st_C_typespec},
2445 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2446 {"@implementation",0, st_C_objimpl}
2447 };
2448
2449 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2450 {
2451 register int key = hash (str, len);
2452
2453 if (key <= MAX_HASH_VALUE && key >= 0)
2454 {
2455 register const char *s = wordlist[key].name;
2456
2457 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2458 return &wordlist[key];
2459 }
2460 }
2461 return 0;
2462 }
2463 /*%>*/
2464
2465 static enum sym_type
2466 C_symtype (str, len, c_ext)
2467 char *str;
2468 int len;
2469 int c_ext;
2470 {
2471 register struct C_stab_entry *se = in_word_set (str, len);
2472
2473 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2474 return st_none;
2475 return se->type;
2476 }
2477
2478 \f
2479 /*
2480 * C functions and variables are recognized using a simple
2481 * finite automaton. fvdef is its state variable.
2482 */
2483 static enum
2484 {
2485 fvnone, /* nothing seen */
2486 fdefunkey, /* Emacs DEFUN keyword seen */
2487 fdefunname, /* Emacs DEFUN name seen */
2488 foperator, /* func: operator keyword seen (cplpl) */
2489 fvnameseen, /* function or variable name seen */
2490 fstartlist, /* func: just after open parenthesis */
2491 finlist, /* func: in parameter list */
2492 flistseen, /* func: after parameter list */
2493 fignore, /* func: before open brace */
2494 vignore /* var-like: ignore until ';' */
2495 } fvdef;
2496
2497 static bool fvextern; /* func or var: extern keyword seen; */
2498
2499 /*
2500 * typedefs are recognized using a simple finite automaton.
2501 * typdef is its state variable.
2502 */
2503 static enum
2504 {
2505 tnone, /* nothing seen */
2506 tkeyseen, /* typedef keyword seen */
2507 ttypeseen, /* defined type seen */
2508 tinbody, /* inside typedef body */
2509 tend, /* just before typedef tag */
2510 tignore /* junk after typedef tag */
2511 } typdef;
2512
2513 /*
2514 * struct-like structures (enum, struct and union) are recognized
2515 * using another simple finite automaton. `structdef' is its state
2516 * variable.
2517 */
2518 static enum
2519 {
2520 snone, /* nothing seen yet,
2521 or in struct body if cblev > 0 */
2522 skeyseen, /* struct-like keyword seen */
2523 stagseen, /* struct-like tag seen */
2524 sintemplate, /* inside template (ignore) */
2525 scolonseen /* colon seen after struct-like tag */
2526 } structdef;
2527
2528 /*
2529 * When objdef is different from onone, objtag is the name of the class.
2530 */
2531 static char *objtag = "<uninited>";
2532
2533 /*
2534 * Yet another little state machine to deal with preprocessor lines.
2535 */
2536 static enum
2537 {
2538 dnone, /* nothing seen */
2539 dsharpseen, /* '#' seen as first char on line */
2540 ddefineseen, /* '#' and 'define' seen */
2541 dignorerest /* ignore rest of line */
2542 } definedef;
2543
2544 /*
2545 * State machine for Objective C protocols and implementations.
2546 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2547 */
2548 static enum
2549 {
2550 onone, /* nothing seen */
2551 oprotocol, /* @interface or @protocol seen */
2552 oimplementation, /* @implementations seen */
2553 otagseen, /* class name seen */
2554 oparenseen, /* parenthesis before category seen */
2555 ocatseen, /* category name seen */
2556 oinbody, /* in @implementation body */
2557 omethodsign, /* in @implementation body, after +/- */
2558 omethodtag, /* after method name */
2559 omethodcolon, /* after method colon */
2560 omethodparm, /* after method parameter */
2561 oignore /* wait for @end */
2562 } objdef;
2563
2564
2565 /*
2566 * Use this structure to keep info about the token read, and how it
2567 * should be tagged. Used by the make_C_tag function to build a tag.
2568 */
2569 static struct tok
2570 {
2571 bool valid;
2572 bool named;
2573 int offset;
2574 int length;
2575 int lineno;
2576 long linepos;
2577 char *line;
2578 } token; /* latest token read */
2579
2580 /*
2581 * Variables and functions for dealing with nested structures.
2582 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2583 */
2584 static void pushclass_above __P((int, char *, int));
2585 static void popclass_above __P((int));
2586 static void write_classname __P((linebuffer *, char *qualifier));
2587
2588 static struct {
2589 char **cname; /* nested class names */
2590 int *cblev; /* nested class curly brace level */
2591 int nl; /* class nesting level (elements used) */
2592 int size; /* length of the array */
2593 } cstack; /* stack for nested declaration tags */
2594 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2595 #define nestlev (cstack.nl)
2596 /* After struct keyword or in struct body, not inside a nested function. */
2597 #define instruct (structdef == snone && nestlev > 0 \
2598 && cblev == cstack.cblev[nestlev-1] + 1)
2599
2600 static void
2601 pushclass_above (cblev, str, len)
2602 int cblev;
2603 char *str;
2604 int len;
2605 {
2606 int nl;
2607
2608 popclass_above (cblev);
2609 nl = cstack.nl;
2610 if (nl >= cstack.size)
2611 {
2612 int size = cstack.size *= 2;
2613 xrnew (cstack.cname, size, char *);
2614 xrnew (cstack.cblev, size, int);
2615 }
2616 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2617 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2618 cstack.cblev[nl] = cblev;
2619 cstack.nl = nl + 1;
2620 }
2621
2622 static void
2623 popclass_above (cblev)
2624 int cblev;
2625 {
2626 int nl;
2627
2628 for (nl = cstack.nl - 1;
2629 nl >= 0 && cstack.cblev[nl] >= cblev;
2630 nl--)
2631 {
2632 if (cstack.cname[nl] != NULL)
2633 free (cstack.cname[nl]);
2634 cstack.nl = nl;
2635 }
2636 }
2637
2638 static void
2639 write_classname (cn, qualifier)
2640 linebuffer *cn;
2641 char *qualifier;
2642 {
2643 int i, len;
2644 int qlen = strlen (qualifier);
2645
2646 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2647 {
2648 len = 0;
2649 cn->len = 0;
2650 cn->buffer[0] = '\0';
2651 }
2652 else
2653 {
2654 len = strlen (cstack.cname[0]);
2655 linebuffer_setlen (cn, len);
2656 strcpy (cn->buffer, cstack.cname[0]);
2657 }
2658 for (i = 1; i < cstack.nl; i++)
2659 {
2660 char *s;
2661 int slen;
2662
2663 s = cstack.cname[i];
2664 if (s == NULL)
2665 continue;
2666 slen = strlen (s);
2667 len += slen + qlen;
2668 linebuffer_setlen (cn, len);
2669 strncat (cn->buffer, qualifier, qlen);
2670 strncat (cn->buffer, s, slen);
2671 }
2672 }
2673
2674 \f
2675 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2676 static void make_C_tag __P((bool));
2677
2678 /*
2679 * consider_token ()
2680 * checks to see if the current token is at the start of a
2681 * function or variable, or corresponds to a typedef, or
2682 * is a struct/union/enum tag, or #define, or an enum constant.
2683 *
2684 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2685 * with args. C_EXTP points to which language we are looking at.
2686 *
2687 * Globals
2688 * fvdef IN OUT
2689 * structdef IN OUT
2690 * definedef IN OUT
2691 * typdef IN OUT
2692 * objdef IN OUT
2693 */
2694
2695 static bool
2696 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2697 register char *str; /* IN: token pointer */
2698 register int len; /* IN: token length */
2699 register int c; /* IN: first char after the token */
2700 int *c_extp; /* IN, OUT: C extensions mask */
2701 int cblev; /* IN: curly brace level */
2702 int parlev; /* IN: parenthesis level */
2703 bool *is_func_or_var; /* OUT: function or variable found */
2704 {
2705 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2706 structtype is the type of the preceding struct-like keyword, and
2707 structcblev is the curly brace level where it has been seen. */
2708 static enum sym_type structtype;
2709 static int structcblev;
2710 static enum sym_type toktype;
2711
2712
2713 toktype = C_symtype (str, len, *c_extp);
2714
2715 /*
2716 * Advance the definedef state machine.
2717 */
2718 switch (definedef)
2719 {
2720 case dnone:
2721 /* We're not on a preprocessor line. */
2722 if (toktype == st_C_gnumacro)
2723 {
2724 fvdef = fdefunkey;
2725 return FALSE;
2726 }
2727 break;
2728 case dsharpseen:
2729 if (toktype == st_C_define)
2730 {
2731 definedef = ddefineseen;
2732 }
2733 else
2734 {
2735 definedef = dignorerest;
2736 }
2737 return FALSE;
2738 case ddefineseen:
2739 /*
2740 * Make a tag for any macro, unless it is a constant
2741 * and constantypedefs is FALSE.
2742 */
2743 definedef = dignorerest;
2744 *is_func_or_var = (c == '(');
2745 if (!*is_func_or_var && !constantypedefs)
2746 return FALSE;
2747 else
2748 return TRUE;
2749 case dignorerest:
2750 return FALSE;
2751 default:
2752 error ("internal error: definedef value.", (char *)NULL);
2753 }
2754
2755 /*
2756 * Now typedefs
2757 */
2758 switch (typdef)
2759 {
2760 case tnone:
2761 if (toktype == st_C_typedef)
2762 {
2763 if (typedefs)
2764 typdef = tkeyseen;
2765 fvextern = FALSE;
2766 fvdef = fvnone;
2767 return FALSE;
2768 }
2769 break;
2770 case tkeyseen:
2771 switch (toktype)
2772 {
2773 case st_none:
2774 case st_C_typespec:
2775 case st_C_class:
2776 case st_C_struct:
2777 case st_C_enum:
2778 typdef = ttypeseen;
2779 break;
2780 }
2781 break;
2782 case ttypeseen:
2783 if (structdef == snone && fvdef == fvnone)
2784 {
2785 fvdef = fvnameseen;
2786 return TRUE;
2787 }
2788 break;
2789 case tend:
2790 switch (toktype)
2791 {
2792 case st_C_typespec:
2793 case st_C_class:
2794 case st_C_struct:
2795 case st_C_enum:
2796 return FALSE;
2797 }
2798 return TRUE;
2799 }
2800
2801 /*
2802 * This structdef business is NOT invoked when we are ctags and the
2803 * file is plain C. This is because a struct tag may have the same
2804 * name as another tag, and this loses with ctags.
2805 */
2806 switch (toktype)
2807 {
2808 case st_C_javastruct:
2809 if (structdef == stagseen)
2810 structdef = scolonseen;
2811 return FALSE;
2812 case st_C_template:
2813 case st_C_class:
2814 if (cblev == 0
2815 && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2816 && definedef == dnone && structdef == snone
2817 && typdef == tnone && fvdef == fvnone)
2818 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2819 if (toktype == st_C_template)
2820 break;
2821 /* FALLTHRU */
2822 case st_C_struct:
2823 case st_C_enum:
2824 if (parlev == 0
2825 && fvdef != vignore
2826 && (typdef == tkeyseen
2827 || (typedefs_or_cplusplus && structdef == snone)))
2828 {
2829 structdef = skeyseen;
2830 structtype = toktype;
2831 structcblev = cblev;
2832 }
2833 return FALSE;
2834 }
2835
2836 if (structdef == skeyseen)
2837 {
2838 structdef = stagseen;
2839 return TRUE;
2840 }
2841
2842 if (typdef != tnone)
2843 definedef = dnone;
2844
2845 /* Detect Objective C constructs. */
2846 switch (objdef)
2847 {
2848 case onone:
2849 switch (toktype)
2850 {
2851 case st_C_objprot:
2852 objdef = oprotocol;
2853 return FALSE;
2854 case st_C_objimpl:
2855 objdef = oimplementation;
2856 return FALSE;
2857 }
2858 break;
2859 case oimplementation:
2860 /* Save the class tag for functions or variables defined inside. */
2861 objtag = savenstr (str, len);
2862 objdef = oinbody;
2863 return FALSE;
2864 case oprotocol:
2865 /* Save the class tag for categories. */
2866 objtag = savenstr (str, len);
2867 objdef = otagseen;
2868 *is_func_or_var = TRUE;
2869 return TRUE;
2870 case oparenseen:
2871 objdef = ocatseen;
2872 *is_func_or_var = TRUE;
2873 return TRUE;
2874 case oinbody:
2875 break;
2876 case omethodsign:
2877 if (parlev == 0)
2878 {
2879 objdef = omethodtag;
2880 linebuffer_setlen (&token_name, len);
2881 strncpy (token_name.buffer, str, len);
2882 token_name.buffer[len] = '\0';
2883 return TRUE;
2884 }
2885 return FALSE;
2886 case omethodcolon:
2887 if (parlev == 0)
2888 objdef = omethodparm;
2889 return FALSE;
2890 case omethodparm:
2891 if (parlev == 0)
2892 {
2893 objdef = omethodtag;
2894 linebuffer_setlen (&token_name, token_name.len + len);
2895 strncat (token_name.buffer, str, len);
2896 return TRUE;
2897 }
2898 return FALSE;
2899 case oignore:
2900 if (toktype == st_C_objend)
2901 {
2902 /* Memory leakage here: the string pointed by objtag is
2903 never released, because many tests would be needed to
2904 avoid breaking on incorrect input code. The amount of
2905 memory leaked here is the sum of the lengths of the
2906 class tags.
2907 free (objtag); */
2908 objdef = onone;
2909 }
2910 return FALSE;
2911 }
2912
2913 /* A function, variable or enum constant? */
2914 switch (toktype)
2915 {
2916 case st_C_extern:
2917 fvextern = TRUE;
2918 /* FALLTHRU */
2919 case st_C_typespec:
2920 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2921 fvdef = fvnone; /* should be useless */
2922 return FALSE;
2923 case st_C_ignore:
2924 fvextern = FALSE;
2925 fvdef = vignore;
2926 return FALSE;
2927 case st_C_operator:
2928 fvdef = foperator;
2929 *is_func_or_var = TRUE;
2930 return TRUE;
2931 case st_none:
2932 if (constantypedefs
2933 && structdef == snone
2934 && structtype == st_C_enum && cblev > structcblev)
2935 return TRUE; /* enum constant */
2936 switch (fvdef)
2937 {
2938 case fdefunkey:
2939 if (cblev > 0)
2940 break;
2941 fvdef = fdefunname; /* GNU macro */
2942 *is_func_or_var = TRUE;
2943 return TRUE;
2944 case fvnone:
2945 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2946 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2947 {
2948 fvdef = vignore;
2949 return FALSE;
2950 }
2951 if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2952 {
2953 fvdef = foperator;
2954 *is_func_or_var = TRUE;
2955 return TRUE;
2956 }
2957 if (cblev > 0 && !instruct)
2958 break;
2959 fvdef = fvnameseen; /* function or variable */
2960 *is_func_or_var = TRUE;
2961 return TRUE;
2962 }
2963 break;
2964 }
2965
2966 return FALSE;
2967 }
2968
2969 \f
2970 /*
2971 * C_entries often keeps pointers to tokens or lines which are older than
2972 * the line currently read. By keeping two line buffers, and switching
2973 * them at end of line, it is possible to use those pointers.
2974 */
2975 static struct
2976 {
2977 long linepos;
2978 linebuffer lb;
2979 } lbs[2];
2980
2981 #define current_lb_is_new (newndx == curndx)
2982 #define switch_line_buffers() (curndx = 1 - curndx)
2983
2984 #define curlb (lbs[curndx].lb)
2985 #define newlb (lbs[newndx].lb)
2986 #define curlinepos (lbs[curndx].linepos)
2987 #define newlinepos (lbs[newndx].linepos)
2988
2989 #define cplpl ((c_ext & C_PLPL) == C_PLPL)
2990 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2991
2992 #define CNL_SAVE_DEFINEDEF() \
2993 do { \
2994 curlinepos = charno; \
2995 readline (&curlb, inf); \
2996 lp = curlb.buffer; \
2997 quotednl = FALSE; \
2998 newndx = curndx; \
2999 } while (0)
3000
3001 #define CNL() \
3002 do { \
3003 CNL_SAVE_DEFINEDEF(); \
3004 if (savetoken.valid) \
3005 { \
3006 token = savetoken; \
3007 savetoken.valid = FALSE; \
3008 } \
3009 definedef = dnone; \
3010 } while (0)
3011
3012
3013 static void
3014 make_C_tag (isfun)
3015 bool isfun;
3016 {
3017 /* This function should never be called when token.valid is FALSE, but
3018 we must protect against invalid input or internal errors. */
3019 if (!DEBUG && !token.valid)
3020 return;
3021
3022 if (!token.valid) /* this case is optimised away if !DEBUG */
3023 make_tag (concat (token_name.buffer, "##invalid token##", ""),
3024 token_name.len + 17, isfun, token.line,
3025 token.offset+token.length+1, token.lineno, token.linepos);
3026 else
3027 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3028 token.offset+token.length+1, token.lineno, token.linepos);
3029
3030 token.valid = FALSE;
3031 }
3032
3033
3034 /*
3035 * C_entries ()
3036 * This routine finds functions, variables, typedefs,
3037 * #define's, enum constants and struct/union/enum definitions in
3038 * C syntax and adds them to the list.
3039 */
3040 static void
3041 C_entries (c_ext, inf)
3042 int c_ext; /* extension of C */
3043 FILE *inf; /* input file */
3044 {
3045 register char c; /* latest char read; '\0' for end of line */
3046 register char *lp; /* pointer one beyond the character `c' */
3047 int curndx, newndx; /* indices for current and new lb */
3048 register int tokoff; /* offset in line of start of current token */
3049 register int toklen; /* length of current token */
3050 char *qualifier; /* string used to qualify names */
3051 int qlen; /* length of qualifier */
3052 int cblev; /* current curly brace level */
3053 int parlev; /* current parenthesis level */
3054 int typdefcblev; /* cblev where a typedef struct body begun */
3055 bool incomm, inquote, inchar, quotednl, midtoken;
3056 bool yacc_rules; /* in the rules part of a yacc file */
3057 struct tok savetoken; /* token saved during preprocessor handling */
3058
3059
3060 linebuffer_init (&lbs[0].lb);
3061 linebuffer_init (&lbs[1].lb);
3062 if (cstack.size == 0)
3063 {
3064 cstack.size = (DEBUG) ? 1 : 4;
3065 cstack.nl = 0;
3066 cstack.cname = xnew (cstack.size, char *);
3067 cstack.cblev = xnew (cstack.size, int);
3068 }
3069
3070 tokoff = toklen = typdefcblev = 0; /* keep compiler quiet */
3071 curndx = newndx = 0;
3072 lp = curlb.buffer;
3073 *lp = 0;
3074
3075 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3076 structdef = snone; definedef = dnone; objdef = onone;
3077 yacc_rules = FALSE;
3078 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3079 token.valid = savetoken.valid = FALSE;
3080 cblev = 0;
3081 parlev = 0;
3082 if (cjava)
3083 { qualifier = "."; qlen = 1; }
3084 else
3085 { qualifier = "::"; qlen = 2; }
3086
3087
3088 while (!feof (inf))
3089 {
3090 c = *lp++;
3091 if (c == '\\')
3092 {
3093 /* If we're at the end of the line, the next character is a
3094 '\0'; don't skip it, because it's the thing that tells us
3095 to read the next line. */
3096 if (*lp == '\0')
3097 {
3098 quotednl = TRUE;
3099 continue;
3100 }
3101 lp++;
3102 c = ' ';
3103 }
3104 else if (incomm)
3105 {
3106 switch (c)
3107 {
3108 case '*':
3109 if (*lp == '/')
3110 {
3111 c = *lp++;
3112 incomm = FALSE;
3113 }
3114 break;
3115 case '\0':
3116 /* Newlines inside comments do not end macro definitions in
3117 traditional cpp. */
3118 CNL_SAVE_DEFINEDEF ();
3119 break;
3120 }
3121 continue;
3122 }
3123 else if (inquote)
3124 {
3125 switch (c)
3126 {
3127 case '"':
3128 inquote = FALSE;
3129 break;
3130 case '\0':
3131 /* Newlines inside strings do not end macro definitions
3132 in traditional cpp, even though compilers don't
3133 usually accept them. */
3134 CNL_SAVE_DEFINEDEF ();
3135 break;
3136 }
3137 continue;
3138 }
3139 else if (inchar)
3140 {
3141 switch (c)
3142 {
3143 case '\0':
3144 /* Hmmm, something went wrong. */
3145 CNL ();
3146 /* FALLTHRU */
3147 case '\'':
3148 inchar = FALSE;
3149 break;
3150 }
3151 continue;
3152 }
3153 else
3154 switch (c)
3155 {
3156 case '"':
3157 inquote = TRUE;
3158 switch (fvdef)
3159 {
3160 case fdefunkey:
3161 case fstartlist:
3162 case finlist:
3163 case fignore:
3164 case vignore:
3165 break;
3166 default:
3167 fvextern = FALSE;
3168 fvdef = fvnone;
3169 }
3170 continue;
3171 case '\'':
3172 inchar = TRUE;
3173 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3174 {
3175 fvextern = FALSE;
3176 fvdef = fvnone;
3177 }
3178 continue;
3179 case '/':
3180 if (*lp == '*')
3181 {
3182 lp++;
3183 incomm = TRUE;
3184 continue;
3185 }
3186 else if (/* cplpl && */ *lp == '/')
3187 {
3188 c = '\0';
3189 break;
3190 }
3191 else
3192 break;
3193 case '%':
3194 if ((c_ext & YACC) && *lp == '%')
3195 {
3196 /* Entering or exiting rules section in yacc file. */
3197 lp++;
3198 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3199 typdef = tnone; structdef = snone;
3200 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3201 cblev = 0;
3202 yacc_rules = !yacc_rules;
3203 continue;
3204 }
3205 else
3206 break;
3207 case '#':
3208 if (definedef == dnone)
3209 {
3210 char *cp;
3211 bool cpptoken = TRUE;
3212
3213 /* Look back on this line. If all blanks, or nonblanks
3214 followed by an end of comment, this is a preprocessor
3215 token. */
3216 for (cp = newlb.buffer; cp < lp-1; cp++)
3217 if (!iswhite (*cp))
3218 {
3219 if (*cp == '*' && *(cp+1) == '/')
3220 {
3221 cp++;
3222 cpptoken = TRUE;
3223 }
3224 else
3225 cpptoken = FALSE;
3226 }
3227 if (cpptoken)
3228 definedef = dsharpseen;
3229 } /* if (definedef == dnone) */
3230
3231 continue;
3232 } /* switch (c) */
3233
3234
3235 /* Consider token only if some involved conditions are satisfied. */
3236 if (typdef != tignore
3237 && definedef != dignorerest
3238 && fvdef != finlist
3239 && structdef != sintemplate
3240 && (definedef != dnone
3241 || structdef != scolonseen))
3242 {
3243 if (midtoken)
3244 {
3245 if (endtoken (c))
3246 {
3247 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
3248 {
3249 /*
3250 * This handles :: in the middle, but not at the
3251 * beginning of an identifier. Also, space-separated
3252 * :: is not recognised.
3253 */
3254 lp += 2;
3255 toklen += 2;
3256 c = lp[-1];
3257 goto still_in_token;
3258 }
3259 else
3260 {
3261 bool funorvar = FALSE;
3262
3263 if (yacc_rules
3264 || consider_token (newlb.buffer + tokoff, toklen, c,
3265 &c_ext, cblev, parlev, &funorvar))
3266 {
3267 if (fvdef == foperator)
3268 {
3269 char *oldlp = lp;
3270 lp = skip_spaces (lp-1);
3271 if (*lp != '\0')
3272 lp += 1;
3273 while (*lp != '\0'
3274 && !iswhite (*lp) && *lp != '(')
3275 lp += 1;
3276 c = *lp++;
3277 toklen += lp - oldlp;
3278 }
3279 token.named = FALSE;
3280 if ((c_ext & C_EXT) /* not pure C */
3281 && nestlev > 0 && definedef == dnone)
3282 /* in struct body */
3283 {
3284 write_classname (&token_name, qualifier);
3285 linebuffer_setlen (&token_name,
3286 token_name.len+qlen+toklen);
3287 strcat (token_name.buffer, qualifier);
3288 strncat (token_name.buffer,
3289 newlb.buffer + tokoff, toklen);
3290 token.named = TRUE;
3291 }
3292 else if (objdef == ocatseen)
3293 /* Objective C category */
3294 {
3295 int len = strlen (objtag) + 2 + toklen;
3296 linebuffer_setlen (&token_name, len);
3297 strcpy (token_name.buffer, objtag);
3298 strcat (token_name.buffer, "(");
3299 strncat (token_name.buffer,
3300 newlb.buffer + tokoff, toklen);
3301 strcat (token_name.buffer, ")");
3302 token.named = TRUE;
3303 }
3304 else if (objdef == omethodtag
3305 || objdef == omethodparm)
3306 /* Objective C method */
3307 {
3308 token.named = TRUE;
3309 }
3310 else if (fvdef == fdefunname)
3311 /* GNU DEFUN and similar macros */
3312 {
3313 bool defun = (newlb.buffer[tokoff] == 'F');
3314 int off = tokoff;
3315 int len = toklen;
3316
3317 /* Rewrite the tag so that emacs lisp DEFUNs
3318 can be found by their elisp name */
3319 if (defun)
3320 {
3321 off += 1;
3322 len -= 1;
3323 }
3324 len = toklen;
3325 linebuffer_setlen (&token_name, len);
3326 strncpy (token_name.buffer,
3327 newlb.buffer + off, len);
3328 token_name.buffer[len] = '\0';
3329 if (defun)
3330 while (--len >= 0)
3331 if (token_name.buffer[len] == '_')
3332 token_name.buffer[len] = '-';
3333 token.named = defun;
3334 }
3335 else
3336 {
3337 linebuffer_setlen (&token_name, toklen);
3338 strncpy (token_name.buffer,
3339 newlb.buffer + tokoff, toklen);
3340 token_name.buffer[toklen] = '\0';
3341 /* Name macros and members. */
3342 token.named = (structdef == stagseen
3343 || typdef == ttypeseen
3344 || typdef == tend
3345 || (funorvar
3346 && definedef == dignorerest)
3347 || (funorvar
3348 && definedef == dnone
3349 && structdef == snone
3350 && cblev > 0));
3351 }
3352 token.lineno = lineno;
3353 token.offset = tokoff;
3354 token.length = toklen;
3355 token.line = newlb.buffer;
3356 token.linepos = newlinepos;
3357 token.valid = TRUE;
3358
3359 if (definedef == dnone
3360 && (fvdef == fvnameseen
3361 || fvdef == foperator
3362 || structdef == stagseen
3363 || typdef == tend
3364 || typdef == ttypeseen
3365 || objdef != onone))
3366 {
3367 if (current_lb_is_new)
3368 switch_line_buffers ();
3369 }
3370 else if (definedef != dnone
3371 || fvdef == fdefunname
3372 || instruct)
3373 make_C_tag (funorvar);
3374 }
3375 midtoken = FALSE;
3376 }
3377 } /* if (endtoken (c)) */
3378 else if (intoken (c))
3379 still_in_token:
3380 {
3381 toklen++;
3382 continue;
3383 }
3384 } /* if (midtoken) */
3385 else if (begtoken (c))
3386 {
3387 switch (definedef)
3388 {
3389 case dnone:
3390 switch (fvdef)
3391 {
3392 case fstartlist:
3393 fvdef = finlist;
3394 continue;
3395 case flistseen:
3396 #if 0
3397 if (!instruct || members)
3398 #endif
3399 make_C_tag (TRUE); /* a function */
3400 fvdef = fignore;
3401 break;
3402 case fvnameseen:
3403 fvdef = fvnone;
3404 break;
3405 }
3406 if (structdef == stagseen && !cjava)
3407 {
3408 popclass_above (cblev);
3409 structdef = snone;
3410 }
3411 break;
3412 case dsharpseen:
3413 savetoken = token;
3414 break;
3415 }
3416 if (!yacc_rules || lp == newlb.buffer + 1)
3417 {
3418 tokoff = lp - 1 - newlb.buffer;
3419 toklen = 1;
3420 midtoken = TRUE;
3421 }
3422 continue;
3423 } /* if (begtoken) */
3424 } /* if must look at token */
3425
3426
3427 /* Detect end of line, colon, comma, semicolon and various braces
3428 after having handled a token.*/
3429 switch (c)
3430 {
3431 case ':':
3432 if (yacc_rules && token.offset == 0 && token.valid)
3433 {
3434 make_C_tag (FALSE); /* a yacc function */
3435 break;
3436 }
3437 if (definedef != dnone)
3438 break;
3439 switch (objdef)
3440 {
3441 case otagseen:
3442 objdef = oignore;
3443 make_C_tag (TRUE); /* an Objective C class */
3444 break;
3445 case omethodtag:
3446 case omethodparm:
3447 objdef = omethodcolon;
3448 linebuffer_setlen (&token_name, token_name.len + 1);
3449 strcat (token_name.buffer, ":");
3450 break;
3451 }
3452 if (structdef == stagseen)
3453 {
3454 structdef = scolonseen;
3455 break;
3456 }
3457 #if 0
3458 if (cplpl && fvdef == flistseen)
3459 {
3460 make_C_tag (TRUE); /* a function */
3461 fvdef = fignore;
3462 break;
3463 }
3464 #endif
3465 break;
3466 case ';':
3467 if (definedef != dnone)
3468 break;
3469 switch (typdef)
3470 {
3471 case tend:
3472 case ttypeseen:
3473 make_C_tag (FALSE); /* a typedef */
3474 typdef = tnone;
3475 fvdef = fvnone;
3476 break;
3477 case tnone:
3478 case tinbody:
3479 case tignore:
3480 switch (fvdef)
3481 {
3482 case fignore:
3483 if (typdef == tignore || cplpl)
3484 fvdef = fvnone;
3485 break;
3486 case fvnameseen:
3487 if ((globals && cblev == 0 && (!fvextern || declarations))
3488 || (members && instruct))
3489 make_C_tag (FALSE); /* a variable */
3490 fvextern = FALSE;
3491 fvdef = fvnone;
3492 token.valid = FALSE;
3493 break;
3494 case flistseen:
3495 if ((declarations && typdef == tnone && !instruct)
3496 || (members && typdef != tignore && instruct))
3497 make_C_tag (TRUE); /* a function declaration */
3498 /* FALLTHRU */
3499 default:
3500 fvextern = FALSE;
3501 fvdef = fvnone;
3502 if (declarations
3503 && structdef == stagseen && (c_ext & C_PLPL))
3504 make_C_tag (FALSE); /* forward declaration */
3505 else
3506 /* The following instruction invalidates the token.
3507 Probably the token should be invalidated in all other
3508 cases where some state machine is reset prematurely. */
3509 token.valid = FALSE;
3510 } /* switch (fvdef) */
3511 /* FALLTHRU */
3512 default:
3513 if (!instruct)
3514 typdef = tnone;
3515 }
3516 if (structdef == stagseen)
3517 structdef = snone;
3518 break;
3519 case ',':
3520 if (definedef != dnone)
3521 break;
3522 switch (objdef)
3523 {
3524 case omethodtag:
3525 case omethodparm:
3526 make_C_tag (TRUE); /* an Objective C method */
3527 objdef = oinbody;
3528 break;
3529 }
3530 switch (fvdef)
3531 {
3532 case fdefunkey:
3533 case foperator:
3534 case fstartlist:
3535 case finlist:
3536 case fignore:
3537 case vignore:
3538 break;
3539 case fdefunname:
3540 fvdef = fignore;
3541 break;
3542 case fvnameseen: /* a variable */
3543 if ((globals && cblev == 0 && (!fvextern || declarations))
3544 || (members && instruct))
3545 make_C_tag (FALSE);
3546 break;
3547 case flistseen: /* a function */
3548 if ((declarations && typdef == tnone && !instruct)
3549 || (members && typdef != tignore && instruct))
3550 {
3551 make_C_tag (TRUE); /* a function declaration */
3552 fvdef = fvnameseen;
3553 }
3554 else if (!declarations)
3555 fvdef = fvnone;
3556 token.valid = FALSE;
3557 break;
3558 default:
3559 fvdef = fvnone;
3560 }
3561 if (structdef == stagseen)
3562 structdef = snone;
3563 break;
3564 case '[':
3565 if (definedef != dnone)
3566 break;
3567 if (structdef == stagseen)
3568 structdef = snone;
3569 switch (typdef)
3570 {
3571 case ttypeseen:
3572 case tend:
3573 typdef = tignore;
3574 make_C_tag (FALSE); /* a typedef */
3575 break;
3576 case tnone:
3577 case tinbody:
3578 switch (fvdef)
3579 {
3580 case foperator:
3581 case finlist:
3582 case fignore:
3583 case vignore:
3584 break;
3585 case fvnameseen:
3586 if ((members && cblev == 1)
3587 || (globals && cblev == 0
3588 && (!fvextern || declarations)))
3589 make_C_tag (FALSE); /* a variable */
3590 /* FALLTHRU */
3591 default:
3592 fvdef = fvnone;
3593 }
3594 break;
3595 }
3596 break;
3597 case '(':
3598 if (definedef != dnone)
3599 break;
3600 if (objdef == otagseen && parlev == 0)
3601 objdef = oparenseen;
3602 switch (fvdef)
3603 {
3604 case fvnameseen:
3605 if (typdef == ttypeseen
3606 && *lp != '*'
3607 && !instruct)
3608 {
3609 /* This handles constructs like:
3610 typedef void OperatorFun (int fun); */
3611 make_C_tag (FALSE);
3612 typdef = tignore;
3613 fvdef = fignore;
3614 break;
3615 }
3616 /* FALLTHRU */
3617 case foperator:
3618 fvdef = fstartlist;
3619 break;
3620 case flistseen:
3621 fvdef = finlist;
3622 break;
3623 }
3624 parlev++;
3625 break;
3626 case ')':
3627 if (definedef != dnone)
3628 break;
3629 if (objdef == ocatseen && parlev == 1)
3630 {
3631 make_C_tag (TRUE); /* an Objective C category */
3632 objdef = oignore;
3633 }
3634 if (--parlev == 0)
3635 {
3636 switch (fvdef)
3637 {
3638 case fstartlist:
3639 case finlist:
3640 fvdef = flistseen;
3641 break;
3642 }
3643 if (!instruct
3644 && (typdef == tend
3645 || typdef == ttypeseen))
3646 {
3647 typdef = tignore;
3648 make_C_tag (FALSE); /* a typedef */
3649 }
3650 }
3651 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3652 parlev = 0;
3653 break;
3654 case '{':
3655 if (definedef != dnone)
3656 break;
3657 if (typdef == ttypeseen)
3658 {
3659 /* Whenever typdef is set to tinbody (currently only
3660 here), typdefcblev should be set to cblev. */
3661 typdef = tinbody;
3662 typdefcblev = cblev;
3663 }
3664 switch (fvdef)
3665 {
3666 case flistseen:
3667 make_C_tag (TRUE); /* a function */
3668 /* FALLTHRU */
3669 case fignore:
3670 fvdef = fvnone;
3671 break;
3672 case fvnone:
3673 switch (objdef)
3674 {
3675 case otagseen:
3676 make_C_tag (TRUE); /* an Objective C class */
3677 objdef = oignore;
3678 break;
3679 case omethodtag:
3680 case omethodparm:
3681 make_C_tag (TRUE); /* an Objective C method */
3682 objdef = oinbody;
3683 break;
3684 default:
3685 /* Neutralize `extern "C" {' grot. */
3686 if (cblev == 0 && structdef == snone && nestlev == 0
3687 && typdef == tnone)
3688 cblev = -1;
3689 }
3690 break;
3691 }
3692 switch (structdef)
3693 {
3694 case skeyseen: /* unnamed struct */
3695 pushclass_above (cblev, NULL, 0);
3696 structdef = snone;
3697 break;
3698 case stagseen: /* named struct or enum */
3699 case scolonseen: /* a class */
3700 pushclass_above (cblev, token.line+token.offset, token.length);
3701 structdef = snone;
3702 make_C_tag (FALSE); /* a struct or enum */
3703 break;
3704 }
3705 cblev++;
3706 break;
3707 case '*':
3708 if (definedef != dnone)
3709 break;
3710 if (fvdef == fstartlist)
3711 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3712 break;
3713 case '}':
3714 if (definedef != dnone)
3715 break;
3716 if (!ignoreindent && lp == newlb.buffer + 1)
3717 {
3718 cblev = 0; /* reset curly brace level if first column */
3719 parlev = 0; /* also reset paren level, just in case... */
3720 }
3721 else if (cblev > 0)
3722 cblev--;
3723 popclass_above (cblev);
3724 structdef = snone;
3725 /* Only if typdef == tinbody is typdefcblev significant. */
3726 if (typdef == tinbody && cblev <= typdefcblev)
3727 {
3728 assert (cblev == typdefcblev);
3729 typdef = tend;
3730 }
3731 break;
3732 case '=':
3733 if (definedef != dnone)
3734 break;
3735 switch (fvdef)
3736 {
3737 case foperator:
3738 case finlist:
3739 case fignore:
3740 case vignore:
3741 break;
3742 case fvnameseen:
3743 if ((members && cblev == 1)
3744 || (globals && cblev == 0 && (!fvextern || declarations)))
3745 make_C_tag (FALSE); /* a variable */
3746 /* FALLTHRU */
3747 default:
3748 fvdef = vignore;
3749 }
3750 break;
3751 case '<':
3752 if (cplpl && structdef == stagseen)
3753 {
3754 structdef = sintemplate;
3755 break;
3756 }
3757 goto resetfvdef;
3758 case '>':
3759 if (structdef == sintemplate)
3760 {
3761 structdef = stagseen;
3762 break;
3763 }
3764 goto resetfvdef;
3765 case '+':
3766 case '-':
3767 if (objdef == oinbody && cblev == 0)
3768 {
3769 objdef = omethodsign;
3770 break;
3771 }
3772 /* FALLTHRU */
3773 resetfvdef:
3774 case '#': case '~': case '&': case '%': case '/': case '|':
3775 case '^': case '!': case '.': case '?': case ']':
3776 if (definedef != dnone)
3777 break;
3778 /* These surely cannot follow a function tag in C. */
3779 switch (fvdef)
3780 {
3781 case foperator:
3782 case finlist:
3783 case fignore:
3784 case vignore:
3785 break;
3786 default:
3787 fvdef = fvnone;
3788 }
3789 break;
3790 case '\0':
3791 if (objdef == otagseen)
3792 {
3793 make_C_tag (TRUE); /* an Objective C class */
3794 objdef = oignore;
3795 }
3796 /* If a macro spans multiple lines don't reset its state. */
3797 if (quotednl)
3798 CNL_SAVE_DEFINEDEF ();
3799 else
3800 CNL ();
3801 break;
3802 } /* switch (c) */
3803
3804 } /* while not eof */
3805
3806 free (lbs[0].lb.buffer);
3807 free (lbs[1].lb.buffer);
3808 }
3809
3810 /*
3811 * Process either a C++ file or a C file depending on the setting
3812 * of a global flag.
3813 */
3814 static void
3815 default_C_entries (inf)
3816 FILE *inf;
3817 {
3818 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3819 }
3820
3821 /* Always do plain C. */
3822 static void
3823 plain_C_entries (inf)
3824 FILE *inf;
3825 {
3826 C_entries (0, inf);
3827 }
3828
3829 /* Always do C++. */
3830 static void
3831 Cplusplus_entries (inf)
3832 FILE *inf;
3833 {
3834 C_entries (C_PLPL, inf);
3835 }
3836
3837 /* Always do Java. */
3838 static void
3839 Cjava_entries (inf)
3840 FILE *inf;
3841 {
3842 C_entries (C_JAVA, inf);
3843 }
3844
3845 /* Always do C*. */
3846 static void
3847 Cstar_entries (inf)
3848 FILE *inf;
3849 {
3850 C_entries (C_STAR, inf);
3851 }
3852
3853 /* Always do Yacc. */
3854 static void
3855 Yacc_entries (inf)
3856 FILE *inf;
3857 {
3858 C_entries (YACC, inf);
3859 }
3860
3861 \f
3862 /* Useful macros. */
3863 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3864 for (; /* loop initialization */ \
3865 !feof (file_pointer) /* loop test */ \
3866 && /* instructions at start of loop */ \
3867 (readline (&line_buffer, file_pointer), \
3868 char_pointer = line_buffer.buffer, \
3869 TRUE); \
3870 )
3871 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */ \
3872 (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
3873 && notinname ((cp)[sizeof(keyword)-1]) /* end of keyword */ \
3874 && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
3875
3876 /*
3877 * Read a file, but do no processing. This is used to do regexp
3878 * matching on files that have no language defined.
3879 */
3880 static void
3881 just_read_file (inf)
3882 FILE *inf;
3883 {
3884 register char *dummy;
3885
3886 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3887 continue;
3888 }
3889
3890 \f
3891 /* Fortran parsing */
3892
3893 static void F_takeprec __P((void));
3894 static void F_getit __P((FILE *));
3895
3896 static void
3897 F_takeprec ()
3898 {
3899 dbp = skip_spaces (dbp);
3900 if (*dbp != '*')
3901 return;
3902 dbp++;
3903 dbp = skip_spaces (dbp);
3904 if (strneq (dbp, "(*)", 3))
3905 {
3906 dbp += 3;
3907 return;
3908 }
3909 if (!ISDIGIT (*dbp))
3910 {
3911 --dbp; /* force failure */
3912 return;
3913 }
3914 do
3915 dbp++;
3916 while (ISDIGIT (*dbp));
3917 }
3918
3919 static void
3920 F_getit (inf)
3921 FILE *inf;
3922 {
3923 register char *cp;
3924
3925 dbp = skip_spaces (dbp);
3926 if (*dbp == '\0')
3927 {
3928 readline (&lb, inf);
3929 dbp = lb.buffer;
3930 if (dbp[5] != '&')
3931 return;
3932 dbp += 6;
3933 dbp = skip_spaces (dbp);
3934 }
3935 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3936 return;
3937 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3938 continue;
3939 make_tag (dbp, cp-dbp, TRUE,
3940 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3941 }
3942
3943
3944 static void
3945 Fortran_functions (inf)
3946 FILE *inf;
3947 {
3948 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3949 {
3950 if (*dbp == '%')
3951 dbp++; /* Ratfor escape to fortran */
3952 dbp = skip_spaces (dbp);
3953 if (*dbp == '\0')
3954 continue;
3955 switch (lowcase (*dbp))
3956 {
3957 case 'i':
3958 if (nocase_tail ("integer"))
3959 F_takeprec ();
3960 break;
3961 case 'r':
3962 if (nocase_tail ("real"))
3963 F_takeprec ();
3964 break;
3965 case 'l':
3966 if (nocase_tail ("logical"))
3967 F_takeprec ();
3968 break;
3969 case 'c':
3970 if (nocase_tail ("complex") || nocase_tail ("character"))
3971 F_takeprec ();
3972 break;
3973 case 'd':
3974 if (nocase_tail ("double"))
3975 {
3976 dbp = skip_spaces (dbp);
3977 if (*dbp == '\0')
3978 continue;
3979 if (nocase_tail ("precision"))
3980 break;
3981 continue;
3982 }
3983 break;
3984 }
3985 dbp = skip_spaces (dbp);
3986 if (*dbp == '\0')
3987 continue;
3988 switch (lowcase (*dbp))
3989 {
3990 case 'f':
3991 if (nocase_tail ("function"))
3992 F_getit (inf);
3993 continue;
3994 case 's':
3995 if (nocase_tail ("subroutine"))
3996 F_getit (inf);
3997 continue;
3998 case 'e':
3999 if (nocase_tail ("entry"))
4000 F_getit (inf);
4001 continue;
4002 case 'b':
4003 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4004 {
4005 dbp = skip_spaces (dbp);
4006 if (*dbp == '\0') /* assume un-named */
4007 make_tag ("blockdata", 9, TRUE,
4008 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4009 else
4010 F_getit (inf); /* look for name */
4011 }
4012 continue;
4013 }
4014 }
4015 }
4016
4017 \f
4018 /*
4019 * Ada parsing
4020 * Original code by
4021 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.int> (1998)
4022 */
4023
4024 static void Ada_getit __P((FILE *, char *));
4025
4026 /* Once we are positioned after an "interesting" keyword, let's get
4027 the real tag value necessary. */
4028 static void
4029 Ada_getit (inf, name_qualifier)
4030 FILE *inf;
4031 char *name_qualifier;
4032 {
4033 register char *cp;
4034 char *name;
4035 char c;
4036
4037 while (!feof (inf))
4038 {
4039 dbp = skip_spaces (dbp);
4040 if (*dbp == '\0'
4041 || (dbp[0] == '-' && dbp[1] == '-'))
4042 {
4043 readline (&lb, inf);
4044 dbp = lb.buffer;
4045 }
4046 switch (lowcase(*dbp))
4047 {
4048 case 'b':
4049 if (nocase_tail ("body"))
4050 {
4051 /* Skipping body of procedure body or package body or ....
4052 resetting qualifier to body instead of spec. */
4053 name_qualifier = "/b";
4054 continue;
4055 }
4056 break;
4057 case 't':
4058 /* Skipping type of task type or protected type ... */
4059 if (nocase_tail ("type"))
4060 continue;
4061 break;
4062 }
4063 if (*dbp == '"')
4064 {
4065 dbp += 1;
4066 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4067 continue;
4068 }
4069 else
4070 {
4071 dbp = skip_spaces (dbp);
4072 for (cp = dbp;
4073 (*cp != '\0'
4074 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4075 cp++)
4076 continue;
4077 if (cp == dbp)
4078 return;
4079 }
4080 c = *cp;
4081 *cp = '\0';
4082 name = concat (dbp, name_qualifier, "");
4083 *cp = c;
4084 make_tag (name, strlen (name), TRUE,
4085 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4086 free (name);
4087 if (c == '"')
4088 dbp = cp + 1;
4089 return;
4090 }
4091 }
4092
4093 static void
4094 Ada_funcs (inf)
4095 FILE *inf;
4096 {
4097 bool inquote = FALSE;
4098 bool skip_till_semicolumn = FALSE;
4099
4100 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4101 {
4102 while (*dbp != '\0')
4103 {
4104 /* Skip a string i.e. "abcd". */
4105 if (inquote || (*dbp == '"'))
4106 {
4107 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4108 if (dbp != NULL)
4109 {
4110 inquote = FALSE;
4111 dbp += 1;
4112 continue; /* advance char */
4113 }
4114 else
4115 {
4116 inquote = TRUE;
4117 break; /* advance line */
4118 }
4119 }
4120
4121 /* Skip comments. */
4122 if (dbp[0] == '-' && dbp[1] == '-')
4123 break; /* advance line */
4124
4125 /* Skip character enclosed in single quote i.e. 'a'
4126 and skip single quote starting an attribute i.e. 'Image. */
4127 if (*dbp == '\'')
4128 {
4129 dbp++ ;
4130 if (*dbp != '\0')
4131 dbp++;
4132 continue;
4133 }
4134
4135 if (skip_till_semicolumn)
4136 {
4137 if (*dbp == ';')
4138 skip_till_semicolumn = FALSE;
4139 dbp++;
4140 continue; /* advance char */
4141 }
4142
4143 /* Search for beginning of a token. */
4144 if (!begtoken (*dbp))
4145 {
4146 dbp++;
4147 continue; /* advance char */
4148 }
4149
4150 /* We are at the beginning of a token. */
4151 switch (lowcase(*dbp))
4152 {
4153 case 'f':
4154 if (!packages_only && nocase_tail ("function"))
4155 Ada_getit (inf, "/f");
4156 else
4157 break; /* from switch */
4158 continue; /* advance char */
4159 case 'p':
4160 if (!packages_only && nocase_tail ("procedure"))
4161 Ada_getit (inf, "/p");
4162 else if (nocase_tail ("package"))
4163 Ada_getit (inf, "/s");
4164 else if (nocase_tail ("protected")) /* protected type */
4165 Ada_getit (inf, "/t");
4166 else
4167 break; /* from switch */
4168 continue; /* advance char */
4169
4170 case 'u':
4171 if (typedefs && !packages_only && nocase_tail ("use"))
4172 {
4173 /* when tagging types, avoid tagging use type Pack.Typename;
4174 for this, we will skip everything till a ; */
4175 skip_till_semicolumn = TRUE;
4176 continue; /* advance char */
4177 }
4178
4179 case 't':
4180 if (!packages_only && nocase_tail ("task"))
4181 Ada_getit (inf, "/k");
4182 else if (typedefs && !packages_only && nocase_tail ("type"))
4183 {
4184 Ada_getit (inf, "/t");
4185 while (*dbp != '\0')
4186 dbp += 1;
4187 }
4188 else
4189 break; /* from switch */
4190 continue; /* advance char */
4191 }
4192
4193 /* Look for the end of the token. */
4194 while (!endtoken (*dbp))
4195 dbp++;
4196
4197 } /* advance char */
4198 } /* advance line */
4199 }
4200
4201 \f
4202 /*
4203 * Unix and microcontroller assembly tag handling
4204 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4205 * Idea by Bob Weiner, Motorola Inc. (1994)
4206 */
4207 static void
4208 Asm_labels (inf)
4209 FILE *inf;
4210 {
4211 register char *cp;
4212
4213 LOOP_ON_INPUT_LINES (inf, lb, cp)
4214 {
4215 /* If first char is alphabetic or one of [_.$], test for colon
4216 following identifier. */
4217 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4218 {
4219 /* Read past label. */
4220 cp++;
4221 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4222 cp++;
4223 if (*cp == ':' || iswhite (*cp))
4224 /* Found end of label, so copy it and add it to the table. */
4225 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4226 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4227 }
4228 }
4229 }
4230
4231 \f
4232 /*
4233 * Perl support
4234 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4235 * Perl variable names: /^(my|local).../
4236 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4237 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4238 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4239 */
4240 static void
4241 Perl_functions (inf)
4242 FILE *inf;
4243 {
4244 char *package = savestr ("main"); /* current package name */
4245 register char *cp;
4246
4247 LOOP_ON_INPUT_LINES (inf, lb, cp)
4248 {
4249 skip_spaces(cp);
4250
4251 if (LOOKING_AT (cp, "package"))
4252 {
4253 free (package);
4254 get_tag (cp, &package);
4255 }
4256 else if (LOOKING_AT (cp, "sub"))
4257 {
4258 char *pos;
4259 char *sp = cp;
4260
4261 while (!notinname (*cp))
4262 cp++;
4263 if (cp == sp)
4264 continue; /* nothing found */
4265 if ((pos = etags_strchr (sp, ':')) != NULL
4266 && pos < cp && pos[1] == ':')
4267 /* The name is already qualified. */
4268 make_tag (sp, cp - sp, TRUE,
4269 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4270 else
4271 /* Qualify it. */
4272 {
4273 char savechar, *name;
4274
4275 savechar = *cp;
4276 *cp = '\0';
4277 name = concat (package, "::", sp);
4278 *cp = savechar;
4279 make_tag (name, strlen(name), TRUE,
4280 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4281 free (name);
4282 }
4283 }
4284 else if (globals) /* only if we are tagging global vars */
4285 {
4286 /* Skip a qualifier, if any. */
4287 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4288 /* After "my" or "local", but before any following paren or space. */
4289 char *varstart = cp;
4290
4291 if (qual /* should this be removed? If yes, how? */
4292 && (*cp == '$' || *cp == '@' || *cp == '%'))
4293 {
4294 varstart += 1;
4295 do
4296 cp++;
4297 while (ISALNUM (*cp) || *cp == '_');
4298 }
4299 else if (qual)
4300 {
4301 /* Should be examining a variable list at this point;
4302 could insist on seeing an open parenthesis. */
4303 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4304 cp++;
4305 }
4306 else
4307 continue;
4308
4309 make_tag (varstart, cp - varstart, FALSE,
4310 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4311 }
4312 }
4313 }
4314
4315
4316 /*
4317 * Python support
4318 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4319 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4320 * More ideas by seb bacon <seb@jamkit.com> (2002)
4321 */
4322 static void
4323 Python_functions (inf)
4324 FILE *inf;
4325 {
4326 register char *cp;
4327
4328 LOOP_ON_INPUT_LINES (inf, lb, cp)
4329 {
4330 cp = skip_spaces (cp);
4331 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4332 {
4333 char *name = cp;
4334 while (!notinname (*cp) && *cp != ':')
4335 cp++;
4336 make_tag (name, cp - name, TRUE,
4337 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4338 }
4339 }
4340 }
4341
4342 \f
4343 /*
4344 * PHP support
4345 * Look for:
4346 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4347 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4348 * - /^[ \t]*define\(\"[^\"]+/
4349 * Only with --members:
4350 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4351 * Idea by Diez B. Roggisch (2001)
4352 */
4353 static void
4354 PHP_functions (inf)
4355 FILE *inf;
4356 {
4357 register char *cp, *name;
4358 bool search_identifier = FALSE;
4359
4360 LOOP_ON_INPUT_LINES (inf, lb, cp)
4361 {
4362 cp = skip_spaces (cp);
4363 name = cp;
4364 if (search_identifier
4365 && *cp != '\0')
4366 {
4367 while (!notinname (*cp))
4368 cp++;
4369 make_tag (name, cp - name, TRUE,
4370 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4371 search_identifier = FALSE;
4372 }
4373 else if (LOOKING_AT (cp, "function"))
4374 {
4375 if(*cp == '&')
4376 cp = skip_spaces (cp+1);
4377 if(*cp != '\0')
4378 {
4379 name = cp;
4380 while (!notinname (*cp))
4381 cp++;
4382 make_tag (name, cp - name, TRUE,
4383 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4384 }
4385 else
4386 search_identifier = TRUE;
4387 }
4388 else if (LOOKING_AT (cp, "class"))
4389 {
4390 if (*cp != '\0')
4391 {
4392 name = cp;
4393 while (*cp != '\0' && !iswhite (*cp))
4394 cp++;
4395 make_tag (name, cp - name, FALSE,
4396 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4397 }
4398 else
4399 search_identifier = TRUE;
4400 }
4401 else if (strneq (cp, "define", 6)
4402 && (cp = skip_spaces (cp+6))
4403 && *cp++ == '('
4404 && (*cp == '"' || *cp == '\''))
4405 {
4406 char quote = *cp++;
4407 name = cp;
4408 while (*cp != quote && *cp != '\0')
4409 cp++;
4410 make_tag (name, cp - name, FALSE,
4411 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4412 }
4413 else if (members
4414 && LOOKING_AT (cp, "var")
4415 && *cp == '$')
4416 {
4417 name = cp;
4418 while (!notinname(*cp))
4419 cp++;
4420 make_tag (name, cp - name, FALSE,
4421 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4422 }
4423 }
4424 }
4425
4426 \f
4427 /*
4428 * Cobol tag functions
4429 * We could look for anything that could be a paragraph name.
4430 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4431 * Idea by Corny de Souza (1993)
4432 */
4433 static void
4434 Cobol_paragraphs (inf)
4435 FILE *inf;
4436 {
4437 register char *bp, *ep;
4438
4439 LOOP_ON_INPUT_LINES (inf, lb, bp)
4440 {
4441 if (lb.len < 9)
4442 continue;
4443 bp += 8;
4444
4445 /* If eoln, compiler option or comment ignore whole line. */
4446 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4447 continue;
4448
4449 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4450 continue;
4451 if (*ep++ == '.')
4452 make_tag (bp, ep - bp, TRUE,
4453 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4454 }
4455 }
4456
4457 \f
4458 /*
4459 * Makefile support
4460 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4461 */
4462 static void
4463 Makefile_targets (inf)
4464 FILE *inf;
4465 {
4466 register char *bp;
4467
4468 LOOP_ON_INPUT_LINES (inf, lb, bp)
4469 {
4470 if (*bp == '\t' || *bp == '#')
4471 continue;
4472 while (*bp != '\0' && *bp != '=' && *bp != ':')
4473 bp++;
4474 if (*bp == ':' || (globals && *bp == '='))
4475 make_tag (lb.buffer, bp - lb.buffer, TRUE,
4476 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4477 }
4478 }
4479
4480 \f
4481 /*
4482 * Pascal parsing
4483 * Original code by Mosur K. Mohan (1989)
4484 *
4485 * Locates tags for procedures & functions. Doesn't do any type- or
4486 * var-definitions. It does look for the keyword "extern" or
4487 * "forward" immediately following the procedure statement; if found,
4488 * the tag is skipped.
4489 */
4490 static void
4491 Pascal_functions (inf)
4492 FILE *inf;
4493 {
4494 linebuffer tline; /* mostly copied from C_entries */
4495 long save_lcno;
4496 int save_lineno, namelen, taglen;
4497 char c, *name;
4498
4499 bool /* each of these flags is TRUE iff: */
4500 incomment, /* point is inside a comment */
4501 inquote, /* point is inside '..' string */
4502 get_tagname, /* point is after PROCEDURE/FUNCTION
4503 keyword, so next item = potential tag */
4504 found_tag, /* point is after a potential tag */
4505 inparms, /* point is within parameter-list */
4506 verify_tag; /* point has passed the parm-list, so the
4507 next token will determine whether this
4508 is a FORWARD/EXTERN to be ignored, or
4509 whether it is a real tag */
4510
4511 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4512 name = NULL; /* keep compiler quiet */
4513 dbp = lb.buffer;
4514 *dbp = '\0';
4515 linebuffer_init (&tline);
4516
4517 incomment = inquote = FALSE;
4518 found_tag = FALSE; /* have a proc name; check if extern */
4519 get_tagname = FALSE; /* found "procedure" keyword */
4520 inparms = FALSE; /* found '(' after "proc" */
4521 verify_tag = FALSE; /* check if "extern" is ahead */
4522
4523
4524 while (!feof (inf)) /* long main loop to get next char */
4525 {
4526 c = *dbp++;
4527 if (c == '\0') /* if end of line */
4528 {
4529 readline (&lb, inf);
4530 dbp = lb.buffer;
4531 if (*dbp == '\0')
4532 continue;
4533 if (!((found_tag && verify_tag)
4534 || get_tagname))
4535 c = *dbp++; /* only if don't need *dbp pointing
4536 to the beginning of the name of
4537 the procedure or function */
4538 }
4539 if (incomment)
4540 {
4541 if (c == '}') /* within { } comments */
4542 incomment = FALSE;
4543 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4544 {
4545 dbp++;
4546 incomment = FALSE;
4547 }
4548 continue;
4549 }
4550 else if (inquote)
4551 {
4552 if (c == '\'')
4553 inquote = FALSE;
4554 continue;
4555 }
4556 else
4557 switch (c)
4558 {
4559 case '\'':
4560 inquote = TRUE; /* found first quote */
4561 continue;
4562 case '{': /* found open { comment */
4563 incomment = TRUE;
4564 continue;
4565 case '(':
4566 if (*dbp == '*') /* found open (* comment */
4567 {
4568 incomment = TRUE;
4569 dbp++;
4570 }
4571 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4572 inparms = TRUE;
4573 continue;
4574 case ')': /* end of parms list */
4575 if (inparms)
4576 inparms = FALSE;
4577 continue;
4578 case ';':
4579 if (found_tag && !inparms) /* end of proc or fn stmt */
4580 {
4581 verify_tag = TRUE;
4582 break;
4583 }
4584 continue;
4585 }
4586 if (found_tag && verify_tag && (*dbp != ' '))
4587 {
4588 /* Check if this is an "extern" declaration. */
4589 if (*dbp == '\0')
4590 continue;
4591 if (lowcase (*dbp == 'e'))
4592 {
4593 if (nocase_tail ("extern")) /* superfluous, really! */
4594 {
4595 found_tag = FALSE;
4596 verify_tag = FALSE;
4597 }
4598 }
4599 else if (lowcase (*dbp) == 'f')
4600 {
4601 if (nocase_tail ("forward")) /* check for forward reference */
4602 {
4603 found_tag = FALSE;
4604 verify_tag = FALSE;
4605 }
4606 }
4607 if (found_tag && verify_tag) /* not external proc, so make tag */
4608 {
4609 found_tag = FALSE;
4610 verify_tag = FALSE;
4611 make_tag (name, namelen, TRUE,
4612 tline.buffer, taglen, save_lineno, save_lcno);
4613 continue;
4614 }
4615 }
4616 if (get_tagname) /* grab name of proc or fn */
4617 {
4618 char *cp;
4619
4620 if (*dbp == '\0')
4621 continue;
4622
4623 /* Find block name. */
4624 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4625 continue;
4626
4627 /* Save all values for later tagging. */
4628 linebuffer_setlen (&tline, lb.len);
4629 strcpy (tline.buffer, lb.buffer);
4630 save_lineno = lineno;
4631 save_lcno = linecharno;
4632 name = tline.buffer + (dbp - lb.buffer);
4633 namelen = cp - dbp;
4634 taglen = cp - lb.buffer + 1;
4635
4636 dbp = cp; /* set dbp to e-o-token */
4637 get_tagname = FALSE;
4638 found_tag = TRUE;
4639 continue;
4640
4641 /* And proceed to check for "extern". */
4642 }
4643 else if (!incomment && !inquote && !found_tag)
4644 {
4645 /* Check for proc/fn keywords. */
4646 switch (lowcase (c))
4647 {
4648 case 'p':
4649 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4650 get_tagname = TRUE;
4651 continue;
4652 case 'f':
4653 if (nocase_tail ("unction"))
4654 get_tagname = TRUE;
4655 continue;
4656 }
4657 }
4658 } /* while not eof */
4659
4660 free (tline.buffer);
4661 }
4662
4663 \f
4664 /*
4665 * Lisp tag functions
4666 * look for (def or (DEF, quote or QUOTE
4667 */
4668
4669 static void L_getit __P((void));
4670
4671 static void
4672 L_getit ()
4673 {
4674 if (*dbp == '\'') /* Skip prefix quote */
4675 dbp++;
4676 else if (*dbp == '(')
4677 {
4678 dbp++;
4679 /* Try to skip "(quote " */
4680 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4681 /* Ok, then skip "(" before name in (defstruct (foo)) */
4682 dbp = skip_spaces (dbp);
4683 }
4684 get_tag (dbp, NULL);
4685 }
4686
4687 static void
4688 Lisp_functions (inf)
4689 FILE *inf;
4690 {
4691 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4692 {
4693 if (dbp[0] != '(')
4694 continue;
4695
4696 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4697 {
4698 dbp = skip_non_spaces (dbp);
4699 dbp = skip_spaces (dbp);
4700 L_getit ();
4701 }
4702 else
4703 {
4704 /* Check for (foo::defmumble name-defined ... */
4705 do
4706 dbp++;
4707 while (!notinname (*dbp) && *dbp != ':');
4708 if (*dbp == ':')
4709 {
4710 do
4711 dbp++;
4712 while (*dbp == ':');
4713
4714 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4715 {
4716 dbp = skip_non_spaces (dbp);
4717 dbp = skip_spaces (dbp);
4718 L_getit ();
4719 }
4720 }
4721 }
4722 }
4723 }
4724
4725 \f
4726 /*
4727 * Postscript tag functions
4728 * Just look for lines where the first character is '/'
4729 * Also look at "defineps" for PSWrap
4730 * Ideas by:
4731 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4732 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4733 */
4734 static void
4735 Postscript_functions (inf)
4736 FILE *inf;
4737 {
4738 register char *bp, *ep;
4739
4740 LOOP_ON_INPUT_LINES (inf, lb, bp)
4741 {
4742 if (bp[0] == '/')
4743 {
4744 for (ep = bp+1;
4745 *ep != '\0' && *ep != ' ' && *ep != '{';
4746 ep++)
4747 continue;
4748 make_tag (bp, ep - bp, TRUE,
4749 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4750 }
4751 else if (LOOKING_AT (bp, "defineps"))
4752 get_tag (bp, NULL);
4753 }
4754 }
4755
4756 \f
4757 /*
4758 * Scheme tag functions
4759 * look for (def... xyzzy
4760 * (def... (xyzzy
4761 * (def ... ((...(xyzzy ....
4762 * (set! xyzzy
4763 * Original code by Ken Haase (1985?)
4764 */
4765
4766 static void
4767 Scheme_functions (inf)
4768 FILE *inf;
4769 {
4770 register char *bp;
4771
4772 LOOP_ON_INPUT_LINES (inf, lb, bp)
4773 {
4774 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4775 {
4776 bp = skip_non_spaces (bp+4);
4777 /* Skip over open parens and white space */
4778 while (notinname (*bp))
4779 bp++;
4780 get_tag (bp, NULL);
4781 }
4782 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4783 get_tag (bp, NULL);
4784 }
4785 }
4786
4787 \f
4788 /* Find tags in TeX and LaTeX input files. */
4789
4790 /* TEX_toktab is a table of TeX control sequences that define tags.
4791 * Each entry records one such control sequence.
4792 *
4793 * Original code from who knows whom.
4794 * Ideas by:
4795 * Stefan Monnier (2002)
4796 */
4797
4798 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4799
4800 /* Default set of control sequences to put into TEX_toktab.
4801 The value of environment var TEXTAGS is prepended to this. */
4802 static char *TEX_defenv = "\
4803 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4804 :part:appendix:entry:index:def\
4805 :newcommand:renewcommand:newenvironment:renewenvironment";
4806
4807 static void TEX_mode __P((FILE *));
4808 static void TEX_decode_env __P((char *, char *));
4809
4810 static char TEX_esc = '\\';
4811 static char TEX_opgrp = '{';
4812 static char TEX_clgrp = '}';
4813
4814 /*
4815 * TeX/LaTeX scanning loop.
4816 */
4817 static void
4818 TeX_commands (inf)
4819 FILE *inf;
4820 {
4821 char *cp;
4822 linebuffer *key;
4823
4824 /* Select either \ or ! as escape character. */
4825 TEX_mode (inf);
4826
4827 /* Initialize token table once from environment. */
4828 if (TEX_toktab == NULL)
4829 TEX_decode_env ("TEXTAGS", TEX_defenv);
4830
4831 LOOP_ON_INPUT_LINES (inf, lb, cp)
4832 {
4833 /* Look at each TEX keyword in line. */
4834 for (;;)
4835 {
4836 /* Look for a TEX escape. */
4837 while (*cp++ != TEX_esc)
4838 if (cp[-1] == '\0' || cp[-1] == '%')
4839 goto tex_next_line;
4840
4841 for (key = TEX_toktab; key->buffer != NULL; key++)
4842 if (strneq (cp, key->buffer, key->len))
4843 {
4844 register char *p;
4845 int namelen, linelen;
4846 bool opgrp = FALSE;
4847
4848 cp = skip_spaces (cp + key->len);
4849 if (*cp == TEX_opgrp)
4850 {
4851 opgrp = TRUE;
4852 cp++;
4853 }
4854 for (p = cp;
4855 (!iswhite (*p) && *p != '#' &&
4856 *p != TEX_opgrp && *p != TEX_clgrp);
4857 p++)
4858 continue;
4859 namelen = p - cp;
4860 linelen = lb.len;
4861 if (!opgrp || *p == TEX_clgrp)
4862 {
4863 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4864 *p++;
4865 linelen = p - lb.buffer + 1;
4866 }
4867 make_tag (cp, namelen, TRUE,
4868 lb.buffer, linelen, lineno, linecharno);
4869 goto tex_next_line; /* We only tag a line once */
4870 }
4871 }
4872 tex_next_line:
4873 ;
4874 }
4875 }
4876
4877 #define TEX_LESC '\\'
4878 #define TEX_SESC '!'
4879
4880 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4881 chars accordingly. */
4882 static void
4883 TEX_mode (inf)
4884 FILE *inf;
4885 {
4886 int c;
4887
4888 while ((c = getc (inf)) != EOF)
4889 {
4890 /* Skip to next line if we hit the TeX comment char. */
4891 if (c == '%')
4892 while (c != '\n')
4893 c = getc (inf);
4894 else if (c == TEX_LESC || c == TEX_SESC )
4895 break;
4896 }
4897
4898 if (c == TEX_LESC)
4899 {
4900 TEX_esc = TEX_LESC;
4901 TEX_opgrp = '{';
4902 TEX_clgrp = '}';
4903 }
4904 else
4905 {
4906 TEX_esc = TEX_SESC;
4907 TEX_opgrp = '<';
4908 TEX_clgrp = '>';
4909 }
4910 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4911 No attempt is made to correct the situation. */
4912 rewind (inf);
4913 }
4914
4915 /* Read environment and prepend it to the default string.
4916 Build token table. */
4917 static void
4918 TEX_decode_env (evarname, defenv)
4919 char *evarname;
4920 char *defenv;
4921 {
4922 register char *env, *p;
4923 int i, len;
4924
4925 /* Append default string to environment. */
4926 env = getenv (evarname);
4927 if (!env)
4928 env = defenv;
4929 else
4930 {
4931 char *oldenv = env;
4932 env = concat (oldenv, defenv, "");
4933 }
4934
4935 /* Allocate a token table */
4936 for (len = 1, p = env; p;)
4937 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4938 len++;
4939 TEX_toktab = xnew (len, linebuffer);
4940
4941 /* Unpack environment string into token table. Be careful about */
4942 /* zero-length strings (leading ':', "::" and trailing ':') */
4943 for (i = 0; *env != '\0';)
4944 {
4945 p = etags_strchr (env, ':');
4946 if (!p) /* End of environment string. */
4947 p = env + strlen (env);
4948 if (p - env > 0)
4949 { /* Only non-zero strings. */
4950 TEX_toktab[i].buffer = savenstr (env, p - env);
4951 TEX_toktab[i].len = p - env;
4952 i++;
4953 }
4954 if (*p)
4955 env = p + 1;
4956 else
4957 {
4958 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
4959 TEX_toktab[i].len = 0;
4960 break;
4961 }
4962 }
4963 }
4964
4965 \f
4966 /* Texinfo support. Dave Love, Mar. 2000. */
4967 static void
4968 Texinfo_nodes (inf)
4969 FILE * inf;
4970 {
4971 char *cp, *start;
4972 LOOP_ON_INPUT_LINES (inf, lb, cp)
4973 if (LOOKING_AT (cp, "@node"))
4974 {
4975 start = cp;
4976 while (*cp != '\0' && *cp != ',')
4977 cp++;
4978 make_tag (start, cp - start, TRUE,
4979 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4980 }
4981 }
4982
4983 \f
4984 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4985 #define LOOKING_AT_NOCASE(cp, kw) /* kw is a constant string */ \
4986 (strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4987 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4988
4989 /*
4990 * HTML support.
4991 * Contents of <title>, <h1>, <h2>, <h3> are tags.
4992 * Contents of <a name=xxx> are tags with name xxx.
4993 *
4994 * Francesco Potortì, 2002.
4995 */
4996 static void
4997 HTML_labels (inf)
4998 FILE * inf;
4999 {
5000 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5001 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5002 bool intag = FALSE; /* inside an html tag, looking for ID= */
5003 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5004 char *end;
5005
5006
5007 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5008
5009 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5010 for (;;) /* loop on the same line */
5011 {
5012 if (skiptag) /* skip HTML tag */
5013 {
5014 while (*dbp != '\0' && *dbp != '>')
5015 dbp++;
5016 if (*dbp == '>')
5017 {
5018 dbp += 1;
5019 skiptag = FALSE;
5020 continue; /* look on the same line */
5021 }
5022 break; /* go to next line */
5023 }
5024
5025 else if (intag) /* look for "name=" or "id=" */
5026 {
5027 while (*dbp != '\0' && *dbp != '>'
5028 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5029 dbp++;
5030 if (*dbp == '\0')
5031 break; /* go to next line */
5032 if (*dbp == '>')
5033 {
5034 dbp += 1;
5035 intag = FALSE;
5036 continue; /* look on the same line */
5037 }
5038 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5039 || LOOKING_AT_NOCASE (dbp, "id="))
5040 {
5041 bool quoted = (dbp[0] == '"');
5042
5043 if (quoted)
5044 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5045 continue;
5046 else
5047 for (end = dbp; *end != '\0' && intoken (*end); end++)
5048 continue;
5049 linebuffer_setlen (&token_name, end - dbp);
5050 strncpy (token_name.buffer, dbp, end - dbp);
5051 token_name.buffer[end - dbp] = '\0';
5052
5053 dbp = end;
5054 intag = FALSE; /* we found what we looked for */
5055 skiptag = TRUE; /* skip to the end of the tag */
5056 getnext = TRUE; /* then grab the text */
5057 continue; /* look on the same line */
5058 }
5059 dbp += 1;
5060 }
5061
5062 else if (getnext) /* grab next tokens and tag them */
5063 {
5064 dbp = skip_spaces (dbp);
5065 if (*dbp == '\0')
5066 break; /* go to next line */
5067 if (*dbp == '<')
5068 {
5069 intag = TRUE;
5070 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5071 continue; /* look on the same line */
5072 }
5073
5074 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5075 continue;
5076 make_tag (token_name.buffer, token_name.len, TRUE,
5077 dbp, end - dbp, lineno, linecharno);
5078 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5079 getnext = FALSE;
5080 break; /* go to next line */
5081 }
5082
5083 else /* look for an interesting HTML tag */
5084 {
5085 while (*dbp != '\0' && *dbp != '<')
5086 dbp++;
5087 if (*dbp == '\0')
5088 break; /* go to next line */
5089 intag = TRUE;
5090 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5091 {
5092 inanchor = TRUE;
5093 continue; /* look on the same line */
5094 }
5095 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5096 || LOOKING_AT_NOCASE (dbp, "<h1>")
5097 || LOOKING_AT_NOCASE (dbp, "<h2>")
5098 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5099 {
5100 intag = FALSE;
5101 getnext = TRUE;
5102 continue; /* look on the same line */
5103 }
5104 dbp += 1;
5105 }
5106 }
5107 }
5108
5109 \f
5110 /*
5111 * Prolog support
5112 *
5113 * Assumes that the predicate or rule starts at column 0.
5114 * Only the first clause of a predicate or rule is added.
5115 * Original code by Sunichirou Sugou (1989)
5116 * Rewritten by Anders Lindgren (1996)
5117 */
5118 static int prolog_pr __P((char *, char *));
5119 static void prolog_skip_comment __P((linebuffer *, FILE *));
5120 static int prolog_atom __P((char *, int));
5121
5122 static void
5123 Prolog_functions (inf)
5124 FILE *inf;
5125 {
5126 char *cp, *last;
5127 int len;
5128 int allocated;
5129
5130 allocated = 0;
5131 len = 0;
5132 last = NULL;
5133
5134 LOOP_ON_INPUT_LINES (inf, lb, cp)
5135 {
5136 if (cp[0] == '\0') /* Empty line */
5137 continue;
5138 else if (iswhite (cp[0])) /* Not a predicate */
5139 continue;
5140 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5141 prolog_skip_comment (&lb, inf);
5142 else if ((len = prolog_pr (cp, last)) > 0)
5143 {
5144 /* Predicate or rule. Store the function name so that we
5145 only generate a tag for the first clause. */
5146 if (last == NULL)
5147 last = xnew(len + 1, char);
5148 else if (len + 1 > allocated)
5149 xrnew (last, len + 1, char);
5150 allocated = len + 1;
5151 strncpy (last, cp, len);
5152 last[len] = '\0';
5153 }
5154 }
5155 }
5156
5157
5158 static void
5159 prolog_skip_comment (plb, inf)
5160 linebuffer *plb;
5161 FILE *inf;
5162 {
5163 char *cp;
5164
5165 do
5166 {
5167 for (cp = plb->buffer; *cp != '\0'; cp++)
5168 if (cp[0] == '*' && cp[1] == '/')
5169 return;
5170 readline (plb, inf);
5171 }
5172 while (!feof(inf));
5173 }
5174
5175 /*
5176 * A predicate or rule definition is added if it matches:
5177 * <beginning of line><Prolog Atom><whitespace>(
5178 * or <beginning of line><Prolog Atom><whitespace>:-
5179 *
5180 * It is added to the tags database if it doesn't match the
5181 * name of the previous clause header.
5182 *
5183 * Return the size of the name of the predicate or rule, or 0 if no
5184 * header was found.
5185 */
5186 static int
5187 prolog_pr (s, last)
5188 char *s;
5189 char *last; /* Name of last clause. */
5190 {
5191 int pos;
5192 int len;
5193
5194 pos = prolog_atom (s, 0);
5195 if (pos < 1)
5196 return 0;
5197
5198 len = pos;
5199 pos = skip_spaces (s + pos) - s;
5200
5201 if ((s[pos] == '.'
5202 || (s[pos] == '(' && (pos += 1))
5203 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5204 && (last == NULL /* save only the first clause */
5205 || len != strlen (last)
5206 || !strneq (s, last, len)))
5207 {
5208 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5209 return len;
5210 }
5211 else
5212 return 0;
5213 }
5214
5215 /*
5216 * Consume a Prolog atom.
5217 * Return the number of bytes consumed, or -1 if there was an error.
5218 *
5219 * A prolog atom, in this context, could be one of:
5220 * - An alphanumeric sequence, starting with a lower case letter.
5221 * - A quoted arbitrary string. Single quotes can escape themselves.
5222 * Backslash quotes everything.
5223 */
5224 static int
5225 prolog_atom (s, pos)
5226 char *s;
5227 int pos;
5228 {
5229 int origpos;
5230
5231 origpos = pos;
5232
5233 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5234 {
5235 /* The atom is unquoted. */
5236 pos++;
5237 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5238 {
5239 pos++;
5240 }
5241 return pos - origpos;
5242 }
5243 else if (s[pos] == '\'')
5244 {
5245 pos++;
5246
5247 for (;;)
5248 {
5249 if (s[pos] == '\'')
5250 {
5251 pos++;
5252 if (s[pos] != '\'')
5253 break;
5254 pos++; /* A double quote */
5255 }
5256 else if (s[pos] == '\0')
5257 /* Multiline quoted atoms are ignored. */
5258 return -1;
5259 else if (s[pos] == '\\')
5260 {
5261 if (s[pos+1] == '\0')
5262 return -1;
5263 pos += 2;
5264 }
5265 else
5266 pos++;
5267 }
5268 return pos - origpos;
5269 }
5270 else
5271 return -1;
5272 }
5273
5274 \f
5275 /*
5276 * Support for Erlang
5277 *
5278 * Generates tags for functions, defines, and records.
5279 * Assumes that Erlang functions start at column 0.
5280 * Original code by Anders Lindgren (1996)
5281 */
5282 static int erlang_func __P((char *, char *));
5283 static void erlang_attribute __P((char *));
5284 static int erlang_atom __P((char *));
5285
5286 static void
5287 Erlang_functions (inf)
5288 FILE *inf;
5289 {
5290 char *cp, *last;
5291 int len;
5292 int allocated;
5293
5294 allocated = 0;
5295 len = 0;
5296 last = NULL;
5297
5298 LOOP_ON_INPUT_LINES (inf, lb, cp)
5299 {
5300 if (cp[0] == '\0') /* Empty line */
5301 continue;
5302 else if (iswhite (cp[0])) /* Not function nor attribute */
5303 continue;
5304 else if (cp[0] == '%') /* comment */
5305 continue;
5306 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5307 continue;
5308 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5309 {
5310 erlang_attribute (cp);
5311 last = NULL;
5312 }
5313 else if ((len = erlang_func (cp, last)) > 0)
5314 {
5315 /*
5316 * Function. Store the function name so that we only
5317 * generates a tag for the first clause.
5318 */
5319 if (last == NULL)
5320 last = xnew (len + 1, char);
5321 else if (len + 1 > allocated)
5322 xrnew (last, len + 1, char);
5323 allocated = len + 1;
5324 strncpy (last, cp, len);
5325 last[len] = '\0';
5326 }
5327 }
5328 }
5329
5330
5331 /*
5332 * A function definition is added if it matches:
5333 * <beginning of line><Erlang Atom><whitespace>(
5334 *
5335 * It is added to the tags database if it doesn't match the
5336 * name of the previous clause header.
5337 *
5338 * Return the size of the name of the function, or 0 if no function
5339 * was found.
5340 */
5341 static int
5342 erlang_func (s, last)
5343 char *s;
5344 char *last; /* Name of last clause. */
5345 {
5346 int pos;
5347 int len;
5348
5349 pos = erlang_atom (s);
5350 if (pos < 1)
5351 return 0;
5352
5353 len = pos;
5354 pos = skip_spaces (s + pos) - s;
5355
5356 /* Save only the first clause. */
5357 if (s[pos++] == '('
5358 && (last == NULL
5359 || len != (int)strlen (last)
5360 || !strneq (s, last, len)))
5361 {
5362 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5363 return len;
5364 }
5365
5366 return 0;
5367 }
5368
5369
5370 /*
5371 * Handle attributes. Currently, tags are generated for defines
5372 * and records.
5373 *
5374 * They are on the form:
5375 * -define(foo, bar).
5376 * -define(Foo(M, N), M+N).
5377 * -record(graph, {vtab = notable, cyclic = true}).
5378 */
5379 static void
5380 erlang_attribute (s)
5381 char *s;
5382 {
5383 char *cp = s;
5384
5385 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5386 && *cp++ == '(')
5387 {
5388 int len = erlang_atom (skip_spaces (cp));
5389 if (len > 0)
5390 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5391 }
5392 return;
5393 }
5394
5395
5396 /*
5397 * Consume an Erlang atom (or variable).
5398 * Return the number of bytes consumed, or -1 if there was an error.
5399 */
5400 static int
5401 erlang_atom (s)
5402 char *s;
5403 {
5404 int pos = 0;
5405
5406 if (ISALPHA (s[pos]) || s[pos] == '_')
5407 {
5408 /* The atom is unquoted. */
5409 do
5410 pos++;
5411 while (ISALNUM (s[pos]) || s[pos] == '_');
5412 }
5413 else if (s[pos] == '\'')
5414 {
5415 for (pos++; s[pos] != '\''; pos++)
5416 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5417 || (s[pos] == '\\' && s[++pos] == '\0'))
5418 return 0;
5419 pos++;
5420 }
5421
5422 return pos;
5423 }
5424
5425 \f
5426 #ifdef ETAGS_REGEXPS
5427
5428 static char *scan_separators __P((char *));
5429 static void add_regex __P((char *, language *));
5430 static char *substitute __P((char *, char *, struct re_registers *));
5431
5432 /*
5433 * Take a string like "/blah/" and turn it into "blah", verifying
5434 * that the first and last characters are the same, and handling
5435 * quoted separator characters. Actually, stops on the occurrence of
5436 * an unquoted separator. Also process \t, \n, etc. and turn into
5437 * appropriate characters. Works in place. Null terminates name string.
5438 * Returns pointer to terminating separator, or NULL for
5439 * unterminated regexps.
5440 */
5441 static char *
5442 scan_separators (name)
5443 char *name;
5444 {
5445 char sep = name[0];
5446 char *copyto = name;
5447 bool quoted = FALSE;
5448
5449 for (++name; *name != '\0'; ++name)
5450 {
5451 if (quoted)
5452 {
5453 switch (*name)
5454 {
5455 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5456 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5457 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5458 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5459 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5460 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5461 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5462 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5463 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5464 default:
5465 if (*name == sep)
5466 *copyto++ = sep;
5467 else
5468 {
5469 /* Something else is quoted, so preserve the quote. */
5470 *copyto++ = '\\';
5471 *copyto++ = *name;
5472 }
5473 break;
5474 }
5475 quoted = FALSE;
5476 }
5477 else if (*name == '\\')
5478 quoted = TRUE;
5479 else if (*name == sep)
5480 break;
5481 else
5482 *copyto++ = *name;
5483 }
5484 if (*name != sep)
5485 name = NULL; /* signal unterminated regexp */
5486
5487 /* Terminate copied string. */
5488 *copyto = '\0';
5489 return name;
5490 }
5491
5492 /* Look at the argument of --regex or --no-regex and do the right
5493 thing. Same for each line of a regexp file. */
5494 static void
5495 analyse_regex (regex_arg)
5496 char *regex_arg;
5497 {
5498 if (regex_arg == NULL)
5499 {
5500 free_regexps (); /* --no-regex: remove existing regexps */
5501 return;
5502 }
5503
5504 /* A real --regexp option or a line in a regexp file. */
5505 switch (regex_arg[0])
5506 {
5507 /* Comments in regexp file or null arg to --regex. */
5508 case '\0':
5509 case ' ':
5510 case '\t':
5511 break;
5512
5513 /* Read a regex file. This is recursive and may result in a
5514 loop, which will stop when the file descriptors are exhausted. */
5515 case '@':
5516 {
5517 FILE *regexfp;
5518 linebuffer regexbuf;
5519 char *regexfile = regex_arg + 1;
5520
5521 /* regexfile is a file containing regexps, one per line. */
5522 regexfp = fopen (regexfile, "r");
5523 if (regexfp == NULL)
5524 {
5525 pfatal (regexfile);
5526 return;
5527 }
5528 linebuffer_init (&regexbuf);
5529 while (readline_internal (&regexbuf, regexfp) > 0)
5530 analyse_regex (regexbuf.buffer);
5531 free (regexbuf.buffer);
5532 fclose (regexfp);
5533 }
5534 break;
5535
5536 /* Regexp to be used for a specific language only. */
5537 case '{':
5538 {
5539 language *lang;
5540 char *lang_name = regex_arg + 1;
5541 char *cp;
5542
5543 for (cp = lang_name; *cp != '}'; cp++)
5544 if (*cp == '\0')
5545 {
5546 error ("unterminated language name in regex: %s", regex_arg);
5547 return;
5548 }
5549 *cp++ = '\0';
5550 lang = get_language_from_langname (lang_name);
5551 if (lang == NULL)
5552 return;
5553 add_regex (cp, lang);
5554 }
5555 break;
5556
5557 /* Regexp to be used for any language. */
5558 default:
5559 add_regex (regex_arg, NULL);
5560 break;
5561 }
5562 }
5563
5564 /* Separate the regexp pattern, compile it,
5565 and care for optional name and modifiers. */
5566 static void
5567 add_regex (regexp_pattern, lang)
5568 char *regexp_pattern;
5569 language *lang;
5570 {
5571 static struct re_pattern_buffer zeropattern;
5572 char sep, *pat, *name, *modifiers;
5573 const char *err;
5574 struct re_pattern_buffer *patbuf;
5575 regexp *rp;
5576 bool
5577 force_explicit_name = TRUE, /* do not use implicit tag names */
5578 ignore_case = FALSE, /* case is significant */
5579 multi_line = FALSE, /* matches are done one line at a time */
5580 single_line = FALSE; /* dot does not match newline */
5581
5582
5583 if (strlen(regexp_pattern) < 3)
5584 {
5585 error ("null regexp", (char *)NULL);
5586 return;
5587 }
5588 sep = regexp_pattern[0];
5589 name = scan_separators (regexp_pattern);
5590 if (name == NULL)
5591 {
5592 error ("%s: unterminated regexp", regexp_pattern);
5593 return;
5594 }
5595 if (name[1] == sep)
5596 {
5597 error ("null name for regexp \"%s\"", regexp_pattern);
5598 return;
5599 }
5600 modifiers = scan_separators (name);
5601 if (modifiers == NULL) /* no terminating separator --> no name */
5602 {
5603 modifiers = name;
5604 name = "";
5605 }
5606 else
5607 modifiers += 1; /* skip separator */
5608
5609 /* Parse regex modifiers. */
5610 for (; modifiers[0] != '\0'; modifiers++)
5611 switch (modifiers[0])
5612 {
5613 case 'N':
5614 if (modifiers == name)
5615 error ("forcing explicit tag name but no name, ignoring", NULL);
5616 force_explicit_name = TRUE;
5617 break;
5618 case 'i':
5619 ignore_case = TRUE;
5620 break;
5621 case 's':
5622 single_line = TRUE;
5623 /* FALLTHRU */
5624 case 'm':
5625 multi_line = TRUE;
5626 need_filebuf = TRUE;
5627 break;
5628 default:
5629 {
5630 char wrongmod [2];
5631 wrongmod[0] = modifiers[0];
5632 wrongmod[1] = '\0';
5633 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5634 }
5635 break;
5636 }
5637
5638 patbuf = xnew (1, struct re_pattern_buffer);
5639 *patbuf = zeropattern;
5640 if (ignore_case)
5641 {
5642 static char lc_trans[CHARS];
5643 int i;
5644 for (i = 0; i < CHARS; i++)
5645 lc_trans[i] = lowcase (i);
5646 patbuf->translate = lc_trans; /* translation table to fold case */
5647 }
5648
5649 if (multi_line)
5650 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5651 else
5652 pat = regexp_pattern;
5653
5654 if (single_line)
5655 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5656 else
5657 re_set_syntax (RE_SYNTAX_EMACS);
5658
5659 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5660 if (multi_line)
5661 free (pat);
5662 if (err != NULL)
5663 {
5664 error ("%s while compiling pattern", err);
5665 return;
5666 }
5667
5668 rp = p_head;
5669 p_head = xnew (1, regexp);
5670 p_head->pattern = savestr (regexp_pattern);
5671 p_head->p_next = rp;
5672 p_head->lang = lang;
5673 p_head->pat = patbuf;
5674 p_head->name = savestr (name);
5675 p_head->error_signaled = FALSE;
5676 p_head->force_explicit_name = force_explicit_name;
5677 p_head->ignore_case = ignore_case;
5678 p_head->multi_line = multi_line;
5679 }
5680
5681 /*
5682 * Do the substitutions indicated by the regular expression and
5683 * arguments.
5684 */
5685 static char *
5686 substitute (in, out, regs)
5687 char *in, *out;
5688 struct re_registers *regs;
5689 {
5690 char *result, *t;
5691 int size, dig, diglen;
5692
5693 result = NULL;
5694 size = strlen (out);
5695
5696 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5697 if (out[size - 1] == '\\')
5698 fatal ("pattern error in \"%s\"", out);
5699 for (t = etags_strchr (out, '\\');
5700 t != NULL;
5701 t = etags_strchr (t + 2, '\\'))
5702 if (ISDIGIT (t[1]))
5703 {
5704 dig = t[1] - '0';
5705 diglen = regs->end[dig] - regs->start[dig];
5706 size += diglen - 2;
5707 }
5708 else
5709 size -= 1;
5710
5711 /* Allocate space and do the substitutions. */
5712 result = xnew (size + 1, char);
5713
5714 for (t = result; *out != '\0'; out++)
5715 if (*out == '\\' && ISDIGIT (*++out))
5716 {
5717 dig = *out - '0';
5718 diglen = regs->end[dig] - regs->start[dig];
5719 strncpy (t, in + regs->start[dig], diglen);
5720 t += diglen;
5721 }
5722 else
5723 *t++ = *out;
5724 *t = '\0';
5725
5726 assert (t <= result + size && t - result == (int)strlen (result));
5727
5728 return result;
5729 }
5730
5731 /* Deallocate all regexps. */
5732 static void
5733 free_regexps ()
5734 {
5735 regexp *rp;
5736 while (p_head != NULL)
5737 {
5738 rp = p_head->p_next;
5739 free (p_head->pattern);
5740 free (p_head->name);
5741 free (p_head);
5742 p_head = rp;
5743 }
5744 return;
5745 }
5746
5747 /*
5748 * Reads the whole file as a single string from `filebuf' and looks for
5749 * multi-line regular expressions, creating tags on matches.
5750 * readline already dealt with normal regexps.
5751 *
5752 * Idea by Ben Wing <ben@666.com> (2002).
5753 */
5754 static void
5755 regex_tag_multiline ()
5756 {
5757 char *buffer = filebuf.buffer;
5758 regexp *rp;
5759 char *name;
5760
5761 for (rp = p_head; rp != NULL; rp = rp->p_next)
5762 {
5763 int match = 0;
5764
5765 if (!rp->multi_line)
5766 continue; /* skip normal regexps */
5767
5768 /* Generic initialisations before parsing file from memory. */
5769 lineno = 1; /* reset global line number */
5770 charno = 0; /* reset global char number */
5771 linecharno = 0; /* reset global char number of line start */
5772
5773 /* Only use generic regexps or those for the current language. */
5774 if (rp->lang != NULL && rp->lang != curfdp->lang)
5775 continue;
5776
5777 while (match >= 0 && match < filebuf.len)
5778 {
5779 match = re_search (rp->pat, buffer, filebuf.len, charno,
5780 filebuf.len - match, &rp->regs);
5781 switch (match)
5782 {
5783 case -2:
5784 /* Some error. */
5785 if (!rp->error_signaled)
5786 {
5787 error ("regexp stack overflow while matching \"%s\"",
5788 rp->pattern);
5789 rp->error_signaled = TRUE;
5790 }
5791 break;
5792 case -1:
5793 /* No match. */
5794 break;
5795 default:
5796 if (match == rp->regs.end[0])
5797 {
5798 if (!rp->error_signaled)
5799 {
5800 error ("regexp matches the empty string: \"%s\"",
5801 rp->pattern);
5802 rp->error_signaled = TRUE;
5803 }
5804 match = -3; /* exit from while loop */
5805 break;
5806 }
5807
5808 /* Match occurred. Construct a tag. */
5809 while (charno < rp->regs.end[0])
5810 if (buffer[charno++] == '\n')
5811 lineno++, linecharno = charno;
5812 name = rp->name;
5813 if (name[0] != '\0')
5814 /* Make a named tag. */
5815 name = substitute (buffer, rp->name, &rp->regs);
5816 if (rp->force_explicit_name)
5817 /* Force explicit tag name, if a name is there. */
5818 pfnote (name, TRUE, buffer + linecharno,
5819 charno - linecharno + 1, lineno, linecharno);
5820 else
5821 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5822 charno - linecharno + 1, lineno, linecharno);
5823 break;
5824 }
5825 }
5826 }
5827 }
5828
5829 #endif /* ETAGS_REGEXPS */
5830
5831 \f
5832 static bool
5833 nocase_tail (cp)
5834 char *cp;
5835 {
5836 register int len = 0;
5837
5838 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5839 cp++, len++;
5840 if (*cp == '\0' && !intoken (dbp[len]))
5841 {
5842 dbp += len;
5843 return TRUE;
5844 }
5845 return FALSE;
5846 }
5847
5848 static void
5849 get_tag (bp, namepp)
5850 register char *bp;
5851 char **namepp;
5852 {
5853 register char *cp = bp;
5854
5855 if (*bp != '\0')
5856 {
5857 /* Go till you get to white space or a syntactic break */
5858 for (cp = bp + 1; !notinname (*cp); cp++)
5859 continue;
5860 make_tag (bp, cp - bp, TRUE,
5861 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5862 }
5863
5864 if (namepp != NULL)
5865 *namepp = savenstr (bp, cp - bp);
5866 }
5867
5868 /*
5869 * Read a line of text from `stream' into `lbp', excluding the
5870 * newline or CR-NL, if any. Return the number of characters read from
5871 * `stream', which is the length of the line including the newline.
5872 *
5873 * On DOS or Windows we do not count the CR character, if any before the
5874 * NL, in the returned length; this mirrors the behavior of Emacs on those
5875 * platforms (for text files, it translates CR-NL to NL as it reads in the
5876 * file).
5877 *
5878 * If multi-line regular expressions are requested, each line read is
5879 * appended to `filebuf'.
5880 */
5881 static long
5882 readline_internal (lbp, stream)
5883 linebuffer *lbp;
5884 register FILE *stream;
5885 {
5886 char *buffer = lbp->buffer;
5887 register char *p = lbp->buffer;
5888 register char *pend;
5889 int chars_deleted;
5890
5891 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5892
5893 for (;;)
5894 {
5895 register int c = getc (stream);
5896 if (p == pend)
5897 {
5898 /* We're at the end of linebuffer: expand it. */
5899 lbp->size *= 2;
5900 xrnew (buffer, lbp->size, char);
5901 p += buffer - lbp->buffer;
5902 pend = buffer + lbp->size;
5903 lbp->buffer = buffer;
5904 }
5905 if (c == EOF)
5906 {
5907 *p = '\0';
5908 chars_deleted = 0;
5909 break;
5910 }
5911 if (c == '\n')
5912 {
5913 if (p > buffer && p[-1] == '\r')
5914 {
5915 p -= 1;
5916 #ifdef DOS_NT
5917 /* Assume CRLF->LF translation will be performed by Emacs
5918 when loading this file, so CRs won't appear in the buffer.
5919 It would be cleaner to compensate within Emacs;
5920 however, Emacs does not know how many CRs were deleted
5921 before any given point in the file. */
5922 chars_deleted = 1;
5923 #else
5924 chars_deleted = 2;
5925 #endif
5926 }
5927 else
5928 {
5929 chars_deleted = 1;
5930 }
5931 *p = '\0';
5932 break;
5933 }
5934 *p++ = c;
5935 }
5936 lbp->len = p - buffer;
5937
5938 if (need_filebuf /* we need filebuf for multi-line regexps */
5939 && chars_deleted > 0) /* not at EOF */
5940 {
5941 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
5942 {
5943 /* Expand filebuf. */
5944 filebuf.size *= 2;
5945 xrnew (filebuf.buffer, filebuf.size, char);
5946 }
5947 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
5948 filebuf.len += lbp->len;
5949 filebuf.buffer[filebuf.len++] = '\n';
5950 filebuf.buffer[filebuf.len] = '\0';
5951 }
5952
5953 return lbp->len + chars_deleted;
5954 }
5955
5956 /*
5957 * Like readline_internal, above, but in addition try to match the
5958 * input line against relevant regular expressions and manage #line
5959 * directives.
5960 */
5961 static void
5962 readline (lbp, stream)
5963 linebuffer *lbp;
5964 FILE *stream;
5965 {
5966 long result;
5967
5968 linecharno = charno; /* update global char number of line start */
5969 result = readline_internal (lbp, stream); /* read line */
5970 lineno += 1; /* increment global line number */
5971 charno += result; /* increment global char number */
5972
5973 /* Honour #line directives. */
5974 if (!no_line_directive)
5975 {
5976 static bool discard_until_line_directive;
5977
5978 /* Check whether this is a #line directive. */
5979 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
5980 {
5981 int start, lno;
5982
5983 if (DEBUG) start = 0; /* shut up the compiler */
5984 if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
5985 {
5986 char *endp = lbp->buffer + start;
5987
5988 assert (start > 0);
5989 while ((endp = etags_strchr (endp, '"')) != NULL
5990 && endp[-1] == '\\')
5991 endp++;
5992 if (endp != NULL)
5993 /* Ok, this is a real #line directive. Let's deal with it. */
5994 {
5995 char *taggedabsname; /* absolute name of original file */
5996 char *taggedfname; /* name of original file as given */
5997 char *name; /* temp var */
5998
5999 discard_until_line_directive = FALSE; /* found it */
6000 name = lbp->buffer + start;
6001 *endp = '\0';
6002 canonicalize_filename (name); /* for DOS */
6003 taggedabsname = absolute_filename (name, curfdp->infabsdir);
6004 if (filename_is_absolute (name)
6005 || filename_is_absolute (curfdp->infname))
6006 taggedfname = savestr (taggedabsname);
6007 else
6008 taggedfname = relative_filename (taggedabsname,tagfiledir);
6009
6010 if (streq (curfdp->taggedfname, taggedfname))
6011 /* The #line directive is only a line number change. We
6012 deal with this afterwards. */
6013 free (taggedfname);
6014 else
6015 /* The tags following this #line directive should be
6016 attributed to taggedfname. In order to do this, set
6017 curfdp accordingly. */
6018 {
6019 fdesc *fdp; /* file description pointer */
6020
6021 /* Go look for a file description already set up for the
6022 file indicated in the #line directive. If there is
6023 one, use it from now until the next #line
6024 directive. */
6025 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6026 if (streq (fdp->infname, curfdp->infname)
6027 && streq (fdp->taggedfname, taggedfname))
6028 /* If we remove the second test above (after the &&)
6029 then all entries pertaining to the same file are
6030 coalesced in the tags file. If we use it, then
6031 entries pertaining to the same file but generated
6032 from different files (via #line directives) will
6033 go into separate sections in the tags file. These
6034 alternatives look equivalent. The first one
6035 destroys some apparently useless information. */
6036 {
6037 curfdp = fdp;
6038 free (taggedfname);
6039 break;
6040 }
6041 /* Else, if we already tagged the real file, skip all
6042 input lines until the next #line directive. */
6043 if (fdp == NULL) /* not found */
6044 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6045 if (streq (fdp->infabsname, taggedabsname))
6046 {
6047 discard_until_line_directive = TRUE;
6048 free (taggedfname);
6049 break;
6050 }
6051 /* Else create a new file description and use that from
6052 now on, until the next #line directive. */
6053 if (fdp == NULL) /* not found */
6054 {
6055 fdp = fdhead;
6056 fdhead = xnew (1, fdesc);
6057 *fdhead = *curfdp; /* copy curr. file description */
6058 fdhead->next = fdp;
6059 fdhead->infname = savestr (curfdp->infname);
6060 fdhead->infabsname = savestr (curfdp->infabsname);
6061 fdhead->infabsdir = savestr (curfdp->infabsdir);
6062 fdhead->taggedfname = taggedfname;
6063 fdhead->usecharno = FALSE;
6064 fdhead->prop = NULL;
6065 fdhead->written = FALSE;
6066 curfdp = fdhead;
6067 }
6068 }
6069 free (taggedabsname);
6070 lineno = lno - 1;
6071 readline (lbp, stream);
6072 return;
6073 } /* if a real #line directive */
6074 } /* if #line is followed by a a number */
6075 } /* if line begins with "#line " */
6076
6077 /* If we are here, no #line directive was found. */
6078 if (discard_until_line_directive)
6079 {
6080 if (result > 0)
6081 {
6082 /* Do a tail recursion on ourselves, thus discarding the contents
6083 of the line buffer. */
6084 readline (lbp, stream);
6085 return;
6086 }
6087 /* End of file. */
6088 discard_until_line_directive = FALSE;
6089 return;
6090 }
6091 } /* if #line directives should be considered */
6092
6093 #ifdef ETAGS_REGEXPS
6094 {
6095 int match;
6096 regexp *rp;
6097 char *name;
6098
6099 /* Match against relevant regexps. */
6100 if (lbp->len > 0)
6101 for (rp = p_head; rp != NULL; rp = rp->p_next)
6102 {
6103 /* Only use generic regexps or those for the current language.
6104 Also do not use multiline regexps, which is the job of
6105 regex_tag_multiline. */
6106 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6107 || rp->multi_line)
6108 continue;
6109
6110 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6111 switch (match)
6112 {
6113 case -2:
6114 /* Some error. */
6115 if (!rp->error_signaled)
6116 {
6117 error ("regexp stack overflow while matching \"%s\"",
6118 rp->pattern);
6119 rp->error_signaled = TRUE;
6120 }
6121 break;
6122 case -1:
6123 /* No match. */
6124 break;
6125 case 0:
6126 /* Empty string matched. */
6127 if (!rp->error_signaled)
6128 {
6129 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6130 rp->error_signaled = TRUE;
6131 }
6132 break;
6133 default:
6134 /* Match occurred. Construct a tag. */
6135 name = rp->name;
6136 if (name[0] != '\0')
6137 /* Make a named tag. */
6138 name = substitute (lbp->buffer, rp->name, &rp->regs);
6139 if (rp->force_explicit_name)
6140 /* Force explicit tag name, if a name is there. */
6141 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6142 else
6143 make_tag (name, strlen (name), TRUE,
6144 lbp->buffer, match, lineno, linecharno);
6145 break;
6146 }
6147 }
6148 }
6149 #endif /* ETAGS_REGEXPS */
6150 }
6151
6152 \f
6153 /*
6154 * Return a pointer to a space of size strlen(cp)+1 allocated
6155 * with xnew where the string CP has been copied.
6156 */
6157 static char *
6158 savestr (cp)
6159 char *cp;
6160 {
6161 return savenstr (cp, strlen (cp));
6162 }
6163
6164 /*
6165 * Return a pointer to a space of size LEN+1 allocated with xnew where
6166 * the string CP has been copied for at most the first LEN characters.
6167 */
6168 static char *
6169 savenstr (cp, len)
6170 char *cp;
6171 int len;
6172 {
6173 register char *dp;
6174
6175 dp = xnew (len + 1, char);
6176 strncpy (dp, cp, len);
6177 dp[len] = '\0';
6178 return dp;
6179 }
6180
6181 /*
6182 * Return the ptr in sp at which the character c last
6183 * appears; NULL if not found
6184 *
6185 * Identical to POSIX strrchr, included for portability.
6186 */
6187 static char *
6188 etags_strrchr (sp, c)
6189 register const char *sp;
6190 register int c;
6191 {
6192 register const char *r;
6193
6194 r = NULL;
6195 do
6196 {
6197 if (*sp == c)
6198 r = sp;
6199 } while (*sp++);
6200 return (char *)r;
6201 }
6202
6203 /*
6204 * Return the ptr in sp at which the character c first
6205 * appears; NULL if not found
6206 *
6207 * Identical to POSIX strchr, included for portability.
6208 */
6209 static char *
6210 etags_strchr (sp, c)
6211 register const char *sp;
6212 register int c;
6213 {
6214 do
6215 {
6216 if (*sp == c)
6217 return (char *)sp;
6218 } while (*sp++);
6219 return NULL;
6220 }
6221
6222 /*
6223 * Compare two strings, ignoring case for alphabetic characters.
6224 *
6225 * Same as BSD's strcasecmp, included for portability.
6226 */
6227 static int
6228 etags_strcasecmp (s1, s2)
6229 register const char *s1;
6230 register const char *s2;
6231 {
6232 while (*s1 != '\0'
6233 && (ISALPHA (*s1) && ISALPHA (*s2)
6234 ? lowcase (*s1) == lowcase (*s2)
6235 : *s1 == *s2))
6236 s1++, s2++;
6237
6238 return (ISALPHA (*s1) && ISALPHA (*s2)
6239 ? lowcase (*s1) - lowcase (*s2)
6240 : *s1 - *s2);
6241 }
6242
6243 /*
6244 * Compare two strings, ignoring case for alphabetic characters.
6245 * Stop after a given number of characters
6246 *
6247 * Same as BSD's strncasecmp, included for portability.
6248 */
6249 static int
6250 etags_strncasecmp (s1, s2, n)
6251 register const char *s1;
6252 register const char *s2;
6253 register int n;
6254 {
6255 while (*s1 != '\0' && n-- > 0
6256 && (ISALPHA (*s1) && ISALPHA (*s2)
6257 ? lowcase (*s1) == lowcase (*s2)
6258 : *s1 == *s2))
6259 s1++, s2++;
6260
6261 if (n < 0)
6262 return 0;
6263 else
6264 return (ISALPHA (*s1) && ISALPHA (*s2)
6265 ? lowcase (*s1) - lowcase (*s2)
6266 : *s1 - *s2);
6267 }
6268
6269 /* Skip spaces, return new pointer. */
6270 static char *
6271 skip_spaces (cp)
6272 char *cp;
6273 {
6274 while (iswhite (*cp))
6275 cp++;
6276 return cp;
6277 }
6278
6279 /* Skip non spaces, return new pointer. */
6280 static char *
6281 skip_non_spaces (cp)
6282 char *cp;
6283 {
6284 while (*cp != '\0' && !iswhite (*cp))
6285 cp++;
6286 return cp;
6287 }
6288
6289 /* Print error message and exit. */
6290 void
6291 fatal (s1, s2)
6292 char *s1, *s2;
6293 {
6294 error (s1, s2);
6295 exit (BAD);
6296 }
6297
6298 static void
6299 pfatal (s1)
6300 char *s1;
6301 {
6302 perror (s1);
6303 exit (BAD);
6304 }
6305
6306 static void
6307 suggest_asking_for_help ()
6308 {
6309 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6310 progname,
6311 #ifdef LONG_OPTIONS
6312 "--help"
6313 #else
6314 "-h"
6315 #endif
6316 );
6317 exit (BAD);
6318 }
6319
6320 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6321 static void
6322 error (s1, s2)
6323 const char *s1, *s2;
6324 {
6325 fprintf (stderr, "%s: ", progname);
6326 fprintf (stderr, s1, s2);
6327 fprintf (stderr, "\n");
6328 }
6329
6330 /* Return a newly-allocated string whose contents
6331 concatenate those of s1, s2, s3. */
6332 static char *
6333 concat (s1, s2, s3)
6334 char *s1, *s2, *s3;
6335 {
6336 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6337 char *result = xnew (len1 + len2 + len3 + 1, char);
6338
6339 strcpy (result, s1);
6340 strcpy (result + len1, s2);
6341 strcpy (result + len1 + len2, s3);
6342 result[len1 + len2 + len3] = '\0';
6343
6344 return result;
6345 }
6346
6347 \f
6348 /* Does the same work as the system V getcwd, but does not need to
6349 guess the buffer size in advance. */
6350 static char *
6351 etags_getcwd ()
6352 {
6353 #ifdef HAVE_GETCWD
6354 int bufsize = 200;
6355 char *path = xnew (bufsize, char);
6356
6357 while (getcwd (path, bufsize) == NULL)
6358 {
6359 if (errno != ERANGE)
6360 pfatal ("getcwd");
6361 bufsize *= 2;
6362 free (path);
6363 path = xnew (bufsize, char);
6364 }
6365
6366 canonicalize_filename (path);
6367 return path;
6368
6369 #else /* not HAVE_GETCWD */
6370 #if MSDOS
6371
6372 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6373
6374 getwd (path);
6375
6376 for (p = path; *p != '\0'; p++)
6377 if (*p == '\\')
6378 *p = '/';
6379 else
6380 *p = lowcase (*p);
6381
6382 return strdup (path);
6383 #else /* not MSDOS */
6384 linebuffer path;
6385 FILE *pipe;
6386
6387 linebuffer_init (&path);
6388 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6389 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6390 pfatal ("pwd");
6391 pclose (pipe);
6392
6393 return path.buffer;
6394 #endif /* not MSDOS */
6395 #endif /* not HAVE_GETCWD */
6396 }
6397
6398 /* Return a newly allocated string containing the file name of FILE
6399 relative to the absolute directory DIR (which should end with a slash). */
6400 static char *
6401 relative_filename (file, dir)
6402 char *file, *dir;
6403 {
6404 char *fp, *dp, *afn, *res;
6405 int i;
6406
6407 /* Find the common root of file and dir (with a trailing slash). */
6408 afn = absolute_filename (file, cwd);
6409 fp = afn;
6410 dp = dir;
6411 while (*fp++ == *dp++)
6412 continue;
6413 fp--, dp--; /* back to the first differing char */
6414 #ifdef DOS_NT
6415 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6416 return afn;
6417 #endif
6418 do /* look at the equal chars until '/' */
6419 fp--, dp--;
6420 while (*fp != '/');
6421
6422 /* Build a sequence of "../" strings for the resulting relative file name. */
6423 i = 0;
6424 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6425 i += 1;
6426 res = xnew (3*i + strlen (fp + 1) + 1, char);
6427 res[0] = '\0';
6428 while (i-- > 0)
6429 strcat (res, "../");
6430
6431 /* Add the file name relative to the common root of file and dir. */
6432 strcat (res, fp + 1);
6433 free (afn);
6434
6435 return res;
6436 }
6437
6438 /* Return a newly allocated string containing the absolute file name
6439 of FILE given DIR (which should end with a slash). */
6440 static char *
6441 absolute_filename (file, dir)
6442 char *file, *dir;
6443 {
6444 char *slashp, *cp, *res;
6445
6446 if (filename_is_absolute (file))
6447 res = savestr (file);
6448 #ifdef DOS_NT
6449 /* We don't support non-absolute file names with a drive
6450 letter, like `d:NAME' (it's too much hassle). */
6451 else if (file[1] == ':')
6452 fatal ("%s: relative file names with drive letters not supported", file);
6453 #endif
6454 else
6455 res = concat (dir, file, "");
6456
6457 /* Delete the "/dirname/.." and "/." substrings. */
6458 slashp = etags_strchr (res, '/');
6459 while (slashp != NULL && slashp[0] != '\0')
6460 {
6461 if (slashp[1] == '.')
6462 {
6463 if (slashp[2] == '.'
6464 && (slashp[3] == '/' || slashp[3] == '\0'))
6465 {
6466 cp = slashp;
6467 do
6468 cp--;
6469 while (cp >= res && !filename_is_absolute (cp));
6470 if (cp < res)
6471 cp = slashp; /* the absolute name begins with "/.." */
6472 #ifdef DOS_NT
6473 /* Under MSDOS and NT we get `d:/NAME' as absolute
6474 file name, so the luser could say `d:/../NAME'.
6475 We silently treat this as `d:/NAME'. */
6476 else if (cp[0] != '/')
6477 cp = slashp;
6478 #endif
6479 strcpy (cp, slashp + 3);
6480 slashp = cp;
6481 continue;
6482 }
6483 else if (slashp[2] == '/' || slashp[2] == '\0')
6484 {
6485 strcpy (slashp, slashp + 2);
6486 continue;
6487 }
6488 }
6489
6490 slashp = etags_strchr (slashp + 1, '/');
6491 }
6492
6493 if (res[0] == '\0')
6494 return savestr ("/");
6495 else
6496 return res;
6497 }
6498
6499 /* Return a newly allocated string containing the absolute
6500 file name of dir where FILE resides given DIR (which should
6501 end with a slash). */
6502 static char *
6503 absolute_dirname (file, dir)
6504 char *file, *dir;
6505 {
6506 char *slashp, *res;
6507 char save;
6508
6509 canonicalize_filename (file);
6510 slashp = etags_strrchr (file, '/');
6511 if (slashp == NULL)
6512 return savestr (dir);
6513 save = slashp[1];
6514 slashp[1] = '\0';
6515 res = absolute_filename (file, dir);
6516 slashp[1] = save;
6517
6518 return res;
6519 }
6520
6521 /* Whether the argument string is an absolute file name. The argument
6522 string must have been canonicalized with canonicalize_filename. */
6523 static bool
6524 filename_is_absolute (fn)
6525 char *fn;
6526 {
6527 return (fn[0] == '/'
6528 #ifdef DOS_NT
6529 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6530 #endif
6531 );
6532 }
6533
6534 /* Translate backslashes into slashes. Works in place. */
6535 static void
6536 canonicalize_filename (fn)
6537 register char *fn;
6538 {
6539 #ifdef DOS_NT
6540 /* Canonicalize drive letter case. */
6541 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6542 fn[0] = upcase (fn[0]);
6543 /* Convert backslashes to slashes. */
6544 for (; *fn != '\0'; fn++)
6545 if (*fn == '\\')
6546 *fn = '/';
6547 #else
6548 /* No action. */
6549 fn = NULL; /* shut up the compiler */
6550 #endif
6551 }
6552
6553 \f
6554 /* Initialize a linebuffer for use */
6555 static void
6556 linebuffer_init (lbp)
6557 linebuffer *lbp;
6558 {
6559 lbp->size = (DEBUG) ? 3 : 200;
6560 lbp->buffer = xnew (lbp->size, char);
6561 lbp->buffer[0] = '\0';
6562 lbp->len = 0;
6563 }
6564
6565 /* Set the minimum size of a string contained in a linebuffer. */
6566 static void
6567 linebuffer_setlen (lbp, toksize)
6568 linebuffer *lbp;
6569 int toksize;
6570 {
6571 while (lbp->size <= toksize)
6572 {
6573 lbp->size *= 2;
6574 xrnew (lbp->buffer, lbp->size, char);
6575 }
6576 lbp->len = toksize;
6577 }
6578
6579 /* Like malloc but get fatal error if memory is exhausted. */
6580 static PTR
6581 xmalloc (size)
6582 unsigned int size;
6583 {
6584 PTR result = (PTR) malloc (size);
6585 if (result == NULL)
6586 fatal ("virtual memory exhausted", (char *)NULL);
6587 return result;
6588 }
6589
6590 static PTR
6591 xrealloc (ptr, size)
6592 char *ptr;
6593 unsigned int size;
6594 {
6595 PTR result = (PTR) realloc (ptr, size);
6596 if (result == NULL)
6597 fatal ("virtual memory exhausted", (char *)NULL);
6598 return result;
6599 }
6600
6601 /*
6602 * Local Variables:
6603 * c-indentation-style: gnu
6604 * indent-tabs-mode: t
6605 * tab-width: 8
6606 * fill-column: 79
6607 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6608 * End:
6609 */