(scan_separators): Support all character escape
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
3 Free Software Foundation, Inc. and Ken Arnold
4
5 This file is not considered part of GNU Emacs.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 /*
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1992 Joseph B. Wells improved C and C++ parsing.
29 * 1993 Francesco Potortì reorganised C and C++.
30 * 1994 Regexp tags by Tom Tromey.
31 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
32 * 2002 #line directives by Francesco Potortì.
33 *
34 * Francesco Potortì <pot@gnu.org> has maintained it since 1993.
35 */
36
37 char pot_etags_version[] = "@(#) pot revision number is 16.10";
38
39 #define TRUE 1
40 #define FALSE 0
41
42 #ifdef DEBUG
43 # undef DEBUG
44 # define DEBUG TRUE
45 #else
46 # define DEBUG FALSE
47 # define NDEBUG /* disable assert */
48 #endif
49
50 #ifdef HAVE_CONFIG_H
51 # include <config.h>
52 /* On some systems, Emacs defines static as nothing for the sake
53 of unexec. We don't want that here since we don't use unexec. */
54 # undef static
55 # define ETAGS_REGEXPS /* use the regexp features */
56 # define LONG_OPTIONS /* accept long options */
57 # ifndef PTR /* for Xemacs */
58 # define PTR void *
59 # endif
60 # ifndef __P /* for Xemacs */
61 # define __P(args) args
62 # endif
63 #else
64 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
65 # define __P(args) args /* use prototypes */
66 # define PTR void * /* for generic pointers */
67 # else
68 # define __P(args) () /* no prototypes */
69 # define const /* remove const for old compilers' sake */
70 # define PTR long * /* don't use void* */
71 # endif
72 #endif /* !HAVE_CONFIG_H */
73
74 #ifndef _GNU_SOURCE
75 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
76 #endif
77
78 /* WIN32_NATIVE is for Xemacs.
79 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
80 #ifdef WIN32_NATIVE
81 # undef MSDOS
82 # undef WINDOWSNT
83 # define WINDOWSNT
84 #endif /* WIN32_NATIVE */
85
86 #ifdef MSDOS
87 # undef MSDOS
88 # define MSDOS TRUE
89 # include <fcntl.h>
90 # include <sys/param.h>
91 # include <io.h>
92 # ifndef HAVE_CONFIG_H
93 # define DOS_NT
94 # include <sys/config.h>
95 # endif
96 #else
97 # define MSDOS FALSE
98 #endif /* MSDOS */
99
100 #ifdef WINDOWSNT
101 # include <stdlib.h>
102 # include <fcntl.h>
103 # include <string.h>
104 # include <direct.h>
105 # include <io.h>
106 # define MAXPATHLEN _MAX_PATH
107 # undef HAVE_NTGUI
108 # undef DOS_NT
109 # define DOS_NT
110 # ifndef HAVE_GETCWD
111 # define HAVE_GETCWD
112 # endif /* undef HAVE_GETCWD */
113 #else /* !WINDOWSNT */
114 # ifdef STDC_HEADERS
115 # include <stdlib.h>
116 # include <string.h>
117 # else
118 extern char *getenv ();
119 # endif
120 #endif /* !WINDOWSNT */
121
122 #ifdef HAVE_UNISTD_H
123 # include <unistd.h>
124 #else
125 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
126 extern char *getcwd (char *buf, size_t size);
127 # endif
128 #endif /* HAVE_UNISTD_H */
129
130 #include <stdio.h>
131 #include <ctype.h>
132 #include <errno.h>
133 #ifndef errno
134 extern int errno;
135 #endif
136 #include <sys/types.h>
137 #include <sys/stat.h>
138
139 #include <assert.h>
140 #ifdef NDEBUG
141 # undef assert /* some systems have a buggy assert.h */
142 # define assert(x) ((void) 0)
143 #endif
144
145 #if !defined (S_ISREG) && defined (S_IFREG)
146 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
147 #endif
148
149 #ifdef LONG_OPTIONS
150 # include <getopt.h>
151 #else
152 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
153 extern char *optarg;
154 extern int optind, opterr;
155 #endif /* LONG_OPTIONS */
156
157 #ifdef ETAGS_REGEXPS
158 # ifndef HAVE_CONFIG_H /* this is a standalone compilation */
159 # ifdef __CYGWIN__ /* compiling on Cygwin */
160 !!! NOTICE !!!
161 the regex.h distributed with Cygwin is not compatible with etags, alas!
162 If you want regular expression support, you should delete this notice and
163 arrange to use the GNU regex.h and regex.c.
164 # endif
165 # endif
166 # include <regex.h>
167 #endif /* ETAGS_REGEXPS */
168
169 /* Define CTAGS to make the program "ctags" compatible with the usual one.
170 Leave it undefined to make the program "etags", which makes emacs-style
171 tag tables and tags typedefs, #defines and struct/union/enum by default. */
172 #ifdef CTAGS
173 # undef CTAGS
174 # define CTAGS TRUE
175 #else
176 # define CTAGS FALSE
177 #endif
178
179 /* Exit codes for success and failure. */
180 #ifdef VMS
181 # define GOOD 1
182 # define BAD 0
183 #else
184 # define GOOD 0
185 # define BAD 1
186 #endif
187
188 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
189 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
190
191 #define CHARS 256 /* 2^sizeof(char) */
192 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
193 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
194 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
195 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
196 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
197 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
198
199 #define ISALNUM(c) isalnum (CHAR(c))
200 #define ISALPHA(c) isalpha (CHAR(c))
201 #define ISDIGIT(c) isdigit (CHAR(c))
202 #define ISLOWER(c) islower (CHAR(c))
203
204 #define lowcase(c) tolower (CHAR(c))
205 #define upcase(c) toupper (CHAR(c))
206
207
208 /*
209 * xnew, xrnew -- allocate, reallocate storage
210 *
211 * SYNOPSIS: Type *xnew (int n, Type);
212 * void xrnew (OldPointer, int n, Type);
213 */
214 #if DEBUG
215 # include "chkmalloc.h"
216 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
217 (n) * sizeof (Type)))
218 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
219 (char *) (op), (n) * sizeof (Type)))
220 #else
221 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
222 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
223 (char *) (op), (n) * sizeof (Type)))
224 #endif
225
226 #define bool int
227
228 typedef void Lang_function __P((FILE *));
229
230 typedef struct
231 {
232 char *suffix; /* file name suffix for this compressor */
233 char *command; /* takes one arg and decompresses to stdout */
234 } compressor;
235
236 typedef struct
237 {
238 char *name; /* language name */
239 bool metasource; /* source used to generate other sources */
240 Lang_function *function; /* parse function */
241 char **filenames; /* names of this language's files */
242 char **suffixes; /* name suffixes of this language's files */
243 char **interpreters; /* interpreters for this language */
244 } language;
245
246 typedef struct fdesc
247 {
248 struct fdesc *next; /* for the linked list */
249 char *infname; /* uncompressed input file name */
250 char *infabsname; /* absolute uncompressed input file name */
251 char *infabsdir; /* absolute dir of input file */
252 char *taggedfname; /* file name to write in tagfile */
253 language *lang; /* language of file */
254 char *prop; /* file properties to write in tagfile */
255 bool usecharno; /* etags tags shall contain char number */
256 } fdesc;
257
258 typedef struct node_st
259 { /* sorting structure */
260 struct node_st *left, *right; /* left and right sons */
261 fdesc *fdp; /* description of file to whom tag belongs */
262 char *name; /* tag name */
263 char *pat; /* search pattern */
264 bool valid; /* write this tag on the tag file */
265 bool is_func; /* function tag: use pattern in CTAGS mode */
266 bool been_warned; /* warning already given for duplicated tag */
267 int lno; /* line number tag is on */
268 long cno; /* character number line starts on */
269 } node;
270
271 /*
272 * A `linebuffer' is a structure which holds a line of text.
273 * `readline_internal' reads a line from a stream into a linebuffer
274 * and works regardless of the length of the line.
275 * SIZE is the size of BUFFER, LEN is the length of the string in
276 * BUFFER after readline reads it.
277 */
278 typedef struct
279 {
280 long size;
281 int len;
282 char *buffer;
283 } linebuffer;
284
285 /* Used to support mixing of --lang and file names. */
286 typedef struct
287 {
288 enum {
289 at_language, /* a language specification */
290 at_regexp, /* a regular expression */
291 at_icregexp, /* same, but with case ignored */
292 at_filename, /* a file name */
293 at_stdin /* read from stdin here */
294 } arg_type; /* argument type */
295 language *lang; /* language associated with the argument */
296 char *what; /* the argument itself */
297 } argument;
298
299 #ifdef ETAGS_REGEXPS
300 /* Structure defining a regular expression. */
301 typedef struct pattern
302 {
303 struct pattern *p_next;
304 language *lang;
305 char *regex;
306 struct re_pattern_buffer *pat;
307 struct re_registers regs;
308 char *name_pattern;
309 bool error_signaled;
310 bool ignore_case;
311 } pattern;
312 #endif /* ETAGS_REGEXPS */
313
314
315 /* Many compilers barf on this:
316 Lang_function Ada_funcs;
317 so let's write it this way */
318 static void Ada_funcs __P((FILE *));
319 static void Asm_labels __P((FILE *));
320 static void C_entries __P((int c_ext, FILE *));
321 static void default_C_entries __P((FILE *));
322 static void plain_C_entries __P((FILE *));
323 static void Cjava_entries __P((FILE *));
324 static void Cobol_paragraphs __P((FILE *));
325 static void Cplusplus_entries __P((FILE *));
326 static void Cstar_entries __P((FILE *));
327 static void Erlang_functions __P((FILE *));
328 static void Fortran_functions __P((FILE *));
329 static void Yacc_entries __P((FILE *));
330 static void Lisp_functions __P((FILE *));
331 static void Makefile_targets __P((FILE *));
332 static void Pascal_functions __P((FILE *));
333 static void Perl_functions __P((FILE *));
334 static void PHP_functions __P((FILE *));
335 static void Postscript_functions __P((FILE *));
336 static void Prolog_functions __P((FILE *));
337 static void Python_functions __P((FILE *));
338 static void Scheme_functions __P((FILE *));
339 static void TeX_commands __P((FILE *));
340 static void Texinfo_nodes __P((FILE *));
341 static void just_read_file __P((FILE *));
342
343 static void print_language_names __P((void));
344 static void print_version __P((void));
345 static void print_help __P((void));
346 int main __P((int, char **));
347
348 static compressor *get_compressor_from_suffix __P((char *, char **));
349 static language *get_language_from_langname __P((const char *));
350 static language *get_language_from_interpreter __P((char *));
351 static language *get_language_from_filename __P((char *, bool));
352 static void readline __P((linebuffer *, FILE *));
353 static long readline_internal __P((linebuffer *, FILE *));
354 static bool nocase_tail __P((char *));
355 static char *get_tag __P((char *));
356
357 #ifdef ETAGS_REGEXPS
358 static void analyse_regex __P((char *, bool));
359 static void add_regex __P((char *, bool, language *));
360 static void free_patterns __P((void));
361 #endif /* ETAGS_REGEXPS */
362 static void error __P((const char *, const char *));
363 static void suggest_asking_for_help __P((void));
364 void fatal __P((char *, char *));
365 static void pfatal __P((char *));
366 static void add_node __P((node *, node **));
367
368 static void init __P((void));
369 static void initbuffer __P((linebuffer *));
370 static void process_file_name __P((char *, language *));
371 static void process_file __P((FILE *, char *, language *));
372 static void find_entries __P((FILE *));
373 static void free_tree __P((node *));
374 static void free_fdesc __P((fdesc *));
375 static void pfnote __P((char *, bool, char *, int, int, long));
376 static void new_pfnote __P((char *, int, bool, char *, int, int, long));
377 static void invalidate_nodes __P((fdesc *, node **));
378 static void put_entries __P((node *));
379
380 static char *concat __P((char *, char *, char *));
381 static char *skip_spaces __P((char *));
382 static char *skip_non_spaces __P((char *));
383 static char *savenstr __P((char *, int));
384 static char *savestr __P((char *));
385 static char *etags_strchr __P((const char *, int));
386 static char *etags_strrchr __P((const char *, int));
387 static bool strcaseeq __P((const char *, const char *));
388 static char *etags_getcwd __P((void));
389 static char *relative_filename __P((char *, char *));
390 static char *absolute_filename __P((char *, char *));
391 static char *absolute_dirname __P((char *, char *));
392 static bool filename_is_absolute __P((char *f));
393 static void canonicalize_filename __P((char *));
394 static void linebuffer_setlen __P((linebuffer *, int));
395 static PTR xmalloc __P((unsigned int));
396 static PTR xrealloc __P((char *, unsigned int));
397
398 \f
399 static char searchar = '/'; /* use /.../ searches */
400
401 static char *tagfile; /* output file */
402 static char *progname; /* name this program was invoked with */
403 static char *cwd; /* current working directory */
404 static char *tagfiledir; /* directory of tagfile */
405 static FILE *tagf; /* ioptr for tags file */
406
407 static fdesc *fdhead; /* head of file description list */
408 static fdesc *curfdp; /* current file description */
409 static int lineno; /* line number of current line */
410 static long charno; /* current character number */
411 static long linecharno; /* charno of start of current line */
412 static char *dbp; /* pointer to start of current tag */
413
414 static const int invalidcharno = -1;
415
416 static node *nodehead; /* the head of the binary tree of tags */
417 static node *last_node; /* the last node created */
418
419 static linebuffer lb; /* the current line */
420
421 /* boolean "functions" (see init) */
422 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
423 static char
424 /* white chars */
425 *white = " \f\t\n\r\v",
426 /* not in a name */
427 *nonam = " \f\t\n\r()=,;",
428 /* token ending chars */
429 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
430 /* token starting chars */
431 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
432 /* valid in-token chars */
433 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
434
435 static bool append_to_tagfile; /* -a: append to tags */
436 /* The next four default to TRUE for etags, but to FALSE for ctags. */
437 static bool typedefs; /* -t: create tags for C and Ada typedefs */
438 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
439 /* 0 struct/enum/union decls, and C++ */
440 /* member functions. */
441 static bool constantypedefs; /* -d: create tags for C #define, enum */
442 /* constants and variables. */
443 /* -D: opposite of -d. Default under ctags. */
444 static bool globals; /* create tags for global variables */
445 static bool declarations; /* --declarations: tag them and extern in C&Co*/
446 static bool members; /* create tags for C member variables */
447 static bool no_line_directive; /* ignore #line directives (undocumented) */
448 static bool update; /* -u: update tags */
449 static bool vgrind_style; /* -v: create vgrind style index output */
450 static bool no_warnings; /* -w: suppress warnings */
451 static bool cxref_style; /* -x: create cxref style output */
452 static bool cplusplus; /* .[hc] means C++, not C */
453 static bool noindentypedefs; /* -I: ignore indentation in C */
454 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
455
456 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
457 static bool parsing_stdin; /* --parse-stdin used */
458
459 #ifdef ETAGS_REGEXPS
460 /* List of all regexps. */
461 static pattern *p_head;
462
463 /* How many characters in the character set. (From regex.c.) */
464 #define CHAR_SET_SIZE 256
465 /* Translation table for case-insensitive matching. */
466 static char lc_trans[CHAR_SET_SIZE];
467 #endif /* ETAGS_REGEXPS */
468
469 #ifdef LONG_OPTIONS
470 static struct option longopts[] =
471 {
472 { "packages-only", no_argument, &packages_only, TRUE },
473 { "c++", no_argument, NULL, 'C' },
474 { "declarations", no_argument, &declarations, TRUE },
475 { "no-line-directive", no_argument, &no_line_directive, TRUE },
476 { "help", no_argument, NULL, 'h' },
477 { "help", no_argument, NULL, 'H' },
478 { "ignore-indentation", no_argument, NULL, 'I' },
479 { "language", required_argument, NULL, 'l' },
480 { "members", no_argument, &members, TRUE },
481 { "no-members", no_argument, &members, FALSE },
482 { "output", required_argument, NULL, 'o' },
483 #ifdef ETAGS_REGEXPS
484 { "regex", required_argument, NULL, 'r' },
485 { "no-regex", no_argument, NULL, 'R' },
486 { "ignore-case-regex", required_argument, NULL, 'c' },
487 #endif /* ETAGS_REGEXPS */
488 { "parse-stdin", required_argument, NULL, STDIN },
489 { "version", no_argument, NULL, 'V' },
490
491 #if CTAGS /* Etags options */
492 { "backward-search", no_argument, NULL, 'B' },
493 { "cxref", no_argument, NULL, 'x' },
494 { "defines", no_argument, NULL, 'd' },
495 { "globals", no_argument, &globals, TRUE },
496 { "typedefs", no_argument, NULL, 't' },
497 { "typedefs-and-c++", no_argument, NULL, 'T' },
498 { "update", no_argument, NULL, 'u' },
499 { "vgrind", no_argument, NULL, 'v' },
500 { "no-warn", no_argument, NULL, 'w' },
501
502 #else /* Ctags options */
503 { "append", no_argument, NULL, 'a' },
504 { "no-defines", no_argument, NULL, 'D' },
505 { "no-globals", no_argument, &globals, FALSE },
506 { "include", required_argument, NULL, 'i' },
507 #endif
508 { NULL }
509 };
510 #endif /* LONG_OPTIONS */
511
512 static compressor compressors[] =
513 {
514 { "z", "gzip -d -c"},
515 { "Z", "gzip -d -c"},
516 { "gz", "gzip -d -c"},
517 { "GZ", "gzip -d -c"},
518 { "bz2", "bzip2 -d -c" },
519 { NULL }
520 };
521
522 /*
523 * Language stuff.
524 */
525
526 /* Ada code */
527 static char *Ada_suffixes [] =
528 { "ads", "adb", "ada", NULL };
529
530 /* Assembly code */
531 static char *Asm_suffixes [] =
532 { "a", /* Unix assembler */
533 "asm", /* Microcontroller assembly */
534 "def", /* BSO/Tasking definition includes */
535 "inc", /* Microcontroller include files */
536 "ins", /* Microcontroller include files */
537 "s", "sa", /* Unix assembler */
538 "S", /* cpp-processed Unix assembler */
539 "src", /* BSO/Tasking C compiler output */
540 NULL
541 };
542
543 /* Note that .c and .h can be considered C++, if the --c++ flag was
544 given, or if the `class' keyowrd is met inside the file.
545 That is why default_C_entries is called for these. */
546 static char *default_C_suffixes [] =
547 { "c", "h", NULL };
548
549 static char *Cplusplus_suffixes [] =
550 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
551 "M", /* Objective C++ */
552 "pdb", /* Postscript with C syntax */
553 NULL };
554
555 static char *Cjava_suffixes [] =
556 { "java", NULL };
557
558 static char *Cobol_suffixes [] =
559 { "COB", "cob", NULL };
560
561 static char *Cstar_suffixes [] =
562 { "cs", "hs", NULL };
563
564 static char *Erlang_suffixes [] =
565 { "erl", "hrl", NULL };
566
567 static char *Fortran_suffixes [] =
568 { "F", "f", "f90", "for", NULL };
569
570 static char *Lisp_suffixes [] =
571 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
572
573 static char *Makefile_filenames [] =
574 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
575
576 static char *Pascal_suffixes [] =
577 { "p", "pas", NULL };
578
579 static char *Perl_suffixes [] =
580 { "pl", "pm", NULL };
581
582 static char *Perl_interpreters [] =
583 { "perl", "@PERL@", NULL };
584
585 static char *PHP_suffixes [] =
586 { "php", "php3", "php4", NULL };
587
588 static char *plain_C_suffixes [] =
589 { "lm", /* Objective lex file */
590 "m", /* Objective C file */
591 "pc", /* Pro*C file */
592 NULL };
593
594 static char *Postscript_suffixes [] =
595 { "ps", "psw", NULL }; /* .psw is for PSWrap */
596
597 static char *Prolog_suffixes [] =
598 { "prolog", NULL };
599
600 static char *Python_suffixes [] =
601 { "py", NULL };
602
603 /* Can't do the `SCM' or `scm' prefix with a version number. */
604 static char *Scheme_suffixes [] =
605 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
606
607 static char *TeX_suffixes [] =
608 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
609
610 static char *Texinfo_suffixes [] =
611 { "texi", "texinfo", "txi", NULL };
612
613 static char *Yacc_suffixes [] =
614 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
615
616 /*
617 * Table of languages.
618 *
619 * It is ok for a given function to be listed under more than one
620 * name. I just didn't.
621 */
622
623 static language lang_names [] =
624 {
625 { "ada", FALSE, Ada_funcs, NULL, Ada_suffixes, NULL },
626 { "asm", FALSE, Asm_labels, NULL, Asm_suffixes, NULL },
627 { "c", FALSE, default_C_entries, NULL, default_C_suffixes, NULL },
628 { "c++", FALSE, Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
629 { "c*", FALSE, Cstar_entries, NULL, Cstar_suffixes, NULL },
630 { "cobol", FALSE, Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
631 { "erlang", FALSE, Erlang_functions, NULL, Erlang_suffixes, NULL },
632 { "fortran", FALSE, Fortran_functions, NULL, Fortran_suffixes, NULL },
633 { "java", FALSE, Cjava_entries, NULL, Cjava_suffixes, NULL },
634 { "lisp", FALSE, Lisp_functions, NULL, Lisp_suffixes, NULL },
635 { "makefile", FALSE, Makefile_targets, Makefile_filenames, NULL, NULL },
636 { "pascal", FALSE, Pascal_functions, NULL, Pascal_suffixes, NULL },
637 { "perl", FALSE, Perl_functions,NULL, Perl_suffixes, Perl_interpreters },
638 { "php", FALSE, PHP_functions, NULL, PHP_suffixes, NULL },
639 { "postscript",FALSE, Postscript_functions,NULL, Postscript_suffixes, NULL },
640 { "proc", FALSE, plain_C_entries, NULL, plain_C_suffixes, NULL },
641 { "prolog", FALSE, Prolog_functions, NULL, Prolog_suffixes, NULL },
642 { "python", FALSE, Python_functions, NULL, Python_suffixes, NULL },
643 { "scheme", FALSE, Scheme_functions, NULL, Scheme_suffixes, NULL },
644 { "tex", FALSE, TeX_commands, NULL, TeX_suffixes, NULL },
645 { "texinfo", FALSE, Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
646 { "yacc", TRUE, Yacc_entries, NULL, Yacc_suffixes, NULL },
647 { "auto", FALSE, NULL }, /* default guessing scheme */
648 { "none", FALSE, just_read_file }, /* regexp matching only */
649 { NULL, FALSE, NULL } /* end of list */
650 };
651
652 \f
653 static void
654 print_language_names ()
655 {
656 language *lang;
657 char **name, **ext;
658
659 puts ("\nThese are the currently supported languages, along with the\n\
660 default file names and dot suffixes:");
661 for (lang = lang_names; lang->name != NULL; lang++)
662 {
663 printf (" %-*s", 10, lang->name);
664 if (lang->filenames != NULL)
665 for (name = lang->filenames; *name != NULL; name++)
666 printf (" %s", *name);
667 if (lang->suffixes != NULL)
668 for (ext = lang->suffixes; *ext != NULL; ext++)
669 printf (" .%s", *ext);
670 puts ("");
671 }
672 puts ("Where `auto' means use default language for files based on file\n\
673 name suffix, and `none' means only do regexp processing on files.\n\
674 If no language is specified and no matching suffix is found,\n\
675 the first line of the file is read for a sharp-bang (#!) sequence\n\
676 followed by the name of an interpreter. If no such sequence is found,\n\
677 Fortran is tried first; if no tags are found, C is tried next.\n\
678 When parsing any C file, a \"class\" keyword switches to C++.\n\
679 Compressed files are supported using gzip and bzip2.");
680 }
681
682 #ifndef EMACS_NAME
683 # define EMACS_NAME "GNU Emacs"
684 #endif
685 #ifndef VERSION
686 # define VERSION "21"
687 #endif
688 static void
689 print_version ()
690 {
691 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
692 puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
693 puts ("This program is distributed under the same terms as Emacs");
694
695 exit (GOOD);
696 }
697
698 static void
699 print_help ()
700 {
701 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
702 \n\
703 These are the options accepted by %s.\n", progname, progname);
704 #ifdef LONG_OPTIONS
705 puts ("You may use unambiguous abbreviations for the long option names.");
706 #else
707 puts ("Long option names do not work with this executable, as it is not\n\
708 linked with GNU getopt.");
709 #endif /* LONG_OPTIONS */
710 puts (" A - as file name means read names from stdin (one per line).\n\
711 Absolute names are stored in the output file as they are.\n\
712 Relative ones are stored relative to the output file's directory.\n");
713
714 if (!CTAGS)
715 puts ("-a, --append\n\
716 Append tag entries to existing tags file.");
717
718 puts ("--packages-only\n\
719 For Ada files, only generate tags for packages.");
720
721 if (CTAGS)
722 puts ("-B, --backward-search\n\
723 Write the search commands for the tag entries using '?', the\n\
724 backward-search command instead of '/', the forward-search command.");
725
726 /* This option is mostly obsolete, because etags can now automatically
727 detect C++. Retained for backward compatibility and for debugging and
728 experimentation. In principle, we could want to tag as C++ even
729 before any "class" keyword.
730 puts ("-C, --c++\n\
731 Treat files whose name suffix defaults to C language as C++ files.");
732 */
733
734 puts ("--declarations\n\
735 In C and derived languages, create tags for function declarations,");
736 if (CTAGS)
737 puts ("\tand create tags for extern variables if --globals is used.");
738 else
739 puts
740 ("\tand create tags for extern variables unless --no-globals is used.");
741
742 if (CTAGS)
743 puts ("-d, --defines\n\
744 Create tag entries for C #define constants and enum constants, too.");
745 else
746 puts ("-D, --no-defines\n\
747 Don't create tag entries for C #define constants and enum constants.\n\
748 This makes the tags file smaller.");
749
750 if (!CTAGS)
751 puts ("-i FILE, --include=FILE\n\
752 Include a note in tag file indicating that, when searching for\n\
753 a tag, one should also consult the tags file FILE after\n\
754 checking the current file.");
755
756 puts ("-l LANG, --language=LANG\n\
757 Force the following files to be considered as written in the\n\
758 named language up to the next --language=LANG option.");
759
760 if (CTAGS)
761 puts ("--globals\n\
762 Create tag entries for global variables in some languages.");
763 else
764 puts ("--no-globals\n\
765 Do not create tag entries for global variables in some\n\
766 languages. This makes the tags file smaller.");
767 puts ("--members\n\
768 Create tag entries for member variables in C and derived languages.");
769
770 #ifdef ETAGS_REGEXPS
771 puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
772 Make a tag for each line matching pattern REGEXP in the following\n\
773 files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
774 regexfile is a file containing one REGEXP per line.\n\
775 REGEXP is anchored (as if preceded by ^).\n\
776 The form /REGEXP/NAME/ creates a named tag.\n\
777 For example Tcl named tags can be created with:\n\
778 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\"");
779 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
780 Like -r, --regex but ignore case when matching expressions.");
781 puts ("-R, --no-regex\n\
782 Don't create tags from regexps for the following files.");
783 #endif /* ETAGS_REGEXPS */
784 puts ("-I, --ignore-indentation\n\
785 Don't rely on indentation quite as much as normal. Currently,\n\
786 this means not to assume that a closing brace in the first\n\
787 column is the final brace of a function or structure\n\
788 definition in C and C++.");
789 puts ("-o FILE, --output=FILE\n\
790 Write the tags to FILE.");
791 puts ("--parse-stdin=NAME\n\
792 Read from standard input and record tags as belonging to file NAME.");
793
794 if (CTAGS)
795 {
796 puts ("-t, --typedefs\n\
797 Generate tag entries for C and Ada typedefs.");
798 puts ("-T, --typedefs-and-c++\n\
799 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
800 and C++ member functions.");
801 }
802
803 if (CTAGS)
804 puts ("-u, --update\n\
805 Update the tag entries for the given files, leaving tag\n\
806 entries for other files in place. Currently, this is\n\
807 implemented by deleting the existing entries for the given\n\
808 files and then rewriting the new entries at the end of the\n\
809 tags file. It is often faster to simply rebuild the entire\n\
810 tag file than to use this.");
811
812 if (CTAGS)
813 {
814 puts ("-v, --vgrind\n\
815 Generates an index of items intended for human consumption,\n\
816 similar to the output of vgrind. The index is sorted, and\n\
817 gives the page number of each item.");
818 puts ("-w, --no-warn\n\
819 Suppress warning messages about entries defined in multiple\n\
820 files.");
821 puts ("-x, --cxref\n\
822 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
823 The output uses line numbers instead of page numbers, but\n\
824 beyond that the differences are cosmetic; try both to see\n\
825 which you like.");
826 }
827
828 puts ("-V, --version\n\
829 Print the version of the program.\n\
830 -h, --help\n\
831 Print this help message.");
832
833 print_language_names ();
834
835 puts ("");
836 puts ("Report bugs to bug-gnu-emacs@gnu.org");
837
838 exit (GOOD);
839 }
840
841 \f
842 #ifdef VMS /* VMS specific functions */
843
844 #define EOS '\0'
845
846 /* This is a BUG! ANY arbitrary limit is a BUG!
847 Won't someone please fix this? */
848 #define MAX_FILE_SPEC_LEN 255
849 typedef struct {
850 short curlen;
851 char body[MAX_FILE_SPEC_LEN + 1];
852 } vspec;
853
854 /*
855 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
856 returning in each successive call the next file name matching the input
857 spec. The function expects that each in_spec passed
858 to it will be processed to completion; in particular, up to and
859 including the call following that in which the last matching name
860 is returned, the function ignores the value of in_spec, and will
861 only start processing a new spec with the following call.
862 If an error occurs, on return out_spec contains the value
863 of in_spec when the error occurred.
864
865 With each successive file name returned in out_spec, the
866 function's return value is one. When there are no more matching
867 names the function returns zero. If on the first call no file
868 matches in_spec, or there is any other error, -1 is returned.
869 */
870
871 #include <rmsdef.h>
872 #include <descrip.h>
873 #define OUTSIZE MAX_FILE_SPEC_LEN
874 static short
875 fn_exp (out, in)
876 vspec *out;
877 char *in;
878 {
879 static long context = 0;
880 static struct dsc$descriptor_s o;
881 static struct dsc$descriptor_s i;
882 static bool pass1 = TRUE;
883 long status;
884 short retval;
885
886 if (pass1)
887 {
888 pass1 = FALSE;
889 o.dsc$a_pointer = (char *) out;
890 o.dsc$w_length = (short)OUTSIZE;
891 i.dsc$a_pointer = in;
892 i.dsc$w_length = (short)strlen(in);
893 i.dsc$b_dtype = DSC$K_DTYPE_T;
894 i.dsc$b_class = DSC$K_CLASS_S;
895 o.dsc$b_dtype = DSC$K_DTYPE_VT;
896 o.dsc$b_class = DSC$K_CLASS_VS;
897 }
898 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
899 {
900 out->body[out->curlen] = EOS;
901 return 1;
902 }
903 else if (status == RMS$_NMF)
904 retval = 0;
905 else
906 {
907 strcpy(out->body, in);
908 retval = -1;
909 }
910 lib$find_file_end(&context);
911 pass1 = TRUE;
912 return retval;
913 }
914
915 /*
916 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
917 name of each file specified by the provided arg expanding wildcards.
918 */
919 static char *
920 gfnames (arg, p_error)
921 char *arg;
922 bool *p_error;
923 {
924 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
925
926 switch (fn_exp (&filename, arg))
927 {
928 case 1:
929 *p_error = FALSE;
930 return filename.body;
931 case 0:
932 *p_error = FALSE;
933 return NULL;
934 default:
935 *p_error = TRUE;
936 return filename.body;
937 }
938 }
939
940 #ifndef OLD /* Newer versions of VMS do provide `system'. */
941 system (cmd)
942 char *cmd;
943 {
944 error ("%s", "system() function not implemented under VMS");
945 }
946 #endif
947
948 #define VERSION_DELIM ';'
949 char *massage_name (s)
950 char *s;
951 {
952 char *start = s;
953
954 for ( ; *s; s++)
955 if (*s == VERSION_DELIM)
956 {
957 *s = EOS;
958 break;
959 }
960 else
961 *s = lowcase (*s);
962 return start;
963 }
964 #endif /* VMS */
965
966 \f
967 int
968 main (argc, argv)
969 int argc;
970 char *argv[];
971 {
972 int i;
973 unsigned int nincluded_files;
974 char **included_files;
975 argument *argbuffer;
976 int current_arg, file_count;
977 linebuffer filename_lb;
978 #ifdef VMS
979 bool got_err;
980 #endif
981 char *optstring;
982 int opt;
983
984
985 #ifdef DOS_NT
986 _fmode = O_BINARY; /* all of files are treated as binary files */
987 #endif /* DOS_NT */
988
989 progname = argv[0];
990 nincluded_files = 0;
991 included_files = xnew (argc, char *);
992 current_arg = 0;
993 file_count = 0;
994
995 /* Allocate enough no matter what happens. Overkill, but each one
996 is small. */
997 argbuffer = xnew (argc, argument);
998
999 #ifdef ETAGS_REGEXPS
1000 /* Set syntax for regular expression routines. */
1001 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
1002 /* Translation table for case-insensitive search. */
1003 for (i = 0; i < CHAR_SET_SIZE; i++)
1004 lc_trans[i] = lowcase (i);
1005 #endif /* ETAGS_REGEXPS */
1006
1007 /*
1008 * If etags, always find typedefs and structure tags. Why not?
1009 * Also default to find macro constants, enum constants and
1010 * global variables.
1011 */
1012 if (!CTAGS)
1013 {
1014 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1015 globals = TRUE;
1016 }
1017
1018 optstring = "-";
1019 #ifdef ETAGS_REGEXPS
1020 optstring = "-r:Rc:";
1021 #endif /* ETAGS_REGEXPS */
1022 #ifndef LONG_OPTIONS
1023 optstring = optstring + 1;
1024 #endif /* LONG_OPTIONS */
1025 optstring = concat (optstring,
1026 "Cf:Il:o:SVhH",
1027 (CTAGS) ? "BxdtTuvw" : "aDi:");
1028
1029 while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1030 switch (opt)
1031 {
1032 case 0:
1033 /* If getopt returns 0, then it has already processed a
1034 long-named option. We should do nothing. */
1035 break;
1036
1037 case 1:
1038 /* This means that a file name has been seen. Record it. */
1039 argbuffer[current_arg].arg_type = at_filename;
1040 argbuffer[current_arg].what = optarg;
1041 ++current_arg;
1042 ++file_count;
1043 break;
1044
1045 case STDIN:
1046 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1047 argbuffer[current_arg].arg_type = at_stdin;
1048 argbuffer[current_arg].what = optarg;
1049 ++current_arg;
1050 ++file_count;
1051 if (parsing_stdin)
1052 fatal ("cannot parse standard input more than once", (char *)NULL);
1053 parsing_stdin = TRUE;
1054 break;
1055
1056 /* Common options. */
1057 case 'C': cplusplus = TRUE; break;
1058 case 'f': /* for compatibility with old makefiles */
1059 case 'o':
1060 if (tagfile)
1061 {
1062 error ("-o option may only be given once.", (char *)NULL);
1063 suggest_asking_for_help ();
1064 }
1065 tagfile = optarg;
1066 break;
1067 case 'I':
1068 case 'S': /* for backward compatibility */
1069 noindentypedefs = TRUE;
1070 break;
1071 case 'l':
1072 {
1073 language *lang = get_language_from_langname (optarg);
1074 if (lang != NULL)
1075 {
1076 argbuffer[current_arg].lang = lang;
1077 argbuffer[current_arg].arg_type = at_language;
1078 ++current_arg;
1079 }
1080 }
1081 break;
1082 case 'r':
1083 argbuffer[current_arg].arg_type = at_regexp;
1084 argbuffer[current_arg].what = optarg;
1085 ++current_arg;
1086 break;
1087 case 'R':
1088 argbuffer[current_arg].arg_type = at_regexp;
1089 argbuffer[current_arg].what = NULL;
1090 ++current_arg;
1091 break;
1092 case 'c':
1093 argbuffer[current_arg].arg_type = at_icregexp;
1094 argbuffer[current_arg].what = optarg;
1095 ++current_arg;
1096 break;
1097 case 'V':
1098 print_version ();
1099 break;
1100 case 'h':
1101 case 'H':
1102 print_help ();
1103 break;
1104
1105 /* Etags options */
1106 case 'a': append_to_tagfile = TRUE; break;
1107 case 'D': constantypedefs = FALSE; break;
1108 case 'i': included_files[nincluded_files++] = optarg; break;
1109
1110 /* Ctags options. */
1111 case 'B': searchar = '?'; break;
1112 case 'd': constantypedefs = TRUE; break;
1113 case 't': typedefs = TRUE; break;
1114 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1115 case 'u': update = TRUE; break;
1116 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1117 case 'x': cxref_style = TRUE; break;
1118 case 'w': no_warnings = TRUE; break;
1119 default:
1120 suggest_asking_for_help ();
1121 }
1122
1123 for (; optind < argc; ++optind)
1124 {
1125 argbuffer[current_arg].arg_type = at_filename;
1126 argbuffer[current_arg].what = argv[optind];
1127 ++current_arg;
1128 ++file_count;
1129 }
1130
1131 if (nincluded_files == 0 && file_count == 0)
1132 {
1133 error ("no input files specified.", (char *)NULL);
1134 suggest_asking_for_help ();
1135 }
1136
1137 if (tagfile == NULL)
1138 tagfile = CTAGS ? "tags" : "TAGS";
1139 cwd = etags_getcwd (); /* the current working directory */
1140 if (cwd[strlen (cwd) - 1] != '/')
1141 {
1142 char *oldcwd = cwd;
1143 cwd = concat (oldcwd, "/", "");
1144 free (oldcwd);
1145 }
1146 if (streq (tagfile, "-"))
1147 tagfiledir = cwd;
1148 else
1149 tagfiledir = absolute_dirname (tagfile, cwd);
1150
1151 init (); /* set up boolean "functions" */
1152
1153 initbuffer (&lb);
1154 initbuffer (&filename_lb);
1155
1156 if (!CTAGS)
1157 {
1158 if (streq (tagfile, "-"))
1159 {
1160 tagf = stdout;
1161 #ifdef DOS_NT
1162 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1163 doesn't take effect until after `stdout' is already open). */
1164 if (!isatty (fileno (stdout)))
1165 setmode (fileno (stdout), O_BINARY);
1166 #endif /* DOS_NT */
1167 }
1168 else
1169 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1170 if (tagf == NULL)
1171 pfatal (tagfile);
1172 }
1173
1174 /*
1175 * Loop through files finding functions.
1176 */
1177 for (i = 0; i < current_arg; ++i)
1178 {
1179 static language *lang; /* non-NULL if language is forced */
1180 char *this_file;
1181
1182 switch (argbuffer[i].arg_type)
1183 {
1184 case at_language:
1185 lang = argbuffer[i].lang;
1186 break;
1187 #ifdef ETAGS_REGEXPS
1188 case at_regexp:
1189 analyse_regex (argbuffer[i].what, FALSE);
1190 break;
1191 case at_icregexp:
1192 analyse_regex (argbuffer[i].what, TRUE);
1193 break;
1194 #endif
1195 case at_filename:
1196 #ifdef VMS
1197 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1198 {
1199 if (got_err)
1200 {
1201 error ("can't find file %s\n", this_file);
1202 argc--, argv++;
1203 }
1204 else
1205 {
1206 this_file = massage_name (this_file);
1207 }
1208 #else
1209 this_file = argbuffer[i].what;
1210 #endif
1211 /* Input file named "-" means read file names from stdin
1212 (one per line) and use them. */
1213 if (streq (this_file, "-"))
1214 {
1215 if (parsing_stdin)
1216 fatal ("cannot parse standard input AND read file names from it",
1217 (char *)NULL);
1218 while (readline_internal (&filename_lb, stdin) > 0)
1219 process_file_name (filename_lb.buffer, lang);
1220 }
1221 else
1222 process_file_name (this_file, lang);
1223 #ifdef VMS
1224 }
1225 #endif
1226 break;
1227 case at_stdin:
1228 this_file = argbuffer[i].what;
1229 process_file (stdin, this_file, lang);
1230 break;
1231 }
1232 }
1233
1234 #ifdef ETAGS_REGEXPS
1235 free_patterns ();
1236 #endif /* ETAGS_REGEXPS */
1237
1238 if (!CTAGS || cxref_style)
1239 {
1240 put_entries (nodehead);
1241 free_tree (nodehead);
1242 nodehead = NULL;
1243 if (!CTAGS)
1244 while (nincluded_files-- > 0)
1245 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1246
1247 if (fclose (tagf) == EOF)
1248 pfatal (tagfile);
1249 exit (GOOD);
1250 }
1251
1252 if (update)
1253 {
1254 char cmd[BUFSIZ];
1255 for (i = 0; i < current_arg; ++i)
1256 {
1257 switch (argbuffer[i].arg_type)
1258 {
1259 case at_filename:
1260 case at_stdin:
1261 break;
1262 default:
1263 continue; /* the for loop */
1264 }
1265 sprintf (cmd,
1266 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1267 tagfile, argbuffer[i].what, tagfile);
1268 if (system (cmd) != GOOD)
1269 fatal ("failed to execute shell command", (char *)NULL);
1270 }
1271 append_to_tagfile = TRUE;
1272 }
1273
1274 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1275 if (tagf == NULL)
1276 pfatal (tagfile);
1277 put_entries (nodehead);
1278 free_tree (nodehead);
1279 nodehead = NULL;
1280 if (fclose (tagf) == EOF)
1281 pfatal (tagfile);
1282
1283 if (update)
1284 {
1285 char cmd[2*BUFSIZ+10];
1286 sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1287 exit (system (cmd));
1288 }
1289 return GOOD;
1290 }
1291
1292
1293 /*
1294 * Return a compressor given the file name. If EXTPTR is non-zero,
1295 * return a pointer into FILE where the compressor-specific
1296 * extension begins. If no compressor is found, NULL is returned
1297 * and EXTPTR is not significant.
1298 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1299 */
1300 static compressor *
1301 get_compressor_from_suffix (file, extptr)
1302 char *file;
1303 char **extptr;
1304 {
1305 compressor *compr;
1306 char *slash, *suffix;
1307
1308 /* This relies on FN to be after canonicalize_filename,
1309 so we don't need to consider backslashes on DOS_NT. */
1310 slash = etags_strrchr (file, '/');
1311 suffix = etags_strrchr (file, '.');
1312 if (suffix == NULL || suffix < slash)
1313 return NULL;
1314 if (extptr != NULL)
1315 *extptr = suffix;
1316 suffix += 1;
1317 /* Let those poor souls who live with DOS 8+3 file name limits get
1318 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1319 Only the first do loop is run if not MSDOS */
1320 do
1321 {
1322 for (compr = compressors; compr->suffix != NULL; compr++)
1323 if (streq (compr->suffix, suffix))
1324 return compr;
1325 if (!MSDOS)
1326 break; /* do it only once: not really a loop */
1327 if (extptr != NULL)
1328 *extptr = ++suffix;
1329 } while (*suffix != '\0');
1330 return NULL;
1331 }
1332
1333
1334
1335 /*
1336 * Return a language given the name.
1337 */
1338 static language *
1339 get_language_from_langname (name)
1340 const char *name;
1341 {
1342 language *lang;
1343
1344 if (name == NULL)
1345 error ("empty language name", (char *)NULL);
1346 else
1347 {
1348 for (lang = lang_names; lang->name != NULL; lang++)
1349 if (streq (name, lang->name))
1350 return lang;
1351 error ("unknown language \"%s\"", name);
1352 }
1353
1354 return NULL;
1355 }
1356
1357
1358 /*
1359 * Return a language given the interpreter name.
1360 */
1361 static language *
1362 get_language_from_interpreter (interpreter)
1363 char *interpreter;
1364 {
1365 language *lang;
1366 char **iname;
1367
1368 if (interpreter == NULL)
1369 return NULL;
1370 for (lang = lang_names; lang->name != NULL; lang++)
1371 if (lang->interpreters != NULL)
1372 for (iname = lang->interpreters; *iname != NULL; iname++)
1373 if (streq (*iname, interpreter))
1374 return lang;
1375
1376 return NULL;
1377 }
1378
1379
1380
1381 /*
1382 * Return a language given the file name.
1383 */
1384 static language *
1385 get_language_from_filename (file, case_sensitive)
1386 char *file;
1387 bool case_sensitive;
1388 {
1389 language *lang;
1390 char **name, **ext, *suffix;
1391
1392 /* Try whole file name first. */
1393 for (lang = lang_names; lang->name != NULL; lang++)
1394 if (lang->filenames != NULL)
1395 for (name = lang->filenames; *name != NULL; name++)
1396 if ((case_sensitive)
1397 ? streq (*name, file)
1398 : strcaseeq (*name, file))
1399 return lang;
1400
1401 /* If not found, try suffix after last dot. */
1402 suffix = etags_strrchr (file, '.');
1403 if (suffix == NULL)
1404 return NULL;
1405 suffix += 1;
1406 for (lang = lang_names; lang->name != NULL; lang++)
1407 if (lang->suffixes != NULL)
1408 for (ext = lang->suffixes; *ext != NULL; ext++)
1409 if ((case_sensitive)
1410 ? streq (*ext, suffix)
1411 : strcaseeq (*ext, suffix))
1412 return lang;
1413 return NULL;
1414 }
1415
1416 \f
1417 /*
1418 * This routine is called on each file argument.
1419 */
1420 static void
1421 process_file_name (file, lang)
1422 char *file;
1423 language *lang;
1424 {
1425 struct stat stat_buf;
1426 FILE *inf;
1427 fdesc *fdp;
1428 compressor *compr;
1429 char *compressed_name, *uncompressed_name;
1430 char *ext, *real_name;
1431 int retval;
1432
1433 canonicalize_filename (file);
1434 if (streq (file, tagfile) && !streq (tagfile, "-"))
1435 {
1436 error ("skipping inclusion of %s in self.", file);
1437 return;
1438 }
1439 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1440 {
1441 compressed_name = NULL;
1442 real_name = uncompressed_name = savestr (file);
1443 }
1444 else
1445 {
1446 real_name = compressed_name = savestr (file);
1447 uncompressed_name = savenstr (file, ext - file);
1448 }
1449
1450 /* If the canonicalized uncompressed name
1451 has already been dealt with, skip it silently. */
1452 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1453 {
1454 assert (fdp->infname != NULL);
1455 if (streq (uncompressed_name, fdp->infname))
1456 goto cleanup;
1457 }
1458
1459 if (stat (real_name, &stat_buf) != 0)
1460 {
1461 /* Reset real_name and try with a different name. */
1462 real_name = NULL;
1463 if (compressed_name != NULL) /* try with the given suffix */
1464 {
1465 if (stat (uncompressed_name, &stat_buf) == 0)
1466 real_name = uncompressed_name;
1467 }
1468 else /* try all possible suffixes */
1469 {
1470 for (compr = compressors; compr->suffix != NULL; compr++)
1471 {
1472 compressed_name = concat (file, ".", compr->suffix);
1473 if (stat (compressed_name, &stat_buf) != 0)
1474 {
1475 if (MSDOS)
1476 {
1477 char *suf = compressed_name + strlen (file);
1478 size_t suflen = strlen (compr->suffix) + 1;
1479 for ( ; suf[1]; suf++, suflen--)
1480 {
1481 memmove (suf, suf + 1, suflen);
1482 if (stat (compressed_name, &stat_buf) == 0)
1483 {
1484 real_name = compressed_name;
1485 break;
1486 }
1487 }
1488 if (real_name != NULL)
1489 break;
1490 } /* MSDOS */
1491 free (compressed_name);
1492 compressed_name = NULL;
1493 }
1494 else
1495 {
1496 real_name = compressed_name;
1497 break;
1498 }
1499 }
1500 }
1501 if (real_name == NULL)
1502 {
1503 perror (file);
1504 goto cleanup;
1505 }
1506 } /* try with a different name */
1507
1508 if (!S_ISREG (stat_buf.st_mode))
1509 {
1510 error ("skipping %s: it is not a regular file.", real_name);
1511 goto cleanup;
1512 }
1513 if (real_name == compressed_name)
1514 {
1515 char *cmd = concat (compr->command, " ", real_name);
1516 inf = (FILE *) popen (cmd, "r");
1517 free (cmd);
1518 }
1519 else
1520 inf = fopen (real_name, "r");
1521 if (inf == NULL)
1522 {
1523 perror (real_name);
1524 goto cleanup;
1525 }
1526
1527 process_file (inf, uncompressed_name, lang);
1528
1529 if (real_name == compressed_name)
1530 retval = pclose (inf);
1531 else
1532 retval = fclose (inf);
1533 if (retval < 0)
1534 pfatal (file);
1535
1536 cleanup:
1537 if (compressed_name) free (compressed_name);
1538 if (uncompressed_name) free (uncompressed_name);
1539 last_node = NULL;
1540 curfdp = NULL;
1541 return;
1542 }
1543
1544 static void
1545 process_file (fh, fn, lang)
1546 FILE *fh;
1547 char *fn;
1548 language *lang;
1549 {
1550 static const fdesc emptyfdesc;
1551 fdesc *fdp;
1552
1553 /* Create a new input file description entry. */
1554 fdp = xnew (1, fdesc);
1555 *fdp = emptyfdesc;
1556 fdp->next = fdhead;
1557 fdp->infname = savestr (fn);
1558 fdp->lang = lang;
1559 fdp->infabsname = absolute_filename (fn, cwd);
1560 fdp->infabsdir = absolute_dirname (fn, cwd);
1561 if (filename_is_absolute (fn))
1562 {
1563 /* An absolute file name. Canonicalize it. */
1564 fdp->taggedfname = absolute_filename (fn, NULL);
1565 }
1566 else
1567 {
1568 /* A file name relative to cwd. Make it relative
1569 to the directory of the tags file. */
1570 fdp->taggedfname = relative_filename (fn, tagfiledir);
1571 }
1572 fdp->usecharno = TRUE; /* use char position when making tags */
1573 fdp->prop = NULL;
1574
1575 fdhead = fdp;
1576 curfdp = fdhead; /* the current file description */
1577
1578 find_entries (fh);
1579
1580 /* If not Ctags, and if this is not metasource and if it contained no #line
1581 directives, we can write the tags and free all nodes pointing to
1582 curfdp. */
1583 if (!CTAGS
1584 && curfdp->usecharno /* no #line directives in this file */
1585 && !curfdp->lang->metasource)
1586 {
1587 node *np, *prev;
1588
1589 /* Look for the head of the sublist relative to this file. See add_node
1590 for the structure of the node tree. */
1591 prev = NULL;
1592 for (np = nodehead; np != NULL; prev = np, np = np->left)
1593 if (np->fdp == curfdp)
1594 break;
1595
1596 /* If we generated tags for this file, write and delete them. */
1597 if (np != NULL)
1598 {
1599 /* This is the head of the last sublist, if any. The following
1600 instructions depend on this being true. */
1601 assert (np->left == NULL);
1602
1603 assert (fdhead == curfdp);
1604 assert (last_node->fdp == curfdp);
1605 put_entries (np); /* write tags for file curfdp->taggedfname */
1606 free_tree (np); /* remove the written nodes */
1607 if (prev == NULL)
1608 nodehead = NULL; /* no nodes left */
1609 else
1610 prev->left = NULL; /* delete the pointer to the sublist */
1611 }
1612 }
1613 }
1614
1615 /*
1616 * This routine sets up the boolean pseudo-functions which work
1617 * by setting boolean flags dependent upon the corresponding character.
1618 * Every char which is NOT in that string is not a white char. Therefore,
1619 * all of the array "_wht" is set to FALSE, and then the elements
1620 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1621 * of a char is TRUE if it is the string "white", else FALSE.
1622 */
1623 static void
1624 init ()
1625 {
1626 register char *sp;
1627 register int i;
1628
1629 for (i = 0; i < CHARS; i++)
1630 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1631 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1632 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1633 notinname('\0') = notinname('\n');
1634 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1635 begtoken('\0') = begtoken('\n');
1636 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1637 intoken('\0') = intoken('\n');
1638 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1639 endtoken('\0') = endtoken('\n');
1640 }
1641
1642 /*
1643 * This routine opens the specified file and calls the function
1644 * which finds the function and type definitions.
1645 */
1646 static void
1647 find_entries (inf)
1648 FILE *inf;
1649 {
1650 char *cp;
1651 node *old_last_node;
1652 language *lang = curfdp->lang;
1653 Lang_function *parser = NULL;
1654
1655 /* If user specified a language, use it. */
1656 if (lang != NULL && lang->function != NULL)
1657 {
1658 parser = lang->function;
1659 }
1660
1661 /* Else try to guess the language given the file name. */
1662 if (parser == NULL)
1663 {
1664 lang = get_language_from_filename (curfdp->infname, TRUE);
1665 if (lang != NULL && lang->function != NULL)
1666 {
1667 curfdp->lang = lang;
1668 parser = lang->function;
1669 }
1670 }
1671
1672 /* Else look for sharp-bang as the first two characters. */
1673 if (parser == NULL
1674 && readline_internal (&lb, inf) > 0
1675 && lb.len >= 2
1676 && lb.buffer[0] == '#'
1677 && lb.buffer[1] == '!')
1678 {
1679 char *lp;
1680
1681 /* Set lp to point at the first char after the last slash in the
1682 line or, if no slashes, at the first nonblank. Then set cp to
1683 the first successive blank and terminate the string. */
1684 lp = etags_strrchr (lb.buffer+2, '/');
1685 if (lp != NULL)
1686 lp += 1;
1687 else
1688 lp = skip_spaces (lb.buffer + 2);
1689 cp = skip_non_spaces (lp);
1690 *cp = '\0';
1691
1692 if (strlen (lp) > 0)
1693 {
1694 lang = get_language_from_interpreter (lp);
1695 if (lang != NULL && lang->function != NULL)
1696 {
1697 curfdp->lang = lang;
1698 parser = lang->function;
1699 }
1700 }
1701 }
1702
1703 /* We rewind here, even if inf may be a pipe. We fail if the
1704 length of the first line is longer than the pipe block size,
1705 which is unlikely. */
1706 rewind (inf);
1707
1708 /* Else try to guess the language given the case insensitive file name. */
1709 if (parser == NULL)
1710 {
1711 lang = get_language_from_filename (curfdp->infname, FALSE);
1712 if (lang != NULL && lang->function != NULL)
1713 {
1714 curfdp->lang = lang;
1715 parser = lang->function;
1716 }
1717 }
1718
1719 if (!no_line_directive
1720 && curfdp->lang != NULL && curfdp->lang->metasource)
1721 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1722 file, or anyway we parsed a file that is automatically generated from
1723 this one. If this is the case, the bingo.c file contained #line
1724 directives that generated tags pointing to this file. Let's delete
1725 them all before parsing this file, which is the real source. */
1726 {
1727 fdesc **fdpp = &fdhead;
1728 while (*fdpp != NULL)
1729 if (*fdpp != curfdp
1730 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1731 /* We found one of those! We must delete both the file description
1732 and all tags referring to it. */
1733 {
1734 fdesc *badfdp = *fdpp;
1735
1736 if (DEBUG)
1737 fprintf (stderr,
1738 "Removing references to \"%s\" obtained from \"%s\"\n",
1739 badfdp->taggedfname, badfdp->infname);
1740
1741 /* Delete the tags referring to badfdp. */
1742 invalidate_nodes (badfdp, &nodehead);
1743
1744 *fdpp = badfdp->next; /* remove the bad description from the list */
1745 free_fdesc (badfdp);
1746 }
1747 else
1748 fdpp = &(*fdpp)->next; /* advance the list pointer */
1749 }
1750
1751 if (parser != NULL)
1752 {
1753 /* Generic initialisations before reading from file. */
1754 lineno = 0; /* reset global line number */
1755 charno = 0; /* reset global char number */
1756 linecharno = 0; /* reset global char number of line start */
1757
1758 parser (inf);
1759 return;
1760 }
1761
1762 /* Else try Fortran. */
1763 old_last_node = last_node;
1764 curfdp->lang = get_language_from_langname ("fortran");
1765 find_entries (inf);
1766
1767 if (old_last_node == last_node)
1768 /* No Fortran entries found. Try C. */
1769 {
1770 /* We do not tag if rewind fails.
1771 Only the file name will be recorded in the tags file. */
1772 rewind (inf);
1773 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1774 find_entries (inf);
1775 }
1776 return;
1777 }
1778
1779 \f
1780 /* Record a tag. */
1781 static void
1782 pfnote (name, is_func, linestart, linelen, lno, cno)
1783 char *name; /* tag name, or NULL if unnamed */
1784 bool is_func; /* tag is a function */
1785 char *linestart; /* start of the line where tag is */
1786 int linelen; /* length of the line where tag is */
1787 int lno; /* line number */
1788 long cno; /* character number */
1789 {
1790 register node *np;
1791
1792 if (CTAGS && name == NULL)
1793 return;
1794
1795 np = xnew (1, node);
1796
1797 /* If ctags mode, change name "main" to M<thisfilename>. */
1798 if (CTAGS && !cxref_style && streq (name, "main"))
1799 {
1800 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1801 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1802 fp = etags_strrchr (np->name, '.');
1803 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1804 fp[0] = '\0';
1805 }
1806 else
1807 np->name = name;
1808 np->valid = TRUE;
1809 np->been_warned = FALSE;
1810 np->fdp = curfdp;
1811 np->is_func = is_func;
1812 np->lno = lno;
1813 if (np->fdp->usecharno)
1814 /* Our char numbers are 0-base, because of C language tradition?
1815 ctags compatibility? old versions compatibility? I don't know.
1816 Anyway, since emacs's are 1-base we expect etags.el to take care
1817 of the difference. If we wanted to have 1-based numbers, we would
1818 uncomment the +1 below. */
1819 np->cno = cno /* + 1 */ ;
1820 else
1821 np->cno = invalidcharno;
1822 np->left = np->right = NULL;
1823 if (CTAGS && !cxref_style)
1824 {
1825 if (strlen (linestart) < 50)
1826 np->pat = concat (linestart, "$", "");
1827 else
1828 np->pat = savenstr (linestart, 50);
1829 }
1830 else
1831 np->pat = savenstr (linestart, linelen);
1832
1833 add_node (np, &nodehead);
1834 }
1835
1836 /*
1837 * TAGS format specification
1838 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1839 *
1840 * pfnote should emit the optimized form [unnamed tag] only if:
1841 * 1. name does not contain any of the characters " \t\r\n(),;";
1842 * 2. linestart contains name as either a rightmost, or rightmost but
1843 * one character, substring;
1844 * 3. the character, if any, immediately before name in linestart must
1845 * be one of the characters " \t(),;";
1846 * 4. the character, if any, immediately after name in linestart must
1847 * also be one of the characters " \t(),;".
1848 *
1849 * The real implementation uses the notinname() macro, which recognises
1850 * characters slightly different from " \t\r\n(),;". See the variable
1851 * `nonam'.
1852 */
1853 #define traditional_tag_style TRUE
1854 static void
1855 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1856 char *name; /* tag name, or NULL if unnamed */
1857 int namelen; /* tag length */
1858 bool is_func; /* tag is a function */
1859 char *linestart; /* start of the line where tag is */
1860 int linelen; /* length of the line where tag is */
1861 int lno; /* line number */
1862 long cno; /* character number */
1863 {
1864 register char *cp;
1865 bool named;
1866
1867 named = TRUE;
1868 if (!CTAGS)
1869 {
1870 for (cp = name; !notinname (*cp); cp++)
1871 continue;
1872 if (*cp == '\0') /* rule #1 */
1873 {
1874 cp = linestart + linelen - namelen;
1875 if (notinname (linestart[linelen-1]))
1876 cp -= 1; /* rule #4 */
1877 if (cp >= linestart /* rule #2 */
1878 && (cp == linestart
1879 || notinname (cp[-1])) /* rule #3 */
1880 && strneq (name, cp, namelen)) /* rule #2 */
1881 named = FALSE; /* use unnamed tag */
1882 }
1883 }
1884
1885 if (named)
1886 name = savenstr (name, namelen);
1887 else
1888 name = NULL;
1889 pfnote (name, is_func, linestart, linelen, lno, cno);
1890 }
1891
1892 /*
1893 * free_tree ()
1894 * recurse on left children, iterate on right children.
1895 */
1896 static void
1897 free_tree (np)
1898 register node *np;
1899 {
1900 while (np)
1901 {
1902 register node *node_right = np->right;
1903 free_tree (np->left);
1904 if (np->name != NULL)
1905 free (np->name);
1906 free (np->pat);
1907 free (np);
1908 np = node_right;
1909 }
1910 }
1911
1912 /*
1913 * free_fdesc ()
1914 * delete a file description
1915 */
1916 static void
1917 free_fdesc (fdp)
1918 register fdesc *fdp;
1919 {
1920 if (fdp->infname != NULL) free (fdp->infname);
1921 if (fdp->infabsname != NULL) free (fdp->infabsname);
1922 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
1923 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
1924 if (fdp->prop != NULL) free (fdp->prop);
1925 free (fdp);
1926 }
1927
1928 /*
1929 * add_node ()
1930 * Adds a node to the tree of nodes. In etags mode, sort by file
1931 * name. In ctags mode, sort by tag name. Make no attempt at
1932 * balancing.
1933 *
1934 * add_node is the only function allowed to add nodes, so it can
1935 * maintain state.
1936 */
1937 static void
1938 add_node (np, cur_node_p)
1939 node *np, **cur_node_p;
1940 {
1941 register int dif;
1942 register node *cur_node = *cur_node_p;
1943
1944 if (cur_node == NULL)
1945 {
1946 *cur_node_p = np;
1947 last_node = np;
1948 return;
1949 }
1950
1951 if (!CTAGS)
1952 /* Etags Mode */
1953 {
1954 /* For each file name, tags are in a linked sublist on the right
1955 pointer. The first tags of different files are a linked list
1956 on the left pointer. last_node points to the end of the last
1957 used sublist. */
1958 if (last_node != NULL && last_node->fdp == np->fdp)
1959 {
1960 /* Let's use the same sublist as the last added node. */
1961 assert (last_node->right == NULL);
1962 last_node->right = np;
1963 last_node = np;
1964 }
1965 else if (cur_node->fdp == np->fdp)
1966 {
1967 /* Scanning the list we found the head of a sublist which is
1968 good for us. Let's scan this sublist. */
1969 add_node (np, &cur_node->right);
1970 }
1971 else
1972 /* The head of this sublist is not good for us. Let's try the
1973 next one. */
1974 add_node (np, &cur_node->left);
1975 } /* if ETAGS mode */
1976
1977 else
1978 {
1979 /* Ctags Mode */
1980 dif = strcmp (np->name, cur_node->name);
1981
1982 /*
1983 * If this tag name matches an existing one, then
1984 * do not add the node, but maybe print a warning.
1985 */
1986 if (!dif)
1987 {
1988 if (np->fdp == cur_node->fdp)
1989 {
1990 if (!no_warnings)
1991 {
1992 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1993 np->fdp->infname, lineno, np->name);
1994 fprintf (stderr, "Second entry ignored\n");
1995 }
1996 }
1997 else if (!cur_node->been_warned && !no_warnings)
1998 {
1999 fprintf
2000 (stderr,
2001 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2002 np->fdp->infname, cur_node->fdp->infname, np->name);
2003 cur_node->been_warned = TRUE;
2004 }
2005 return;
2006 }
2007
2008 /* Actually add the node */
2009 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2010 } /* if CTAGS mode */
2011 }
2012
2013 /*
2014 * invalidate_nodes ()
2015 * Scan the node tree and invalidate all nodes pointing to the
2016 * given file description (CTAGS case) or free them (ETAGS case).
2017 */
2018 static void
2019 invalidate_nodes (badfdp, npp)
2020 fdesc *badfdp;
2021 node **npp;
2022 {
2023 node *np = *npp;
2024
2025 if (np == NULL)
2026 return;
2027
2028 if (CTAGS)
2029 {
2030 if (np->left != NULL)
2031 invalidate_nodes (badfdp, &np->left);
2032 if (np->fdp == badfdp)
2033 np-> valid = FALSE;
2034 if (np->right != NULL)
2035 invalidate_nodes (badfdp, &np->right);
2036 }
2037 else
2038 {
2039 node **next = &np->left;
2040 if (np->fdp == badfdp)
2041 {
2042 *npp = *next; /* detach the sublist from the list */
2043 np->left = NULL; /* isolate it */
2044 free_tree (np); /* free it */
2045 }
2046 invalidate_nodes (badfdp, next);
2047 }
2048 }
2049
2050 \f
2051 static int total_size_of_entries __P((node *));
2052 static int number_len __P((long));
2053
2054 /* Length of a non-negative number's decimal representation. */
2055 static int
2056 number_len (num)
2057 long num;
2058 {
2059 int len = 1;
2060 while ((num /= 10) > 0)
2061 len += 1;
2062 return len;
2063 }
2064
2065 /*
2066 * Return total number of characters that put_entries will output for
2067 * the nodes in the linked list at the right of the specified node.
2068 * This count is irrelevant with etags.el since emacs 19.34 at least,
2069 * but is still supplied for backward compatibility.
2070 */
2071 static int
2072 total_size_of_entries (np)
2073 register node *np;
2074 {
2075 register int total = 0;
2076
2077 for (; np != NULL; np = np->right)
2078 {
2079 total += strlen (np->pat) + 1; /* pat\177 */
2080 if (np->name != NULL)
2081 total += strlen (np->name) + 1; /* name\001 */
2082 total += number_len ((long) np->lno) + 1; /* lno, */
2083 if (np->cno != invalidcharno) /* cno */
2084 total += number_len (np->cno);
2085 total += 1; /* newline */
2086 }
2087
2088 return total;
2089 }
2090
2091 static void
2092 put_entries (np)
2093 register node *np;
2094 {
2095 register char *sp;
2096 static fdesc *fdp = NULL;
2097
2098 if (np == NULL)
2099 return;
2100
2101 /* Output subentries that precede this one */
2102 if (CTAGS)
2103 put_entries (np->left);
2104
2105 /* Output this entry */
2106 if (np->valid)
2107 {
2108 if (!CTAGS)
2109 {
2110 /* Etags mode */
2111 if (fdp != np->fdp)
2112 {
2113 fdp = np->fdp;
2114 fprintf (tagf, "\f\n%s,%d\n",
2115 fdp->taggedfname, total_size_of_entries (np));
2116 }
2117 fputs (np->pat, tagf);
2118 fputc ('\177', tagf);
2119 if (np->name != NULL)
2120 {
2121 fputs (np->name, tagf);
2122 fputc ('\001', tagf);
2123 }
2124 fprintf (tagf, "%d,", np->lno);
2125 if (np->cno != invalidcharno)
2126 fprintf (tagf, "%ld", np->cno);
2127 fputs ("\n", tagf);
2128 }
2129 else
2130 {
2131 /* Ctags mode */
2132 if (np->name == NULL)
2133 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2134
2135 if (cxref_style)
2136 {
2137 if (vgrind_style)
2138 fprintf (stdout, "%s %s %d\n",
2139 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2140 else
2141 fprintf (stdout, "%-16s %3d %-16s %s\n",
2142 np->name, np->lno, np->fdp->taggedfname, np->pat);
2143 }
2144 else
2145 {
2146 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2147
2148 if (np->is_func)
2149 { /* function or #define macro with args */
2150 putc (searchar, tagf);
2151 putc ('^', tagf);
2152
2153 for (sp = np->pat; *sp; sp++)
2154 {
2155 if (*sp == '\\' || *sp == searchar)
2156 putc ('\\', tagf);
2157 putc (*sp, tagf);
2158 }
2159 putc (searchar, tagf);
2160 }
2161 else
2162 { /* anything else; text pattern inadequate */
2163 fprintf (tagf, "%d", np->lno);
2164 }
2165 putc ('\n', tagf);
2166 }
2167 }
2168 } /* if this node contains a valid tag */
2169
2170 /* Output subentries that follow this one */
2171 put_entries (np->right);
2172 if (!CTAGS)
2173 put_entries (np->left);
2174 }
2175
2176 \f
2177 /* C extensions. */
2178 #define C_EXT 0x00fff /* C extensions */
2179 #define C_PLAIN 0x00000 /* C */
2180 #define C_PLPL 0x00001 /* C++ */
2181 #define C_STAR 0x00003 /* C* */
2182 #define C_JAVA 0x00005 /* JAVA */
2183 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2184 #define YACC 0x10000 /* yacc file */
2185
2186 /*
2187 * The C symbol tables.
2188 */
2189 enum sym_type
2190 {
2191 st_none,
2192 st_C_objprot, st_C_objimpl, st_C_objend,
2193 st_C_gnumacro,
2194 st_C_ignore,
2195 st_C_javastruct,
2196 st_C_operator,
2197 st_C_class, st_C_template,
2198 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
2199 };
2200
2201 static unsigned int hash __P((const char *, unsigned int));
2202 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2203 static enum sym_type C_symtype __P((char *, int, int));
2204
2205 /* Feed stuff between (but not including) %[ and %] lines to:
2206 gperf -c -k 1,3 -o -p -r -t
2207 %[
2208 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2209 %%
2210 if, 0, st_C_ignore
2211 for, 0, st_C_ignore
2212 while, 0, st_C_ignore
2213 switch, 0, st_C_ignore
2214 return, 0, st_C_ignore
2215 @interface, 0, st_C_objprot
2216 @protocol, 0, st_C_objprot
2217 @implementation,0, st_C_objimpl
2218 @end, 0, st_C_objend
2219 import, C_JAVA, st_C_ignore
2220 package, C_JAVA, st_C_ignore
2221 friend, C_PLPL, st_C_ignore
2222 extends, C_JAVA, st_C_javastruct
2223 implements, C_JAVA, st_C_javastruct
2224 interface, C_JAVA, st_C_struct
2225 class, 0, st_C_class
2226 namespace, C_PLPL, st_C_struct
2227 domain, C_STAR, st_C_struct
2228 union, 0, st_C_struct
2229 struct, 0, st_C_struct
2230 extern, 0, st_C_extern
2231 enum, 0, st_C_enum
2232 typedef, 0, st_C_typedef
2233 define, 0, st_C_define
2234 operator, C_PLPL, st_C_operator
2235 template, 0, st_C_template
2236 bool, C_PLPL, st_C_typespec
2237 long, 0, st_C_typespec
2238 short, 0, st_C_typespec
2239 int, 0, st_C_typespec
2240 char, 0, st_C_typespec
2241 float, 0, st_C_typespec
2242 double, 0, st_C_typespec
2243 signed, 0, st_C_typespec
2244 unsigned, 0, st_C_typespec
2245 auto, 0, st_C_typespec
2246 void, 0, st_C_typespec
2247 static, 0, st_C_typespec
2248 const, 0, st_C_typespec
2249 volatile, 0, st_C_typespec
2250 explicit, C_PLPL, st_C_typespec
2251 mutable, C_PLPL, st_C_typespec
2252 typename, C_PLPL, st_C_typespec
2253 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2254 DEFUN, 0, st_C_gnumacro
2255 SYSCALL, 0, st_C_gnumacro
2256 ENTRY, 0, st_C_gnumacro
2257 PSEUDO, 0, st_C_gnumacro
2258 # These are defined inside C functions, so currently they are not met.
2259 # EXFUN used in glibc, DEFVAR_* in emacs.
2260 #EXFUN, 0, st_C_gnumacro
2261 #DEFVAR_, 0, st_C_gnumacro
2262 %]
2263 and replace lines between %< and %> with its output,
2264 then make in_word_set and C_stab_entry static. */
2265 /*%<*/
2266 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2267 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2268 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2269
2270 #define TOTAL_KEYWORDS 47
2271 #define MIN_WORD_LENGTH 2
2272 #define MAX_WORD_LENGTH 15
2273 #define MIN_HASH_VALUE 18
2274 #define MAX_HASH_VALUE 138
2275 /* maximum key range = 121, duplicates = 0 */
2276
2277 #ifdef __GNUC__
2278 __inline
2279 #endif
2280 static unsigned int
2281 hash (str, len)
2282 register const char *str;
2283 register unsigned int len;
2284 {
2285 static unsigned char asso_values[] =
2286 {
2287 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2288 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2289 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2290 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2291 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2292 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2293 139, 139, 139, 139, 63, 139, 139, 139, 33, 44,
2294 62, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2295 42, 139, 139, 12, 32, 139, 139, 139, 139, 139,
2296 139, 139, 139, 139, 139, 139, 139, 34, 59, 37,
2297 24, 58, 33, 3, 139, 16, 139, 139, 42, 60,
2298 18, 11, 39, 139, 23, 57, 4, 63, 6, 20,
2299 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2300 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2301 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2302 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2303 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2304 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2305 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2306 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2307 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2308 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2309 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2310 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2311 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2312 139, 139, 139, 139, 139, 139
2313 };
2314 register int hval = len;
2315
2316 switch (hval)
2317 {
2318 default:
2319 case 3:
2320 hval += asso_values[(unsigned char)str[2]];
2321 case 2:
2322 case 1:
2323 hval += asso_values[(unsigned char)str[0]];
2324 break;
2325 }
2326 return hval;
2327 }
2328
2329 #ifdef __GNUC__
2330 __inline
2331 #endif
2332 static struct C_stab_entry *
2333 in_word_set (str, len)
2334 register const char *str;
2335 register unsigned int len;
2336 {
2337 static struct C_stab_entry wordlist[] =
2338 {
2339 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2340 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2341 {"if", 0, st_C_ignore},
2342 {""}, {""}, {""}, {""},
2343 {"int", 0, st_C_typespec},
2344 {""}, {""},
2345 {"void", 0, st_C_typespec},
2346 {""}, {""},
2347 {"interface", C_JAVA, st_C_struct},
2348 {""},
2349 {"SYSCALL", 0, st_C_gnumacro},
2350 {""},
2351 {"return", 0, st_C_ignore},
2352 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2353 {"while", 0, st_C_ignore},
2354 {"auto", 0, st_C_typespec},
2355 {""}, {""}, {""}, {""}, {""}, {""},
2356 {"float", 0, st_C_typespec},
2357 {"typedef", 0, st_C_typedef},
2358 {"typename", C_PLPL, st_C_typespec},
2359 {""}, {""}, {""},
2360 {"friend", C_PLPL, st_C_ignore},
2361 {"volatile", 0, st_C_typespec},
2362 {""}, {""},
2363 {"for", 0, st_C_ignore},
2364 {"const", 0, st_C_typespec},
2365 {"import", C_JAVA, st_C_ignore},
2366 {""},
2367 {"define", 0, st_C_define},
2368 {"long", 0, st_C_typespec},
2369 {"implements", C_JAVA, st_C_javastruct},
2370 {"signed", 0, st_C_typespec},
2371 {""},
2372 {"extern", 0, st_C_extern},
2373 {"extends", C_JAVA, st_C_javastruct},
2374 {""},
2375 {"mutable", C_PLPL, st_C_typespec},
2376 {"template", 0, st_C_template},
2377 {"short", 0, st_C_typespec},
2378 {"bool", C_PLPL, st_C_typespec},
2379 {"char", 0, st_C_typespec},
2380 {"class", 0, st_C_class},
2381 {"operator", C_PLPL, st_C_operator},
2382 {""},
2383 {"switch", 0, st_C_ignore},
2384 {""},
2385 {"ENTRY", 0, st_C_gnumacro},
2386 {""},
2387 {"package", C_JAVA, st_C_ignore},
2388 {"union", 0, st_C_struct},
2389 {"@end", 0, st_C_objend},
2390 {"struct", 0, st_C_struct},
2391 {"namespace", C_PLPL, st_C_struct},
2392 {""}, {""},
2393 {"domain", C_STAR, st_C_struct},
2394 {"@interface", 0, st_C_objprot},
2395 {"PSEUDO", 0, st_C_gnumacro},
2396 {"double", 0, st_C_typespec},
2397 {""},
2398 {"@protocol", 0, st_C_objprot},
2399 {""},
2400 {"static", 0, st_C_typespec},
2401 {""}, {""},
2402 {"DEFUN", 0, st_C_gnumacro},
2403 {""}, {""}, {""}, {""},
2404 {"explicit", C_PLPL, st_C_typespec},
2405 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2406 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2407 {""},
2408 {"enum", 0, st_C_enum},
2409 {""}, {""},
2410 {"unsigned", 0, st_C_typespec},
2411 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2412 {"@implementation",0, st_C_objimpl}
2413 };
2414
2415 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2416 {
2417 register int key = hash (str, len);
2418
2419 if (key <= MAX_HASH_VALUE && key >= 0)
2420 {
2421 register const char *s = wordlist[key].name;
2422
2423 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2424 return &wordlist[key];
2425 }
2426 }
2427 return 0;
2428 }
2429 /*%>*/
2430
2431 static enum sym_type
2432 C_symtype (str, len, c_ext)
2433 char *str;
2434 int len;
2435 int c_ext;
2436 {
2437 register struct C_stab_entry *se = in_word_set (str, len);
2438
2439 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2440 return st_none;
2441 return se->type;
2442 }
2443
2444 \f
2445 /*
2446 * C functions and variables are recognized using a simple
2447 * finite automaton. fvdef is its state variable.
2448 */
2449 static enum
2450 {
2451 fvnone, /* nothing seen */
2452 fdefunkey, /* Emacs DEFUN keyword seen */
2453 fdefunname, /* Emacs DEFUN name seen */
2454 foperator, /* func: operator keyword seen (cplpl) */
2455 fvnameseen, /* function or variable name seen */
2456 fstartlist, /* func: just after open parenthesis */
2457 finlist, /* func: in parameter list */
2458 flistseen, /* func: after parameter list */
2459 fignore, /* func: before open brace */
2460 vignore /* var-like: ignore until ';' */
2461 } fvdef;
2462
2463 static bool fvextern; /* func or var: extern keyword seen; */
2464
2465 /*
2466 * typedefs are recognized using a simple finite automaton.
2467 * typdef is its state variable.
2468 */
2469 static enum
2470 {
2471 tnone, /* nothing seen */
2472 tkeyseen, /* typedef keyword seen */
2473 ttypeseen, /* defined type seen */
2474 tinbody, /* inside typedef body */
2475 tend, /* just before typedef tag */
2476 tignore /* junk after typedef tag */
2477 } typdef;
2478
2479 /*
2480 * struct-like structures (enum, struct and union) are recognized
2481 * using another simple finite automaton. `structdef' is its state
2482 * variable.
2483 */
2484 static enum
2485 {
2486 snone, /* nothing seen yet,
2487 or in struct body if cblev > 0 */
2488 skeyseen, /* struct-like keyword seen */
2489 stagseen, /* struct-like tag seen */
2490 sintemplate, /* inside template (ignore) */
2491 scolonseen /* colon seen after struct-like tag */
2492 } structdef;
2493
2494 /*
2495 * When objdef is different from onone, objtag is the name of the class.
2496 */
2497 static char *objtag = "<uninited>";
2498
2499 /*
2500 * Yet another little state machine to deal with preprocessor lines.
2501 */
2502 static enum
2503 {
2504 dnone, /* nothing seen */
2505 dsharpseen, /* '#' seen as first char on line */
2506 ddefineseen, /* '#' and 'define' seen */
2507 dignorerest /* ignore rest of line */
2508 } definedef;
2509
2510 /*
2511 * State machine for Objective C protocols and implementations.
2512 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2513 */
2514 static enum
2515 {
2516 onone, /* nothing seen */
2517 oprotocol, /* @interface or @protocol seen */
2518 oimplementation, /* @implementations seen */
2519 otagseen, /* class name seen */
2520 oparenseen, /* parenthesis before category seen */
2521 ocatseen, /* category name seen */
2522 oinbody, /* in @implementation body */
2523 omethodsign, /* in @implementation body, after +/- */
2524 omethodtag, /* after method name */
2525 omethodcolon, /* after method colon */
2526 omethodparm, /* after method parameter */
2527 oignore /* wait for @end */
2528 } objdef;
2529
2530
2531 /*
2532 * Use this structure to keep info about the token read, and how it
2533 * should be tagged. Used by the make_C_tag function to build a tag.
2534 */
2535 static struct tok
2536 {
2537 bool valid;
2538 bool named;
2539 int offset;
2540 int length;
2541 int lineno;
2542 long linepos;
2543 char *line;
2544 } token; /* latest token read */
2545 static linebuffer token_name; /* its name */
2546
2547 /*
2548 * Variables and functions for dealing with nested structures.
2549 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2550 */
2551 static void pushclass_above __P((int, char *, int));
2552 static void popclass_above __P((int));
2553 static void write_classname __P((linebuffer *, char *qualifier));
2554
2555 static struct {
2556 char **cname; /* nested class names */
2557 int *cblev; /* nested class curly brace level */
2558 int nl; /* class nesting level (elements used) */
2559 int size; /* length of the array */
2560 } cstack; /* stack for nested declaration tags */
2561 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2562 #define nestlev (cstack.nl)
2563 /* After struct keyword or in struct body, not inside an nested function. */
2564 #define instruct (structdef == snone && nestlev > 0 \
2565 && cblev == cstack.cblev[nestlev-1] + 1)
2566
2567 static void
2568 pushclass_above (cblev, str, len)
2569 int cblev;
2570 char *str;
2571 int len;
2572 {
2573 int nl;
2574
2575 popclass_above (cblev);
2576 nl = cstack.nl;
2577 if (nl >= cstack.size)
2578 {
2579 int size = cstack.size *= 2;
2580 xrnew (cstack.cname, size, char *);
2581 xrnew (cstack.cblev, size, int);
2582 }
2583 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2584 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2585 cstack.cblev[nl] = cblev;
2586 cstack.nl = nl + 1;
2587 }
2588
2589 static void
2590 popclass_above (cblev)
2591 int cblev;
2592 {
2593 int nl;
2594
2595 for (nl = cstack.nl - 1;
2596 nl >= 0 && cstack.cblev[nl] >= cblev;
2597 nl--)
2598 {
2599 if (cstack.cname[nl] != NULL)
2600 free (cstack.cname[nl]);
2601 cstack.nl = nl;
2602 }
2603 }
2604
2605 static void
2606 write_classname (cn, qualifier)
2607 linebuffer *cn;
2608 char *qualifier;
2609 {
2610 int i, len;
2611 int qlen = strlen (qualifier);
2612
2613 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2614 {
2615 len = 0;
2616 cn->len = 0;
2617 cn->buffer[0] = '\0';
2618 }
2619 else
2620 {
2621 len = strlen (cstack.cname[0]);
2622 linebuffer_setlen (cn, len);
2623 strcpy (cn->buffer, cstack.cname[0]);
2624 }
2625 for (i = 1; i < cstack.nl; i++)
2626 {
2627 char *s;
2628 int slen;
2629
2630 s = cstack.cname[i];
2631 if (s == NULL)
2632 continue;
2633 slen = strlen (s);
2634 len += slen + qlen;
2635 linebuffer_setlen (cn, len);
2636 strncat (cn->buffer, qualifier, qlen);
2637 strncat (cn->buffer, s, slen);
2638 }
2639 }
2640
2641 \f
2642 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2643 static void make_C_tag __P((bool));
2644
2645 /*
2646 * consider_token ()
2647 * checks to see if the current token is at the start of a
2648 * function or variable, or corresponds to a typedef, or
2649 * is a struct/union/enum tag, or #define, or an enum constant.
2650 *
2651 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2652 * with args. C_EXTP points to which language we are looking at.
2653 *
2654 * Globals
2655 * fvdef IN OUT
2656 * structdef IN OUT
2657 * definedef IN OUT
2658 * typdef IN OUT
2659 * objdef IN OUT
2660 */
2661
2662 static bool
2663 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2664 register char *str; /* IN: token pointer */
2665 register int len; /* IN: token length */
2666 register int c; /* IN: first char after the token */
2667 int *c_extp; /* IN, OUT: C extensions mask */
2668 int cblev; /* IN: curly brace level */
2669 int parlev; /* IN: parenthesis level */
2670 bool *is_func_or_var; /* OUT: function or variable found */
2671 {
2672 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2673 structtype is the type of the preceding struct-like keyword, and
2674 structcblev is the curly brace level where it has been seen. */
2675 static enum sym_type structtype;
2676 static int structcblev;
2677 static enum sym_type toktype;
2678
2679
2680 toktype = C_symtype (str, len, *c_extp);
2681
2682 /*
2683 * Advance the definedef state machine.
2684 */
2685 switch (definedef)
2686 {
2687 case dnone:
2688 /* We're not on a preprocessor line. */
2689 if (toktype == st_C_gnumacro)
2690 {
2691 fvdef = fdefunkey;
2692 return FALSE;
2693 }
2694 break;
2695 case dsharpseen:
2696 if (toktype == st_C_define)
2697 {
2698 definedef = ddefineseen;
2699 }
2700 else
2701 {
2702 definedef = dignorerest;
2703 }
2704 return FALSE;
2705 case ddefineseen:
2706 /*
2707 * Make a tag for any macro, unless it is a constant
2708 * and constantypedefs is FALSE.
2709 */
2710 definedef = dignorerest;
2711 *is_func_or_var = (c == '(');
2712 if (!*is_func_or_var && !constantypedefs)
2713 return FALSE;
2714 else
2715 return TRUE;
2716 case dignorerest:
2717 return FALSE;
2718 default:
2719 error ("internal error: definedef value.", (char *)NULL);
2720 }
2721
2722 /*
2723 * Now typedefs
2724 */
2725 switch (typdef)
2726 {
2727 case tnone:
2728 if (toktype == st_C_typedef)
2729 {
2730 if (typedefs)
2731 typdef = tkeyseen;
2732 fvextern = FALSE;
2733 fvdef = fvnone;
2734 return FALSE;
2735 }
2736 break;
2737 case tkeyseen:
2738 switch (toktype)
2739 {
2740 case st_none:
2741 case st_C_typespec:
2742 case st_C_class:
2743 case st_C_struct:
2744 case st_C_enum:
2745 typdef = ttypeseen;
2746 break;
2747 }
2748 break;
2749 case ttypeseen:
2750 if (structdef == snone && fvdef == fvnone)
2751 {
2752 fvdef = fvnameseen;
2753 return TRUE;
2754 }
2755 break;
2756 case tend:
2757 switch (toktype)
2758 {
2759 case st_C_typespec:
2760 case st_C_class:
2761 case st_C_struct:
2762 case st_C_enum:
2763 return FALSE;
2764 }
2765 return TRUE;
2766 }
2767
2768 /*
2769 * This structdef business is NOT invoked when we are ctags and the
2770 * file is plain C. This is because a struct tag may have the same
2771 * name as another tag, and this loses with ctags.
2772 */
2773 switch (toktype)
2774 {
2775 case st_C_javastruct:
2776 if (structdef == stagseen)
2777 structdef = scolonseen;
2778 return FALSE;
2779 case st_C_template:
2780 case st_C_class:
2781 if (cblev == 0
2782 && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2783 && definedef == dnone && structdef == snone
2784 && typdef == tnone && fvdef == fvnone)
2785 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2786 if (toktype == st_C_template)
2787 break;
2788 /* FALLTHRU */
2789 case st_C_struct:
2790 case st_C_enum:
2791 if (parlev == 0
2792 && fvdef != vignore
2793 && (typdef == tkeyseen
2794 || (typedefs_or_cplusplus && structdef == snone)))
2795 {
2796 structdef = skeyseen;
2797 structtype = toktype;
2798 structcblev = cblev;
2799 }
2800 return FALSE;
2801 }
2802
2803 if (structdef == skeyseen)
2804 {
2805 structdef = stagseen;
2806 return TRUE;
2807 }
2808
2809 if (typdef != tnone)
2810 definedef = dnone;
2811
2812 /* Detect Objective C constructs. */
2813 switch (objdef)
2814 {
2815 case onone:
2816 switch (toktype)
2817 {
2818 case st_C_objprot:
2819 objdef = oprotocol;
2820 return FALSE;
2821 case st_C_objimpl:
2822 objdef = oimplementation;
2823 return FALSE;
2824 }
2825 break;
2826 case oimplementation:
2827 /* Save the class tag for functions or variables defined inside. */
2828 objtag = savenstr (str, len);
2829 objdef = oinbody;
2830 return FALSE;
2831 case oprotocol:
2832 /* Save the class tag for categories. */
2833 objtag = savenstr (str, len);
2834 objdef = otagseen;
2835 *is_func_or_var = TRUE;
2836 return TRUE;
2837 case oparenseen:
2838 objdef = ocatseen;
2839 *is_func_or_var = TRUE;
2840 return TRUE;
2841 case oinbody:
2842 break;
2843 case omethodsign:
2844 if (parlev == 0)
2845 {
2846 objdef = omethodtag;
2847 linebuffer_setlen (&token_name, len);
2848 strncpy (token_name.buffer, str, len);
2849 token_name.buffer[len] = '\0';
2850 return TRUE;
2851 }
2852 return FALSE;
2853 case omethodcolon:
2854 if (parlev == 0)
2855 objdef = omethodparm;
2856 return FALSE;
2857 case omethodparm:
2858 if (parlev == 0)
2859 {
2860 objdef = omethodtag;
2861 linebuffer_setlen (&token_name, token_name.len + len);
2862 strncat (token_name.buffer, str, len);
2863 return TRUE;
2864 }
2865 return FALSE;
2866 case oignore:
2867 if (toktype == st_C_objend)
2868 {
2869 /* Memory leakage here: the string pointed by objtag is
2870 never released, because many tests would be needed to
2871 avoid breaking on incorrect input code. The amount of
2872 memory leaked here is the sum of the lengths of the
2873 class tags.
2874 free (objtag); */
2875 objdef = onone;
2876 }
2877 return FALSE;
2878 }
2879
2880 /* A function, variable or enum constant? */
2881 switch (toktype)
2882 {
2883 case st_C_extern:
2884 fvextern = TRUE;
2885 /* FALLTHRU */
2886 case st_C_typespec:
2887 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2888 fvdef = fvnone; /* should be useless */
2889 return FALSE;
2890 case st_C_ignore:
2891 fvextern = FALSE;
2892 fvdef = vignore;
2893 return FALSE;
2894 case st_C_operator:
2895 fvdef = foperator;
2896 *is_func_or_var = TRUE;
2897 return TRUE;
2898 case st_none:
2899 if (constantypedefs
2900 && structdef == snone
2901 && structtype == st_C_enum && cblev > structcblev)
2902 return TRUE; /* enum constant */
2903 switch (fvdef)
2904 {
2905 case fdefunkey:
2906 if (cblev > 0)
2907 break;
2908 fvdef = fdefunname; /* GNU macro */
2909 *is_func_or_var = TRUE;
2910 return TRUE;
2911 case fvnone:
2912 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2913 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2914 {
2915 fvdef = vignore;
2916 return FALSE;
2917 }
2918 if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2919 {
2920 fvdef = foperator;
2921 *is_func_or_var = TRUE;
2922 return TRUE;
2923 }
2924 if (cblev > 0 && !instruct)
2925 break;
2926 fvdef = fvnameseen; /* function or variable */
2927 *is_func_or_var = TRUE;
2928 return TRUE;
2929 }
2930 break;
2931 }
2932
2933 return FALSE;
2934 }
2935
2936 \f
2937 /*
2938 * C_entries often keeps pointers to tokens or lines which are older than
2939 * the line currently read. By keeping two line buffers, and switching
2940 * them at end of line, it is possible to use those pointers.
2941 */
2942 static struct
2943 {
2944 long linepos;
2945 linebuffer lb;
2946 } lbs[2];
2947
2948 #define current_lb_is_new (newndx == curndx)
2949 #define switch_line_buffers() (curndx = 1 - curndx)
2950
2951 #define curlb (lbs[curndx].lb)
2952 #define newlb (lbs[newndx].lb)
2953 #define curlinepos (lbs[curndx].linepos)
2954 #define newlinepos (lbs[newndx].linepos)
2955
2956 #define CNL_SAVE_DEFINEDEF() \
2957 do { \
2958 curlinepos = charno; \
2959 readline (&curlb, inf); \
2960 lp = curlb.buffer; \
2961 quotednl = FALSE; \
2962 newndx = curndx; \
2963 } while (0)
2964
2965 #define CNL() \
2966 do { \
2967 CNL_SAVE_DEFINEDEF(); \
2968 if (savetoken.valid) \
2969 { \
2970 token = savetoken; \
2971 savetoken.valid = FALSE; \
2972 } \
2973 definedef = dnone; \
2974 } while (0)
2975
2976
2977 static void
2978 make_C_tag (isfun)
2979 bool isfun;
2980 {
2981 /* This function should never be called when token.valid is FALSE, but
2982 we must protect against invalid input or internal errors. */
2983 if (DEBUG || token.valid)
2984 {
2985 if (traditional_tag_style)
2986 {
2987 /* This was the original code. Now we call new_pfnote instead,
2988 which uses the new method for naming tags (see new_pfnote). */
2989 char *name = NULL;
2990
2991 if (CTAGS || token.named)
2992 name = savestr (token_name.buffer);
2993 if (DEBUG && !token.valid)
2994 {
2995 if (token.named)
2996 name = concat (name, "##invalid##", "");
2997 else
2998 name = savestr ("##invalid##");
2999 }
3000 pfnote (name, isfun, token.line,
3001 token.offset+token.length+1, token.lineno, token.linepos);
3002 }
3003 else
3004 new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
3005 token.offset+token.length+1, token.lineno, token.linepos);
3006 token.valid = FALSE;
3007 }
3008 }
3009
3010
3011 /*
3012 * C_entries ()
3013 * This routine finds functions, variables, typedefs,
3014 * #define's, enum constants and struct/union/enum definitions in
3015 * C syntax and adds them to the list.
3016 */
3017 static void
3018 C_entries (c_ext, inf)
3019 int c_ext; /* extension of C */
3020 FILE *inf; /* input file */
3021 {
3022 register char c; /* latest char read; '\0' for end of line */
3023 register char *lp; /* pointer one beyond the character `c' */
3024 int curndx, newndx; /* indices for current and new lb */
3025 register int tokoff; /* offset in line of start of current token */
3026 register int toklen; /* length of current token */
3027 char *qualifier; /* string used to qualify names */
3028 int qlen; /* length of qualifier */
3029 int cblev; /* current curly brace level */
3030 int parlev; /* current parenthesis level */
3031 int typdefcblev; /* cblev where a typedef struct body begun */
3032 bool incomm, inquote, inchar, quotednl, midtoken;
3033 bool cplpl, cjava;
3034 bool yacc_rules; /* in the rules part of a yacc file */
3035 struct tok savetoken; /* token saved during preprocessor handling */
3036
3037
3038 initbuffer (&token_name);
3039 initbuffer (&lbs[0].lb);
3040 initbuffer (&lbs[1].lb);
3041 if (cstack.size == 0)
3042 {
3043 cstack.size = (DEBUG) ? 1 : 4;
3044 cstack.nl = 0;
3045 cstack.cname = xnew (cstack.size, char *);
3046 cstack.cblev = xnew (cstack.size, int);
3047 }
3048
3049 tokoff = toklen = typdefcblev = 0; /* keep compiler quiet */
3050 curndx = newndx = 0;
3051 lp = curlb.buffer;
3052 *lp = 0;
3053
3054 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3055 structdef = snone; definedef = dnone; objdef = onone;
3056 yacc_rules = FALSE;
3057 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3058 token.valid = savetoken.valid = FALSE;
3059 cblev = 0;
3060 parlev = 0;
3061 cplpl = (c_ext & C_PLPL) == C_PLPL;
3062 cjava = (c_ext & C_JAVA) == C_JAVA;
3063 if (cjava)
3064 { qualifier = "."; qlen = 1; }
3065 else
3066 { qualifier = "::"; qlen = 2; }
3067
3068
3069 while (!feof (inf))
3070 {
3071 c = *lp++;
3072 if (c == '\\')
3073 {
3074 /* If we're at the end of the line, the next character is a
3075 '\0'; don't skip it, because it's the thing that tells us
3076 to read the next line. */
3077 if (*lp == '\0')
3078 {
3079 quotednl = TRUE;
3080 continue;
3081 }
3082 lp++;
3083 c = ' ';
3084 }
3085 else if (incomm)
3086 {
3087 switch (c)
3088 {
3089 case '*':
3090 if (*lp == '/')
3091 {
3092 c = *lp++;
3093 incomm = FALSE;
3094 }
3095 break;
3096 case '\0':
3097 /* Newlines inside comments do not end macro definitions in
3098 traditional cpp. */
3099 CNL_SAVE_DEFINEDEF ();
3100 break;
3101 }
3102 continue;
3103 }
3104 else if (inquote)
3105 {
3106 switch (c)
3107 {
3108 case '"':
3109 inquote = FALSE;
3110 break;
3111 case '\0':
3112 /* Newlines inside strings do not end macro definitions
3113 in traditional cpp, even though compilers don't
3114 usually accept them. */
3115 CNL_SAVE_DEFINEDEF ();
3116 break;
3117 }
3118 continue;
3119 }
3120 else if (inchar)
3121 {
3122 switch (c)
3123 {
3124 case '\0':
3125 /* Hmmm, something went wrong. */
3126 CNL ();
3127 /* FALLTHRU */
3128 case '\'':
3129 inchar = FALSE;
3130 break;
3131 }
3132 continue;
3133 }
3134 else
3135 switch (c)
3136 {
3137 case '"':
3138 inquote = TRUE;
3139 switch (fvdef)
3140 {
3141 case fdefunkey:
3142 case fstartlist:
3143 case finlist:
3144 case fignore:
3145 case vignore:
3146 break;
3147 default:
3148 fvextern = FALSE;
3149 fvdef = fvnone;
3150 }
3151 continue;
3152 case '\'':
3153 inchar = TRUE;
3154 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3155 {
3156 fvextern = FALSE;
3157 fvdef = fvnone;
3158 }
3159 continue;
3160 case '/':
3161 if (*lp == '*')
3162 {
3163 lp++;
3164 incomm = TRUE;
3165 continue;
3166 }
3167 else if (/* cplpl && */ *lp == '/')
3168 {
3169 c = '\0';
3170 break;
3171 }
3172 else
3173 break;
3174 case '%':
3175 if ((c_ext & YACC) && *lp == '%')
3176 {
3177 /* Entering or exiting rules section in yacc file. */
3178 lp++;
3179 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3180 typdef = tnone; structdef = snone;
3181 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3182 cblev = 0;
3183 yacc_rules = !yacc_rules;
3184 continue;
3185 }
3186 else
3187 break;
3188 case '#':
3189 if (definedef == dnone)
3190 {
3191 char *cp;
3192 bool cpptoken = TRUE;
3193
3194 /* Look back on this line. If all blanks, or nonblanks
3195 followed by an end of comment, this is a preprocessor
3196 token. */
3197 for (cp = newlb.buffer; cp < lp-1; cp++)
3198 if (!iswhite (*cp))
3199 {
3200 if (*cp == '*' && *(cp+1) == '/')
3201 {
3202 cp++;
3203 cpptoken = TRUE;
3204 }
3205 else
3206 cpptoken = FALSE;
3207 }
3208 if (cpptoken)
3209 definedef = dsharpseen;
3210 } /* if (definedef == dnone) */
3211
3212 continue;
3213 } /* switch (c) */
3214
3215
3216 /* Consider token only if some involved conditions are satisfied. */
3217 if (typdef != tignore
3218 && definedef != dignorerest
3219 && fvdef != finlist
3220 && structdef != sintemplate
3221 && (definedef != dnone
3222 || structdef != scolonseen))
3223 {
3224 if (midtoken)
3225 {
3226 if (endtoken (c))
3227 {
3228 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
3229 {
3230 /*
3231 * This handles :: in the middle, but not at the
3232 * beginning of an identifier. Also, space-separated
3233 * :: is not recognised.
3234 */
3235 lp += 2;
3236 toklen += 2;
3237 c = lp[-1];
3238 goto still_in_token;
3239 }
3240 else
3241 {
3242 bool funorvar = FALSE;
3243
3244 if (yacc_rules
3245 || consider_token (newlb.buffer + tokoff, toklen, c,
3246 &c_ext, cblev, parlev, &funorvar))
3247 {
3248 if (fvdef == foperator)
3249 {
3250 char *oldlp = lp;
3251 lp = skip_spaces (lp-1);
3252 if (*lp != '\0')
3253 lp += 1;
3254 while (*lp != '\0'
3255 && !iswhite (*lp) && *lp != '(')
3256 lp += 1;
3257 c = *lp++;
3258 toklen += lp - oldlp;
3259 }
3260 token.named = FALSE;
3261 if ((c_ext & C_EXT) /* not pure C */
3262 && nestlev > 0 && definedef == dnone)
3263 /* in struct body */
3264 {
3265 write_classname (&token_name, qualifier);
3266 linebuffer_setlen (&token_name,
3267 token_name.len+qlen+toklen);
3268 strcat (token_name.buffer, qualifier);
3269 strncat (token_name.buffer,
3270 newlb.buffer + tokoff, toklen);
3271 token.named = TRUE;
3272 }
3273 else if (objdef == ocatseen)
3274 /* Objective C category */
3275 {
3276 int len = strlen (objtag) + 2 + toklen;
3277 linebuffer_setlen (&token_name, len);
3278 strcpy (token_name.buffer, objtag);
3279 strcat (token_name.buffer, "(");
3280 strncat (token_name.buffer,
3281 newlb.buffer + tokoff, toklen);
3282 strcat (token_name.buffer, ")");
3283 token.named = TRUE;
3284 }
3285 else if (objdef == omethodtag
3286 || objdef == omethodparm)
3287 /* Objective C method */
3288 {
3289 token.named = TRUE;
3290 }
3291 else if (fvdef == fdefunname)
3292 /* GNU DEFUN and similar macros */
3293 {
3294 bool defun = (newlb.buffer[tokoff] == 'F');
3295 int off = tokoff;
3296 int len = toklen;
3297
3298 /* Rewrite the tag so that emacs lisp DEFUNs
3299 can be found by their elisp name */
3300 if (defun)
3301 {
3302 off += 1;
3303 len -= 1;
3304 }
3305 len = toklen;
3306 linebuffer_setlen (&token_name, len);
3307 strncpy (token_name.buffer,
3308 newlb.buffer + off, len);
3309 token_name.buffer[len] = '\0';
3310 if (defun)
3311 while (--len >= 0)
3312 if (token_name.buffer[len] == '_')
3313 token_name.buffer[len] = '-';
3314 token.named = defun;
3315 }
3316 else
3317 {
3318 linebuffer_setlen (&token_name, toklen);
3319 strncpy (token_name.buffer,
3320 newlb.buffer + tokoff, toklen);
3321 token_name.buffer[toklen] = '\0';
3322 /* Name macros and members. */
3323 token.named = (structdef == stagseen
3324 || typdef == ttypeseen
3325 || typdef == tend
3326 || (funorvar
3327 && definedef == dignorerest)
3328 || (funorvar
3329 && definedef == dnone
3330 && structdef == snone
3331 && cblev > 0));
3332 }
3333 token.lineno = lineno;
3334 token.offset = tokoff;
3335 token.length = toklen;
3336 token.line = newlb.buffer;
3337 token.linepos = newlinepos;
3338 token.valid = TRUE;
3339
3340 if (definedef == dnone
3341 && (fvdef == fvnameseen
3342 || fvdef == foperator
3343 || structdef == stagseen
3344 || typdef == tend
3345 || typdef == ttypeseen
3346 || objdef != onone))
3347 {
3348 if (current_lb_is_new)
3349 switch_line_buffers ();
3350 }
3351 else if (definedef != dnone
3352 || fvdef == fdefunname
3353 || instruct)
3354 make_C_tag (funorvar);
3355 }
3356 midtoken = FALSE;
3357 }
3358 } /* if (endtoken (c)) */
3359 else if (intoken (c))
3360 still_in_token:
3361 {
3362 toklen++;
3363 continue;
3364 }
3365 } /* if (midtoken) */
3366 else if (begtoken (c))
3367 {
3368 switch (definedef)
3369 {
3370 case dnone:
3371 switch (fvdef)
3372 {
3373 case fstartlist:
3374 fvdef = finlist;
3375 continue;
3376 case flistseen:
3377 make_C_tag (TRUE); /* a function */
3378 fvdef = fignore;
3379 break;
3380 case fvnameseen:
3381 fvdef = fvnone;
3382 break;
3383 }
3384 if (structdef == stagseen && !cjava)
3385 {
3386 popclass_above (cblev);
3387 structdef = snone;
3388 }
3389 break;
3390 case dsharpseen:
3391 savetoken = token;
3392 break;
3393 }
3394 if (!yacc_rules || lp == newlb.buffer + 1)
3395 {
3396 tokoff = lp - 1 - newlb.buffer;
3397 toklen = 1;
3398 midtoken = TRUE;
3399 }
3400 continue;
3401 } /* if (begtoken) */
3402 } /* if must look at token */
3403
3404
3405 /* Detect end of line, colon, comma, semicolon and various braces
3406 after having handled a token.*/
3407 switch (c)
3408 {
3409 case ':':
3410 if (yacc_rules && token.offset == 0 && token.valid)
3411 {
3412 make_C_tag (FALSE); /* a yacc function */
3413 break;
3414 }
3415 if (definedef != dnone)
3416 break;
3417 switch (objdef)
3418 {
3419 case otagseen:
3420 objdef = oignore;
3421 make_C_tag (TRUE); /* an Objective C class */
3422 break;
3423 case omethodtag:
3424 case omethodparm:
3425 objdef = omethodcolon;
3426 linebuffer_setlen (&token_name, token_name.len + 1);
3427 strcat (token_name.buffer, ":");
3428 break;
3429 }
3430 if (structdef == stagseen)
3431 structdef = scolonseen;
3432 break;
3433 case ';':
3434 if (definedef != dnone)
3435 break;
3436 switch (typdef)
3437 {
3438 case tend:
3439 case ttypeseen:
3440 make_C_tag (FALSE); /* a typedef */
3441 typdef = tnone;
3442 fvdef = fvnone;
3443 break;
3444 case tnone:
3445 case tinbody:
3446 case tignore:
3447 switch (fvdef)
3448 {
3449 case fignore:
3450 if (typdef == tignore)
3451 fvdef = fvnone;
3452 break;
3453 case fvnameseen:
3454 if ((globals && cblev == 0 && (!fvextern || declarations))
3455 || (members && instruct))
3456 make_C_tag (FALSE); /* a variable */
3457 fvextern = FALSE;
3458 fvdef = fvnone;
3459 token.valid = FALSE;
3460 break;
3461 case flistseen:
3462 if ((declarations && typdef == tnone && !instruct)
3463 || (members && typdef != tignore && instruct))
3464 make_C_tag (TRUE); /* a function declaration */
3465 /* FALLTHRU */
3466 default:
3467 fvextern = FALSE;
3468 fvdef = fvnone;
3469 if (declarations
3470 && structdef == stagseen && (c_ext & C_PLPL))
3471 make_C_tag (FALSE); /* forward declaration */
3472 else
3473 /* The following instruction invalidates the token.
3474 Probably the token should be invalidated in all other
3475 cases where some state machine is reset prematurely. */
3476 token.valid = FALSE;
3477 } /* switch (fvdef) */
3478 /* FALLTHRU */
3479 default:
3480 if (!instruct)
3481 typdef = tnone;
3482 }
3483 if (structdef == stagseen)
3484 structdef = snone;
3485 break;
3486 case ',':
3487 if (definedef != dnone)
3488 break;
3489 switch (objdef)
3490 {
3491 case omethodtag:
3492 case omethodparm:
3493 make_C_tag (TRUE); /* an Objective C method */
3494 objdef = oinbody;
3495 break;
3496 }
3497 switch (fvdef)
3498 {
3499 case fdefunkey:
3500 case foperator:
3501 case fstartlist:
3502 case finlist:
3503 case fignore:
3504 case vignore:
3505 break;
3506 case fdefunname:
3507 fvdef = fignore;
3508 break;
3509 case fvnameseen: /* a variable */
3510 if ((globals && cblev == 0 && (!fvextern || declarations))
3511 || (members && instruct))
3512 make_C_tag (FALSE);
3513 break;
3514 case flistseen: /* a function */
3515 if ((declarations && typdef == tnone && !instruct)
3516 || (members && typdef != tignore && instruct))
3517 {
3518 make_C_tag (TRUE); /* a function declaration */
3519 fvdef = fvnameseen;
3520 }
3521 else if (!declarations)
3522 fvdef = fvnone;
3523 token.valid = FALSE;
3524 break;
3525 default:
3526 fvdef = fvnone;
3527 }
3528 if (structdef == stagseen)
3529 structdef = snone;
3530 break;
3531 case '[':
3532 if (definedef != dnone)
3533 break;
3534 if (structdef == stagseen)
3535 structdef = snone;
3536 switch (typdef)
3537 {
3538 case ttypeseen:
3539 case tend:
3540 typdef = tignore;
3541 make_C_tag (FALSE); /* a typedef */
3542 break;
3543 case tnone:
3544 case tinbody:
3545 switch (fvdef)
3546 {
3547 case foperator:
3548 case finlist:
3549 case fignore:
3550 case vignore:
3551 break;
3552 case fvnameseen:
3553 if ((members && cblev == 1)
3554 || (globals && cblev == 0
3555 && (!fvextern || declarations)))
3556 make_C_tag (FALSE); /* a variable */
3557 /* FALLTHRU */
3558 default:
3559 fvdef = fvnone;
3560 }
3561 break;
3562 }
3563 break;
3564 case '(':
3565 if (definedef != dnone)
3566 break;
3567 if (objdef == otagseen && parlev == 0)
3568 objdef = oparenseen;
3569 switch (fvdef)
3570 {
3571 case fvnameseen:
3572 if (typdef == ttypeseen
3573 && *lp != '*'
3574 && !instruct)
3575 {
3576 /* This handles constructs like:
3577 typedef void OperatorFun (int fun); */
3578 make_C_tag (FALSE);
3579 typdef = tignore;
3580 fvdef = fignore;
3581 break;
3582 }
3583 /* FALLTHRU */
3584 case foperator:
3585 fvdef = fstartlist;
3586 break;
3587 case flistseen:
3588 fvdef = finlist;
3589 break;
3590 }
3591 parlev++;
3592 break;
3593 case ')':
3594 if (definedef != dnone)
3595 break;
3596 if (objdef == ocatseen && parlev == 1)
3597 {
3598 make_C_tag (TRUE); /* an Objective C category */
3599 objdef = oignore;
3600 }
3601 if (--parlev == 0)
3602 {
3603 switch (fvdef)
3604 {
3605 case fstartlist:
3606 case finlist:
3607 fvdef = flistseen;
3608 break;
3609 }
3610 if (!instruct
3611 && (typdef == tend
3612 || typdef == ttypeseen))
3613 {
3614 typdef = tignore;
3615 make_C_tag (FALSE); /* a typedef */
3616 }
3617 }
3618 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3619 parlev = 0;
3620 break;
3621 case '{':
3622 if (definedef != dnone)
3623 break;
3624 if (typdef == ttypeseen)
3625 {
3626 /* Whenever typdef is set to tinbody (currently only
3627 here), typdefcblev should be set to cblev. */
3628 typdef = tinbody;
3629 typdefcblev = cblev;
3630 }
3631 switch (fvdef)
3632 {
3633 case flistseen:
3634 make_C_tag (TRUE); /* a function */
3635 /* FALLTHRU */
3636 case fignore:
3637 fvdef = fvnone;
3638 break;
3639 case fvnone:
3640 switch (objdef)
3641 {
3642 case otagseen:
3643 make_C_tag (TRUE); /* an Objective C class */
3644 objdef = oignore;
3645 break;
3646 case omethodtag:
3647 case omethodparm:
3648 make_C_tag (TRUE); /* an Objective C method */
3649 objdef = oinbody;
3650 break;
3651 default:
3652 /* Neutralize `extern "C" {' grot. */
3653 if (cblev == 0 && structdef == snone && nestlev == 0
3654 && typdef == tnone)
3655 cblev = -1;
3656 }
3657 break;
3658 }
3659 switch (structdef)
3660 {
3661 case skeyseen: /* unnamed struct */
3662 pushclass_above (cblev, NULL, 0);
3663 structdef = snone;
3664 break;
3665 case stagseen: /* named struct or enum */
3666 case scolonseen: /* a class */
3667 pushclass_above (cblev, token.line+token.offset, token.length);
3668 structdef = snone;
3669 make_C_tag (FALSE); /* a struct or enum */
3670 break;
3671 }
3672 cblev++;
3673 break;
3674 case '*':
3675 if (definedef != dnone)
3676 break;
3677 if (fvdef == fstartlist)
3678 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3679 break;
3680 case '}':
3681 if (definedef != dnone)
3682 break;
3683 if (!noindentypedefs && lp == newlb.buffer + 1)
3684 {
3685 cblev = 0; /* reset curly brace level if first column */
3686 parlev = 0; /* also reset paren level, just in case... */
3687 }
3688 else if (cblev > 0)
3689 cblev--;
3690 popclass_above (cblev);
3691 structdef = snone;
3692 /* Only if typdef == tinbody is typdefcblev significant. */
3693 if (typdef == tinbody && cblev <= typdefcblev)
3694 {
3695 assert (cblev == typdefcblev);
3696 typdef = tend;
3697 }
3698 break;
3699 case '=':
3700 if (definedef != dnone)
3701 break;
3702 switch (fvdef)
3703 {
3704 case foperator:
3705 case finlist:
3706 case fignore:
3707 case vignore:
3708 break;
3709 case fvnameseen:
3710 if ((members && cblev == 1)
3711 || (globals && cblev == 0 && (!fvextern || declarations)))
3712 make_C_tag (FALSE); /* a variable */
3713 /* FALLTHRU */
3714 default:
3715 fvdef = vignore;
3716 }
3717 break;
3718 case '<':
3719 if (cplpl && structdef == stagseen)
3720 {
3721 structdef = sintemplate;
3722 break;
3723 }
3724 goto resetfvdef;
3725 case '>':
3726 if (structdef == sintemplate)
3727 {
3728 structdef = stagseen;
3729 break;
3730 }
3731 goto resetfvdef;
3732 case '+':
3733 case '-':
3734 if (objdef == oinbody && cblev == 0)
3735 {
3736 objdef = omethodsign;
3737 break;
3738 }
3739 /* FALLTHRU */
3740 resetfvdef:
3741 case '#': case '~': case '&': case '%': case '/': case '|':
3742 case '^': case '!': case '.': case '?': case ']':
3743 if (definedef != dnone)
3744 break;
3745 /* These surely cannot follow a function tag in C. */
3746 switch (fvdef)
3747 {
3748 case foperator:
3749 case finlist:
3750 case fignore:
3751 case vignore:
3752 break;
3753 default:
3754 fvdef = fvnone;
3755 }
3756 break;
3757 case '\0':
3758 if (objdef == otagseen)
3759 {
3760 make_C_tag (TRUE); /* an Objective C class */
3761 objdef = oignore;
3762 }
3763 /* If a macro spans multiple lines don't reset its state. */
3764 if (quotednl)
3765 CNL_SAVE_DEFINEDEF ();
3766 else
3767 CNL ();
3768 break;
3769 } /* switch (c) */
3770
3771 } /* while not eof */
3772
3773 free (token_name.buffer);
3774 free (lbs[0].lb.buffer);
3775 free (lbs[1].lb.buffer);
3776 }
3777
3778 /*
3779 * Process either a C++ file or a C file depending on the setting
3780 * of a global flag.
3781 */
3782 static void
3783 default_C_entries (inf)
3784 FILE *inf;
3785 {
3786 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3787 }
3788
3789 /* Always do plain C. */
3790 static void
3791 plain_C_entries (inf)
3792 FILE *inf;
3793 {
3794 C_entries (0, inf);
3795 }
3796
3797 /* Always do C++. */
3798 static void
3799 Cplusplus_entries (inf)
3800 FILE *inf;
3801 {
3802 C_entries (C_PLPL, inf);
3803 }
3804
3805 /* Always do Java. */
3806 static void
3807 Cjava_entries (inf)
3808 FILE *inf;
3809 {
3810 C_entries (C_JAVA, inf);
3811 }
3812
3813 /* Always do C*. */
3814 static void
3815 Cstar_entries (inf)
3816 FILE *inf;
3817 {
3818 C_entries (C_STAR, inf);
3819 }
3820
3821 /* Always do Yacc. */
3822 static void
3823 Yacc_entries (inf)
3824 FILE *inf;
3825 {
3826 C_entries (YACC, inf);
3827 }
3828
3829 \f
3830 /* Useful macros. */
3831 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3832 for (; /* loop initialization */ \
3833 !feof (file_pointer) /* loop test */ \
3834 && (char_pointer = lb.buffer, /* instructions at start of loop */ \
3835 readline (&line_buffer, file_pointer), \
3836 TRUE); \
3837 )
3838 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */ \
3839 (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
3840 && notinname ((cp)[sizeof(keyword)-1]) /* end of keyword */ \
3841 && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
3842
3843 /*
3844 * Read a file, but do no processing. This is used to do regexp
3845 * matching on files that have no language defined.
3846 */
3847 static void
3848 just_read_file (inf)
3849 FILE *inf;
3850 {
3851 register char *dummy;
3852
3853 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3854 continue;
3855 }
3856
3857 \f
3858 /* Fortran parsing */
3859
3860 static void F_takeprec __P((void));
3861 static void F_getit __P((FILE *));
3862
3863 static void
3864 F_takeprec ()
3865 {
3866 dbp = skip_spaces (dbp);
3867 if (*dbp != '*')
3868 return;
3869 dbp++;
3870 dbp = skip_spaces (dbp);
3871 if (strneq (dbp, "(*)", 3))
3872 {
3873 dbp += 3;
3874 return;
3875 }
3876 if (!ISDIGIT (*dbp))
3877 {
3878 --dbp; /* force failure */
3879 return;
3880 }
3881 do
3882 dbp++;
3883 while (ISDIGIT (*dbp));
3884 }
3885
3886 static void
3887 F_getit (inf)
3888 FILE *inf;
3889 {
3890 register char *cp;
3891
3892 dbp = skip_spaces (dbp);
3893 if (*dbp == '\0')
3894 {
3895 readline (&lb, inf);
3896 dbp = lb.buffer;
3897 if (dbp[5] != '&')
3898 return;
3899 dbp += 6;
3900 dbp = skip_spaces (dbp);
3901 }
3902 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3903 return;
3904 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3905 continue;
3906 pfnote (savenstr (dbp, cp-dbp), TRUE,
3907 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3908 }
3909
3910
3911 static void
3912 Fortran_functions (inf)
3913 FILE *inf;
3914 {
3915 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3916 {
3917 if (*dbp == '%')
3918 dbp++; /* Ratfor escape to fortran */
3919 dbp = skip_spaces (dbp);
3920 if (*dbp == '\0')
3921 continue;
3922 switch (lowcase (*dbp))
3923 {
3924 case 'i':
3925 if (nocase_tail ("integer"))
3926 F_takeprec ();
3927 break;
3928 case 'r':
3929 if (nocase_tail ("real"))
3930 F_takeprec ();
3931 break;
3932 case 'l':
3933 if (nocase_tail ("logical"))
3934 F_takeprec ();
3935 break;
3936 case 'c':
3937 if (nocase_tail ("complex") || nocase_tail ("character"))
3938 F_takeprec ();
3939 break;
3940 case 'd':
3941 if (nocase_tail ("double"))
3942 {
3943 dbp = skip_spaces (dbp);
3944 if (*dbp == '\0')
3945 continue;
3946 if (nocase_tail ("precision"))
3947 break;
3948 continue;
3949 }
3950 break;
3951 }
3952 dbp = skip_spaces (dbp);
3953 if (*dbp == '\0')
3954 continue;
3955 switch (lowcase (*dbp))
3956 {
3957 case 'f':
3958 if (nocase_tail ("function"))
3959 F_getit (inf);
3960 continue;
3961 case 's':
3962 if (nocase_tail ("subroutine"))
3963 F_getit (inf);
3964 continue;
3965 case 'e':
3966 if (nocase_tail ("entry"))
3967 F_getit (inf);
3968 continue;
3969 case 'b':
3970 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
3971 {
3972 dbp = skip_spaces (dbp);
3973 if (*dbp == '\0') /* assume un-named */
3974 pfnote (savestr ("blockdata"), TRUE,
3975 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3976 else
3977 F_getit (inf); /* look for name */
3978 }
3979 continue;
3980 }
3981 }
3982 }
3983
3984 \f
3985 /*
3986 * Ada parsing
3987 * Original code by
3988 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3989 */
3990
3991 static void Ada_getit __P((FILE *, char *));
3992
3993 /* Once we are positioned after an "interesting" keyword, let's get
3994 the real tag value necessary. */
3995 static void
3996 Ada_getit (inf, name_qualifier)
3997 FILE *inf;
3998 char *name_qualifier;
3999 {
4000 register char *cp;
4001 char *name;
4002 char c;
4003
4004 while (!feof (inf))
4005 {
4006 dbp = skip_spaces (dbp);
4007 if (*dbp == '\0'
4008 || (dbp[0] == '-' && dbp[1] == '-'))
4009 {
4010 readline (&lb, inf);
4011 dbp = lb.buffer;
4012 }
4013 switch (lowcase(*dbp))
4014 {
4015 case 'b':
4016 if (nocase_tail ("body"))
4017 {
4018 /* Skipping body of procedure body or package body or ....
4019 resetting qualifier to body instead of spec. */
4020 name_qualifier = "/b";
4021 continue;
4022 }
4023 break;
4024 case 't':
4025 /* Skipping type of task type or protected type ... */
4026 if (nocase_tail ("type"))
4027 continue;
4028 break;
4029 }
4030 if (*dbp == '"')
4031 {
4032 dbp += 1;
4033 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4034 continue;
4035 }
4036 else
4037 {
4038 dbp = skip_spaces (dbp);
4039 for (cp = dbp;
4040 (*cp != '\0'
4041 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4042 cp++)
4043 continue;
4044 if (cp == dbp)
4045 return;
4046 }
4047 c = *cp;
4048 *cp = '\0';
4049 name = concat (dbp, name_qualifier, "");
4050 *cp = c;
4051 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4052 if (c == '"')
4053 dbp = cp + 1;
4054 return;
4055 }
4056 }
4057
4058 static void
4059 Ada_funcs (inf)
4060 FILE *inf;
4061 {
4062 bool inquote = FALSE;
4063
4064 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4065 {
4066 while (*dbp != '\0')
4067 {
4068 /* Skip a string i.e. "abcd". */
4069 if (inquote || (*dbp == '"'))
4070 {
4071 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4072 if (dbp != NULL)
4073 {
4074 inquote = FALSE;
4075 dbp += 1;
4076 continue; /* advance char */
4077 }
4078 else
4079 {
4080 inquote = TRUE;
4081 break; /* advance line */
4082 }
4083 }
4084
4085 /* Skip comments. */
4086 if (dbp[0] == '-' && dbp[1] == '-')
4087 break; /* advance line */
4088
4089 /* Skip character enclosed in single quote i.e. 'a'
4090 and skip single quote starting an attribute i.e. 'Image. */
4091 if (*dbp == '\'')
4092 {
4093 dbp++ ;
4094 if (*dbp != '\0')
4095 dbp++;
4096 continue;
4097 }
4098
4099 /* Search for beginning of a token. */
4100 if (!begtoken (*dbp))
4101 {
4102 dbp++;
4103 continue; /* advance char */
4104 }
4105
4106 /* We are at the beginning of a token. */
4107 switch (lowcase(*dbp))
4108 {
4109 case 'f':
4110 if (!packages_only && nocase_tail ("function"))
4111 Ada_getit (inf, "/f");
4112 else
4113 break; /* from switch */
4114 continue; /* advance char */
4115 case 'p':
4116 if (!packages_only && nocase_tail ("procedure"))
4117 Ada_getit (inf, "/p");
4118 else if (nocase_tail ("package"))
4119 Ada_getit (inf, "/s");
4120 else if (nocase_tail ("protected")) /* protected type */
4121 Ada_getit (inf, "/t");
4122 else
4123 break; /* from switch */
4124 continue; /* advance char */
4125 case 't':
4126 if (!packages_only && nocase_tail ("task"))
4127 Ada_getit (inf, "/k");
4128 else if (typedefs && !packages_only && nocase_tail ("type"))
4129 {
4130 Ada_getit (inf, "/t");
4131 while (*dbp != '\0')
4132 dbp += 1;
4133 }
4134 else
4135 break; /* from switch */
4136 continue; /* advance char */
4137 }
4138
4139 /* Look for the end of the token. */
4140 while (!endtoken (*dbp))
4141 dbp++;
4142
4143 } /* advance char */
4144 } /* advance line */
4145 }
4146
4147 \f
4148 /*
4149 * Unix and microcontroller assembly tag handling
4150 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4151 * Idea by Bob Weiner, Motorola Inc. (1994)
4152 */
4153 static void
4154 Asm_labels (inf)
4155 FILE *inf;
4156 {
4157 register char *cp;
4158
4159 LOOP_ON_INPUT_LINES (inf, lb, cp)
4160 {
4161 /* If first char is alphabetic or one of [_.$], test for colon
4162 following identifier. */
4163 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4164 {
4165 /* Read past label. */
4166 cp++;
4167 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4168 cp++;
4169 if (*cp == ':' || iswhite (*cp))
4170 {
4171 /* Found end of label, so copy it and add it to the table. */
4172 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
4173 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4174 }
4175 }
4176 }
4177 }
4178
4179 \f
4180 /*
4181 * Perl support
4182 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4183 * Perl variable names: /^(my|local).../
4184 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4185 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4186 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4187 */
4188 static void
4189 Perl_functions (inf)
4190 FILE *inf;
4191 {
4192 char *package = savestr ("main"); /* current package name */
4193 register char *cp;
4194
4195 LOOP_ON_INPUT_LINES (inf, lb, cp)
4196 {
4197 skip_spaces(cp);
4198
4199 if (LOOKING_AT (cp, "package"))
4200 {
4201 free (package);
4202 package = get_tag (cp);
4203 if (package == NULL) /* can't parse package name */
4204 package = savestr ("");
4205 else
4206 package = savestr(package); /* make a copy */
4207 }
4208 else if (LOOKING_AT (cp, "sub"))
4209 {
4210 char *name, *fullname, *pos;
4211 char *sp = cp;
4212
4213 while (!notinname (*cp))
4214 cp++;
4215 if (cp == sp)
4216 continue;
4217 name = savenstr (sp, cp-sp);
4218 if ((pos = etags_strchr (name, ':')) != NULL && pos[1] == ':')
4219 fullname = name;
4220 else
4221 fullname = concat (package, "::", name);
4222 pfnote (fullname, TRUE,
4223 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4224 if (name != fullname)
4225 free (name);
4226 }
4227 else if (globals /* only if tagging global vars is enabled */
4228 && (LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local")))
4229 {
4230 /* After "my" or "local", but before any following paren or space. */
4231 char *varname = NULL;
4232
4233 if (*cp == '$' || *cp == '@' || *cp == '%')
4234 {
4235 char* varstart = ++cp;
4236 while (ISALNUM (*cp) || *cp == '_')
4237 cp++;
4238 varname = savenstr (varstart, cp-varstart);
4239 }
4240 else
4241 {
4242 /* Should be examining a variable list at this point;
4243 could insist on seeing an open parenthesis. */
4244 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4245 cp++;
4246 }
4247
4248 /* Perhaps I should back cp up one character, so the TAGS table
4249 doesn't mention (and so depend upon) the following char. */
4250 pfnote (varname, FALSE,
4251 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4252 }
4253 }
4254 }
4255
4256
4257 /*
4258 * Python support
4259 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4260 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4261 * More ideas by seb bacon <seb@jamkit.com> (2002)
4262 */
4263 static void
4264 Python_functions (inf)
4265 FILE *inf;
4266 {
4267 register char *cp;
4268
4269 LOOP_ON_INPUT_LINES (inf, lb, cp)
4270 {
4271 cp = skip_spaces (cp);
4272 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4273 {
4274 char *name = cp;
4275 while (!notinname (*cp) && *cp != ':')
4276 cp++;
4277 pfnote (savenstr (name, cp-name), TRUE,
4278 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4279 }
4280 }
4281 }
4282
4283 \f
4284 /*
4285 * PHP support
4286 * Look for:
4287 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4288 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4289 * - /^[ \t]*define\(\"[^\"]+/
4290 * Only with --members:
4291 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4292 * Idea by Diez B. Roggisch (2001)
4293 */
4294 static void
4295 PHP_functions (inf)
4296 FILE *inf;
4297 {
4298 register char *cp, *name;
4299 bool search_identifier = FALSE;
4300
4301 LOOP_ON_INPUT_LINES (inf, lb, cp)
4302 {
4303 cp = skip_spaces (cp);
4304 name = cp;
4305 if (search_identifier
4306 && *cp != '\0')
4307 {
4308 while (!notinname (*cp))
4309 cp++;
4310 pfnote (savenstr (name, cp-name), TRUE,
4311 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4312 search_identifier = FALSE;
4313 }
4314 else if (LOOKING_AT (cp, "function"))
4315 {
4316 if(*cp == '&')
4317 cp = skip_spaces (cp+1);
4318 if(*cp != '\0')
4319 {
4320 name = cp;
4321 while (!notinname (*cp))
4322 cp++;
4323 pfnote (savenstr (name, cp-name), TRUE,
4324 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4325 }
4326 else
4327 search_identifier = TRUE;
4328 }
4329 else if (LOOKING_AT (cp, "class"))
4330 {
4331 if (*cp != '\0')
4332 {
4333 name = cp;
4334 while (*cp != '\0' && !iswhite (*cp))
4335 cp++;
4336 pfnote (savenstr (name, cp-name), FALSE,
4337 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4338 }
4339 else
4340 search_identifier = TRUE;
4341 }
4342 else if (strneq (cp, "define", 6)
4343 && (cp = skip_spaces (cp+6))
4344 && *cp++ == '('
4345 && (*cp == '"' || *cp == '\''))
4346 {
4347 char quote = *cp++;
4348 name = cp;
4349 while (*cp != quote && *cp != '\0')
4350 cp++;
4351 pfnote (savenstr (name, cp-name), FALSE,
4352 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4353 }
4354 else if (members
4355 && LOOKING_AT (cp, "var")
4356 && *cp == '$')
4357 {
4358 name = cp;
4359 while (!notinname(*cp))
4360 cp++;
4361 pfnote (savenstr (name, cp-name), FALSE,
4362 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4363 }
4364 }
4365 }
4366
4367 \f
4368 /*
4369 * Cobol tag functions
4370 * We could look for anything that could be a paragraph name.
4371 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4372 * Idea by Corny de Souza (1993)
4373 */
4374 static void
4375 Cobol_paragraphs (inf)
4376 FILE *inf;
4377 {
4378 register char *bp, *ep;
4379
4380 LOOP_ON_INPUT_LINES (inf, lb, bp)
4381 {
4382 if (lb.len < 9)
4383 continue;
4384 bp += 8;
4385
4386 /* If eoln, compiler option or comment ignore whole line. */
4387 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4388 continue;
4389
4390 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4391 continue;
4392 if (*ep++ == '.')
4393 pfnote (savenstr (bp, ep-bp), TRUE,
4394 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4395 }
4396 }
4397
4398 \f
4399 /*
4400 * Makefile support
4401 * Idea by Assar Westerlund <assar@sics.se> (2001)
4402 */
4403 static void
4404 Makefile_targets (inf)
4405 FILE *inf;
4406 {
4407 register char *bp;
4408
4409 LOOP_ON_INPUT_LINES (inf, lb, bp)
4410 {
4411 if (*bp == '\t' || *bp == '#')
4412 continue;
4413 while (*bp != '\0' && *bp != '=' && *bp != ':')
4414 bp++;
4415 if (*bp == ':')
4416 pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4417 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4418 }
4419 }
4420
4421 \f
4422 /*
4423 * Pascal parsing
4424 * Original code by Mosur K. Mohan (1989)
4425 *
4426 * Locates tags for procedures & functions. Doesn't do any type- or
4427 * var-definitions. It does look for the keyword "extern" or
4428 * "forward" immediately following the procedure statement; if found,
4429 * the tag is skipped.
4430 */
4431 static void
4432 Pascal_functions (inf)
4433 FILE *inf;
4434 {
4435 linebuffer tline; /* mostly copied from C_entries */
4436 long save_lcno;
4437 int save_lineno, save_len;
4438 char c, *cp, *namebuf;
4439
4440 bool /* each of these flags is TRUE iff: */
4441 incomment, /* point is inside a comment */
4442 inquote, /* point is inside '..' string */
4443 get_tagname, /* point is after PROCEDURE/FUNCTION
4444 keyword, so next item = potential tag */
4445 found_tag, /* point is after a potential tag */
4446 inparms, /* point is within parameter-list */
4447 verify_tag; /* point has passed the parm-list, so the
4448 next token will determine whether this
4449 is a FORWARD/EXTERN to be ignored, or
4450 whether it is a real tag */
4451
4452 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4453 namebuf = NULL; /* keep compiler quiet */
4454 dbp = lb.buffer;
4455 *dbp = '\0';
4456 initbuffer (&tline);
4457
4458 incomment = inquote = FALSE;
4459 found_tag = FALSE; /* have a proc name; check if extern */
4460 get_tagname = FALSE; /* have found "procedure" keyword */
4461 inparms = FALSE; /* found '(' after "proc" */
4462 verify_tag = FALSE; /* check if "extern" is ahead */
4463
4464
4465 while (!feof (inf)) /* long main loop to get next char */
4466 {
4467 c = *dbp++;
4468 if (c == '\0') /* if end of line */
4469 {
4470 readline (&lb, inf);
4471 dbp = lb.buffer;
4472 if (*dbp == '\0')
4473 continue;
4474 if (!((found_tag && verify_tag)
4475 || get_tagname))
4476 c = *dbp++; /* only if don't need *dbp pointing
4477 to the beginning of the name of
4478 the procedure or function */
4479 }
4480 if (incomment)
4481 {
4482 if (c == '}') /* within { } comments */
4483 incomment = FALSE;
4484 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4485 {
4486 dbp++;
4487 incomment = FALSE;
4488 }
4489 continue;
4490 }
4491 else if (inquote)
4492 {
4493 if (c == '\'')
4494 inquote = FALSE;
4495 continue;
4496 }
4497 else
4498 switch (c)
4499 {
4500 case '\'':
4501 inquote = TRUE; /* found first quote */
4502 continue;
4503 case '{': /* found open { comment */
4504 incomment = TRUE;
4505 continue;
4506 case '(':
4507 if (*dbp == '*') /* found open (* comment */
4508 {
4509 incomment = TRUE;
4510 dbp++;
4511 }
4512 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4513 inparms = TRUE;
4514 continue;
4515 case ')': /* end of parms list */
4516 if (inparms)
4517 inparms = FALSE;
4518 continue;
4519 case ';':
4520 if (found_tag && !inparms) /* end of proc or fn stmt */
4521 {
4522 verify_tag = TRUE;
4523 break;
4524 }
4525 continue;
4526 }
4527 if (found_tag && verify_tag && (*dbp != ' '))
4528 {
4529 /* check if this is an "extern" declaration */
4530 if (*dbp == '\0')
4531 continue;
4532 if (lowcase (*dbp == 'e'))
4533 {
4534 if (nocase_tail ("extern")) /* superfluous, really! */
4535 {
4536 found_tag = FALSE;
4537 verify_tag = FALSE;
4538 }
4539 }
4540 else if (lowcase (*dbp) == 'f')
4541 {
4542 if (nocase_tail ("forward")) /* check for forward reference */
4543 {
4544 found_tag = FALSE;
4545 verify_tag = FALSE;
4546 }
4547 }
4548 if (found_tag && verify_tag) /* not external proc, so make tag */
4549 {
4550 found_tag = FALSE;
4551 verify_tag = FALSE;
4552 pfnote (namebuf, TRUE,
4553 tline.buffer, save_len, save_lineno, save_lcno);
4554 continue;
4555 }
4556 }
4557 if (get_tagname) /* grab name of proc or fn */
4558 {
4559 if (*dbp == '\0')
4560 continue;
4561
4562 /* save all values for later tagging */
4563 linebuffer_setlen (&tline, lb.len);
4564 strcpy (tline.buffer, lb.buffer);
4565 save_lineno = lineno;
4566 save_lcno = linecharno;
4567
4568 /* grab block name */
4569 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4570 continue;
4571 namebuf = savenstr (dbp, cp-dbp);
4572 dbp = cp; /* set dbp to e-o-token */
4573 save_len = dbp - lb.buffer + 1;
4574 get_tagname = FALSE;
4575 found_tag = TRUE;
4576 continue;
4577
4578 /* and proceed to check for "extern" */
4579 }
4580 else if (!incomment && !inquote && !found_tag)
4581 {
4582 /* check for proc/fn keywords */
4583 switch (lowcase (c))
4584 {
4585 case 'p':
4586 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4587 get_tagname = TRUE;
4588 continue;
4589 case 'f':
4590 if (nocase_tail ("unction"))
4591 get_tagname = TRUE;
4592 continue;
4593 }
4594 }
4595 } /* while not eof */
4596
4597 free (tline.buffer);
4598 }
4599
4600 \f
4601 /*
4602 * Lisp tag functions
4603 * look for (def or (DEF, quote or QUOTE
4604 */
4605
4606 static void L_getit __P((void));
4607
4608 static void
4609 L_getit ()
4610 {
4611 if (*dbp == '\'') /* Skip prefix quote */
4612 dbp++;
4613 else if (*dbp == '(')
4614 {
4615 dbp++;
4616 /* Try to skip "(quote " */
4617 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4618 /* Ok, then skip "(" before name in (defstruct (foo)) */
4619 dbp = skip_spaces (dbp);
4620 }
4621 get_tag (dbp);
4622 }
4623
4624 static void
4625 Lisp_functions (inf)
4626 FILE *inf;
4627 {
4628 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4629 {
4630 if (dbp[0] != '(')
4631 continue;
4632
4633 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4634 {
4635 dbp = skip_non_spaces (dbp);
4636 dbp = skip_spaces (dbp);
4637 L_getit ();
4638 }
4639 else
4640 {
4641 /* Check for (foo::defmumble name-defined ... */
4642 do
4643 dbp++;
4644 while (!notinname (*dbp) && *dbp != ':');
4645 if (*dbp == ':')
4646 {
4647 do
4648 dbp++;
4649 while (*dbp == ':');
4650
4651 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4652 {
4653 dbp = skip_non_spaces (dbp);
4654 dbp = skip_spaces (dbp);
4655 L_getit ();
4656 }
4657 }
4658 }
4659 }
4660 }
4661
4662 \f
4663 /*
4664 * Postscript tag functions
4665 * Just look for lines where the first character is '/'
4666 * Also look at "defineps" for PSWrap
4667 * Ideas by:
4668 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4669 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4670 */
4671 static void
4672 Postscript_functions (inf)
4673 FILE *inf;
4674 {
4675 register char *bp, *ep;
4676
4677 LOOP_ON_INPUT_LINES (inf, lb, bp)
4678 {
4679 if (bp[0] == '/')
4680 {
4681 for (ep = bp+1;
4682 *ep != '\0' && *ep != ' ' && *ep != '{';
4683 ep++)
4684 continue;
4685 pfnote (savenstr (bp, ep-bp), TRUE,
4686 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4687 }
4688 else if (LOOKING_AT (bp, "defineps"))
4689 get_tag (bp);
4690 }
4691 }
4692
4693 \f
4694 /*
4695 * Scheme tag functions
4696 * look for (def... xyzzy
4697 * (def... (xyzzy
4698 * (def ... ((...(xyzzy ....
4699 * (set! xyzzy
4700 * Original code by Ken Haase (1985?)
4701 */
4702
4703 static void
4704 Scheme_functions (inf)
4705 FILE *inf;
4706 {
4707 register char *bp;
4708
4709 LOOP_ON_INPUT_LINES (inf, lb, bp)
4710 {
4711 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4712 {
4713 bp = skip_non_spaces (bp+4);
4714 /* Skip over open parens and white space */
4715 while (notinname (*bp))
4716 bp++;
4717 get_tag (bp);
4718 }
4719 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4720 get_tag (bp);
4721 }
4722 }
4723
4724 \f
4725 /* Find tags in TeX and LaTeX input files. */
4726
4727 /* TEX_toktab is a table of TeX control sequences that define tags.
4728 * Each entry records one such control sequence.
4729 *
4730 * Original code from who knows whom.
4731 * Ideas by:
4732 * Stefan Monnier (2002)
4733 */
4734
4735 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4736
4737 /* Default set of control sequences to put into TEX_toktab.
4738 The value of environment var TEXTAGS is prepended to this. */
4739 static char *TEX_defenv = "\
4740 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4741 :part:appendix:entry:index:def\
4742 :newcommand:renewcommand:newenvironment:renewenvironment";
4743
4744 static void TEX_mode __P((FILE *));
4745 static void TEX_decode_env __P((char *, char *));
4746
4747 static char TEX_esc = '\\';
4748 static char TEX_opgrp = '{';
4749 static char TEX_clgrp = '}';
4750
4751 /*
4752 * TeX/LaTeX scanning loop.
4753 */
4754 static void
4755 TeX_commands (inf)
4756 FILE *inf;
4757 {
4758 char *cp;
4759 linebuffer *key;
4760
4761 /* Select either \ or ! as escape character. */
4762 TEX_mode (inf);
4763
4764 /* Initialize token table once from environment. */
4765 if (TEX_toktab == NULL)
4766 TEX_decode_env ("TEXTAGS", TEX_defenv);
4767
4768 LOOP_ON_INPUT_LINES (inf, lb, cp)
4769 {
4770 /* Look at each TEX keyword in line. */
4771 for (;;)
4772 {
4773 /* Look for a TEX escape. */
4774 while (*cp++ != TEX_esc)
4775 if (cp[-1] == '\0' || cp[-1] == '%')
4776 goto tex_next_line;
4777
4778 for (key = TEX_toktab; key->buffer != NULL; key++)
4779 if (strneq (cp, key->buffer, key->len))
4780 {
4781 register char *p;
4782 char *name;
4783 int linelen;
4784 bool opgrp = FALSE;
4785
4786 cp = skip_spaces (cp + key->len);
4787 if (*cp == TEX_opgrp)
4788 {
4789 opgrp = TRUE;
4790 cp++;
4791 }
4792 for (p = cp;
4793 (!iswhite (*p) && *p != '#' &&
4794 *p != TEX_opgrp && *p != TEX_clgrp);
4795 p++)
4796 continue;
4797 name = savenstr (cp, p-cp);
4798 linelen = lb.len;
4799 if (!opgrp || *p == TEX_clgrp)
4800 {
4801 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4802 *p++;
4803 linelen = p - lb.buffer + 1;
4804 }
4805 pfnote (name, TRUE, lb.buffer, linelen, lineno, linecharno);
4806 goto tex_next_line; /* We only tag a line once */
4807 }
4808 }
4809 tex_next_line:
4810 ;
4811 }
4812 }
4813
4814 #define TEX_LESC '\\'
4815 #define TEX_SESC '!'
4816
4817 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4818 chars accordingly. */
4819 static void
4820 TEX_mode (inf)
4821 FILE *inf;
4822 {
4823 int c;
4824
4825 while ((c = getc (inf)) != EOF)
4826 {
4827 /* Skip to next line if we hit the TeX comment char. */
4828 if (c == '%')
4829 while (c != '\n')
4830 c = getc (inf);
4831 else if (c == TEX_LESC || c == TEX_SESC )
4832 break;
4833 }
4834
4835 if (c == TEX_LESC)
4836 {
4837 TEX_esc = TEX_LESC;
4838 TEX_opgrp = '{';
4839 TEX_clgrp = '}';
4840 }
4841 else
4842 {
4843 TEX_esc = TEX_SESC;
4844 TEX_opgrp = '<';
4845 TEX_clgrp = '>';
4846 }
4847 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4848 No attempt is made to correct the situation. */
4849 rewind (inf);
4850 }
4851
4852 /* Read environment and prepend it to the default string.
4853 Build token table. */
4854 static void
4855 TEX_decode_env (evarname, defenv)
4856 char *evarname;
4857 char *defenv;
4858 {
4859 register char *env, *p;
4860 int i, len;
4861
4862 /* Append default string to environment. */
4863 env = getenv (evarname);
4864 if (!env)
4865 env = defenv;
4866 else
4867 {
4868 char *oldenv = env;
4869 env = concat (oldenv, defenv, "");
4870 }
4871
4872 /* Allocate a token table */
4873 for (len = 1, p = env; p;)
4874 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4875 len++;
4876 TEX_toktab = xnew (len, linebuffer);
4877
4878 /* Unpack environment string into token table. Be careful about */
4879 /* zero-length strings (leading ':', "::" and trailing ':') */
4880 for (i = 0; *env != '\0';)
4881 {
4882 p = etags_strchr (env, ':');
4883 if (!p) /* End of environment string. */
4884 p = env + strlen (env);
4885 if (p - env > 0)
4886 { /* Only non-zero strings. */
4887 TEX_toktab[i].buffer = savenstr (env, p - env);
4888 TEX_toktab[i].len = p - env;
4889 i++;
4890 }
4891 if (*p)
4892 env = p + 1;
4893 else
4894 {
4895 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
4896 TEX_toktab[i].len = 0;
4897 break;
4898 }
4899 }
4900 }
4901
4902 \f
4903 /* Texinfo support. Dave Love, Mar. 2000. */
4904 static void
4905 Texinfo_nodes (inf)
4906 FILE * inf;
4907 {
4908 char *cp, *start;
4909 LOOP_ON_INPUT_LINES (inf, lb, cp)
4910 if (LOOKING_AT (cp, "@node"))
4911 {
4912 start = cp;
4913 while (*cp != '\0' && *cp != ',')
4914 cp++;
4915 pfnote (savenstr (start, cp - start), TRUE,
4916 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4917 }
4918 }
4919
4920 \f
4921 /*
4922 * Prolog support
4923 *
4924 * Assumes that the predicate or rule starts at column 0.
4925 * Only the first clause of a predicate or rule is added.
4926 * Original code by Sunichirou Sugou (1989)
4927 * Rewritten by Anders Lindgren (1996)
4928 */
4929 static int prolog_pr __P((char *, char *));
4930 static void prolog_skip_comment __P((linebuffer *, FILE *));
4931 static int prolog_atom __P((char *, int));
4932
4933 static void
4934 Prolog_functions (inf)
4935 FILE *inf;
4936 {
4937 char *cp, *last;
4938 int len;
4939 int allocated;
4940
4941 allocated = 0;
4942 len = 0;
4943 last = NULL;
4944
4945 LOOP_ON_INPUT_LINES (inf, lb, cp)
4946 {
4947 if (cp[0] == '\0') /* Empty line */
4948 continue;
4949 else if (iswhite (cp[0])) /* Not a predicate */
4950 continue;
4951 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4952 prolog_skip_comment (&lb, inf);
4953 else if ((len = prolog_pr (cp, last)) > 0)
4954 {
4955 /* Predicate or rule. Store the function name so that we
4956 only generate a tag for the first clause. */
4957 if (last == NULL)
4958 last = xnew(len + 1, char);
4959 else if (len + 1 > allocated)
4960 xrnew (last, len + 1, char);
4961 allocated = len + 1;
4962 strncpy (last, cp, len);
4963 last[len] = '\0';
4964 }
4965 }
4966 }
4967
4968
4969 static void
4970 prolog_skip_comment (plb, inf)
4971 linebuffer *plb;
4972 FILE *inf;
4973 {
4974 char *cp;
4975
4976 do
4977 {
4978 for (cp = plb->buffer; *cp != '\0'; cp++)
4979 if (cp[0] == '*' && cp[1] == '/')
4980 return;
4981 readline (plb, inf);
4982 }
4983 while (!feof(inf));
4984 }
4985
4986 /*
4987 * A predicate or rule definition is added if it matches:
4988 * <beginning of line><Prolog Atom><whitespace>(
4989 * or <beginning of line><Prolog Atom><whitespace>:-
4990 *
4991 * It is added to the tags database if it doesn't match the
4992 * name of the previous clause header.
4993 *
4994 * Return the size of the name of the predicate or rule, or 0 if no
4995 * header was found.
4996 */
4997 static int
4998 prolog_pr (s, last)
4999 char *s;
5000 char *last; /* Name of last clause. */
5001 {
5002 int pos;
5003 int len;
5004
5005 pos = prolog_atom (s, 0);
5006 if (pos < 1)
5007 return 0;
5008
5009 len = pos;
5010 pos = skip_spaces (s + pos) - s;
5011
5012 if ((s[pos] == '.'
5013 || (s[pos] == '(' && (pos += 1))
5014 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5015 && (last == NULL /* save only the first clause */
5016 || len != strlen (last)
5017 || !strneq (s, last, len)))
5018 {
5019 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
5020 return len;
5021 }
5022 else
5023 return 0;
5024 }
5025
5026 /*
5027 * Consume a Prolog atom.
5028 * Return the number of bytes consumed, or -1 if there was an error.
5029 *
5030 * A prolog atom, in this context, could be one of:
5031 * - An alphanumeric sequence, starting with a lower case letter.
5032 * - A quoted arbitrary string. Single quotes can escape themselves.
5033 * Backslash quotes everything.
5034 */
5035 static int
5036 prolog_atom (s, pos)
5037 char *s;
5038 int pos;
5039 {
5040 int origpos;
5041
5042 origpos = pos;
5043
5044 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5045 {
5046 /* The atom is unquoted. */
5047 pos++;
5048 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5049 {
5050 pos++;
5051 }
5052 return pos - origpos;
5053 }
5054 else if (s[pos] == '\'')
5055 {
5056 pos++;
5057
5058 for (;;)
5059 {
5060 if (s[pos] == '\'')
5061 {
5062 pos++;
5063 if (s[pos] != '\'')
5064 break;
5065 pos++; /* A double quote */
5066 }
5067 else if (s[pos] == '\0')
5068 /* Multiline quoted atoms are ignored. */
5069 return -1;
5070 else if (s[pos] == '\\')
5071 {
5072 if (s[pos+1] == '\0')
5073 return -1;
5074 pos += 2;
5075 }
5076 else
5077 pos++;
5078 }
5079 return pos - origpos;
5080 }
5081 else
5082 return -1;
5083 }
5084
5085 \f
5086 /*
5087 * Support for Erlang
5088 *
5089 * Generates tags for functions, defines, and records.
5090 * Assumes that Erlang functions start at column 0.
5091 * Original code by Anders Lindgren (1996)
5092 */
5093 static int erlang_func __P((char *, char *));
5094 static void erlang_attribute __P((char *));
5095 static int erlang_atom __P((char *, int));
5096
5097 static void
5098 Erlang_functions (inf)
5099 FILE *inf;
5100 {
5101 char *cp, *last;
5102 int len;
5103 int allocated;
5104
5105 allocated = 0;
5106 len = 0;
5107 last = NULL;
5108
5109 LOOP_ON_INPUT_LINES (inf, lb, cp)
5110 {
5111 if (cp[0] == '\0') /* Empty line */
5112 continue;
5113 else if (iswhite (cp[0])) /* Not function nor attribute */
5114 continue;
5115 else if (cp[0] == '%') /* comment */
5116 continue;
5117 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5118 continue;
5119 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5120 {
5121 erlang_attribute (cp);
5122 last = NULL;
5123 }
5124 else if ((len = erlang_func (cp, last)) > 0)
5125 {
5126 /*
5127 * Function. Store the function name so that we only
5128 * generates a tag for the first clause.
5129 */
5130 if (last == NULL)
5131 last = xnew (len + 1, char);
5132 else if (len + 1 > allocated)
5133 xrnew (last, len + 1, char);
5134 allocated = len + 1;
5135 strncpy (last, cp, len);
5136 last[len] = '\0';
5137 }
5138 }
5139 }
5140
5141
5142 /*
5143 * A function definition is added if it matches:
5144 * <beginning of line><Erlang Atom><whitespace>(
5145 *
5146 * It is added to the tags database if it doesn't match the
5147 * name of the previous clause header.
5148 *
5149 * Return the size of the name of the function, or 0 if no function
5150 * was found.
5151 */
5152 static int
5153 erlang_func (s, last)
5154 char *s;
5155 char *last; /* Name of last clause. */
5156 {
5157 int pos;
5158 int len;
5159
5160 pos = erlang_atom (s, 0);
5161 if (pos < 1)
5162 return 0;
5163
5164 len = pos;
5165 pos = skip_spaces (s + pos) - s;
5166
5167 /* Save only the first clause. */
5168 if (s[pos++] == '('
5169 && (last == NULL
5170 || len != (int)strlen (last)
5171 || !strneq (s, last, len)))
5172 {
5173 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
5174 return len;
5175 }
5176
5177 return 0;
5178 }
5179
5180
5181 /*
5182 * Handle attributes. Currently, tags are generated for defines
5183 * and records.
5184 *
5185 * They are on the form:
5186 * -define(foo, bar).
5187 * -define(Foo(M, N), M+N).
5188 * -record(graph, {vtab = notable, cyclic = true}).
5189 */
5190 static void
5191 erlang_attribute (s)
5192 char *s;
5193 {
5194 int pos;
5195 int len;
5196
5197 if (LOOKING_AT (s, "-define") || LOOKING_AT (s, "-record"))
5198 {
5199 if (s[pos++] == '(')
5200 {
5201 pos = skip_spaces (s + pos) - s;
5202 len = erlang_atom (s, pos);
5203 if (len != 0)
5204 pfnote (savenstr (& s[pos], len), TRUE,
5205 s, pos + len, lineno, linecharno);
5206 }
5207 }
5208 return;
5209 }
5210
5211
5212 /*
5213 * Consume an Erlang atom (or variable).
5214 * Return the number of bytes consumed, or -1 if there was an error.
5215 */
5216 static int
5217 erlang_atom (s, pos)
5218 char *s;
5219 int pos;
5220 {
5221 int origpos;
5222
5223 origpos = pos;
5224
5225 if (ISALPHA (s[pos]) || s[pos] == '_')
5226 {
5227 /* The atom is unquoted. */
5228 pos++;
5229 while (ISALNUM (s[pos]) || s[pos] == '_')
5230 pos++;
5231 return pos - origpos;
5232 }
5233 else if (s[pos] == '\'')
5234 {
5235 pos++;
5236
5237 for (;;)
5238 {
5239 if (s[pos] == '\'')
5240 {
5241 pos++;
5242 break;
5243 }
5244 else if (s[pos] == '\0')
5245 /* Multiline quoted atoms are ignored. */
5246 return -1;
5247 else if (s[pos] == '\\')
5248 {
5249 if (s[pos+1] == '\0')
5250 return -1;
5251 pos += 2;
5252 }
5253 else
5254 pos++;
5255 }
5256 return pos - origpos;
5257 }
5258 else
5259 return -1;
5260 }
5261
5262 \f
5263 #ifdef ETAGS_REGEXPS
5264
5265 static char *scan_separators __P((char *));
5266 static void analyse_regex __P((char *, bool));
5267 static void add_regex __P((char *, bool, language *));
5268 static char *substitute __P((char *, char *, struct re_registers *));
5269
5270 /* Take a string like "/blah/" and turn it into "blah", making sure
5271 that the first and last characters are the same, and handling
5272 quoted separator characters. Actually, stops on the occurrence of
5273 an unquoted separator. Also turns "\t" into a Tab character, and
5274 similarly for all character escape sequences supported by Gcc.
5275 Returns pointer to terminating separator. Works in place. Null
5276 terminates name string. */
5277 static char *
5278 scan_separators (name)
5279 char *name;
5280 {
5281 char sep = name[0];
5282 char *copyto = name;
5283 bool quoted = FALSE;
5284
5285 for (++name; *name != '\0'; ++name)
5286 {
5287 if (quoted)
5288 {
5289 switch (*name)
5290 {
5291 case 'a': *copyto++ = '\007'; break;
5292 case 'b': *copyto++ = '\b'; break;
5293 case 'd': *copyto++ = 0177; break;
5294 case 'e': *copyto++ = 033; break;
5295 case 'f': *copyto++ = '\f'; break;
5296 case 'n': *copyto++ = '\n'; break;
5297 case 'r': *copyto++ = '\r'; break;
5298 case 't': *copyto++ = '\t'; break;
5299 case 'v': *copyto++ = '\v'; break;
5300 default:
5301 if (*name == sep)
5302 *copyto++ = sep;
5303 else
5304 {
5305 /* Something else is quoted, so preserve the quote. */
5306 *copyto++ = '\\';
5307 *copyto++ = *name;
5308 }
5309 break;
5310 }
5311 quoted = FALSE;
5312 }
5313 else if (*name == '\\')
5314 quoted = TRUE;
5315 else if (*name == sep)
5316 break;
5317 else
5318 *copyto++ = *name;
5319 }
5320
5321 /* Terminate copied string. */
5322 *copyto = '\0';
5323 return name;
5324 }
5325
5326 /* Look at the argument of --regex or --no-regex and do the right
5327 thing. Same for each line of a regexp file. */
5328 static void
5329 analyse_regex (regex_arg, ignore_case)
5330 char *regex_arg;
5331 bool ignore_case;
5332 {
5333 if (regex_arg == NULL)
5334 {
5335 free_patterns (); /* --no-regex: remove existing regexps */
5336 return;
5337 }
5338
5339 /* A real --regexp option or a line in a regexp file. */
5340 switch (regex_arg[0])
5341 {
5342 /* Comments in regexp file or null arg to --regex. */
5343 case '\0':
5344 case ' ':
5345 case '\t':
5346 break;
5347
5348 /* Read a regex file. This is recursive and may result in a
5349 loop, which will stop when the file descriptors are exhausted. */
5350 case '@':
5351 {
5352 FILE *regexfp;
5353 linebuffer regexbuf;
5354 char *regexfile = regex_arg + 1;
5355
5356 /* regexfile is a file containing regexps, one per line. */
5357 regexfp = fopen (regexfile, "r");
5358 if (regexfp == NULL)
5359 {
5360 pfatal (regexfile);
5361 return;
5362 }
5363 initbuffer (&regexbuf);
5364 while (readline_internal (&regexbuf, regexfp) > 0)
5365 analyse_regex (regexbuf.buffer, ignore_case);
5366 free (regexbuf.buffer);
5367 fclose (regexfp);
5368 }
5369 break;
5370
5371 /* Regexp to be used for a specific language only. */
5372 case '{':
5373 {
5374 language *lang;
5375 char *lang_name = regex_arg + 1;
5376 char *cp;
5377
5378 for (cp = lang_name; *cp != '}'; cp++)
5379 if (*cp == '\0')
5380 {
5381 error ("unterminated language name in regex: %s", regex_arg);
5382 return;
5383 }
5384 *cp = '\0';
5385 lang = get_language_from_langname (lang_name);
5386 if (lang == NULL)
5387 return;
5388 add_regex (cp + 1, ignore_case, lang);
5389 }
5390 break;
5391
5392 /* Regexp to be used for any language. */
5393 default:
5394 add_regex (regex_arg, ignore_case, NULL);
5395 break;
5396 }
5397 }
5398
5399 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5400 expression, into a real regular expression by compiling it. */
5401 static void
5402 add_regex (regexp_pattern, ignore_case, lang)
5403 char *regexp_pattern;
5404 bool ignore_case;
5405 language *lang;
5406 {
5407 static struct re_pattern_buffer zeropattern;
5408 char *name;
5409 const char *err;
5410 struct re_pattern_buffer *patbuf;
5411 pattern *pp;
5412
5413
5414 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
5415 {
5416 error ("%s: unterminated regexp", regexp_pattern);
5417 return;
5418 }
5419 name = scan_separators (regexp_pattern);
5420 if (regexp_pattern[0] == '\0')
5421 {
5422 error ("null regexp", (char *)NULL);
5423 return;
5424 }
5425 (void) scan_separators (name);
5426
5427 patbuf = xnew (1, struct re_pattern_buffer);
5428 *patbuf = zeropattern;
5429 if (ignore_case)
5430 patbuf->translate = lc_trans; /* translation table to fold case */
5431
5432 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
5433 if (err != NULL)
5434 {
5435 error ("%s while compiling pattern", err);
5436 return;
5437 }
5438
5439 pp = p_head;
5440 p_head = xnew (1, pattern);
5441 p_head->regex = savestr (regexp_pattern);
5442 p_head->p_next = pp;
5443 p_head->lang = lang;
5444 p_head->pat = patbuf;
5445 p_head->name_pattern = savestr (name);
5446 p_head->error_signaled = FALSE;
5447 p_head->ignore_case = ignore_case;
5448 }
5449
5450 /*
5451 * Do the substitutions indicated by the regular expression and
5452 * arguments.
5453 */
5454 static char *
5455 substitute (in, out, regs)
5456 char *in, *out;
5457 struct re_registers *regs;
5458 {
5459 char *result, *t;
5460 int size, dig, diglen;
5461
5462 result = NULL;
5463 size = strlen (out);
5464
5465 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5466 if (out[size - 1] == '\\')
5467 fatal ("pattern error in \"%s\"", out);
5468 for (t = etags_strchr (out, '\\');
5469 t != NULL;
5470 t = etags_strchr (t + 2, '\\'))
5471 if (ISDIGIT (t[1]))
5472 {
5473 dig = t[1] - '0';
5474 diglen = regs->end[dig] - regs->start[dig];
5475 size += diglen - 2;
5476 }
5477 else
5478 size -= 1;
5479
5480 /* Allocate space and do the substitutions. */
5481 result = xnew (size + 1, char);
5482
5483 for (t = result; *out != '\0'; out++)
5484 if (*out == '\\' && ISDIGIT (*++out))
5485 {
5486 dig = *out - '0';
5487 diglen = regs->end[dig] - regs->start[dig];
5488 strncpy (t, in + regs->start[dig], diglen);
5489 t += diglen;
5490 }
5491 else
5492 *t++ = *out;
5493 *t = '\0';
5494
5495 assert (t <= result + size && t - result == (int)strlen (result));
5496
5497 return result;
5498 }
5499
5500 /* Deallocate all patterns. */
5501 static void
5502 free_patterns ()
5503 {
5504 pattern *pp;
5505 while (p_head != NULL)
5506 {
5507 pp = p_head->p_next;
5508 free (p_head->regex);
5509 free (p_head->name_pattern);
5510 free (p_head);
5511 p_head = pp;
5512 }
5513 return;
5514 }
5515 #endif /* ETAGS_REGEXPS */
5516
5517 \f
5518 static bool
5519 nocase_tail (cp)
5520 char *cp;
5521 {
5522 register int len = 0;
5523
5524 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5525 cp++, len++;
5526 if (*cp == '\0' && !intoken (dbp[len]))
5527 {
5528 dbp += len;
5529 return TRUE;
5530 }
5531 return FALSE;
5532 }
5533
5534 static char *
5535 get_tag (bp)
5536 register char *bp;
5537 {
5538 register char *cp, *name;
5539
5540 if (*bp == '\0')
5541 return NULL;
5542 /* Go till you get to white space or a syntactic break */
5543 for (cp = bp + 1; !notinname (*cp); cp++)
5544 continue;
5545 name = savenstr (bp, cp-bp);
5546 pfnote (name, TRUE,
5547 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5548 return name;
5549 }
5550
5551 /* Initialize a linebuffer for use */
5552 static void
5553 initbuffer (lbp)
5554 linebuffer *lbp;
5555 {
5556 lbp->size = (DEBUG) ? 3 : 200;
5557 lbp->buffer = xnew (lbp->size, char);
5558 lbp->buffer[0] = '\0';
5559 lbp->len = 0;
5560 }
5561
5562 /*
5563 * Read a line of text from `stream' into `lbp', excluding the
5564 * newline or CR-NL, if any. Return the number of characters read from
5565 * `stream', which is the length of the line including the newline.
5566 *
5567 * On DOS or Windows we do not count the CR character, if any, before the
5568 * NL, in the returned length; this mirrors the behavior of emacs on those
5569 * platforms (for text files, it translates CR-NL to NL as it reads in the
5570 * file).
5571 */
5572 static long
5573 readline_internal (lbp, stream)
5574 linebuffer *lbp;
5575 register FILE *stream;
5576 {
5577 char *buffer = lbp->buffer;
5578 register char *p = lbp->buffer;
5579 register char *pend;
5580 int chars_deleted;
5581
5582 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5583
5584 for (;;)
5585 {
5586 register int c = getc (stream);
5587 if (p == pend)
5588 {
5589 /* We're at the end of linebuffer: expand it. */
5590 lbp->size *= 2;
5591 xrnew (buffer, lbp->size, char);
5592 p += buffer - lbp->buffer;
5593 pend = buffer + lbp->size;
5594 lbp->buffer = buffer;
5595 }
5596 if (c == EOF)
5597 {
5598 *p = '\0';
5599 chars_deleted = 0;
5600 break;
5601 }
5602 if (c == '\n')
5603 {
5604 if (p > buffer && p[-1] == '\r')
5605 {
5606 p -= 1;
5607 #ifdef DOS_NT
5608 /* Assume CRLF->LF translation will be performed by Emacs
5609 when loading this file, so CRs won't appear in the buffer.
5610 It would be cleaner to compensate within Emacs;
5611 however, Emacs does not know how many CRs were deleted
5612 before any given point in the file. */
5613 chars_deleted = 1;
5614 #else
5615 chars_deleted = 2;
5616 #endif
5617 }
5618 else
5619 {
5620 chars_deleted = 1;
5621 }
5622 *p = '\0';
5623 break;
5624 }
5625 *p++ = c;
5626 }
5627 lbp->len = p - buffer;
5628
5629 return lbp->len + chars_deleted;
5630 }
5631
5632 /*
5633 * Like readline_internal, above, but in addition try to match the
5634 * input line against relevant regular expressions.
5635 */
5636 static void
5637 readline (lbp, stream)
5638 linebuffer *lbp;
5639 FILE *stream;
5640 {
5641 long result;
5642
5643 linecharno = charno; /* update global char number of line start */
5644 result = readline_internal (lbp, stream); /* read line */
5645 lineno += 1; /* increment global line number */
5646 charno += result; /* increment global char number */
5647
5648 /* Honour #line directives. */
5649 if (!no_line_directive)
5650 {
5651 static bool discard_until_line_directive;
5652
5653 /* Check whether this is a #line directive. */
5654 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
5655 {
5656 int start, lno;
5657
5658 if (DEBUG) start = 0; /* shut up the compiler */
5659 if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
5660 {
5661 char *endp = lbp->buffer + start;
5662
5663 assert (start > 0);
5664 while ((endp = etags_strchr (endp, '"')) != NULL
5665 && endp[-1] == '\\')
5666 endp++;
5667 if (endp != NULL)
5668 /* Ok, this is a real #line directive. Let's deal with it. */
5669 {
5670 char *taggedabsname; /* absolute name of original file */
5671 char *taggedfname; /* name of original file as given */
5672 char *name; /* temp var */
5673
5674 discard_until_line_directive = FALSE; /* found it */
5675 name = lbp->buffer + start;
5676 *endp = '\0';
5677 canonicalize_filename (name); /* for DOS */
5678 taggedabsname = absolute_filename (name, curfdp->infabsdir);
5679 if (filename_is_absolute (name)
5680 || filename_is_absolute (curfdp->infname))
5681 taggedfname = savestr (taggedabsname);
5682 else
5683 taggedfname = relative_filename (taggedabsname,tagfiledir);
5684
5685 if (streq (curfdp->taggedfname, taggedfname))
5686 /* The #line directive is only a line number change. We
5687 deal with this afterwards. */
5688 free (taggedfname);
5689 else
5690 /* The tags following this #line directive should be
5691 attributed to taggedfname. In order to do this, set
5692 curfdp accordingly. */
5693 {
5694 fdesc *fdp; /* file description pointer */
5695
5696 /* Go look for a file description already set up for the
5697 file indicated in the #line directive. If there is
5698 one, use it from now until the next #line
5699 directive. */
5700 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5701 if (streq (fdp->infname, curfdp->infname)
5702 && streq (fdp->taggedfname, taggedfname))
5703 /* If we remove the second test above (after the &&)
5704 then all entries pertaining to the same file are
5705 coalesced in the tags file. If we use it, then
5706 entries pertaining to the same file but generated
5707 from different files (via #line directives) will
5708 go into separate sections in the tags file. These
5709 alternatives look equivalent. The first one
5710 destroys some apparently useless information. */
5711 {
5712 curfdp = fdp;
5713 free (taggedfname);
5714 break;
5715 }
5716 /* Else, if we already tagged the real file, skip all
5717 input lines until the next #line directive. */
5718 if (fdp == NULL) /* not found */
5719 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5720 if (streq (fdp->infabsname, taggedabsname))
5721 {
5722 discard_until_line_directive = TRUE;
5723 free (taggedfname);
5724 break;
5725 }
5726 /* Else create a new file description and use that from
5727 now on, until the next #line directive. */
5728 if (fdp == NULL) /* not found */
5729 {
5730 fdp = fdhead;
5731 fdhead = xnew (1, fdesc);
5732 *fdhead = *curfdp; /* copy curr. file description */
5733 fdhead->next = fdp;
5734 fdhead->infname = savestr (curfdp->infname);
5735 fdhead->infabsname = savestr (curfdp->infabsname);
5736 fdhead->infabsdir = savestr (curfdp->infabsdir);
5737 fdhead->taggedfname = taggedfname;
5738 fdhead->usecharno = FALSE;
5739 curfdp = fdhead;
5740 }
5741 }
5742 free (taggedabsname);
5743 lineno = lno - 1;
5744 readline (lbp, stream);
5745 return;
5746 } /* if a real #line directive */
5747 } /* if #line is followed by a a number */
5748 } /* if line begins with "#line " */
5749
5750 /* If we are here, no #line directive was found. */
5751 if (discard_until_line_directive)
5752 {
5753 if (result > 0)
5754 {
5755 /* Do a tail recursion on ourselves, thus discarding the contents
5756 of the line buffer. */
5757 readline (lbp, stream);
5758 return;
5759 }
5760 /* End of file. */
5761 discard_until_line_directive = FALSE;
5762 return;
5763 }
5764 } /* if #line directives should be considered */
5765
5766 #ifdef ETAGS_REGEXPS
5767 {
5768 int match;
5769 pattern *pp;
5770
5771 /* Match against relevant patterns. */
5772 if (lbp->len > 0)
5773 for (pp = p_head; pp != NULL; pp = pp->p_next)
5774 {
5775 /* Only use generic regexps or those for the current language. */
5776 if (pp->lang != NULL && pp->lang != fdhead->lang)
5777 continue;
5778
5779 match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
5780 switch (match)
5781 {
5782 case -2:
5783 /* Some error. */
5784 if (!pp->error_signaled)
5785 {
5786 error ("error while matching \"%s\"", pp->regex);
5787 pp->error_signaled = TRUE;
5788 }
5789 break;
5790 case -1:
5791 /* No match. */
5792 break;
5793 default:
5794 /* Match occurred. Construct a tag. */
5795 if (pp->name_pattern[0] != '\0')
5796 {
5797 /* Make a named tag. */
5798 char *name = substitute (lbp->buffer,
5799 pp->name_pattern, &pp->regs);
5800 if (name != NULL)
5801 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5802 }
5803 else
5804 {
5805 /* Make an unnamed tag. */
5806 pfnote ((char *)NULL, TRUE,
5807 lbp->buffer, match, lineno, linecharno);
5808 }
5809 break;
5810 }
5811 }
5812 }
5813 #endif /* ETAGS_REGEXPS */
5814 }
5815
5816 \f
5817 /*
5818 * Return a pointer to a space of size strlen(cp)+1 allocated
5819 * with xnew where the string CP has been copied.
5820 */
5821 static char *
5822 savestr (cp)
5823 char *cp;
5824 {
5825 return savenstr (cp, strlen (cp));
5826 }
5827
5828 /*
5829 * Return a pointer to a space of size LEN+1 allocated with xnew where
5830 * the string CP has been copied for at most the first LEN characters.
5831 */
5832 static char *
5833 savenstr (cp, len)
5834 char *cp;
5835 int len;
5836 {
5837 register char *dp;
5838
5839 dp = xnew (len + 1, char);
5840 strncpy (dp, cp, len);
5841 dp[len] = '\0';
5842 return dp;
5843 }
5844
5845 /*
5846 * Return the ptr in sp at which the character c last
5847 * appears; NULL if not found
5848 *
5849 * Identical to POSIX strrchr, included for portability.
5850 */
5851 static char *
5852 etags_strrchr (sp, c)
5853 register const char *sp;
5854 register int c;
5855 {
5856 register const char *r;
5857
5858 r = NULL;
5859 do
5860 {
5861 if (*sp == c)
5862 r = sp;
5863 } while (*sp++);
5864 return (char *)r;
5865 }
5866
5867 /*
5868 * Return the ptr in sp at which the character c first
5869 * appears; NULL if not found
5870 *
5871 * Identical to POSIX strchr, included for portability.
5872 */
5873 static char *
5874 etags_strchr (sp, c)
5875 register const char *sp;
5876 register int c;
5877 {
5878 do
5879 {
5880 if (*sp == c)
5881 return (char *)sp;
5882 } while (*sp++);
5883 return NULL;
5884 }
5885
5886 /*
5887 * Return TRUE if the two strings are equal, ignoring case for alphabetic
5888 * characters.
5889 *
5890 * Analogous to BSD's strcasecmp, included for portability.
5891 */
5892 static bool
5893 strcaseeq (s1, s2)
5894 register const char *s1;
5895 register const char *s2;
5896 {
5897 while (*s1 != '\0'
5898 && (ISALPHA (*s1) && ISALPHA (*s2)
5899 ? lowcase (*s1) == lowcase (*s2)
5900 : *s1 == *s2))
5901 s1++, s2++;
5902
5903 return (*s1 == *s2);
5904 }
5905
5906 /* Skip spaces, return new pointer. */
5907 static char *
5908 skip_spaces (cp)
5909 char *cp;
5910 {
5911 while (iswhite (*cp))
5912 cp++;
5913 return cp;
5914 }
5915
5916 /* Skip non spaces, return new pointer. */
5917 static char *
5918 skip_non_spaces (cp)
5919 char *cp;
5920 {
5921 while (*cp != '\0' && !iswhite (*cp))
5922 cp++;
5923 return cp;
5924 }
5925
5926 /* Print error message and exit. */
5927 void
5928 fatal (s1, s2)
5929 char *s1, *s2;
5930 {
5931 error (s1, s2);
5932 exit (BAD);
5933 }
5934
5935 static void
5936 pfatal (s1)
5937 char *s1;
5938 {
5939 perror (s1);
5940 exit (BAD);
5941 }
5942
5943 static void
5944 suggest_asking_for_help ()
5945 {
5946 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5947 progname,
5948 #ifdef LONG_OPTIONS
5949 "--help"
5950 #else
5951 "-h"
5952 #endif
5953 );
5954 exit (BAD);
5955 }
5956
5957 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
5958 static void
5959 error (s1, s2)
5960 const char *s1, *s2;
5961 {
5962 fprintf (stderr, "%s: ", progname);
5963 fprintf (stderr, s1, s2);
5964 fprintf (stderr, "\n");
5965 }
5966
5967 /* Return a newly-allocated string whose contents
5968 concatenate those of s1, s2, s3. */
5969 static char *
5970 concat (s1, s2, s3)
5971 char *s1, *s2, *s3;
5972 {
5973 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5974 char *result = xnew (len1 + len2 + len3 + 1, char);
5975
5976 strcpy (result, s1);
5977 strcpy (result + len1, s2);
5978 strcpy (result + len1 + len2, s3);
5979 result[len1 + len2 + len3] = '\0';
5980
5981 return result;
5982 }
5983
5984 \f
5985 /* Does the same work as the system V getcwd, but does not need to
5986 guess the buffer size in advance. */
5987 static char *
5988 etags_getcwd ()
5989 {
5990 #ifdef HAVE_GETCWD
5991 int bufsize = 200;
5992 char *path = xnew (bufsize, char);
5993
5994 while (getcwd (path, bufsize) == NULL)
5995 {
5996 if (errno != ERANGE)
5997 pfatal ("getcwd");
5998 bufsize *= 2;
5999 free (path);
6000 path = xnew (bufsize, char);
6001 }
6002
6003 canonicalize_filename (path);
6004 return path;
6005
6006 #else /* not HAVE_GETCWD */
6007 #if MSDOS
6008
6009 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6010
6011 getwd (path);
6012
6013 for (p = path; *p != '\0'; p++)
6014 if (*p == '\\')
6015 *p = '/';
6016 else
6017 *p = lowcase (*p);
6018
6019 return strdup (path);
6020 #else /* not MSDOS */
6021 linebuffer path;
6022 FILE *pipe;
6023
6024 initbuffer (&path);
6025 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6026 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6027 pfatal ("pwd");
6028 pclose (pipe);
6029
6030 return path.buffer;
6031 #endif /* not MSDOS */
6032 #endif /* not HAVE_GETCWD */
6033 }
6034
6035 /* Return a newly allocated string containing the file name of FILE
6036 relative to the absolute directory DIR (which should end with a slash). */
6037 static char *
6038 relative_filename (file, dir)
6039 char *file, *dir;
6040 {
6041 char *fp, *dp, *afn, *res;
6042 int i;
6043
6044 /* Find the common root of file and dir (with a trailing slash). */
6045 afn = absolute_filename (file, cwd);
6046 fp = afn;
6047 dp = dir;
6048 while (*fp++ == *dp++)
6049 continue;
6050 fp--, dp--; /* back to the first differing char */
6051 #ifdef DOS_NT
6052 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6053 return afn;
6054 #endif
6055 do /* look at the equal chars until '/' */
6056 fp--, dp--;
6057 while (*fp != '/');
6058
6059 /* Build a sequence of "../" strings for the resulting relative file name. */
6060 i = 0;
6061 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6062 i += 1;
6063 res = xnew (3*i + strlen (fp + 1) + 1, char);
6064 res[0] = '\0';
6065 while (i-- > 0)
6066 strcat (res, "../");
6067
6068 /* Add the file name relative to the common root of file and dir. */
6069 strcat (res, fp + 1);
6070 free (afn);
6071
6072 return res;
6073 }
6074
6075 /* Return a newly allocated string containing the absolute file name
6076 of FILE given DIR (which should end with a slash). */
6077 static char *
6078 absolute_filename (file, dir)
6079 char *file, *dir;
6080 {
6081 char *slashp, *cp, *res;
6082
6083 if (filename_is_absolute (file))
6084 res = savestr (file);
6085 #ifdef DOS_NT
6086 /* We don't support non-absolute file names with a drive
6087 letter, like `d:NAME' (it's too much hassle). */
6088 else if (file[1] == ':')
6089 fatal ("%s: relative file names with drive letters not supported", file);
6090 #endif
6091 else
6092 res = concat (dir, file, "");
6093
6094 /* Delete the "/dirname/.." and "/." substrings. */
6095 slashp = etags_strchr (res, '/');
6096 while (slashp != NULL && slashp[0] != '\0')
6097 {
6098 if (slashp[1] == '.')
6099 {
6100 if (slashp[2] == '.'
6101 && (slashp[3] == '/' || slashp[3] == '\0'))
6102 {
6103 cp = slashp;
6104 do
6105 cp--;
6106 while (cp >= res && !filename_is_absolute (cp));
6107 if (cp < res)
6108 cp = slashp; /* the absolute name begins with "/.." */
6109 #ifdef DOS_NT
6110 /* Under MSDOS and NT we get `d:/NAME' as absolute
6111 file name, so the luser could say `d:/../NAME'.
6112 We silently treat this as `d:/NAME'. */
6113 else if (cp[0] != '/')
6114 cp = slashp;
6115 #endif
6116 strcpy (cp, slashp + 3);
6117 slashp = cp;
6118 continue;
6119 }
6120 else if (slashp[2] == '/' || slashp[2] == '\0')
6121 {
6122 strcpy (slashp, slashp + 2);
6123 continue;
6124 }
6125 }
6126
6127 slashp = etags_strchr (slashp + 1, '/');
6128 }
6129
6130 if (res[0] == '\0')
6131 return savestr ("/");
6132 else
6133 return res;
6134 }
6135
6136 /* Return a newly allocated string containing the absolute
6137 file name of dir where FILE resides given DIR (which should
6138 end with a slash). */
6139 static char *
6140 absolute_dirname (file, dir)
6141 char *file, *dir;
6142 {
6143 char *slashp, *res;
6144 char save;
6145
6146 canonicalize_filename (file);
6147 slashp = etags_strrchr (file, '/');
6148 if (slashp == NULL)
6149 return savestr (dir);
6150 save = slashp[1];
6151 slashp[1] = '\0';
6152 res = absolute_filename (file, dir);
6153 slashp[1] = save;
6154
6155 return res;
6156 }
6157
6158 /* Whether the argument string is an absolute file name. The argument
6159 string must have been canonicalized with canonicalize_filename. */
6160 static bool
6161 filename_is_absolute (fn)
6162 char *fn;
6163 {
6164 return (fn[0] == '/'
6165 #ifdef DOS_NT
6166 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6167 #endif
6168 );
6169 }
6170
6171 /* Translate backslashes into slashes. Works in place. */
6172 static void
6173 canonicalize_filename (fn)
6174 register char *fn;
6175 {
6176 #ifdef DOS_NT
6177 /* Canonicalize drive letter case. */
6178 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6179 fn[0] = upcase (fn[0]);
6180 /* Convert backslashes to slashes. */
6181 for (; *fn != '\0'; fn++)
6182 if (*fn == '\\')
6183 *fn = '/';
6184 #else
6185 /* No action. */
6186 fn = NULL; /* shut up the compiler */
6187 #endif
6188 }
6189
6190 /* Set the minimum size of a string contained in a linebuffer. */
6191 static void
6192 linebuffer_setlen (lbp, toksize)
6193 linebuffer *lbp;
6194 int toksize;
6195 {
6196 while (lbp->size <= toksize)
6197 {
6198 lbp->size *= 2;
6199 xrnew (lbp->buffer, lbp->size, char);
6200 }
6201 lbp->len = toksize;
6202 }
6203
6204 /* Like malloc but get fatal error if memory is exhausted. */
6205 static PTR
6206 xmalloc (size)
6207 unsigned int size;
6208 {
6209 PTR result = (PTR) malloc (size);
6210 if (result == NULL)
6211 fatal ("virtual memory exhausted", (char *)NULL);
6212 return result;
6213 }
6214
6215 static PTR
6216 xrealloc (ptr, size)
6217 char *ptr;
6218 unsigned int size;
6219 {
6220 PTR result = (PTR) realloc (ptr, size);
6221 if (result == NULL)
6222 fatal ("virtual memory exhausted", (char *)NULL);
6223 return result;
6224 }
6225
6226 /*
6227 * Local Variables:
6228 * c-indentation-style: gnu
6229 * indent-tabs-mode: t
6230 * tab-width: 8
6231 * fill-column: 79
6232 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node")
6233 * End:
6234 */