(main): Avoid a buffer overrun with sprintf.
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
3 Free Software Foundation, Inc. and Ken Arnold
4
5 This file is not considered part of GNU Emacs.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 /*
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1992 Joseph B. Wells improved C and C++ parsing.
29 * 1993 Francesco Potortì reorganised C and C++.
30 * 1994 Regexp tags by Tom Tromey.
31 * 2001 Nested classes by Francesco Potortì (ideas by Mykola Dzyuba).
32 *
33 * Francesco Potortì <pot@gnu.org> has maintained it since 1993.
34 */
35
36 char pot_etags_version[] = "@(#) pot revision number is $Revision: 15.18 $";
37
38 #define TRUE 1
39 #define FALSE 0
40
41 #ifdef DEBUG
42 # undef DEBUG
43 # define DEBUG TRUE
44 #else
45 # define DEBUG FALSE
46 # define NDEBUG /* disable assert */
47 #endif
48
49 #ifdef HAVE_CONFIG_H
50 # include <config.h>
51 /* On some systems, Emacs defines static as nothing for the sake
52 of unexec. We don't want that here since we don't use unexec. */
53 # undef static
54 # define ETAGS_REGEXPS /* use the regexp features */
55 # define LONG_OPTIONS /* accept long options */
56 # ifndef PTR /* for Xemacs */
57 # define PTR void *
58 # endif
59 # ifndef __P /* for Xemacs */
60 # define __P(args) args
61 # endif
62 #else
63 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
64 # define __P(args) args /* use prototypes */
65 # define PTR void * /* for generic pointers */
66 # else
67 # define __P(args) () /* no prototypes */
68 # define const /* remove const for old compilers' sake */
69 # define PTR long * /* don't use void* */
70 # endif
71 #endif /* !HAVE_CONFIG_H */
72
73 #ifndef _GNU_SOURCE
74 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
75 #endif
76
77 /* WIN32_NATIVE is for Xemacs.
78 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
79 #ifdef WIN32_NATIVE
80 # undef MSDOS
81 # undef WINDOWSNT
82 # define WINDOWSNT
83 #endif /* WIN32_NATIVE */
84
85 #ifdef MSDOS
86 # undef MSDOS
87 # define MSDOS TRUE
88 # include <fcntl.h>
89 # include <sys/param.h>
90 # include <io.h>
91 # ifndef HAVE_CONFIG_H
92 # define DOS_NT
93 # include <sys/config.h>
94 # endif
95 #else
96 # define MSDOS FALSE
97 #endif /* MSDOS */
98
99 #ifdef WINDOWSNT
100 # include <stdlib.h>
101 # include <fcntl.h>
102 # include <string.h>
103 # include <direct.h>
104 # include <io.h>
105 # define MAXPATHLEN _MAX_PATH
106 # undef HAVE_NTGUI
107 # undef DOS_NT
108 # define DOS_NT
109 # ifndef HAVE_GETCWD
110 # define HAVE_GETCWD
111 # endif /* undef HAVE_GETCWD */
112 #else /* !WINDOWSNT */
113 # ifdef STDC_HEADERS
114 # include <stdlib.h>
115 # include <string.h>
116 # else
117 extern char *getenv ();
118 # endif
119 #endif /* !WINDOWSNT */
120
121 #ifdef HAVE_UNISTD_H
122 # include <unistd.h>
123 #else
124 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
125 extern char *getcwd (char *buf, size_t size);
126 # endif
127 #endif /* HAVE_UNISTD_H */
128
129 #include <stdio.h>
130 #include <ctype.h>
131 #include <errno.h>
132 #ifndef errno
133 extern int errno;
134 #endif
135 #include <sys/types.h>
136 #include <sys/stat.h>
137
138 #include <assert.h>
139 #ifdef NDEBUG
140 # undef assert /* some systems have a buggy assert.h */
141 # define assert(x) ((void) 0)
142 #endif
143
144 #if !defined (S_ISREG) && defined (S_IFREG)
145 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
146 #endif
147
148 #ifdef LONG_OPTIONS
149 # include <getopt.h>
150 #else
151 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
152 extern char *optarg;
153 extern int optind, opterr;
154 #endif /* LONG_OPTIONS */
155
156 #ifdef ETAGS_REGEXPS
157 # ifndef HAVE_CONFIG_H /* this is a standalone compilation */
158 # ifdef __CYGWIN__ /* compiling on Cygwin */
159 !!! NOTICE !!!
160 the regex.h distributed with Cygwin is not compatible with etags, alas!
161 If you want regular expression support, you should delete this notice and
162 arrange to use the GNU regex.h and regex.c.
163 # endif
164 # endif
165 # include <regex.h>
166 #endif /* ETAGS_REGEXPS */
167
168 /* Define CTAGS to make the program "ctags" compatible with the usual one.
169 Leave it undefined to make the program "etags", which makes emacs-style
170 tag tables and tags typedefs, #defines and struct/union/enum by default. */
171 #ifdef CTAGS
172 # undef CTAGS
173 # define CTAGS TRUE
174 #else
175 # define CTAGS FALSE
176 #endif
177
178 /* Exit codes for success and failure. */
179 #ifdef VMS
180 # define GOOD 1
181 # define BAD 0
182 #else
183 # define GOOD 0
184 # define BAD 1
185 #endif
186
187 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
188 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
189
190 #define CHARS 256 /* 2^sizeof(char) */
191 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
192 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
193 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
194 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
195 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
196 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
197
198 #define ISALNUM(c) isalnum (CHAR(c))
199 #define ISALPHA(c) isalpha (CHAR(c))
200 #define ISDIGIT(c) isdigit (CHAR(c))
201 #define ISLOWER(c) islower (CHAR(c))
202
203 #define lowcase(c) tolower (CHAR(c))
204 #define upcase(c) toupper (CHAR(c))
205
206
207 /*
208 * xnew, xrnew -- allocate, reallocate storage
209 *
210 * SYNOPSIS: Type *xnew (int n, Type);
211 * void xrnew (OldPointer, int n, Type);
212 */
213 #if DEBUG
214 # include "chkmalloc.h"
215 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
216 (n) * sizeof (Type)))
217 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
218 (char *) (op), (n) * sizeof (Type)))
219 #else
220 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
221 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
222 (char *) (op), (n) * sizeof (Type)))
223 #endif
224
225 #define bool int
226
227 typedef void Lang_function __P((FILE *));
228
229 typedef struct
230 {
231 char *suffix; /* file name suffix for this compressor */
232 char *command; /* takes one arg and decompresses to stdout */
233 } compressor;
234
235 typedef struct
236 {
237 char *name; /* language name */
238 bool metasource; /* source used to generate other sources */
239 Lang_function *function; /* parse function */
240 char **filenames; /* names of this language's files */
241 char **suffixes; /* name suffixes of this language's files */
242 char **interpreters; /* interpreters for this language */
243 } language;
244
245 typedef struct fdesc
246 {
247 struct fdesc *next; /* for the linked list */
248 char *infname; /* uncompressed input file name */
249 char *infabsname; /* absolute uncompressed input file name */
250 char *infabsdir; /* absolute dir of input file */
251 char *taggedfname; /* file name to write in tagfile */
252 language *lang; /* language of file */
253 char *prop; /* file properties to write in tagfile */
254 bool usecharno; /* etags tags shall contain char number */
255 } fdesc;
256
257 typedef struct node_st
258 { /* sorting structure */
259 struct node_st *left, *right; /* left and right sons */
260 fdesc *fdp; /* description of file to whom tag belongs */
261 char *name; /* tag name */
262 char *pat; /* search pattern */
263 bool valid; /* write this tag on the tag file */
264 bool is_func; /* function tag: use pattern in CTAGS mode */
265 bool been_warned; /* warning already given for duplicated tag */
266 int lno; /* line number tag is on */
267 long cno; /* character number line starts on */
268 } node;
269
270 /*
271 * A `linebuffer' is a structure which holds a line of text.
272 * `readline_internal' reads a line from a stream into a linebuffer
273 * and works regardless of the length of the line.
274 * SIZE is the size of BUFFER, LEN is the length of the string in
275 * BUFFER after readline reads it.
276 */
277 typedef struct
278 {
279 long size;
280 int len;
281 char *buffer;
282 } linebuffer;
283
284 /* Used to support mixing of --lang and file names. */
285 typedef struct
286 {
287 enum {
288 at_language, /* a language specification */
289 at_regexp, /* a regular expression */
290 at_icregexp, /* same, but with case ignored */
291 at_filename /* a file name */
292 } arg_type; /* argument type */
293 language *lang; /* language associated with the argument */
294 char *what; /* the argument itself */
295 } argument;
296
297 #ifdef ETAGS_REGEXPS
298 /* Structure defining a regular expression. */
299 typedef struct pattern
300 {
301 struct pattern *p_next;
302 language *lang;
303 char *regex;
304 struct re_pattern_buffer *pat;
305 struct re_registers regs;
306 char *name_pattern;
307 bool error_signaled;
308 bool ignore_case;
309 } pattern;
310 #endif /* ETAGS_REGEXPS */
311
312
313 /* Many compilers barf on this:
314 Lang_function Ada_funcs;
315 so let's write it this way */
316 static void Ada_funcs __P((FILE *));
317 static void Asm_labels __P((FILE *));
318 static void C_entries __P((int c_ext, FILE *));
319 static void default_C_entries __P((FILE *));
320 static void plain_C_entries __P((FILE *));
321 static void Cjava_entries __P((FILE *));
322 static void Cobol_paragraphs __P((FILE *));
323 static void Cplusplus_entries __P((FILE *));
324 static void Cstar_entries __P((FILE *));
325 static void Erlang_functions __P((FILE *));
326 static void Fortran_functions __P((FILE *));
327 static void Yacc_entries __P((FILE *));
328 static void Lisp_functions __P((FILE *));
329 static void Makefile_targets __P((FILE *));
330 static void Pascal_functions __P((FILE *));
331 static void Perl_functions __P((FILE *));
332 static void PHP_functions __P((FILE *));
333 static void Postscript_functions __P((FILE *));
334 static void Prolog_functions __P((FILE *));
335 static void Python_functions __P((FILE *));
336 static void Scheme_functions __P((FILE *));
337 static void TeX_commands __P((FILE *));
338 static void Texinfo_nodes __P((FILE *));
339 static void just_read_file __P((FILE *));
340
341 static void print_language_names __P((void));
342 static void print_version __P((void));
343 static void print_help __P((void));
344 int main __P((int, char **));
345
346 static compressor *get_compressor_from_suffix __P((char *, char **));
347 static language *get_language_from_langname __P((const char *));
348 static language *get_language_from_interpreter __P((char *));
349 static language *get_language_from_filename __P((char *, bool));
350 static long readline __P((linebuffer *, FILE *));
351 static long readline_internal __P((linebuffer *, FILE *));
352 static bool nocase_tail __P((char *));
353 static char *get_tag __P((char *));
354
355 #ifdef ETAGS_REGEXPS
356 static void analyse_regex __P((char *, bool));
357 static void add_regex __P((char *, bool, language *));
358 static void free_patterns __P((void));
359 #endif /* ETAGS_REGEXPS */
360 static void error __P((const char *, const char *));
361 static void suggest_asking_for_help __P((void));
362 void fatal __P((char *, char *));
363 static void pfatal __P((char *));
364 static void add_node __P((node *, node **));
365
366 static void init __P((void));
367 static void initbuffer __P((linebuffer *));
368 static void process_file __P((char *, language *));
369 static void find_entries __P((FILE *));
370 static void free_tree __P((node *));
371 static void free_fdesc __P((fdesc *));
372 static void pfnote __P((char *, bool, char *, int, int, long));
373 static void new_pfnote __P((char *, int, bool, char *, int, int, long));
374 static void invalidate_nodes __P((fdesc *, node **));
375 static void put_entries __P((node *));
376
377 static char *concat __P((char *, char *, char *));
378 static char *skip_spaces __P((char *));
379 static char *skip_non_spaces __P((char *));
380 static char *savenstr __P((char *, int));
381 static char *savestr __P((char *));
382 static char *etags_strchr __P((const char *, int));
383 static char *etags_strrchr __P((const char *, int));
384 static bool strcaseeq __P((const char *, const char *));
385 static char *etags_getcwd __P((void));
386 static char *relative_filename __P((char *, char *));
387 static char *absolute_filename __P((char *, char *));
388 static char *absolute_dirname __P((char *, char *));
389 static bool filename_is_absolute __P((char *f));
390 static void canonicalize_filename __P((char *));
391 static void linebuffer_setlen __P((linebuffer *, int));
392 static PTR xmalloc __P((unsigned int));
393 static PTR xrealloc __P((char *, unsigned int));
394
395 \f
396 static char searchar = '/'; /* use /.../ searches */
397
398 static char *tagfile; /* output file */
399 static char *progname; /* name this program was invoked with */
400 static char *cwd; /* current working directory */
401 static char *tagfiledir; /* directory of tagfile */
402 static FILE *tagf; /* ioptr for tags file */
403
404 static fdesc *fdhead; /* head of file description list */
405 static fdesc *curfdp; /* current file description */
406 static int lineno; /* line number of current line */
407 static long charno; /* current character number */
408 static long linecharno; /* charno of start of current line */
409 static char *dbp; /* pointer to start of current tag */
410
411 static const int invalidcharno = -1;
412
413 static node *nodehead; /* the head of the binary tree of tags */
414 static node *last_node; /* the last node created */
415
416 static linebuffer lb; /* the current line */
417
418 /* boolean "functions" (see init) */
419 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
420 static char
421 /* white chars */
422 *white = " \f\t\n\r\v",
423 /* not in a name */
424 *nonam = " \f\t\n\r()=,;",
425 /* token ending chars */
426 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
427 /* token starting chars */
428 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
429 /* valid in-token chars */
430 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
431
432 static bool append_to_tagfile; /* -a: append to tags */
433 /* The next four default to TRUE for etags, but to FALSE for ctags. */
434 static bool typedefs; /* -t: create tags for C and Ada typedefs */
435 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
436 /* 0 struct/enum/union decls, and C++ */
437 /* member functions. */
438 static bool constantypedefs; /* -d: create tags for C #define, enum */
439 /* constants and variables. */
440 /* -D: opposite of -d. Default under ctags. */
441 static bool globals; /* create tags for global variables */
442 static bool declarations; /* --declarations: tag them and extern in C&Co*/
443 static bool members; /* create tags for C member variables */
444 static bool no_line_directive; /* ignore #line directives (undocumented) */
445 static bool update; /* -u: update tags */
446 static bool vgrind_style; /* -v: create vgrind style index output */
447 static bool no_warnings; /* -w: suppress warnings */
448 static bool cxref_style; /* -x: create cxref style output */
449 static bool cplusplus; /* .[hc] means C++, not C */
450 static bool noindentypedefs; /* -I: ignore indentation in C */
451 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
452
453 #ifdef ETAGS_REGEXPS
454 /* List of all regexps. */
455 static pattern *p_head;
456
457 /* How many characters in the character set. (From regex.c.) */
458 #define CHAR_SET_SIZE 256
459 /* Translation table for case-insensitive matching. */
460 static char lc_trans[CHAR_SET_SIZE];
461 #endif /* ETAGS_REGEXPS */
462
463 #ifdef LONG_OPTIONS
464 static struct option longopts[] =
465 {
466 { "packages-only", no_argument, &packages_only, TRUE },
467 { "c++", no_argument, NULL, 'C' },
468 { "declarations", no_argument, &declarations, TRUE },
469 { "no-line-directive", no_argument, &no_line_directive, TRUE },
470 { "help", no_argument, NULL, 'h' },
471 { "help", no_argument, NULL, 'H' },
472 { "ignore-indentation", no_argument, NULL, 'I' },
473 { "language", required_argument, NULL, 'l' },
474 { "members", no_argument, &members, TRUE },
475 { "no-members", no_argument, &members, FALSE },
476 { "output", required_argument, NULL, 'o' },
477 #ifdef ETAGS_REGEXPS
478 { "regex", required_argument, NULL, 'r' },
479 { "no-regex", no_argument, NULL, 'R' },
480 { "ignore-case-regex", required_argument, NULL, 'c' },
481 #endif /* ETAGS_REGEXPS */
482 { "version", no_argument, NULL, 'V' },
483
484 #if CTAGS /* Etags options */
485 { "backward-search", no_argument, NULL, 'B' },
486 { "cxref", no_argument, NULL, 'x' },
487 { "defines", no_argument, NULL, 'd' },
488 { "globals", no_argument, &globals, TRUE },
489 { "typedefs", no_argument, NULL, 't' },
490 { "typedefs-and-c++", no_argument, NULL, 'T' },
491 { "update", no_argument, NULL, 'u' },
492 { "vgrind", no_argument, NULL, 'v' },
493 { "no-warn", no_argument, NULL, 'w' },
494
495 #else /* Ctags options */
496 { "append", no_argument, NULL, 'a' },
497 { "no-defines", no_argument, NULL, 'D' },
498 { "no-globals", no_argument, &globals, FALSE },
499 { "include", required_argument, NULL, 'i' },
500 #endif
501 { NULL }
502 };
503 #endif /* LONG_OPTIONS */
504
505 static compressor compressors[] =
506 {
507 { "z", "gzip -d -c"},
508 { "Z", "gzip -d -c"},
509 { "gz", "gzip -d -c"},
510 { "GZ", "gzip -d -c"},
511 { "bz2", "bzip2 -d -c" },
512 { NULL }
513 };
514
515 /*
516 * Language stuff.
517 */
518
519 /* Ada code */
520 static char *Ada_suffixes [] =
521 { "ads", "adb", "ada", NULL };
522
523 /* Assembly code */
524 static char *Asm_suffixes [] =
525 { "a", /* Unix assembler */
526 "asm", /* Microcontroller assembly */
527 "def", /* BSO/Tasking definition includes */
528 "inc", /* Microcontroller include files */
529 "ins", /* Microcontroller include files */
530 "s", "sa", /* Unix assembler */
531 "S", /* cpp-processed Unix assembler */
532 "src", /* BSO/Tasking C compiler output */
533 NULL
534 };
535
536 /* Note that .c and .h can be considered C++, if the --c++ flag was
537 given, or if the `class' keyowrd is met inside the file.
538 That is why default_C_entries is called for these. */
539 static char *default_C_suffixes [] =
540 { "c", "h", NULL };
541
542 static char *Cplusplus_suffixes [] =
543 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
544 "M", /* Objective C++ */
545 "pdb", /* Postscript with C syntax */
546 NULL };
547
548 static char *Cjava_suffixes [] =
549 { "java", NULL };
550
551 static char *Cobol_suffixes [] =
552 { "COB", "cob", NULL };
553
554 static char *Cstar_suffixes [] =
555 { "cs", "hs", NULL };
556
557 static char *Erlang_suffixes [] =
558 { "erl", "hrl", NULL };
559
560 static char *Fortran_suffixes [] =
561 { "F", "f", "f90", "for", NULL };
562
563 static char *Lisp_suffixes [] =
564 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
565
566 static char *Makefile_filenames [] =
567 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
568
569 static char *Pascal_suffixes [] =
570 { "p", "pas", NULL };
571
572 static char *Perl_suffixes [] =
573 { "pl", "pm", NULL };
574
575 static char *Perl_interpreters [] =
576 { "perl", "@PERL@", NULL };
577
578 static char *PHP_suffixes [] =
579 { "php", "php3", "php4", NULL };
580
581 static char *plain_C_suffixes [] =
582 { "lm", /* Objective lex file */
583 "m", /* Objective C file */
584 "pc", /* Pro*C file */
585 NULL };
586
587 static char *Postscript_suffixes [] =
588 { "ps", "psw", NULL }; /* .psw is for PSWrap */
589
590 static char *Prolog_suffixes [] =
591 { "prolog", NULL };
592
593 static char *Python_suffixes [] =
594 { "py", NULL };
595
596 /* Can't do the `SCM' or `scm' prefix with a version number. */
597 static char *Scheme_suffixes [] =
598 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
599
600 static char *TeX_suffixes [] =
601 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
602
603 static char *Texinfo_suffixes [] =
604 { "texi", "texinfo", "txi", NULL };
605
606 static char *Yacc_suffixes [] =
607 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
608
609 /*
610 * Table of languages.
611 *
612 * It is ok for a given function to be listed under more than one
613 * name. I just didn't.
614 */
615
616 static language lang_names [] =
617 {
618 { "ada", FALSE, Ada_funcs, NULL, Ada_suffixes, NULL },
619 { "asm", FALSE, Asm_labels, NULL, Asm_suffixes, NULL },
620 { "c", FALSE, default_C_entries, NULL, default_C_suffixes, NULL },
621 { "c++", FALSE, Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
622 { "c*", FALSE, Cstar_entries, NULL, Cstar_suffixes, NULL },
623 { "cobol", FALSE, Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
624 { "erlang", FALSE, Erlang_functions, NULL, Erlang_suffixes, NULL },
625 { "fortran", FALSE, Fortran_functions, NULL, Fortran_suffixes, NULL },
626 { "java", FALSE, Cjava_entries, NULL, Cjava_suffixes, NULL },
627 { "lisp", FALSE, Lisp_functions, NULL, Lisp_suffixes, NULL },
628 { "makefile", FALSE, Makefile_targets, Makefile_filenames, NULL, NULL },
629 { "pascal", FALSE, Pascal_functions, NULL, Pascal_suffixes, NULL },
630 { "perl", FALSE, Perl_functions,NULL, Perl_suffixes, Perl_interpreters },
631 { "php", FALSE, PHP_functions, NULL, PHP_suffixes, NULL },
632 { "postscript",FALSE, Postscript_functions,NULL, Postscript_suffixes, NULL },
633 { "proc", FALSE, plain_C_entries, NULL, plain_C_suffixes, NULL },
634 { "prolog", FALSE, Prolog_functions, NULL, Prolog_suffixes, NULL },
635 { "python", FALSE, Python_functions, NULL, Python_suffixes, NULL },
636 { "scheme", FALSE, Scheme_functions, NULL, Scheme_suffixes, NULL },
637 { "tex", FALSE, TeX_commands, NULL, TeX_suffixes, NULL },
638 { "texinfo", FALSE, Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
639 { "yacc", TRUE, Yacc_entries, NULL, Yacc_suffixes, NULL },
640 { "auto", FALSE, NULL }, /* default guessing scheme */
641 { "none", FALSE, just_read_file }, /* regexp matching only */
642 { NULL, FALSE, NULL } /* end of list */
643 };
644
645 \f
646 static void
647 print_language_names ()
648 {
649 language *lang;
650 char **name, **ext;
651
652 puts ("\nThese are the currently supported languages, along with the\n\
653 default file names and dot suffixes:");
654 for (lang = lang_names; lang->name != NULL; lang++)
655 {
656 printf (" %-*s", 10, lang->name);
657 if (lang->filenames != NULL)
658 for (name = lang->filenames; *name != NULL; name++)
659 printf (" %s", *name);
660 if (lang->suffixes != NULL)
661 for (ext = lang->suffixes; *ext != NULL; ext++)
662 printf (" .%s", *ext);
663 puts ("");
664 }
665 puts ("Where `auto' means use default language for files based on file\n\
666 name suffix, and `none' means only do regexp processing on files.\n\
667 If no language is specified and no matching suffix is found,\n\
668 the first line of the file is read for a sharp-bang (#!) sequence\n\
669 followed by the name of an interpreter. If no such sequence is found,\n\
670 Fortran is tried first; if no tags are found, C is tried next.\n\
671 When parsing any C file, a \"class\" keyword switches to C++.\n\
672 Compressed files are supported using gzip and bzip2.");
673 }
674
675 #ifndef EMACS_NAME
676 # define EMACS_NAME "GNU Emacs"
677 #endif
678 #ifndef VERSION
679 # define VERSION "21"
680 #endif
681 static void
682 print_version ()
683 {
684 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
685 puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
686 puts ("This program is distributed under the same terms as Emacs");
687
688 exit (GOOD);
689 }
690
691 static void
692 print_help ()
693 {
694 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
695 \n\
696 These are the options accepted by %s.\n", progname, progname);
697 #ifdef LONG_OPTIONS
698 puts ("You may use unambiguous abbreviations for the long option names.");
699 #else
700 puts ("Long option names do not work with this executable, as it is not\n\
701 linked with GNU getopt.");
702 #endif /* LONG_OPTIONS */
703 puts (" A - as file name means read names from stdin (one per line).\n\
704 Absolute names are stored in the output file as they are.\n\
705 Relative ones are stored relative to the output file's directory.\n");
706
707 if (!CTAGS)
708 puts ("-a, --append\n\
709 Append tag entries to existing tags file.");
710
711 puts ("--packages-only\n\
712 For Ada files, only generate tags for packages.");
713
714 if (CTAGS)
715 puts ("-B, --backward-search\n\
716 Write the search commands for the tag entries using '?', the\n\
717 backward-search command instead of '/', the forward-search command.");
718
719 /* This option is mostly obsolete, because etags can now automatically
720 detect C++. Retained for backward compatibility and for debugging and
721 experimentation. In principle, we could want to tag as C++ even
722 before any "class" keyword.
723 puts ("-C, --c++\n\
724 Treat files whose name suffix defaults to C language as C++ files.");
725 */
726
727 puts ("--declarations\n\
728 In C and derived languages, create tags for function declarations,");
729 if (CTAGS)
730 puts ("\tand create tags for extern variables if --globals is used.");
731 else
732 puts
733 ("\tand create tags for extern variables unless --no-globals is used.");
734
735 if (CTAGS)
736 puts ("-d, --defines\n\
737 Create tag entries for C #define constants and enum constants, too.");
738 else
739 puts ("-D, --no-defines\n\
740 Don't create tag entries for C #define constants and enum constants.\n\
741 This makes the tags file smaller.");
742
743 if (!CTAGS)
744 puts ("-i FILE, --include=FILE\n\
745 Include a note in tag file indicating that, when searching for\n\
746 a tag, one should also consult the tags file FILE after\n\
747 checking the current file.");
748
749 puts ("-l LANG, --language=LANG\n\
750 Force the following files to be considered as written in the\n\
751 named language up to the next --language=LANG option.");
752
753 if (CTAGS)
754 puts ("--globals\n\
755 Create tag entries for global variables in some languages.");
756 else
757 puts ("--no-globals\n\
758 Do not create tag entries for global variables in some\n\
759 languages. This makes the tags file smaller.");
760 puts ("--members\n\
761 Create tag entries for member variables in C and derived languages.");
762
763 #ifdef ETAGS_REGEXPS
764 puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
765 Make a tag for each line matching pattern REGEXP in the following\n\
766 files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
767 regexfile is a file containing one REGEXP per line.\n\
768 REGEXP is anchored (as if preceded by ^).\n\
769 The form /REGEXP/NAME/ creates a named tag.\n\
770 For example Tcl named tags can be created with:\n\
771 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\"");
772 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
773 Like -r, --regex but ignore case when matching expressions.");
774 puts ("-R, --no-regex\n\
775 Don't create tags from regexps for the following files.");
776 #endif /* ETAGS_REGEXPS */
777 puts ("-o FILE, --output=FILE\n\
778 Write the tags to FILE.");
779 puts ("-I, --ignore-indentation\n\
780 Don't rely on indentation quite as much as normal. Currently,\n\
781 this means not to assume that a closing brace in the first\n\
782 column is the final brace of a function or structure\n\
783 definition in C and C++.");
784
785 if (CTAGS)
786 {
787 puts ("-t, --typedefs\n\
788 Generate tag entries for C and Ada typedefs.");
789 puts ("-T, --typedefs-and-c++\n\
790 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
791 and C++ member functions.");
792 }
793
794 if (CTAGS)
795 puts ("-u, --update\n\
796 Update the tag entries for the given files, leaving tag\n\
797 entries for other files in place. Currently, this is\n\
798 implemented by deleting the existing entries for the given\n\
799 files and then rewriting the new entries at the end of the\n\
800 tags file. It is often faster to simply rebuild the entire\n\
801 tag file than to use this.");
802
803 if (CTAGS)
804 {
805 puts ("-v, --vgrind\n\
806 Generates an index of items intended for human consumption,\n\
807 similar to the output of vgrind. The index is sorted, and\n\
808 gives the page number of each item.");
809 puts ("-w, --no-warn\n\
810 Suppress warning messages about entries defined in multiple\n\
811 files.");
812 puts ("-x, --cxref\n\
813 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
814 The output uses line numbers instead of page numbers, but\n\
815 beyond that the differences are cosmetic; try both to see\n\
816 which you like.");
817 }
818
819 puts ("-V, --version\n\
820 Print the version of the program.\n\
821 -h, --help\n\
822 Print this help message.");
823
824 print_language_names ();
825
826 puts ("");
827 puts ("Report bugs to bug-gnu-emacs@gnu.org");
828
829 exit (GOOD);
830 }
831
832 \f
833 #ifdef VMS /* VMS specific functions */
834
835 #define EOS '\0'
836
837 /* This is a BUG! ANY arbitrary limit is a BUG!
838 Won't someone please fix this? */
839 #define MAX_FILE_SPEC_LEN 255
840 typedef struct {
841 short curlen;
842 char body[MAX_FILE_SPEC_LEN + 1];
843 } vspec;
844
845 /*
846 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
847 returning in each successive call the next file name matching the input
848 spec. The function expects that each in_spec passed
849 to it will be processed to completion; in particular, up to and
850 including the call following that in which the last matching name
851 is returned, the function ignores the value of in_spec, and will
852 only start processing a new spec with the following call.
853 If an error occurs, on return out_spec contains the value
854 of in_spec when the error occurred.
855
856 With each successive file name returned in out_spec, the
857 function's return value is one. When there are no more matching
858 names the function returns zero. If on the first call no file
859 matches in_spec, or there is any other error, -1 is returned.
860 */
861
862 #include <rmsdef.h>
863 #include <descrip.h>
864 #define OUTSIZE MAX_FILE_SPEC_LEN
865 static short
866 fn_exp (out, in)
867 vspec *out;
868 char *in;
869 {
870 static long context = 0;
871 static struct dsc$descriptor_s o;
872 static struct dsc$descriptor_s i;
873 static bool pass1 = TRUE;
874 long status;
875 short retval;
876
877 if (pass1)
878 {
879 pass1 = FALSE;
880 o.dsc$a_pointer = (char *) out;
881 o.dsc$w_length = (short)OUTSIZE;
882 i.dsc$a_pointer = in;
883 i.dsc$w_length = (short)strlen(in);
884 i.dsc$b_dtype = DSC$K_DTYPE_T;
885 i.dsc$b_class = DSC$K_CLASS_S;
886 o.dsc$b_dtype = DSC$K_DTYPE_VT;
887 o.dsc$b_class = DSC$K_CLASS_VS;
888 }
889 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
890 {
891 out->body[out->curlen] = EOS;
892 return 1;
893 }
894 else if (status == RMS$_NMF)
895 retval = 0;
896 else
897 {
898 strcpy(out->body, in);
899 retval = -1;
900 }
901 lib$find_file_end(&context);
902 pass1 = TRUE;
903 return retval;
904 }
905
906 /*
907 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
908 name of each file specified by the provided arg expanding wildcards.
909 */
910 static char *
911 gfnames (arg, p_error)
912 char *arg;
913 bool *p_error;
914 {
915 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
916
917 switch (fn_exp (&filename, arg))
918 {
919 case 1:
920 *p_error = FALSE;
921 return filename.body;
922 case 0:
923 *p_error = FALSE;
924 return NULL;
925 default:
926 *p_error = TRUE;
927 return filename.body;
928 }
929 }
930
931 #ifndef OLD /* Newer versions of VMS do provide `system'. */
932 system (cmd)
933 char *cmd;
934 {
935 error ("%s", "system() function not implemented under VMS");
936 }
937 #endif
938
939 #define VERSION_DELIM ';'
940 char *massage_name (s)
941 char *s;
942 {
943 char *start = s;
944
945 for ( ; *s; s++)
946 if (*s == VERSION_DELIM)
947 {
948 *s = EOS;
949 break;
950 }
951 else
952 *s = lowcase (*s);
953 return start;
954 }
955 #endif /* VMS */
956
957 \f
958 int
959 main (argc, argv)
960 int argc;
961 char *argv[];
962 {
963 int i;
964 unsigned int nincluded_files;
965 char **included_files;
966 argument *argbuffer;
967 int current_arg, file_count;
968 linebuffer filename_lb;
969 #ifdef VMS
970 bool got_err;
971 #endif
972
973 #ifdef DOS_NT
974 _fmode = O_BINARY; /* all of files are treated as binary files */
975 #endif /* DOS_NT */
976
977 progname = argv[0];
978 nincluded_files = 0;
979 included_files = xnew (argc, char *);
980 current_arg = 0;
981 file_count = 0;
982
983 /* Allocate enough no matter what happens. Overkill, but each one
984 is small. */
985 argbuffer = xnew (argc, argument);
986
987 #ifdef ETAGS_REGEXPS
988 /* Set syntax for regular expression routines. */
989 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
990 /* Translation table for case-insensitive search. */
991 for (i = 0; i < CHAR_SET_SIZE; i++)
992 lc_trans[i] = lowcase (i);
993 #endif /* ETAGS_REGEXPS */
994
995 /*
996 * If etags, always find typedefs and structure tags. Why not?
997 * Also default to find macro constants, enum constants and
998 * global variables.
999 */
1000 if (!CTAGS)
1001 {
1002 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1003 globals = TRUE;
1004 }
1005
1006 while (1)
1007 {
1008 int opt;
1009 char *optstring = "-";
1010
1011 #ifdef ETAGS_REGEXPS
1012 optstring = "-r:Rc:";
1013 #endif /* ETAGS_REGEXPS */
1014
1015 #ifndef LONG_OPTIONS
1016 optstring = optstring + 1;
1017 #endif /* LONG_OPTIONS */
1018
1019 optstring = concat (optstring,
1020 "Cf:Il:o:SVhH",
1021 (CTAGS) ? "BxdtTuvw" : "aDi:");
1022
1023 opt = getopt_long (argc, argv, optstring, longopts, 0);
1024 if (opt == EOF)
1025 break;
1026
1027 switch (opt)
1028 {
1029 case 0:
1030 /* If getopt returns 0, then it has already processed a
1031 long-named option. We should do nothing. */
1032 break;
1033
1034 case 1:
1035 /* This means that a file name has been seen. Record it. */
1036 argbuffer[current_arg].arg_type = at_filename;
1037 argbuffer[current_arg].what = optarg;
1038 ++current_arg;
1039 ++file_count;
1040 break;
1041
1042 /* Common options. */
1043 case 'C': cplusplus = TRUE; break;
1044 case 'f': /* for compatibility with old makefiles */
1045 case 'o':
1046 if (tagfile)
1047 {
1048 error ("-o option may only be given once.", (char *)NULL);
1049 suggest_asking_for_help ();
1050 }
1051 tagfile = optarg;
1052 break;
1053 case 'I':
1054 case 'S': /* for backward compatibility */
1055 noindentypedefs = TRUE;
1056 break;
1057 case 'l':
1058 {
1059 language *lang = get_language_from_langname (optarg);
1060 if (lang != NULL)
1061 {
1062 argbuffer[current_arg].lang = lang;
1063 argbuffer[current_arg].arg_type = at_language;
1064 ++current_arg;
1065 }
1066 }
1067 break;
1068 case 'r':
1069 argbuffer[current_arg].arg_type = at_regexp;
1070 argbuffer[current_arg].what = optarg;
1071 ++current_arg;
1072 break;
1073 case 'R':
1074 argbuffer[current_arg].arg_type = at_regexp;
1075 argbuffer[current_arg].what = NULL;
1076 ++current_arg;
1077 break;
1078 case 'c':
1079 argbuffer[current_arg].arg_type = at_icregexp;
1080 argbuffer[current_arg].what = optarg;
1081 ++current_arg;
1082 break;
1083 case 'V':
1084 print_version ();
1085 break;
1086 case 'h':
1087 case 'H':
1088 print_help ();
1089 break;
1090
1091 /* Etags options */
1092 case 'a': append_to_tagfile = TRUE; break;
1093 case 'D': constantypedefs = FALSE; break;
1094 case 'i': included_files[nincluded_files++] = optarg; break;
1095
1096 /* Ctags options. */
1097 case 'B': searchar = '?'; break;
1098 case 'd': constantypedefs = TRUE; break;
1099 case 't': typedefs = TRUE; break;
1100 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1101 case 'u': update = TRUE; break;
1102 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1103 case 'x': cxref_style = TRUE; break;
1104 case 'w': no_warnings = TRUE; break;
1105 default:
1106 suggest_asking_for_help ();
1107 }
1108 }
1109
1110 for (; optind < argc; ++optind)
1111 {
1112 argbuffer[current_arg].arg_type = at_filename;
1113 argbuffer[current_arg].what = argv[optind];
1114 ++current_arg;
1115 ++file_count;
1116 }
1117
1118 if (nincluded_files == 0 && file_count == 0)
1119 {
1120 error ("no input files specified.", (char *)NULL);
1121 suggest_asking_for_help ();
1122 }
1123
1124 if (tagfile == NULL)
1125 tagfile = CTAGS ? "tags" : "TAGS";
1126 cwd = etags_getcwd (); /* the current working directory */
1127 if (cwd[strlen (cwd) - 1] != '/')
1128 {
1129 char *oldcwd = cwd;
1130 cwd = concat (oldcwd, "/", "");
1131 free (oldcwd);
1132 }
1133 if (streq (tagfile, "-"))
1134 tagfiledir = cwd;
1135 else
1136 tagfiledir = absolute_dirname (tagfile, cwd);
1137
1138 init (); /* set up boolean "functions" */
1139
1140 initbuffer (&lb);
1141 initbuffer (&filename_lb);
1142
1143 if (!CTAGS)
1144 {
1145 if (streq (tagfile, "-"))
1146 {
1147 tagf = stdout;
1148 #ifdef DOS_NT
1149 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1150 doesn't take effect until after `stdout' is already open). */
1151 if (!isatty (fileno (stdout)))
1152 setmode (fileno (stdout), O_BINARY);
1153 #endif /* DOS_NT */
1154 }
1155 else
1156 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1157 if (tagf == NULL)
1158 pfatal (tagfile);
1159 }
1160
1161 /*
1162 * Loop through files finding functions.
1163 */
1164 for (i = 0; i < current_arg; ++i)
1165 {
1166 static language *lang; /* non-NULL if language is forced */
1167 char *this_file;
1168
1169 switch (argbuffer[i].arg_type)
1170 {
1171 case at_language:
1172 lang = argbuffer[i].lang;
1173 break;
1174 #ifdef ETAGS_REGEXPS
1175 case at_regexp:
1176 analyse_regex (argbuffer[i].what, FALSE);
1177 break;
1178 case at_icregexp:
1179 analyse_regex (argbuffer[i].what, TRUE);
1180 break;
1181 #endif
1182 case at_filename:
1183 #ifdef VMS
1184 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1185 {
1186 if (got_err)
1187 {
1188 error ("can't find file %s\n", this_file);
1189 argc--, argv++;
1190 }
1191 else
1192 {
1193 this_file = massage_name (this_file);
1194 }
1195 #else
1196 this_file = argbuffer[i].what;
1197 #endif
1198 /* Input file named "-" means read file names from stdin
1199 (one per line) and use them. */
1200 if (streq (this_file, "-"))
1201 while (readline_internal (&filename_lb, stdin) > 0)
1202 process_file (filename_lb.buffer, lang);
1203 else
1204 process_file (this_file, lang);
1205 #ifdef VMS
1206 }
1207 #endif
1208 break;
1209 }
1210 }
1211
1212 #ifdef ETAGS_REGEXPS
1213 free_patterns ();
1214 #endif /* ETAGS_REGEXPS */
1215
1216 if (!CTAGS || cxref_style)
1217 {
1218 put_entries (nodehead);
1219 free_tree (nodehead);
1220 nodehead = NULL;
1221 if (!CTAGS)
1222 while (nincluded_files-- > 0)
1223 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1224
1225 if (fclose (tagf) == EOF)
1226 pfatal (tagfile);
1227 exit (GOOD);
1228 }
1229
1230 if (update)
1231 {
1232 char cmd[BUFSIZ];
1233 for (i = 0; i < current_arg; ++i)
1234 {
1235 if (argbuffer[i].arg_type != at_filename)
1236 continue;
1237 sprintf (cmd,
1238 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1239 tagfile, argbuffer[i].what, tagfile);
1240 if (system (cmd) != GOOD)
1241 fatal ("failed to execute shell command", (char *)NULL);
1242 }
1243 append_to_tagfile = TRUE;
1244 }
1245
1246 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1247 if (tagf == NULL)
1248 pfatal (tagfile);
1249 put_entries (nodehead);
1250 free_tree (nodehead);
1251 nodehead = NULL;
1252 if (fclose (tagf) == EOF)
1253 pfatal (tagfile);
1254
1255 if (update)
1256 {
1257 char cmd[2*BUFSIZ+10];
1258 sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1259 exit (system (cmd));
1260 }
1261 return GOOD;
1262 }
1263
1264
1265 /*
1266 * Return a compressor given the file name. If EXTPTR is non-zero,
1267 * return a pointer into FILE where the compressor-specific
1268 * extension begins. If no compressor is found, NULL is returned
1269 * and EXTPTR is not significant.
1270 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1271 */
1272 static compressor *
1273 get_compressor_from_suffix (file, extptr)
1274 char *file;
1275 char **extptr;
1276 {
1277 compressor *compr;
1278 char *slash, *suffix;
1279
1280 /* This relies on FN to be after canonicalize_filename,
1281 so we don't need to consider backslashes on DOS_NT. */
1282 slash = etags_strrchr (file, '/');
1283 suffix = etags_strrchr (file, '.');
1284 if (suffix == NULL || suffix < slash)
1285 return NULL;
1286 if (extptr != NULL)
1287 *extptr = suffix;
1288 suffix += 1;
1289 /* Let those poor souls who live with DOS 8+3 file name limits get
1290 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1291 Only the first do loop is run if not MSDOS */
1292 do
1293 {
1294 for (compr = compressors; compr->suffix != NULL; compr++)
1295 if (streq (compr->suffix, suffix))
1296 return compr;
1297 if (!MSDOS)
1298 break; /* do it only once: not really a loop */
1299 if (extptr != NULL)
1300 *extptr = ++suffix;
1301 } while (*suffix != '\0');
1302 return NULL;
1303 }
1304
1305
1306
1307 /*
1308 * Return a language given the name.
1309 */
1310 static language *
1311 get_language_from_langname (name)
1312 const char *name;
1313 {
1314 language *lang;
1315
1316 if (name == NULL)
1317 error ("empty language name", (char *)NULL);
1318 else
1319 {
1320 for (lang = lang_names; lang->name != NULL; lang++)
1321 if (streq (name, lang->name))
1322 return lang;
1323 error ("unknown language \"%s\"", name);
1324 }
1325
1326 return NULL;
1327 }
1328
1329
1330 /*
1331 * Return a language given the interpreter name.
1332 */
1333 static language *
1334 get_language_from_interpreter (interpreter)
1335 char *interpreter;
1336 {
1337 language *lang;
1338 char **iname;
1339
1340 if (interpreter == NULL)
1341 return NULL;
1342 for (lang = lang_names; lang->name != NULL; lang++)
1343 if (lang->interpreters != NULL)
1344 for (iname = lang->interpreters; *iname != NULL; iname++)
1345 if (streq (*iname, interpreter))
1346 return lang;
1347
1348 return NULL;
1349 }
1350
1351
1352
1353 /*
1354 * Return a language given the file name.
1355 */
1356 static language *
1357 get_language_from_filename (file, case_sensitive)
1358 char *file;
1359 bool case_sensitive;
1360 {
1361 language *lang;
1362 char **name, **ext, *suffix;
1363
1364 /* Try whole file name first. */
1365 for (lang = lang_names; lang->name != NULL; lang++)
1366 if (lang->filenames != NULL)
1367 for (name = lang->filenames; *name != NULL; name++)
1368 if ((case_sensitive)
1369 ? streq (*name, file)
1370 : strcaseeq (*name, file))
1371 return lang;
1372
1373 /* If not found, try suffix after last dot. */
1374 suffix = etags_strrchr (file, '.');
1375 if (suffix == NULL)
1376 return NULL;
1377 suffix += 1;
1378 for (lang = lang_names; lang->name != NULL; lang++)
1379 if (lang->suffixes != NULL)
1380 for (ext = lang->suffixes; *ext != NULL; ext++)
1381 if ((case_sensitive)
1382 ? streq (*ext, suffix)
1383 : strcaseeq (*ext, suffix))
1384 return lang;
1385 return NULL;
1386 }
1387
1388 \f
1389 /*
1390 * This routine is called on each file argument.
1391 */
1392 static void
1393 process_file (file, lang)
1394 char *file;
1395 language *lang;
1396 {
1397 struct stat stat_buf;
1398 FILE *inf;
1399 static const fdesc emptyfdesc;
1400 fdesc *fdp;
1401 compressor *compr;
1402 char *compressed_name, *uncompressed_name;
1403 char *ext, *real_name;
1404 int retval;
1405
1406
1407 canonicalize_filename (file);
1408 if (streq (file, tagfile) && !streq (tagfile, "-"))
1409 {
1410 error ("skipping inclusion of %s in self.", file);
1411 return;
1412 }
1413 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1414 {
1415 compressed_name = NULL;
1416 real_name = uncompressed_name = savestr (file);
1417 }
1418 else
1419 {
1420 real_name = compressed_name = savestr (file);
1421 uncompressed_name = savenstr (file, ext - file);
1422 }
1423
1424 /* If the canonicalized uncompressed name
1425 has already been dealt with, skip it silently. */
1426 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1427 {
1428 assert (fdp->infname != NULL);
1429 if (streq (uncompressed_name, fdp->infname))
1430 goto cleanup;
1431 }
1432
1433 if (stat (real_name, &stat_buf) != 0)
1434 {
1435 /* Reset real_name and try with a different name. */
1436 real_name = NULL;
1437 if (compressed_name != NULL) /* try with the given suffix */
1438 {
1439 if (stat (uncompressed_name, &stat_buf) == 0)
1440 real_name = uncompressed_name;
1441 }
1442 else /* try all possible suffixes */
1443 {
1444 for (compr = compressors; compr->suffix != NULL; compr++)
1445 {
1446 compressed_name = concat (file, ".", compr->suffix);
1447 if (stat (compressed_name, &stat_buf) != 0)
1448 {
1449 if (MSDOS)
1450 {
1451 char *suf = compressed_name + strlen (file);
1452 size_t suflen = strlen (compr->suffix) + 1;
1453 for ( ; suf[1]; suf++, suflen--)
1454 {
1455 memmove (suf, suf + 1, suflen);
1456 if (stat (compressed_name, &stat_buf) == 0)
1457 {
1458 real_name = compressed_name;
1459 break;
1460 }
1461 }
1462 if (real_name != NULL)
1463 break;
1464 } /* MSDOS */
1465 free (compressed_name);
1466 compressed_name = NULL;
1467 }
1468 else
1469 {
1470 real_name = compressed_name;
1471 break;
1472 }
1473 }
1474 }
1475 if (real_name == NULL)
1476 {
1477 perror (file);
1478 goto cleanup;
1479 }
1480 } /* try with a different name */
1481
1482 if (!S_ISREG (stat_buf.st_mode))
1483 {
1484 error ("skipping %s: it is not a regular file.", real_name);
1485 goto cleanup;
1486 }
1487 if (real_name == compressed_name)
1488 {
1489 char *cmd = concat (compr->command, " ", real_name);
1490 inf = (FILE *) popen (cmd, "r");
1491 free (cmd);
1492 }
1493 else
1494 inf = fopen (real_name, "r");
1495 if (inf == NULL)
1496 {
1497 perror (real_name);
1498 goto cleanup;
1499 }
1500
1501 /* Create a new input file description entry. */
1502 fdp = xnew (1, fdesc);
1503 *fdp = emptyfdesc;
1504 fdp->next = fdhead;
1505 fdp->infname = savestr (uncompressed_name);
1506 fdp->lang = lang;
1507 fdp->infabsname = absolute_filename (uncompressed_name, cwd);
1508 fdp->infabsdir = absolute_dirname (uncompressed_name, cwd);
1509 if (filename_is_absolute (uncompressed_name))
1510 {
1511 /* file is an absolute file name. Canonicalize it. */
1512 fdp->taggedfname = absolute_filename (uncompressed_name, NULL);
1513 }
1514 else
1515 {
1516 /* file is a file name relative to cwd. Make it relative
1517 to the directory of the tags file. */
1518 fdp->taggedfname = relative_filename (uncompressed_name, tagfiledir);
1519 }
1520 fdp->usecharno = TRUE; /* use char position when making tags */
1521 fdp->prop = NULL;
1522
1523 fdhead = fdp;
1524 curfdp = fdhead; /* the current file description */
1525
1526 find_entries (inf);
1527
1528 if (real_name == compressed_name)
1529 retval = pclose (inf);
1530 else
1531 retval = fclose (inf);
1532 if (retval < 0)
1533 pfatal (file);
1534
1535 /* If not Ctags, and if this is not metasource and if it contained no #line
1536 directives, we can write the tags and free all nodes pointing to
1537 curfdp. */
1538 if (!CTAGS
1539 && curfdp->usecharno /* no #line directives in this file */
1540 && !curfdp->lang->metasource)
1541 {
1542 node *np, *prev;
1543
1544 /* Look for the head of the sublist relative to this file. See add_node
1545 for the structure of the node tree. */
1546 prev = NULL;
1547 for (np = nodehead; np != NULL; prev = np, np = np->left)
1548 if (np->fdp == curfdp)
1549 break;
1550
1551 /* If we generated tags for this file, write and delete them. */
1552 if (np != NULL)
1553 {
1554 /* This is the head of the last sublist, if any. The following
1555 instructions depend on this being true. */
1556 assert (np->left == NULL);
1557
1558 assert (fdhead == curfdp);
1559 assert (last_node->fdp == curfdp);
1560 put_entries (np); /* write tags for file curfdp->taggedfname */
1561 free_tree (np); /* remove the written nodes */
1562 if (prev == NULL)
1563 nodehead = NULL; /* no nodes left */
1564 else
1565 prev->left = NULL; /* delete the pointer to the sublist */
1566 }
1567 }
1568
1569 cleanup:
1570 if (compressed_name) free (compressed_name);
1571 if (uncompressed_name) free (uncompressed_name);
1572 last_node = NULL;
1573 curfdp = NULL;
1574 return;
1575 }
1576
1577 /*
1578 * This routine sets up the boolean pseudo-functions which work
1579 * by setting boolean flags dependent upon the corresponding character.
1580 * Every char which is NOT in that string is not a white char. Therefore,
1581 * all of the array "_wht" is set to FALSE, and then the elements
1582 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1583 * of a char is TRUE if it is the string "white", else FALSE.
1584 */
1585 static void
1586 init ()
1587 {
1588 register char *sp;
1589 register int i;
1590
1591 for (i = 0; i < CHARS; i++)
1592 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1593 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1594 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1595 notinname('\0') = notinname('\n');
1596 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1597 begtoken('\0') = begtoken('\n');
1598 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1599 intoken('\0') = intoken('\n');
1600 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1601 endtoken('\0') = endtoken('\n');
1602 }
1603
1604 /*
1605 * This routine opens the specified file and calls the function
1606 * which finds the function and type definitions.
1607 */
1608 static void
1609 find_entries (inf)
1610 FILE *inf;
1611 {
1612 char *cp;
1613 node *old_last_node;
1614 language *lang = curfdp->lang;
1615 Lang_function *parser = NULL;
1616
1617 /* If user specified a language, use it. */
1618 if (lang != NULL && lang->function != NULL)
1619 {
1620 parser = lang->function;
1621 }
1622
1623 /* Else try to guess the language given the file name. */
1624 if (parser == NULL)
1625 {
1626 lang = get_language_from_filename (curfdp->infname, TRUE);
1627 if (lang != NULL && lang->function != NULL)
1628 {
1629 curfdp->lang = lang;
1630 parser = lang->function;
1631 }
1632 }
1633
1634 /* Else look for sharp-bang as the first two characters. */
1635 if (parser == NULL
1636 && readline_internal (&lb, inf) > 0
1637 && lb.len >= 2
1638 && lb.buffer[0] == '#'
1639 && lb.buffer[1] == '!')
1640 {
1641 char *lp;
1642
1643 /* Set lp to point at the first char after the last slash in the
1644 line or, if no slashes, at the first nonblank. Then set cp to
1645 the first successive blank and terminate the string. */
1646 lp = etags_strrchr (lb.buffer+2, '/');
1647 if (lp != NULL)
1648 lp += 1;
1649 else
1650 lp = skip_spaces (lb.buffer + 2);
1651 cp = skip_non_spaces (lp);
1652 *cp = '\0';
1653
1654 if (strlen (lp) > 0)
1655 {
1656 lang = get_language_from_interpreter (lp);
1657 if (lang != NULL && lang->function != NULL)
1658 {
1659 curfdp->lang = lang;
1660 parser = lang->function;
1661 }
1662 }
1663 }
1664
1665 /* We rewind here, even if inf may be a pipe. We fail if the
1666 length of the first line is longer than the pipe block size,
1667 which is unlikely. */
1668 if (parser == NULL)
1669 rewind (inf);
1670
1671 /* Else try to guess the language given the case insensitive file name. */
1672 if (parser == NULL)
1673 {
1674 lang = get_language_from_filename (curfdp->infname, FALSE);
1675 if (lang != NULL && lang->function != NULL)
1676 {
1677 curfdp->lang = lang;
1678 parser = lang->function;
1679 }
1680 }
1681
1682 if (!no_line_directive
1683 && curfdp->lang != NULL && curfdp->lang->metasource)
1684 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1685 file, or anyway we parsed a file that is automatically generated from
1686 this one. If this is the case, the bingo.c file contained #line
1687 directives that generated tags pointing to this file. Let's delete
1688 them all before parsing this file, which is the real source. */
1689 {
1690 fdesc **fdpp = &fdhead;
1691 while (*fdpp != NULL)
1692 if (*fdpp != curfdp
1693 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1694 /* We found one of those! We must delete both the file description
1695 and all tags referring to it. */
1696 {
1697 fdesc *badfdp = *fdpp;
1698
1699 if (DEBUG)
1700 fprintf (stderr,
1701 "Removing references to \"%s\" obtained from \"%s\"\n",
1702 badfdp->taggedfname, badfdp->infname);
1703
1704 /* Delete the tags referring to badfdp. */
1705 invalidate_nodes (badfdp, &nodehead);
1706
1707 *fdpp = badfdp->next; /* remove the bad description from the list */
1708 free_fdesc (badfdp);
1709 }
1710 else
1711 fdpp = &(*fdpp)->next; /* advance the list pointer */
1712 }
1713
1714 if (parser != NULL)
1715 {
1716 parser (inf);
1717 return;
1718 }
1719
1720 /* Else try Fortran. */
1721 old_last_node = last_node;
1722 curfdp->lang = get_language_from_langname ("fortran");
1723 Fortran_functions (inf);
1724
1725 if (old_last_node == last_node)
1726 /* No Fortran entries found. Try C. */
1727 {
1728 /* We do not tag if rewind fails.
1729 Only the file name will be recorded in the tags file. */
1730 rewind (inf);
1731 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1732 default_C_entries (inf);
1733 }
1734 return;
1735 }
1736
1737 \f
1738 /* Record a tag. */
1739 static void
1740 pfnote (name, is_func, linestart, linelen, lno, cno)
1741 char *name; /* tag name, or NULL if unnamed */
1742 bool is_func; /* tag is a function */
1743 char *linestart; /* start of the line where tag is */
1744 int linelen; /* length of the line where tag is */
1745 int lno; /* line number */
1746 long cno; /* character number */
1747 {
1748 register node *np;
1749
1750 if (CTAGS && name == NULL)
1751 return;
1752
1753 np = xnew (1, node);
1754
1755 /* If ctags mode, change name "main" to M<thisfilename>. */
1756 if (CTAGS && !cxref_style && streq (name, "main"))
1757 {
1758 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1759 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1760 fp = etags_strrchr (np->name, '.');
1761 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1762 fp[0] = '\0';
1763 }
1764 else
1765 np->name = name;
1766 np->valid = TRUE;
1767 np->been_warned = FALSE;
1768 np->fdp = curfdp;
1769 np->is_func = is_func;
1770 np->lno = lno;
1771 if (np->fdp->usecharno)
1772 /* Our char numbers are 0-base, because of C language tradition?
1773 ctags compatibility? old versions compatibility? I don't know.
1774 Anyway, since emacs's are 1-base we expect etags.el to take care
1775 of the difference. If we wanted to have 1-based numbers, we would
1776 uncomment the +1 below. */
1777 np->cno = cno /* + 1 */ ;
1778 else
1779 np->cno = invalidcharno;
1780 np->left = np->right = NULL;
1781 if (CTAGS && !cxref_style)
1782 {
1783 if (strlen (linestart) < 50)
1784 np->pat = concat (linestart, "$", "");
1785 else
1786 np->pat = savenstr (linestart, 50);
1787 }
1788 else
1789 np->pat = savenstr (linestart, linelen);
1790
1791 add_node (np, &nodehead);
1792 }
1793
1794 /*
1795 * TAGS format specification
1796 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1797 *
1798 * pfnote should emit the optimized form [unnamed tag] only if:
1799 * 1. name does not contain any of the characters " \t\r\n(),;";
1800 * 2. linestart contains name as either a rightmost, or rightmost but
1801 * one character, substring;
1802 * 3. the character, if any, immediately before name in linestart must
1803 * be one of the characters " \t(),;";
1804 * 4. the character, if any, immediately after name in linestart must
1805 * also be one of the characters " \t(),;".
1806 *
1807 * The real implementation uses the notinname() macro, which recognises
1808 * characters slightly different from " \t\r\n(),;". See the variable
1809 * `nonam'.
1810 */
1811 #define traditional_tag_style TRUE
1812 static void
1813 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1814 char *name; /* tag name, or NULL if unnamed */
1815 int namelen; /* tag length */
1816 bool is_func; /* tag is a function */
1817 char *linestart; /* start of the line where tag is */
1818 int linelen; /* length of the line where tag is */
1819 int lno; /* line number */
1820 long cno; /* character number */
1821 {
1822 register char *cp;
1823 bool named;
1824
1825 named = TRUE;
1826 if (!CTAGS)
1827 {
1828 for (cp = name; !notinname (*cp); cp++)
1829 continue;
1830 if (*cp == '\0') /* rule #1 */
1831 {
1832 cp = linestart + linelen - namelen;
1833 if (notinname (linestart[linelen-1]))
1834 cp -= 1; /* rule #4 */
1835 if (cp >= linestart /* rule #2 */
1836 && (cp == linestart
1837 || notinname (cp[-1])) /* rule #3 */
1838 && strneq (name, cp, namelen)) /* rule #2 */
1839 named = FALSE; /* use unnamed tag */
1840 }
1841 }
1842
1843 if (named)
1844 name = savenstr (name, namelen);
1845 else
1846 name = NULL;
1847 pfnote (name, is_func, linestart, linelen, lno, cno);
1848 }
1849
1850 /*
1851 * free_tree ()
1852 * recurse on left children, iterate on right children.
1853 */
1854 static void
1855 free_tree (np)
1856 register node *np;
1857 {
1858 while (np)
1859 {
1860 register node *node_right = np->right;
1861 free_tree (np->left);
1862 if (np->name != NULL)
1863 free (np->name);
1864 free (np->pat);
1865 free (np);
1866 np = node_right;
1867 }
1868 }
1869
1870 /*
1871 * free_fdesc ()
1872 * delete a file description
1873 */
1874 static void
1875 free_fdesc (fdp)
1876 register fdesc *fdp;
1877 {
1878 if (fdp->infname != NULL) free (fdp->infname);
1879 if (fdp->infabsname != NULL) free (fdp->infabsname);
1880 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
1881 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
1882 if (fdp->prop != NULL) free (fdp->prop);
1883 free (fdp);
1884 }
1885
1886 /*
1887 * add_node ()
1888 * Adds a node to the tree of nodes. In etags mode, sort by file
1889 * name. In ctags mode, sort by tag name. Make no attempt at
1890 * balancing.
1891 *
1892 * add_node is the only function allowed to add nodes, so it can
1893 * maintain state.
1894 */
1895 static void
1896 add_node (np, cur_node_p)
1897 node *np, **cur_node_p;
1898 {
1899 register int dif;
1900 register node *cur_node = *cur_node_p;
1901
1902 if (cur_node == NULL)
1903 {
1904 *cur_node_p = np;
1905 last_node = np;
1906 return;
1907 }
1908
1909 if (!CTAGS)
1910 /* Etags Mode */
1911 {
1912 /* For each file name, tags are in a linked sublist on the right
1913 pointer. The first tags of different files are a linked list
1914 on the left pointer. last_node points to the end of the last
1915 used sublist. */
1916 if (last_node != NULL && last_node->fdp == np->fdp)
1917 {
1918 /* Let's use the same sublist as the last added node. */
1919 assert (last_node->right == NULL);
1920 last_node->right = np;
1921 last_node = np;
1922 }
1923 else if (cur_node->fdp == np->fdp)
1924 {
1925 /* Scanning the list we found the head of a sublist which is
1926 good for us. Let's scan this sublist. */
1927 add_node (np, &cur_node->right);
1928 }
1929 else
1930 /* The head of this sublist is not good for us. Let's try the
1931 next one. */
1932 add_node (np, &cur_node->left);
1933 } /* if ETAGS mode */
1934
1935 else
1936 {
1937 /* Ctags Mode */
1938 dif = strcmp (np->name, cur_node->name);
1939
1940 /*
1941 * If this tag name matches an existing one, then
1942 * do not add the node, but maybe print a warning.
1943 */
1944 if (!dif)
1945 {
1946 if (np->fdp == cur_node->fdp)
1947 {
1948 if (!no_warnings)
1949 {
1950 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1951 np->fdp->infname, lineno, np->name);
1952 fprintf (stderr, "Second entry ignored\n");
1953 }
1954 }
1955 else if (!cur_node->been_warned && !no_warnings)
1956 {
1957 fprintf
1958 (stderr,
1959 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1960 np->fdp->infname, cur_node->fdp->infname, np->name);
1961 cur_node->been_warned = TRUE;
1962 }
1963 return;
1964 }
1965
1966 /* Actually add the node */
1967 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1968 } /* if CTAGS mode */
1969 }
1970
1971 /*
1972 * invalidate_nodes ()
1973 * Scan the node tree and invalidate all nodes pointing to the
1974 * given file description (CTAGS case) or free them (ETAGS case).
1975 */
1976 static void
1977 invalidate_nodes (badfdp, npp)
1978 fdesc *badfdp;
1979 node **npp;
1980 {
1981 node *np = *npp;
1982
1983 if (np == NULL)
1984 return;
1985
1986 if (CTAGS)
1987 {
1988 if (np->left != NULL)
1989 invalidate_nodes (badfdp, &np->left);
1990 if (np->fdp == badfdp)
1991 np-> valid = FALSE;
1992 if (np->right != NULL)
1993 invalidate_nodes (badfdp, &np->right);
1994 }
1995 else
1996 {
1997 node **next = &np->left;
1998 if (np->fdp == badfdp)
1999 {
2000 *npp = *next; /* detach the sublist from the list */
2001 np->left = NULL; /* isolate it */
2002 free_tree (np); /* free it */
2003 }
2004 invalidate_nodes (badfdp, next);
2005 }
2006 }
2007
2008 \f
2009 static int total_size_of_entries __P((node *));
2010 static int number_len __P((long));
2011
2012 /* Length of a non-negative number's decimal representation. */
2013 static int
2014 number_len (num)
2015 long num;
2016 {
2017 int len = 1;
2018 while ((num /= 10) > 0)
2019 len += 1;
2020 return len;
2021 }
2022
2023 /*
2024 * Return total number of characters that put_entries will output for
2025 * the nodes in the linked list at the right of the specified node.
2026 * This count is irrelevant with etags.el since emacs 19.34 at least,
2027 * but is still supplied for backward compatibility.
2028 */
2029 static int
2030 total_size_of_entries (np)
2031 register node *np;
2032 {
2033 register int total = 0;
2034
2035 for (; np != NULL; np = np->right)
2036 {
2037 total += strlen (np->pat) + 1; /* pat\177 */
2038 if (np->name != NULL)
2039 total += strlen (np->name) + 1; /* name\001 */
2040 total += number_len ((long) np->lno) + 1; /* lno, */
2041 if (np->cno != invalidcharno) /* cno */
2042 total += number_len (np->cno);
2043 total += 1; /* newline */
2044 }
2045
2046 return total;
2047 }
2048
2049 static void
2050 put_entries (np)
2051 register node *np;
2052 {
2053 register char *sp;
2054 static fdesc *fdp = NULL;
2055
2056 if (np == NULL)
2057 return;
2058
2059 /* Output subentries that precede this one */
2060 if (CTAGS)
2061 put_entries (np->left);
2062
2063 /* Output this entry */
2064 if (np->valid)
2065 {
2066 if (!CTAGS)
2067 {
2068 /* Etags mode */
2069 if (fdp != np->fdp)
2070 {
2071 fdp = np->fdp;
2072 fprintf (tagf, "\f\n%s,%d\n",
2073 fdp->taggedfname, total_size_of_entries (np));
2074 }
2075 fputs (np->pat, tagf);
2076 fputc ('\177', tagf);
2077 if (np->name != NULL)
2078 {
2079 fputs (np->name, tagf);
2080 fputc ('\001', tagf);
2081 }
2082 fprintf (tagf, "%d,", np->lno);
2083 if (np->cno != invalidcharno)
2084 fprintf (tagf, "%ld", np->cno);
2085 fputs ("\n", tagf);
2086 }
2087 else
2088 {
2089 /* Ctags mode */
2090 if (np->name == NULL)
2091 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2092
2093 if (cxref_style)
2094 {
2095 if (vgrind_style)
2096 fprintf (stdout, "%s %s %d\n",
2097 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2098 else
2099 fprintf (stdout, "%-16s %3d %-16s %s\n",
2100 np->name, np->lno, np->fdp->taggedfname, np->pat);
2101 }
2102 else
2103 {
2104 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2105
2106 if (np->is_func)
2107 { /* function or #define macro with args */
2108 putc (searchar, tagf);
2109 putc ('^', tagf);
2110
2111 for (sp = np->pat; *sp; sp++)
2112 {
2113 if (*sp == '\\' || *sp == searchar)
2114 putc ('\\', tagf);
2115 putc (*sp, tagf);
2116 }
2117 putc (searchar, tagf);
2118 }
2119 else
2120 { /* anything else; text pattern inadequate */
2121 fprintf (tagf, "%d", np->lno);
2122 }
2123 putc ('\n', tagf);
2124 }
2125 }
2126 } /* if this node contains a valid tag */
2127
2128 /* Output subentries that follow this one */
2129 put_entries (np->right);
2130 if (!CTAGS)
2131 put_entries (np->left);
2132 }
2133
2134 \f
2135 /* C extensions. */
2136 #define C_EXT 0x00fff /* C extensions */
2137 #define C_PLAIN 0x00000 /* C */
2138 #define C_PLPL 0x00001 /* C++ */
2139 #define C_STAR 0x00003 /* C* */
2140 #define C_JAVA 0x00005 /* JAVA */
2141 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2142 #define YACC 0x10000 /* yacc file */
2143
2144 /*
2145 * The C symbol tables.
2146 */
2147 enum sym_type
2148 {
2149 st_none,
2150 st_C_objprot, st_C_objimpl, st_C_objend,
2151 st_C_gnumacro,
2152 st_C_ignore,
2153 st_C_javastruct,
2154 st_C_operator,
2155 st_C_class, st_C_template,
2156 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
2157 };
2158
2159 static unsigned int hash __P((const char *, unsigned int));
2160 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2161 static enum sym_type C_symtype __P((char *, int, int));
2162
2163 /* Feed stuff between (but not including) %[ and %] lines to:
2164 gperf -c -k 1,3 -o -p -r -t
2165 %[
2166 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2167 %%
2168 if, 0, st_C_ignore
2169 for, 0, st_C_ignore
2170 while, 0, st_C_ignore
2171 switch, 0, st_C_ignore
2172 return, 0, st_C_ignore
2173 @interface, 0, st_C_objprot
2174 @protocol, 0, st_C_objprot
2175 @implementation,0, st_C_objimpl
2176 @end, 0, st_C_objend
2177 import, C_JAVA, st_C_ignore
2178 package, C_JAVA, st_C_ignore
2179 friend, C_PLPL, st_C_ignore
2180 extends, C_JAVA, st_C_javastruct
2181 implements, C_JAVA, st_C_javastruct
2182 interface, C_JAVA, st_C_struct
2183 class, 0, st_C_class
2184 namespace, C_PLPL, st_C_struct
2185 domain, C_STAR, st_C_struct
2186 union, 0, st_C_struct
2187 struct, 0, st_C_struct
2188 extern, 0, st_C_extern
2189 enum, 0, st_C_enum
2190 typedef, 0, st_C_typedef
2191 define, 0, st_C_define
2192 operator, C_PLPL, st_C_operator
2193 template, 0, st_C_template
2194 bool, C_PLPL, st_C_typespec
2195 long, 0, st_C_typespec
2196 short, 0, st_C_typespec
2197 int, 0, st_C_typespec
2198 char, 0, st_C_typespec
2199 float, 0, st_C_typespec
2200 double, 0, st_C_typespec
2201 signed, 0, st_C_typespec
2202 unsigned, 0, st_C_typespec
2203 auto, 0, st_C_typespec
2204 void, 0, st_C_typespec
2205 static, 0, st_C_typespec
2206 const, 0, st_C_typespec
2207 volatile, 0, st_C_typespec
2208 explicit, C_PLPL, st_C_typespec
2209 mutable, C_PLPL, st_C_typespec
2210 typename, C_PLPL, st_C_typespec
2211 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2212 DEFUN, 0, st_C_gnumacro
2213 SYSCALL, 0, st_C_gnumacro
2214 ENTRY, 0, st_C_gnumacro
2215 PSEUDO, 0, st_C_gnumacro
2216 # These are defined inside C functions, so currently they are not met.
2217 # EXFUN used in glibc, DEFVAR_* in emacs.
2218 #EXFUN, 0, st_C_gnumacro
2219 #DEFVAR_, 0, st_C_gnumacro
2220 %]
2221 and replace lines between %< and %> with its output,
2222 then make in_word_set and C_stab_entry static. */
2223 /*%<*/
2224 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2225 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2226 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2227
2228 #define TOTAL_KEYWORDS 47
2229 #define MIN_WORD_LENGTH 2
2230 #define MAX_WORD_LENGTH 15
2231 #define MIN_HASH_VALUE 18
2232 #define MAX_HASH_VALUE 138
2233 /* maximum key range = 121, duplicates = 0 */
2234
2235 #ifdef __GNUC__
2236 __inline
2237 #endif
2238 static unsigned int
2239 hash (str, len)
2240 register const char *str;
2241 register unsigned int len;
2242 {
2243 static unsigned char asso_values[] =
2244 {
2245 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2246 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2247 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2248 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2249 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2250 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2251 139, 139, 139, 139, 63, 139, 139, 139, 33, 44,
2252 62, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2253 42, 139, 139, 12, 32, 139, 139, 139, 139, 139,
2254 139, 139, 139, 139, 139, 139, 139, 34, 59, 37,
2255 24, 58, 33, 3, 139, 16, 139, 139, 42, 60,
2256 18, 11, 39, 139, 23, 57, 4, 63, 6, 20,
2257 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2258 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2259 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2260 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2261 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2262 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2263 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2264 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2265 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2266 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2267 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2268 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2269 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2270 139, 139, 139, 139, 139, 139
2271 };
2272 register int hval = len;
2273
2274 switch (hval)
2275 {
2276 default:
2277 case 3:
2278 hval += asso_values[(unsigned char)str[2]];
2279 case 2:
2280 case 1:
2281 hval += asso_values[(unsigned char)str[0]];
2282 break;
2283 }
2284 return hval;
2285 }
2286
2287 #ifdef __GNUC__
2288 __inline
2289 #endif
2290 static struct C_stab_entry *
2291 in_word_set (str, len)
2292 register const char *str;
2293 register unsigned int len;
2294 {
2295 static struct C_stab_entry wordlist[] =
2296 {
2297 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2298 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2299 {"if", 0, st_C_ignore},
2300 {""}, {""}, {""}, {""},
2301 {"int", 0, st_C_typespec},
2302 {""}, {""},
2303 {"void", 0, st_C_typespec},
2304 {""}, {""},
2305 {"interface", C_JAVA, st_C_struct},
2306 {""},
2307 {"SYSCALL", 0, st_C_gnumacro},
2308 {""},
2309 {"return", 0, st_C_ignore},
2310 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2311 {"while", 0, st_C_ignore},
2312 {"auto", 0, st_C_typespec},
2313 {""}, {""}, {""}, {""}, {""}, {""},
2314 {"float", 0, st_C_typespec},
2315 {"typedef", 0, st_C_typedef},
2316 {"typename", C_PLPL, st_C_typespec},
2317 {""}, {""}, {""},
2318 {"friend", C_PLPL, st_C_ignore},
2319 {"volatile", 0, st_C_typespec},
2320 {""}, {""},
2321 {"for", 0, st_C_ignore},
2322 {"const", 0, st_C_typespec},
2323 {"import", C_JAVA, st_C_ignore},
2324 {""},
2325 {"define", 0, st_C_define},
2326 {"long", 0, st_C_typespec},
2327 {"implements", C_JAVA, st_C_javastruct},
2328 {"signed", 0, st_C_typespec},
2329 {""},
2330 {"extern", 0, st_C_extern},
2331 {"extends", C_JAVA, st_C_javastruct},
2332 {""},
2333 {"mutable", C_PLPL, st_C_typespec},
2334 {"template", 0, st_C_template},
2335 {"short", 0, st_C_typespec},
2336 {"bool", C_PLPL, st_C_typespec},
2337 {"char", 0, st_C_typespec},
2338 {"class", 0, st_C_class},
2339 {"operator", C_PLPL, st_C_operator},
2340 {""},
2341 {"switch", 0, st_C_ignore},
2342 {""},
2343 {"ENTRY", 0, st_C_gnumacro},
2344 {""},
2345 {"package", C_JAVA, st_C_ignore},
2346 {"union", 0, st_C_struct},
2347 {"@end", 0, st_C_objend},
2348 {"struct", 0, st_C_struct},
2349 {"namespace", C_PLPL, st_C_struct},
2350 {""}, {""},
2351 {"domain", C_STAR, st_C_struct},
2352 {"@interface", 0, st_C_objprot},
2353 {"PSEUDO", 0, st_C_gnumacro},
2354 {"double", 0, st_C_typespec},
2355 {""},
2356 {"@protocol", 0, st_C_objprot},
2357 {""},
2358 {"static", 0, st_C_typespec},
2359 {""}, {""},
2360 {"DEFUN", 0, st_C_gnumacro},
2361 {""}, {""}, {""}, {""},
2362 {"explicit", C_PLPL, st_C_typespec},
2363 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2364 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2365 {""},
2366 {"enum", 0, st_C_enum},
2367 {""}, {""},
2368 {"unsigned", 0, st_C_typespec},
2369 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2370 {"@implementation",0, st_C_objimpl}
2371 };
2372
2373 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2374 {
2375 register int key = hash (str, len);
2376
2377 if (key <= MAX_HASH_VALUE && key >= 0)
2378 {
2379 register const char *s = wordlist[key].name;
2380
2381 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2382 return &wordlist[key];
2383 }
2384 }
2385 return 0;
2386 }
2387 /*%>*/
2388
2389 static enum sym_type
2390 C_symtype (str, len, c_ext)
2391 char *str;
2392 int len;
2393 int c_ext;
2394 {
2395 register struct C_stab_entry *se = in_word_set (str, len);
2396
2397 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2398 return st_none;
2399 return se->type;
2400 }
2401
2402 \f
2403 /*
2404 * C functions and variables are recognized using a simple
2405 * finite automaton. fvdef is its state variable.
2406 */
2407 static enum
2408 {
2409 fvnone, /* nothing seen */
2410 fdefunkey, /* Emacs DEFUN keyword seen */
2411 fdefunname, /* Emacs DEFUN name seen */
2412 foperator, /* func: operator keyword seen (cplpl) */
2413 fvnameseen, /* function or variable name seen */
2414 fstartlist, /* func: just after open parenthesis */
2415 finlist, /* func: in parameter list */
2416 flistseen, /* func: after parameter list */
2417 fignore, /* func: before open brace */
2418 vignore /* var-like: ignore until ';' */
2419 } fvdef;
2420
2421 static bool fvextern; /* func or var: extern keyword seen; */
2422
2423 /*
2424 * typedefs are recognized using a simple finite automaton.
2425 * typdef is its state variable.
2426 */
2427 static enum
2428 {
2429 tnone, /* nothing seen */
2430 tkeyseen, /* typedef keyword seen */
2431 ttypeseen, /* defined type seen */
2432 tinbody, /* inside typedef body */
2433 tend, /* just before typedef tag */
2434 tignore /* junk after typedef tag */
2435 } typdef;
2436
2437 /*
2438 * struct-like structures (enum, struct and union) are recognized
2439 * using another simple finite automaton. `structdef' is its state
2440 * variable.
2441 */
2442 static enum
2443 {
2444 snone, /* nothing seen yet,
2445 or in struct body if cblev > 0 */
2446 skeyseen, /* struct-like keyword seen */
2447 stagseen, /* struct-like tag seen */
2448 sintemplate, /* inside template (ignore) */
2449 scolonseen /* colon seen after struct-like tag */
2450 } structdef;
2451
2452 /*
2453 * When objdef is different from onone, objtag is the name of the class.
2454 */
2455 static char *objtag = "<uninited>";
2456
2457 /*
2458 * Yet another little state machine to deal with preprocessor lines.
2459 */
2460 static enum
2461 {
2462 dnone, /* nothing seen */
2463 dsharpseen, /* '#' seen as first char on line */
2464 ddefineseen, /* '#' and 'define' seen */
2465 dignorerest /* ignore rest of line */
2466 } definedef;
2467
2468 /*
2469 * State machine for Objective C protocols and implementations.
2470 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2471 */
2472 static enum
2473 {
2474 onone, /* nothing seen */
2475 oprotocol, /* @interface or @protocol seen */
2476 oimplementation, /* @implementations seen */
2477 otagseen, /* class name seen */
2478 oparenseen, /* parenthesis before category seen */
2479 ocatseen, /* category name seen */
2480 oinbody, /* in @implementation body */
2481 omethodsign, /* in @implementation body, after +/- */
2482 omethodtag, /* after method name */
2483 omethodcolon, /* after method colon */
2484 omethodparm, /* after method parameter */
2485 oignore /* wait for @end */
2486 } objdef;
2487
2488
2489 /*
2490 * Use this structure to keep info about the token read, and how it
2491 * should be tagged. Used by the make_C_tag function to build a tag.
2492 */
2493 static struct tok
2494 {
2495 bool valid;
2496 bool named;
2497 int offset;
2498 int length;
2499 int lineno;
2500 long linepos;
2501 char *line;
2502 } token; /* latest token read */
2503 static linebuffer token_name; /* its name */
2504
2505 /*
2506 * Variables and functions for dealing with nested structures.
2507 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2508 */
2509 static void pushclass_above __P((int, char *, int));
2510 static void popclass_above __P((int));
2511 static void write_classname __P((linebuffer *, char *qualifier));
2512
2513 static struct {
2514 char **cname; /* nested class names */
2515 int *cblev; /* nested class curly brace level */
2516 int nl; /* class nesting level (elements used) */
2517 int size; /* length of the array */
2518 } cstack; /* stack for nested declaration tags */
2519 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2520 #define nestlev (cstack.nl)
2521 /* After struct keyword or in struct body, not inside an nested function. */
2522 #define instruct (structdef == snone && nestlev > 0 \
2523 && cblev == cstack.cblev[nestlev-1] + 1)
2524
2525 static void
2526 pushclass_above (cblev, str, len)
2527 int cblev;
2528 char *str;
2529 int len;
2530 {
2531 int nl;
2532
2533 popclass_above (cblev);
2534 nl = cstack.nl;
2535 if (nl >= cstack.size)
2536 {
2537 int size = cstack.size *= 2;
2538 xrnew (cstack.cname, size, char *);
2539 xrnew (cstack.cblev, size, int);
2540 }
2541 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2542 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2543 cstack.cblev[nl] = cblev;
2544 cstack.nl = nl + 1;
2545 }
2546
2547 static void
2548 popclass_above (cblev)
2549 int cblev;
2550 {
2551 int nl;
2552
2553 for (nl = cstack.nl - 1;
2554 nl >= 0 && cstack.cblev[nl] >= cblev;
2555 nl--)
2556 {
2557 if (cstack.cname[nl] != NULL)
2558 free (cstack.cname[nl]);
2559 cstack.nl = nl;
2560 }
2561 }
2562
2563 static void
2564 write_classname (cn, qualifier)
2565 linebuffer *cn;
2566 char *qualifier;
2567 {
2568 int i, len;
2569 int qlen = strlen (qualifier);
2570
2571 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2572 {
2573 len = 0;
2574 cn->len = 0;
2575 cn->buffer[0] = '\0';
2576 }
2577 else
2578 {
2579 len = strlen (cstack.cname[0]);
2580 linebuffer_setlen (cn, len);
2581 strcpy (cn->buffer, cstack.cname[0]);
2582 }
2583 for (i = 1; i < cstack.nl; i++)
2584 {
2585 char *s;
2586 int slen;
2587
2588 s = cstack.cname[i];
2589 if (s == NULL)
2590 continue;
2591 slen = strlen (s);
2592 len += slen + qlen;
2593 linebuffer_setlen (cn, len);
2594 strncat (cn->buffer, qualifier, qlen);
2595 strncat (cn->buffer, s, slen);
2596 }
2597 }
2598
2599 \f
2600 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2601 static void make_C_tag __P((bool));
2602
2603 /*
2604 * consider_token ()
2605 * checks to see if the current token is at the start of a
2606 * function or variable, or corresponds to a typedef, or
2607 * is a struct/union/enum tag, or #define, or an enum constant.
2608 *
2609 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2610 * with args. C_EXTP points to which language we are looking at.
2611 *
2612 * Globals
2613 * fvdef IN OUT
2614 * structdef IN OUT
2615 * definedef IN OUT
2616 * typdef IN OUT
2617 * objdef IN OUT
2618 */
2619
2620 static bool
2621 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2622 register char *str; /* IN: token pointer */
2623 register int len; /* IN: token length */
2624 register int c; /* IN: first char after the token */
2625 int *c_extp; /* IN, OUT: C extensions mask */
2626 int cblev; /* IN: curly brace level */
2627 int parlev; /* IN: parenthesis level */
2628 bool *is_func_or_var; /* OUT: function or variable found */
2629 {
2630 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2631 structtype is the type of the preceding struct-like keyword, and
2632 structcblev is the curly brace level where it has been seen. */
2633 static enum sym_type structtype;
2634 static int structcblev;
2635 static enum sym_type toktype;
2636
2637
2638 toktype = C_symtype (str, len, *c_extp);
2639
2640 /*
2641 * Advance the definedef state machine.
2642 */
2643 switch (definedef)
2644 {
2645 case dnone:
2646 /* We're not on a preprocessor line. */
2647 if (toktype == st_C_gnumacro)
2648 {
2649 fvdef = fdefunkey;
2650 return FALSE;
2651 }
2652 break;
2653 case dsharpseen:
2654 if (toktype == st_C_define)
2655 {
2656 definedef = ddefineseen;
2657 }
2658 else
2659 {
2660 definedef = dignorerest;
2661 }
2662 return FALSE;
2663 case ddefineseen:
2664 /*
2665 * Make a tag for any macro, unless it is a constant
2666 * and constantypedefs is FALSE.
2667 */
2668 definedef = dignorerest;
2669 *is_func_or_var = (c == '(');
2670 if (!*is_func_or_var && !constantypedefs)
2671 return FALSE;
2672 else
2673 return TRUE;
2674 case dignorerest:
2675 return FALSE;
2676 default:
2677 error ("internal error: definedef value.", (char *)NULL);
2678 }
2679
2680 /*
2681 * Now typedefs
2682 */
2683 switch (typdef)
2684 {
2685 case tnone:
2686 if (toktype == st_C_typedef)
2687 {
2688 if (typedefs)
2689 typdef = tkeyseen;
2690 fvextern = FALSE;
2691 fvdef = fvnone;
2692 return FALSE;
2693 }
2694 break;
2695 case tkeyseen:
2696 switch (toktype)
2697 {
2698 case st_none:
2699 case st_C_typespec:
2700 case st_C_class:
2701 case st_C_struct:
2702 case st_C_enum:
2703 typdef = ttypeseen;
2704 break;
2705 }
2706 break;
2707 case ttypeseen:
2708 if (structdef == snone && fvdef == fvnone)
2709 {
2710 fvdef = fvnameseen;
2711 return TRUE;
2712 }
2713 break;
2714 case tend:
2715 switch (toktype)
2716 {
2717 case st_C_typespec:
2718 case st_C_class:
2719 case st_C_struct:
2720 case st_C_enum:
2721 return FALSE;
2722 }
2723 return TRUE;
2724 }
2725
2726 /*
2727 * This structdef business is NOT invoked when we are ctags and the
2728 * file is plain C. This is because a struct tag may have the same
2729 * name as another tag, and this loses with ctags.
2730 */
2731 switch (toktype)
2732 {
2733 case st_C_javastruct:
2734 if (structdef == stagseen)
2735 structdef = scolonseen;
2736 return FALSE;
2737 case st_C_template:
2738 case st_C_class:
2739 if (cblev == 0
2740 && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2741 && definedef == dnone && structdef == snone
2742 && typdef == tnone && fvdef == fvnone)
2743 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2744 if (toktype == st_C_template)
2745 break;
2746 /* FALLTHRU */
2747 case st_C_struct:
2748 case st_C_enum:
2749 if (parlev == 0
2750 && fvdef != vignore
2751 && (typdef == tkeyseen
2752 || (typedefs_or_cplusplus && structdef == snone)))
2753 {
2754 structdef = skeyseen;
2755 structtype = toktype;
2756 structcblev = cblev;
2757 }
2758 return FALSE;
2759 }
2760
2761 if (structdef == skeyseen)
2762 {
2763 structdef = stagseen;
2764 return TRUE;
2765 }
2766
2767 if (typdef != tnone)
2768 definedef = dnone;
2769
2770 /* Detect Objective C constructs. */
2771 switch (objdef)
2772 {
2773 case onone:
2774 switch (toktype)
2775 {
2776 case st_C_objprot:
2777 objdef = oprotocol;
2778 return FALSE;
2779 case st_C_objimpl:
2780 objdef = oimplementation;
2781 return FALSE;
2782 }
2783 break;
2784 case oimplementation:
2785 /* Save the class tag for functions or variables defined inside. */
2786 objtag = savenstr (str, len);
2787 objdef = oinbody;
2788 return FALSE;
2789 case oprotocol:
2790 /* Save the class tag for categories. */
2791 objtag = savenstr (str, len);
2792 objdef = otagseen;
2793 *is_func_or_var = TRUE;
2794 return TRUE;
2795 case oparenseen:
2796 objdef = ocatseen;
2797 *is_func_or_var = TRUE;
2798 return TRUE;
2799 case oinbody:
2800 break;
2801 case omethodsign:
2802 if (parlev == 0)
2803 {
2804 objdef = omethodtag;
2805 linebuffer_setlen (&token_name, len);
2806 strncpy (token_name.buffer, str, len);
2807 token_name.buffer[len] = '\0';
2808 return TRUE;
2809 }
2810 return FALSE;
2811 case omethodcolon:
2812 if (parlev == 0)
2813 objdef = omethodparm;
2814 return FALSE;
2815 case omethodparm:
2816 if (parlev == 0)
2817 {
2818 objdef = omethodtag;
2819 linebuffer_setlen (&token_name, token_name.len + len);
2820 strncat (token_name.buffer, str, len);
2821 return TRUE;
2822 }
2823 return FALSE;
2824 case oignore:
2825 if (toktype == st_C_objend)
2826 {
2827 /* Memory leakage here: the string pointed by objtag is
2828 never released, because many tests would be needed to
2829 avoid breaking on incorrect input code. The amount of
2830 memory leaked here is the sum of the lengths of the
2831 class tags.
2832 free (objtag); */
2833 objdef = onone;
2834 }
2835 return FALSE;
2836 }
2837
2838 /* A function, variable or enum constant? */
2839 switch (toktype)
2840 {
2841 case st_C_extern:
2842 fvextern = TRUE;
2843 /* FALLTHRU */
2844 case st_C_typespec:
2845 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2846 fvdef = fvnone; /* should be useless */
2847 return FALSE;
2848 case st_C_ignore:
2849 fvextern = FALSE;
2850 fvdef = vignore;
2851 return FALSE;
2852 case st_C_operator:
2853 fvdef = foperator;
2854 *is_func_or_var = TRUE;
2855 return TRUE;
2856 case st_none:
2857 if (constantypedefs
2858 && structdef == snone
2859 && structtype == st_C_enum && cblev > structcblev)
2860 return TRUE; /* enum constant */
2861 switch (fvdef)
2862 {
2863 case fdefunkey:
2864 if (cblev > 0)
2865 break;
2866 fvdef = fdefunname; /* GNU macro */
2867 *is_func_or_var = TRUE;
2868 return TRUE;
2869 case fvnone:
2870 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2871 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2872 {
2873 fvdef = vignore;
2874 return FALSE;
2875 }
2876 if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2877 {
2878 fvdef = foperator;
2879 *is_func_or_var = TRUE;
2880 return TRUE;
2881 }
2882 if (cblev > 0 && !instruct)
2883 break;
2884 fvdef = fvnameseen; /* function or variable */
2885 *is_func_or_var = TRUE;
2886 return TRUE;
2887 }
2888 break;
2889 }
2890
2891 return FALSE;
2892 }
2893
2894 \f
2895 /*
2896 * C_entries often keeps pointers to tokens or lines which are older than
2897 * the line currently read. By keeping two line buffers, and switching
2898 * them at end of line, it is possible to use those pointers.
2899 */
2900 static struct
2901 {
2902 long linepos;
2903 linebuffer lb;
2904 } lbs[2];
2905
2906 #define current_lb_is_new (newndx == curndx)
2907 #define switch_line_buffers() (curndx = 1 - curndx)
2908
2909 #define curlb (lbs[curndx].lb)
2910 #define newlb (lbs[newndx].lb)
2911 #define curlinepos (lbs[curndx].linepos)
2912 #define newlinepos (lbs[newndx].linepos)
2913
2914 #define CNL_SAVE_DEFINEDEF() \
2915 do { \
2916 curlinepos = charno; \
2917 lineno++; \
2918 linecharno = charno; \
2919 charno += readline (&curlb, inf); \
2920 lp = curlb.buffer; \
2921 quotednl = FALSE; \
2922 newndx = curndx; \
2923 } while (0)
2924
2925 #define CNL() \
2926 do { \
2927 CNL_SAVE_DEFINEDEF(); \
2928 if (savetoken.valid) \
2929 { \
2930 token = savetoken; \
2931 savetoken.valid = FALSE; \
2932 } \
2933 definedef = dnone; \
2934 } while (0)
2935
2936
2937 static void
2938 make_C_tag (isfun)
2939 bool isfun;
2940 {
2941 /* This function should never be called when token.valid is FALSE, but
2942 we must protect against invalid input or internal errors. */
2943 if (DEBUG || token.valid)
2944 {
2945 if (traditional_tag_style)
2946 {
2947 /* This was the original code. Now we call new_pfnote instead,
2948 which uses the new method for naming tags (see new_pfnote). */
2949 char *name = NULL;
2950
2951 if (CTAGS || token.named)
2952 name = savestr (token_name.buffer);
2953 if (DEBUG && !token.valid)
2954 {
2955 if (token.named)
2956 name = concat (name, "##invalid##", "");
2957 else
2958 name = savestr ("##invalid##");
2959 }
2960 pfnote (name, isfun, token.line,
2961 token.offset+token.length+1, token.lineno, token.linepos);
2962 }
2963 else
2964 new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
2965 token.offset+token.length+1, token.lineno, token.linepos);
2966 token.valid = FALSE;
2967 }
2968 }
2969
2970
2971 /*
2972 * C_entries ()
2973 * This routine finds functions, variables, typedefs,
2974 * #define's, enum constants and struct/union/enum definitions in
2975 * C syntax and adds them to the list.
2976 */
2977 static void
2978 C_entries (c_ext, inf)
2979 int c_ext; /* extension of C */
2980 FILE *inf; /* input file */
2981 {
2982 register char c; /* latest char read; '\0' for end of line */
2983 register char *lp; /* pointer one beyond the character `c' */
2984 int curndx, newndx; /* indices for current and new lb */
2985 register int tokoff; /* offset in line of start of current token */
2986 register int toklen; /* length of current token */
2987 char *qualifier; /* string used to qualify names */
2988 int qlen; /* length of qualifier */
2989 int cblev; /* current curly brace level */
2990 int parlev; /* current parenthesis level */
2991 int typdefcblev; /* cblev where a typedef struct body begun */
2992 bool incomm, inquote, inchar, quotednl, midtoken;
2993 bool cplpl, cjava;
2994 bool yacc_rules; /* in the rules part of a yacc file */
2995 struct tok savetoken; /* token saved during preprocessor handling */
2996
2997
2998 initbuffer (&token_name);
2999 initbuffer (&lbs[0].lb);
3000 initbuffer (&lbs[1].lb);
3001 if (cstack.size == 0)
3002 {
3003 cstack.size = (DEBUG) ? 1 : 4;
3004 cstack.nl = 0;
3005 cstack.cname = xnew (cstack.size, char *);
3006 cstack.cblev = xnew (cstack.size, int);
3007 }
3008
3009 tokoff = toklen = typdefcblev = 0; /* keep compiler quiet */
3010 curndx = newndx = 0;
3011 lineno = 0;
3012 charno = 0;
3013 lp = curlb.buffer;
3014 *lp = 0;
3015
3016 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3017 structdef = snone; definedef = dnone; objdef = onone;
3018 yacc_rules = FALSE;
3019 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3020 token.valid = savetoken.valid = FALSE;
3021 cblev = 0;
3022 parlev = 0;
3023 cplpl = (c_ext & C_PLPL) == C_PLPL;
3024 cjava = (c_ext & C_JAVA) == C_JAVA;
3025 if (cjava)
3026 { qualifier = "."; qlen = 1; }
3027 else
3028 { qualifier = "::"; qlen = 2; }
3029
3030
3031 while (!feof (inf))
3032 {
3033 c = *lp++;
3034 if (c == '\\')
3035 {
3036 /* If we're at the end of the line, the next character is a
3037 '\0'; don't skip it, because it's the thing that tells us
3038 to read the next line. */
3039 if (*lp == '\0')
3040 {
3041 quotednl = TRUE;
3042 continue;
3043 }
3044 lp++;
3045 c = ' ';
3046 }
3047 else if (incomm)
3048 {
3049 switch (c)
3050 {
3051 case '*':
3052 if (*lp == '/')
3053 {
3054 c = *lp++;
3055 incomm = FALSE;
3056 }
3057 break;
3058 case '\0':
3059 /* Newlines inside comments do not end macro definitions in
3060 traditional cpp. */
3061 CNL_SAVE_DEFINEDEF ();
3062 break;
3063 }
3064 continue;
3065 }
3066 else if (inquote)
3067 {
3068 switch (c)
3069 {
3070 case '"':
3071 inquote = FALSE;
3072 break;
3073 case '\0':
3074 /* Newlines inside strings do not end macro definitions
3075 in traditional cpp, even though compilers don't
3076 usually accept them. */
3077 CNL_SAVE_DEFINEDEF ();
3078 break;
3079 }
3080 continue;
3081 }
3082 else if (inchar)
3083 {
3084 switch (c)
3085 {
3086 case '\0':
3087 /* Hmmm, something went wrong. */
3088 CNL ();
3089 /* FALLTHRU */
3090 case '\'':
3091 inchar = FALSE;
3092 break;
3093 }
3094 continue;
3095 }
3096 else
3097 switch (c)
3098 {
3099 case '"':
3100 inquote = TRUE;
3101 switch (fvdef)
3102 {
3103 case fdefunkey:
3104 case fstartlist:
3105 case finlist:
3106 case fignore:
3107 case vignore:
3108 break;
3109 default:
3110 fvextern = FALSE;
3111 fvdef = fvnone;
3112 }
3113 continue;
3114 case '\'':
3115 inchar = TRUE;
3116 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3117 {
3118 fvextern = FALSE;
3119 fvdef = fvnone;
3120 }
3121 continue;
3122 case '/':
3123 if (*lp == '*')
3124 {
3125 lp++;
3126 incomm = TRUE;
3127 continue;
3128 }
3129 else if (/* cplpl && */ *lp == '/')
3130 {
3131 c = '\0';
3132 break;
3133 }
3134 else
3135 break;
3136 case '%':
3137 if ((c_ext & YACC) && *lp == '%')
3138 {
3139 /* Entering or exiting rules section in yacc file. */
3140 lp++;
3141 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3142 typdef = tnone; structdef = snone;
3143 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3144 cblev = 0;
3145 yacc_rules = !yacc_rules;
3146 continue;
3147 }
3148 else
3149 break;
3150 case '#':
3151 if (definedef == dnone)
3152 {
3153 char *cp;
3154 bool cpptoken = TRUE;
3155
3156 /* Look back on this line. If all blanks, or nonblanks
3157 followed by an end of comment, this is a preprocessor
3158 token. */
3159 for (cp = newlb.buffer; cp < lp-1; cp++)
3160 if (!iswhite (*cp))
3161 {
3162 if (*cp == '*' && *(cp+1) == '/')
3163 {
3164 cp++;
3165 cpptoken = TRUE;
3166 }
3167 else
3168 cpptoken = FALSE;
3169 }
3170 if (cpptoken)
3171 definedef = dsharpseen;
3172 } /* if (definedef == dnone) */
3173
3174 continue;
3175 } /* switch (c) */
3176
3177
3178 /* Consider token only if some involved conditions are satisfied. */
3179 if (typdef != tignore
3180 && definedef != dignorerest
3181 && fvdef != finlist
3182 && structdef != sintemplate
3183 && (definedef != dnone
3184 || structdef != scolonseen))
3185 {
3186 if (midtoken)
3187 {
3188 if (endtoken (c))
3189 {
3190 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
3191 {
3192 /*
3193 * This handles :: in the middle, but not at the
3194 * beginning of an identifier. Also, space-separated
3195 * :: is not recognised.
3196 */
3197 lp += 2;
3198 toklen += 2;
3199 c = lp[-1];
3200 goto still_in_token;
3201 }
3202 else
3203 {
3204 bool funorvar = FALSE;
3205
3206 if (yacc_rules
3207 || consider_token (newlb.buffer + tokoff, toklen, c,
3208 &c_ext, cblev, parlev, &funorvar))
3209 {
3210 if (fvdef == foperator)
3211 {
3212 char *oldlp = lp;
3213 lp = skip_spaces (lp-1);
3214 if (*lp != '\0')
3215 lp += 1;
3216 while (*lp != '\0'
3217 && !iswhite (*lp) && *lp != '(')
3218 lp += 1;
3219 c = *lp++;
3220 toklen += lp - oldlp;
3221 }
3222 token.named = FALSE;
3223 if ((c_ext & C_EXT) /* not pure C */
3224 && nestlev > 0 && definedef == dnone)
3225 /* in struct body */
3226 {
3227 write_classname (&token_name, qualifier);
3228 linebuffer_setlen (&token_name,
3229 token_name.len+qlen+toklen);
3230 strcat (token_name.buffer, qualifier);
3231 strncat (token_name.buffer,
3232 newlb.buffer + tokoff, toklen);
3233 token.named = TRUE;
3234 }
3235 else if (objdef == ocatseen)
3236 /* Objective C category */
3237 {
3238 int len = strlen (objtag) + 2 + toklen;
3239 linebuffer_setlen (&token_name, len);
3240 strcpy (token_name.buffer, objtag);
3241 strcat (token_name.buffer, "(");
3242 strncat (token_name.buffer,
3243 newlb.buffer + tokoff, toklen);
3244 strcat (token_name.buffer, ")");
3245 token.named = TRUE;
3246 }
3247 else if (objdef == omethodtag
3248 || objdef == omethodparm)
3249 /* Objective C method */
3250 {
3251 token.named = TRUE;
3252 }
3253 else if (fvdef == fdefunname)
3254 /* GNU DEFUN and similar macros */
3255 {
3256 bool defun = (newlb.buffer[tokoff] == 'F');
3257 int off = tokoff;
3258 int len = toklen;
3259
3260 /* Rewrite the tag so that emacs lisp DEFUNs
3261 can be found by their elisp name */
3262 if (defun)
3263 {
3264 off += 1;
3265 len -= 1;
3266 }
3267 len = toklen;
3268 linebuffer_setlen (&token_name, len);
3269 strncpy (token_name.buffer,
3270 newlb.buffer + off, len);
3271 token_name.buffer[len] = '\0';
3272 if (defun)
3273 while (--len >= 0)
3274 if (token_name.buffer[len] == '_')
3275 token_name.buffer[len] = '-';
3276 token.named = defun;
3277 }
3278 else
3279 {
3280 linebuffer_setlen (&token_name, toklen);
3281 strncpy (token_name.buffer,
3282 newlb.buffer + tokoff, toklen);
3283 token_name.buffer[toklen] = '\0';
3284 /* Name macros and members. */
3285 token.named = (structdef == stagseen
3286 || typdef == ttypeseen
3287 || typdef == tend
3288 || (funorvar
3289 && definedef == dignorerest)
3290 || (funorvar
3291 && definedef == dnone
3292 && structdef == snone
3293 && cblev > 0));
3294 }
3295 token.lineno = lineno;
3296 token.offset = tokoff;
3297 token.length = toklen;
3298 token.line = newlb.buffer;
3299 token.linepos = newlinepos;
3300 token.valid = TRUE;
3301
3302 if (definedef == dnone
3303 && (fvdef == fvnameseen
3304 || fvdef == foperator
3305 || structdef == stagseen
3306 || typdef == tend
3307 || typdef == ttypeseen
3308 || objdef != onone))
3309 {
3310 if (current_lb_is_new)
3311 switch_line_buffers ();
3312 }
3313 else if (definedef != dnone
3314 || fvdef == fdefunname
3315 || instruct)
3316 make_C_tag (funorvar);
3317 }
3318 midtoken = FALSE;
3319 }
3320 } /* if (endtoken (c)) */
3321 else if (intoken (c))
3322 still_in_token:
3323 {
3324 toklen++;
3325 continue;
3326 }
3327 } /* if (midtoken) */
3328 else if (begtoken (c))
3329 {
3330 switch (definedef)
3331 {
3332 case dnone:
3333 switch (fvdef)
3334 {
3335 case fstartlist:
3336 fvdef = finlist;
3337 continue;
3338 case flistseen:
3339 make_C_tag (TRUE); /* a function */
3340 fvdef = fignore;
3341 break;
3342 case fvnameseen:
3343 fvdef = fvnone;
3344 break;
3345 }
3346 if (structdef == stagseen && !cjava)
3347 {
3348 popclass_above (cblev);
3349 structdef = snone;
3350 }
3351 break;
3352 case dsharpseen:
3353 savetoken = token;
3354 }
3355 if (!yacc_rules || lp == newlb.buffer + 1)
3356 {
3357 tokoff = lp - 1 - newlb.buffer;
3358 toklen = 1;
3359 midtoken = TRUE;
3360 }
3361 continue;
3362 } /* if (begtoken) */
3363 } /* if must look at token */
3364
3365
3366 /* Detect end of line, colon, comma, semicolon and various braces
3367 after having handled a token.*/
3368 switch (c)
3369 {
3370 case ':':
3371 if (yacc_rules && token.offset == 0 && token.valid)
3372 {
3373 make_C_tag (FALSE); /* a yacc function */
3374 break;
3375 }
3376 if (definedef != dnone)
3377 break;
3378 switch (objdef)
3379 {
3380 case otagseen:
3381 objdef = oignore;
3382 make_C_tag (TRUE); /* an Objective C class */
3383 break;
3384 case omethodtag:
3385 case omethodparm:
3386 objdef = omethodcolon;
3387 linebuffer_setlen (&token_name, token_name.len + 1);
3388 strcat (token_name.buffer, ":");
3389 break;
3390 }
3391 if (structdef == stagseen)
3392 structdef = scolonseen;
3393 break;
3394 case ';':
3395 if (definedef != dnone)
3396 break;
3397 switch (typdef)
3398 {
3399 case tend:
3400 case ttypeseen:
3401 make_C_tag (FALSE); /* a typedef */
3402 typdef = tnone;
3403 fvdef = fvnone;
3404 break;
3405 case tnone:
3406 case tinbody:
3407 case tignore:
3408 switch (fvdef)
3409 {
3410 case fignore:
3411 if (typdef == tignore)
3412 fvdef = fvnone;
3413 break;
3414 case fvnameseen:
3415 if ((globals && cblev == 0 && (!fvextern || declarations))
3416 || (members && instruct))
3417 make_C_tag (FALSE); /* a variable */
3418 fvextern = FALSE;
3419 fvdef = fvnone;
3420 token.valid = FALSE;
3421 break;
3422 case flistseen:
3423 if ((declarations && typdef == tnone && !instruct)
3424 || (members && typdef != tignore && instruct))
3425 make_C_tag (TRUE); /* a function declaration */
3426 /* FALLTHRU */
3427 default:
3428 fvextern = FALSE;
3429 fvdef = fvnone;
3430 if (declarations
3431 && structdef == stagseen && (c_ext & C_PLPL))
3432 make_C_tag (FALSE); /* forward declaration */
3433 else
3434 /* The following instruction invalidates the token.
3435 Probably the token should be invalidated in all other
3436 cases where some state machine is reset prematurely. */
3437 token.valid = FALSE;
3438 } /* switch (fvdef) */
3439 /* FALLTHRU */
3440 default:
3441 if (!instruct)
3442 typdef = tnone;
3443 }
3444 if (structdef == stagseen)
3445 structdef = snone;
3446 break;
3447 case ',':
3448 if (definedef != dnone)
3449 break;
3450 switch (objdef)
3451 {
3452 case omethodtag:
3453 case omethodparm:
3454 make_C_tag (TRUE); /* an Objective C method */
3455 objdef = oinbody;
3456 break;
3457 }
3458 switch (fvdef)
3459 {
3460 case fdefunkey:
3461 case foperator:
3462 case fstartlist:
3463 case finlist:
3464 case fignore:
3465 case vignore:
3466 break;
3467 case fdefunname:
3468 fvdef = fignore;
3469 break;
3470 case fvnameseen: /* a variable */
3471 if ((globals && cblev == 0 && (!fvextern || declarations))
3472 || (members && instruct))
3473 make_C_tag (FALSE);
3474 break;
3475 case flistseen: /* a function */
3476 if ((declarations && typdef == tnone && !instruct)
3477 || (members && typdef != tignore && instruct))
3478 {
3479 make_C_tag (TRUE); /* a function declaration */
3480 fvdef = fvnameseen;
3481 }
3482 else if (!declarations)
3483 fvdef = fvnone;
3484 token.valid = FALSE;
3485 break;
3486 default:
3487 fvdef = fvnone;
3488 }
3489 if (structdef == stagseen)
3490 structdef = snone;
3491 break;
3492 case '[':
3493 if (definedef != dnone)
3494 break;
3495 if (structdef == stagseen)
3496 structdef = snone;
3497 switch (typdef)
3498 {
3499 case ttypeseen:
3500 case tend:
3501 typdef = tignore;
3502 make_C_tag (FALSE); /* a typedef */
3503 break;
3504 case tnone:
3505 case tinbody:
3506 switch (fvdef)
3507 {
3508 case foperator:
3509 case finlist:
3510 case fignore:
3511 case vignore:
3512 break;
3513 case fvnameseen:
3514 if ((members && cblev == 1)
3515 || (globals && cblev == 0
3516 && (!fvextern || declarations)))
3517 make_C_tag (FALSE); /* a variable */
3518 /* FALLTHRU */
3519 default:
3520 fvdef = fvnone;
3521 }
3522 break;
3523 }
3524 break;
3525 case '(':
3526 if (definedef != dnone)
3527 break;
3528 if (objdef == otagseen && parlev == 0)
3529 objdef = oparenseen;
3530 switch (fvdef)
3531 {
3532 case fvnameseen:
3533 if (typdef == ttypeseen
3534 && *lp != '*'
3535 && !instruct)
3536 {
3537 /* This handles constructs like:
3538 typedef void OperatorFun (int fun); */
3539 make_C_tag (FALSE);
3540 typdef = tignore;
3541 fvdef = fignore;
3542 break;
3543 }
3544 /* FALLTHRU */
3545 case foperator:
3546 fvdef = fstartlist;
3547 break;
3548 case flistseen:
3549 fvdef = finlist;
3550 break;
3551 }
3552 parlev++;
3553 break;
3554 case ')':
3555 if (definedef != dnone)
3556 break;
3557 if (objdef == ocatseen && parlev == 1)
3558 {
3559 make_C_tag (TRUE); /* an Objective C category */
3560 objdef = oignore;
3561 }
3562 if (--parlev == 0)
3563 {
3564 switch (fvdef)
3565 {
3566 case fstartlist:
3567 case finlist:
3568 fvdef = flistseen;
3569 break;
3570 }
3571 if (!instruct
3572 && (typdef == tend
3573 || typdef == ttypeseen))
3574 {
3575 typdef = tignore;
3576 make_C_tag (FALSE); /* a typedef */
3577 }
3578 }
3579 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3580 parlev = 0;
3581 break;
3582 case '{':
3583 if (definedef != dnone)
3584 break;
3585 if (typdef == ttypeseen)
3586 {
3587 /* Whenever typdef is set to tinbody (currently only
3588 here), typdefcblev should be set to cblev. */
3589 typdef = tinbody;
3590 typdefcblev = cblev;
3591 }
3592 switch (fvdef)
3593 {
3594 case flistseen:
3595 make_C_tag (TRUE); /* a function */
3596 /* FALLTHRU */
3597 case fignore:
3598 fvdef = fvnone;
3599 break;
3600 case fvnone:
3601 switch (objdef)
3602 {
3603 case otagseen:
3604 make_C_tag (TRUE); /* an Objective C class */
3605 objdef = oignore;
3606 break;
3607 case omethodtag:
3608 case omethodparm:
3609 make_C_tag (TRUE); /* an Objective C method */
3610 objdef = oinbody;
3611 break;
3612 default:
3613 /* Neutralize `extern "C" {' grot. */
3614 if (cblev == 0 && structdef == snone && nestlev == 0
3615 && typdef == tnone)
3616 cblev = -1;
3617 }
3618 }
3619 switch (structdef)
3620 {
3621 case skeyseen: /* unnamed struct */
3622 pushclass_above (cblev, NULL, 0);
3623 structdef = snone;
3624 break;
3625 case stagseen: /* named struct or enum */
3626 case scolonseen: /* a class */
3627 pushclass_above (cblev, token.line+token.offset, token.length);
3628 structdef = snone;
3629 make_C_tag (FALSE); /* a struct or enum */
3630 break;
3631 }
3632 cblev++;
3633 break;
3634 case '*':
3635 if (definedef != dnone)
3636 break;
3637 if (fvdef == fstartlist)
3638 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3639 break;
3640 case '}':
3641 if (definedef != dnone)
3642 break;
3643 if (!noindentypedefs && lp == newlb.buffer + 1)
3644 {
3645 cblev = 0; /* reset curly brace level if first column */
3646 parlev = 0; /* also reset paren level, just in case... */
3647 }
3648 else if (cblev > 0)
3649 cblev--;
3650 popclass_above (cblev);
3651 structdef = snone;
3652 /* Only if typdef == tinbody is typdefcblev significant. */
3653 if (typdef == tinbody && cblev <= typdefcblev)
3654 {
3655 assert (cblev == typdefcblev);
3656 typdef = tend;
3657 }
3658 break;
3659 case '=':
3660 if (definedef != dnone)
3661 break;
3662 switch (fvdef)
3663 {
3664 case foperator:
3665 case finlist:
3666 case fignore:
3667 case vignore:
3668 break;
3669 case fvnameseen:
3670 if ((members && cblev == 1)
3671 || (globals && cblev == 0 && (!fvextern || declarations)))
3672 make_C_tag (FALSE); /* a variable */
3673 /* FALLTHRU */
3674 default:
3675 fvdef = vignore;
3676 }
3677 break;
3678 case '<':
3679 if (cplpl && structdef == stagseen)
3680 {
3681 structdef = sintemplate;
3682 break;
3683 }
3684 goto resetfvdef;
3685 case '>':
3686 if (structdef == sintemplate)
3687 {
3688 structdef = stagseen;
3689 break;
3690 }
3691 goto resetfvdef;
3692 case '+':
3693 case '-':
3694 if (objdef == oinbody && cblev == 0)
3695 {
3696 objdef = omethodsign;
3697 break;
3698 }
3699 /* FALLTHRU */
3700 resetfvdef:
3701 case '#': case '~': case '&': case '%': case '/': case '|':
3702 case '^': case '!': case '.': case '?': case ']':
3703 if (definedef != dnone)
3704 break;
3705 /* These surely cannot follow a function tag in C. */
3706 switch (fvdef)
3707 {
3708 case foperator:
3709 case finlist:
3710 case fignore:
3711 case vignore:
3712 break;
3713 default:
3714 fvdef = fvnone;
3715 }
3716 break;
3717 case '\0':
3718 if (objdef == otagseen)
3719 {
3720 make_C_tag (TRUE); /* an Objective C class */
3721 objdef = oignore;
3722 }
3723 /* If a macro spans multiple lines don't reset its state. */
3724 if (quotednl)
3725 CNL_SAVE_DEFINEDEF ();
3726 else
3727 CNL ();
3728 break;
3729 } /* switch (c) */
3730
3731 } /* while not eof */
3732
3733 free (token_name.buffer);
3734 free (lbs[0].lb.buffer);
3735 free (lbs[1].lb.buffer);
3736 }
3737
3738 /*
3739 * Process either a C++ file or a C file depending on the setting
3740 * of a global flag.
3741 */
3742 static void
3743 default_C_entries (inf)
3744 FILE *inf;
3745 {
3746 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3747 }
3748
3749 /* Always do plain C. */
3750 static void
3751 plain_C_entries (inf)
3752 FILE *inf;
3753 {
3754 C_entries (0, inf);
3755 }
3756
3757 /* Always do C++. */
3758 static void
3759 Cplusplus_entries (inf)
3760 FILE *inf;
3761 {
3762 C_entries (C_PLPL, inf);
3763 }
3764
3765 /* Always do Java. */
3766 static void
3767 Cjava_entries (inf)
3768 FILE *inf;
3769 {
3770 C_entries (C_JAVA, inf);
3771 }
3772
3773 /* Always do C*. */
3774 static void
3775 Cstar_entries (inf)
3776 FILE *inf;
3777 {
3778 C_entries (C_STAR, inf);
3779 }
3780
3781 /* Always do Yacc. */
3782 static void
3783 Yacc_entries (inf)
3784 FILE *inf;
3785 {
3786 C_entries (YACC, inf);
3787 }
3788
3789 \f
3790 /* Useful macros. */
3791 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3792 for (lineno = charno = 0; /* loop initialization */ \
3793 !feof (file_pointer) /* loop test */ \
3794 && (lineno++, /* instructions at start of loop */ \
3795 linecharno = charno, \
3796 charno += readline (&line_buffer, file_pointer), \
3797 char_pointer = lb.buffer, \
3798 TRUE); \
3799 )
3800 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */ \
3801 (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
3802 && notinname ((cp)[sizeof(keyword)-1]) /* end of keyword */ \
3803 && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
3804
3805 /*
3806 * Read a file, but do no processing. This is used to do regexp
3807 * matching on files that have no language defined.
3808 */
3809 static void
3810 just_read_file (inf)
3811 FILE *inf;
3812 {
3813 register char *dummy;
3814
3815 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3816 continue;
3817 }
3818
3819 \f
3820 /* Fortran parsing */
3821
3822 static void F_takeprec __P((void));
3823 static void F_getit __P((FILE *));
3824
3825 static void
3826 F_takeprec ()
3827 {
3828 dbp = skip_spaces (dbp);
3829 if (*dbp != '*')
3830 return;
3831 dbp++;
3832 dbp = skip_spaces (dbp);
3833 if (strneq (dbp, "(*)", 3))
3834 {
3835 dbp += 3;
3836 return;
3837 }
3838 if (!ISDIGIT (*dbp))
3839 {
3840 --dbp; /* force failure */
3841 return;
3842 }
3843 do
3844 dbp++;
3845 while (ISDIGIT (*dbp));
3846 }
3847
3848 static void
3849 F_getit (inf)
3850 FILE *inf;
3851 {
3852 register char *cp;
3853
3854 dbp = skip_spaces (dbp);
3855 if (*dbp == '\0')
3856 {
3857 lineno++;
3858 linecharno = charno;
3859 charno += readline (&lb, inf);
3860 dbp = lb.buffer;
3861 if (dbp[5] != '&')
3862 return;
3863 dbp += 6;
3864 dbp = skip_spaces (dbp);
3865 }
3866 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3867 return;
3868 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3869 continue;
3870 pfnote (savenstr (dbp, cp-dbp), TRUE,
3871 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3872 }
3873
3874
3875 static void
3876 Fortran_functions (inf)
3877 FILE *inf;
3878 {
3879 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3880 {
3881 if (*dbp == '%')
3882 dbp++; /* Ratfor escape to fortran */
3883 dbp = skip_spaces (dbp);
3884 if (*dbp == '\0')
3885 continue;
3886 switch (lowcase (*dbp))
3887 {
3888 case 'i':
3889 if (nocase_tail ("integer"))
3890 F_takeprec ();
3891 break;
3892 case 'r':
3893 if (nocase_tail ("real"))
3894 F_takeprec ();
3895 break;
3896 case 'l':
3897 if (nocase_tail ("logical"))
3898 F_takeprec ();
3899 break;
3900 case 'c':
3901 if (nocase_tail ("complex") || nocase_tail ("character"))
3902 F_takeprec ();
3903 break;
3904 case 'd':
3905 if (nocase_tail ("double"))
3906 {
3907 dbp = skip_spaces (dbp);
3908 if (*dbp == '\0')
3909 continue;
3910 if (nocase_tail ("precision"))
3911 break;
3912 continue;
3913 }
3914 break;
3915 }
3916 dbp = skip_spaces (dbp);
3917 if (*dbp == '\0')
3918 continue;
3919 switch (lowcase (*dbp))
3920 {
3921 case 'f':
3922 if (nocase_tail ("function"))
3923 F_getit (inf);
3924 continue;
3925 case 's':
3926 if (nocase_tail ("subroutine"))
3927 F_getit (inf);
3928 continue;
3929 case 'e':
3930 if (nocase_tail ("entry"))
3931 F_getit (inf);
3932 continue;
3933 case 'b':
3934 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
3935 {
3936 dbp = skip_spaces (dbp);
3937 if (*dbp == '\0') /* assume un-named */
3938 pfnote (savestr ("blockdata"), TRUE,
3939 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3940 else
3941 F_getit (inf); /* look for name */
3942 }
3943 continue;
3944 }
3945 }
3946 }
3947
3948 \f
3949 /*
3950 * Ada parsing
3951 * Original code by
3952 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3953 */
3954
3955 static void Ada_getit __P((FILE *, char *));
3956
3957 /* Once we are positioned after an "interesting" keyword, let's get
3958 the real tag value necessary. */
3959 static void
3960 Ada_getit (inf, name_qualifier)
3961 FILE *inf;
3962 char *name_qualifier;
3963 {
3964 register char *cp;
3965 char *name;
3966 char c;
3967
3968 while (!feof (inf))
3969 {
3970 dbp = skip_spaces (dbp);
3971 if (*dbp == '\0'
3972 || (dbp[0] == '-' && dbp[1] == '-'))
3973 {
3974 lineno++;
3975 linecharno = charno;
3976 charno += readline (&lb, inf);
3977 dbp = lb.buffer;
3978 }
3979 switch (lowcase(*dbp))
3980 {
3981 case 'b':
3982 if (nocase_tail ("body"))
3983 {
3984 /* Skipping body of procedure body or package body or ....
3985 resetting qualifier to body instead of spec. */
3986 name_qualifier = "/b";
3987 continue;
3988 }
3989 break;
3990 case 't':
3991 /* Skipping type of task type or protected type ... */
3992 if (nocase_tail ("type"))
3993 continue;
3994 break;
3995 }
3996 if (*dbp == '"')
3997 {
3998 dbp += 1;
3999 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4000 continue;
4001 }
4002 else
4003 {
4004 dbp = skip_spaces (dbp);
4005 for (cp = dbp;
4006 (*cp != '\0'
4007 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4008 cp++)
4009 continue;
4010 if (cp == dbp)
4011 return;
4012 }
4013 c = *cp;
4014 *cp = '\0';
4015 name = concat (dbp, name_qualifier, "");
4016 *cp = c;
4017 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4018 if (c == '"')
4019 dbp = cp + 1;
4020 return;
4021 }
4022 }
4023
4024 static void
4025 Ada_funcs (inf)
4026 FILE *inf;
4027 {
4028 bool inquote = FALSE;
4029
4030 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4031 {
4032 while (*dbp != '\0')
4033 {
4034 /* Skip a string i.e. "abcd". */
4035 if (inquote || (*dbp == '"'))
4036 {
4037 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4038 if (dbp != NULL)
4039 {
4040 inquote = FALSE;
4041 dbp += 1;
4042 continue; /* advance char */
4043 }
4044 else
4045 {
4046 inquote = TRUE;
4047 break; /* advance line */
4048 }
4049 }
4050
4051 /* Skip comments. */
4052 if (dbp[0] == '-' && dbp[1] == '-')
4053 break; /* advance line */
4054
4055 /* Skip character enclosed in single quote i.e. 'a'
4056 and skip single quote starting an attribute i.e. 'Image. */
4057 if (*dbp == '\'')
4058 {
4059 dbp++ ;
4060 if (*dbp != '\0')
4061 dbp++;
4062 continue;
4063 }
4064
4065 /* Search for beginning of a token. */
4066 if (!begtoken (*dbp))
4067 {
4068 dbp++;
4069 continue; /* advance char */
4070 }
4071
4072 /* We are at the beginning of a token. */
4073 switch (lowcase(*dbp))
4074 {
4075 case 'f':
4076 if (!packages_only && nocase_tail ("function"))
4077 Ada_getit (inf, "/f");
4078 else
4079 break; /* from switch */
4080 continue; /* advance char */
4081 case 'p':
4082 if (!packages_only && nocase_tail ("procedure"))
4083 Ada_getit (inf, "/p");
4084 else if (nocase_tail ("package"))
4085 Ada_getit (inf, "/s");
4086 else if (nocase_tail ("protected")) /* protected type */
4087 Ada_getit (inf, "/t");
4088 else
4089 break; /* from switch */
4090 continue; /* advance char */
4091 case 't':
4092 if (!packages_only && nocase_tail ("task"))
4093 Ada_getit (inf, "/k");
4094 else if (typedefs && !packages_only && nocase_tail ("type"))
4095 {
4096 Ada_getit (inf, "/t");
4097 while (*dbp != '\0')
4098 dbp += 1;
4099 }
4100 else
4101 break; /* from switch */
4102 continue; /* advance char */
4103 }
4104
4105 /* Look for the end of the token. */
4106 while (!endtoken (*dbp))
4107 dbp++;
4108
4109 } /* advance char */
4110 } /* advance line */
4111 }
4112
4113 \f
4114 /*
4115 * Unix and microcontroller assembly tag handling
4116 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4117 * Idea by Bob Weiner, Motorola Inc. (1994)
4118 */
4119 static void
4120 Asm_labels (inf)
4121 FILE *inf;
4122 {
4123 register char *cp;
4124
4125 LOOP_ON_INPUT_LINES (inf, lb, cp)
4126 {
4127 /* If first char is alphabetic or one of [_.$], test for colon
4128 following identifier. */
4129 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4130 {
4131 /* Read past label. */
4132 cp++;
4133 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4134 cp++;
4135 if (*cp == ':' || iswhite (*cp))
4136 {
4137 /* Found end of label, so copy it and add it to the table. */
4138 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
4139 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4140 }
4141 }
4142 }
4143 }
4144
4145 \f
4146 /*
4147 * Perl support
4148 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4149 * Perl variable names: /^(my|local).../
4150 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4151 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4152 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4153 */
4154 static void
4155 Perl_functions (inf)
4156 FILE *inf;
4157 {
4158 char *package = savestr ("main"); /* current package name */
4159 register char *cp;
4160
4161 LOOP_ON_INPUT_LINES (inf, lb, cp)
4162 {
4163 skip_spaces(cp);
4164
4165 if (LOOKING_AT (cp, "package"))
4166 {
4167 free (package);
4168 package = get_tag (cp);
4169 if (package == NULL) /* can't parse package name */
4170 package = savestr ("");
4171 else
4172 package = savestr(package); /* make a copy */
4173 }
4174 else if (LOOKING_AT (cp, "sub"))
4175 {
4176 char *name, *fullname, *pos;
4177 char *sp = cp;
4178
4179 while (!notinname (*cp))
4180 cp++;
4181 if (cp == sp)
4182 continue;
4183 name = savenstr (sp, cp-sp);
4184 if ((pos = etags_strchr (name, ':')) != NULL && pos[1] == ':')
4185 fullname = name;
4186 else
4187 fullname = concat (package, "::", name);
4188 pfnote (fullname, TRUE,
4189 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4190 if (name != fullname)
4191 free (name);
4192 }
4193 else if (globals /* only if tagging global vars is enabled */
4194 && (LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local")))
4195 {
4196 /* After "my" or "local", but before any following paren or space. */
4197 char *varname = NULL;
4198
4199 if (*cp == '$' || *cp == '@' || *cp == '%')
4200 {
4201 char* varstart = ++cp;
4202 while (ISALNUM (*cp) || *cp == '_')
4203 cp++;
4204 varname = savenstr (varstart, cp-varstart);
4205 }
4206 else
4207 {
4208 /* Should be examining a variable list at this point;
4209 could insist on seeing an open parenthesis. */
4210 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4211 cp++;
4212 }
4213
4214 /* Perhaps I should back cp up one character, so the TAGS table
4215 doesn't mention (and so depend upon) the following char. */
4216 pfnote (varname, FALSE,
4217 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4218 }
4219 }
4220 }
4221
4222
4223 /*
4224 * Python support
4225 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4226 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4227 * More ideas by seb bacon <seb@jamkit.com> (2002)
4228 */
4229 static void
4230 Python_functions (inf)
4231 FILE *inf;
4232 {
4233 register char *cp;
4234
4235 LOOP_ON_INPUT_LINES (inf, lb, cp)
4236 {
4237 cp = skip_spaces (cp);
4238 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4239 {
4240 char *name = cp;
4241 while (!notinname (*cp) && *cp != ':')
4242 cp++;
4243 pfnote (savenstr (name, cp-name), TRUE,
4244 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4245 }
4246 }
4247 }
4248
4249 \f
4250 /*
4251 * PHP support
4252 * Look for:
4253 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4254 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4255 * - /^[ \t]*define\(\"[^\"]+/
4256 * Only with --members:
4257 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4258 * Idea by Diez B. Roggisch (2001)
4259 */
4260 static void
4261 PHP_functions (inf)
4262 FILE *inf;
4263 {
4264 register char *cp, *name;
4265 bool search_identifier = FALSE;
4266
4267 LOOP_ON_INPUT_LINES (inf, lb, cp)
4268 {
4269 cp = skip_spaces (cp);
4270 name = cp;
4271 if (search_identifier
4272 && *cp != '\0')
4273 {
4274 while (!notinname (*cp))
4275 cp++;
4276 pfnote (savenstr (name, cp-name), TRUE,
4277 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4278 search_identifier = FALSE;
4279 }
4280 else if (LOOKING_AT (cp, "function"))
4281 {
4282 if(*cp == '&')
4283 cp = skip_spaces (cp+1);
4284 if(*cp != '\0')
4285 {
4286 name = cp;
4287 while (!notinname (*cp))
4288 cp++;
4289 pfnote (savenstr (name, cp-name), TRUE,
4290 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4291 }
4292 else
4293 search_identifier = TRUE;
4294 }
4295 else if (LOOKING_AT (cp, "class"))
4296 {
4297 if (*cp != '\0')
4298 {
4299 name = cp;
4300 while (*cp != '\0' && !iswhite (*cp))
4301 cp++;
4302 pfnote (savenstr (name, cp-name), FALSE,
4303 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4304 }
4305 else
4306 search_identifier = TRUE;
4307 }
4308 else if (strneq (cp, "define", 6)
4309 && (cp = skip_spaces (cp+6))
4310 && *cp++ == '('
4311 && (*cp == '"' || *cp == '\''))
4312 {
4313 char quote = *cp++;
4314 name = cp;
4315 while (*cp != quote && *cp != '\0')
4316 cp++;
4317 pfnote (savenstr (name, cp-name), FALSE,
4318 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4319 }
4320 else if (members
4321 && LOOKING_AT (cp, "var")
4322 && *cp == '$')
4323 {
4324 name = cp;
4325 while (!notinname(*cp))
4326 cp++;
4327 pfnote (savenstr (name, cp-name), FALSE,
4328 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4329 }
4330 }
4331 }
4332
4333 \f
4334 /*
4335 * Cobol tag functions
4336 * We could look for anything that could be a paragraph name.
4337 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4338 * Idea by Corny de Souza (1993)
4339 */
4340 static void
4341 Cobol_paragraphs (inf)
4342 FILE *inf;
4343 {
4344 register char *bp, *ep;
4345
4346 LOOP_ON_INPUT_LINES (inf, lb, bp)
4347 {
4348 if (lb.len < 9)
4349 continue;
4350 bp += 8;
4351
4352 /* If eoln, compiler option or comment ignore whole line. */
4353 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4354 continue;
4355
4356 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4357 continue;
4358 if (*ep++ == '.')
4359 pfnote (savenstr (bp, ep-bp), TRUE,
4360 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4361 }
4362 }
4363
4364 \f
4365 /*
4366 * Makefile support
4367 * Idea by Assar Westerlund <assar@sics.se> (2001)
4368 */
4369 static void
4370 Makefile_targets (inf)
4371 FILE *inf;
4372 {
4373 register char *bp;
4374
4375 LOOP_ON_INPUT_LINES (inf, lb, bp)
4376 {
4377 if (*bp == '\t' || *bp == '#')
4378 continue;
4379 while (*bp != '\0' && *bp != '=' && *bp != ':')
4380 bp++;
4381 if (*bp == ':')
4382 pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4383 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4384 }
4385 }
4386
4387 \f
4388 /*
4389 * Pascal parsing
4390 * Original code by Mosur K. Mohan (1989)
4391 *
4392 * Locates tags for procedures & functions. Doesn't do any type- or
4393 * var-definitions. It does look for the keyword "extern" or
4394 * "forward" immediately following the procedure statement; if found,
4395 * the tag is skipped.
4396 */
4397 static void
4398 Pascal_functions (inf)
4399 FILE *inf;
4400 {
4401 linebuffer tline; /* mostly copied from C_entries */
4402 long save_lcno;
4403 int save_lineno, save_len;
4404 char c, *cp, *namebuf;
4405
4406 bool /* each of these flags is TRUE iff: */
4407 incomment, /* point is inside a comment */
4408 inquote, /* point is inside '..' string */
4409 get_tagname, /* point is after PROCEDURE/FUNCTION
4410 keyword, so next item = potential tag */
4411 found_tag, /* point is after a potential tag */
4412 inparms, /* point is within parameter-list */
4413 verify_tag; /* point has passed the parm-list, so the
4414 next token will determine whether this
4415 is a FORWARD/EXTERN to be ignored, or
4416 whether it is a real tag */
4417
4418 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4419 namebuf = NULL; /* keep compiler quiet */
4420 lineno = 0;
4421 charno = 0;
4422 dbp = lb.buffer;
4423 *dbp = '\0';
4424 initbuffer (&tline);
4425
4426 incomment = inquote = FALSE;
4427 found_tag = FALSE; /* have a proc name; check if extern */
4428 get_tagname = FALSE; /* have found "procedure" keyword */
4429 inparms = FALSE; /* found '(' after "proc" */
4430 verify_tag = FALSE; /* check if "extern" is ahead */
4431
4432
4433 while (!feof (inf)) /* long main loop to get next char */
4434 {
4435 c = *dbp++;
4436 if (c == '\0') /* if end of line */
4437 {
4438 lineno++;
4439 linecharno = charno;
4440 charno += readline (&lb, inf);
4441 dbp = lb.buffer;
4442 if (*dbp == '\0')
4443 continue;
4444 if (!((found_tag && verify_tag)
4445 || get_tagname))
4446 c = *dbp++; /* only if don't need *dbp pointing
4447 to the beginning of the name of
4448 the procedure or function */
4449 }
4450 if (incomment)
4451 {
4452 if (c == '}') /* within { } comments */
4453 incomment = FALSE;
4454 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4455 {
4456 dbp++;
4457 incomment = FALSE;
4458 }
4459 continue;
4460 }
4461 else if (inquote)
4462 {
4463 if (c == '\'')
4464 inquote = FALSE;
4465 continue;
4466 }
4467 else
4468 switch (c)
4469 {
4470 case '\'':
4471 inquote = TRUE; /* found first quote */
4472 continue;
4473 case '{': /* found open { comment */
4474 incomment = TRUE;
4475 continue;
4476 case '(':
4477 if (*dbp == '*') /* found open (* comment */
4478 {
4479 incomment = TRUE;
4480 dbp++;
4481 }
4482 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4483 inparms = TRUE;
4484 continue;
4485 case ')': /* end of parms list */
4486 if (inparms)
4487 inparms = FALSE;
4488 continue;
4489 case ';':
4490 if (found_tag && !inparms) /* end of proc or fn stmt */
4491 {
4492 verify_tag = TRUE;
4493 break;
4494 }
4495 continue;
4496 }
4497 if (found_tag && verify_tag && (*dbp != ' '))
4498 {
4499 /* check if this is an "extern" declaration */
4500 if (*dbp == '\0')
4501 continue;
4502 if (lowcase (*dbp == 'e'))
4503 {
4504 if (nocase_tail ("extern")) /* superfluous, really! */
4505 {
4506 found_tag = FALSE;
4507 verify_tag = FALSE;
4508 }
4509 }
4510 else if (lowcase (*dbp) == 'f')
4511 {
4512 if (nocase_tail ("forward")) /* check for forward reference */
4513 {
4514 found_tag = FALSE;
4515 verify_tag = FALSE;
4516 }
4517 }
4518 if (found_tag && verify_tag) /* not external proc, so make tag */
4519 {
4520 found_tag = FALSE;
4521 verify_tag = FALSE;
4522 pfnote (namebuf, TRUE,
4523 tline.buffer, save_len, save_lineno, save_lcno);
4524 continue;
4525 }
4526 }
4527 if (get_tagname) /* grab name of proc or fn */
4528 {
4529 if (*dbp == '\0')
4530 continue;
4531
4532 /* save all values for later tagging */
4533 linebuffer_setlen (&tline, lb.len);
4534 strcpy (tline.buffer, lb.buffer);
4535 save_lineno = lineno;
4536 save_lcno = linecharno;
4537
4538 /* grab block name */
4539 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4540 continue;
4541 namebuf = savenstr (dbp, cp-dbp);
4542 dbp = cp; /* set dbp to e-o-token */
4543 save_len = dbp - lb.buffer + 1;
4544 get_tagname = FALSE;
4545 found_tag = TRUE;
4546 continue;
4547
4548 /* and proceed to check for "extern" */
4549 }
4550 else if (!incomment && !inquote && !found_tag)
4551 {
4552 /* check for proc/fn keywords */
4553 switch (lowcase (c))
4554 {
4555 case 'p':
4556 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4557 get_tagname = TRUE;
4558 continue;
4559 case 'f':
4560 if (nocase_tail ("unction"))
4561 get_tagname = TRUE;
4562 continue;
4563 }
4564 }
4565 } /* while not eof */
4566
4567 free (tline.buffer);
4568 }
4569
4570 \f
4571 /*
4572 * Lisp tag functions
4573 * look for (def or (DEF, quote or QUOTE
4574 */
4575
4576 static void L_getit __P((void));
4577
4578 static void
4579 L_getit ()
4580 {
4581 if (*dbp == '\'') /* Skip prefix quote */
4582 dbp++;
4583 else if (*dbp == '(')
4584 {
4585 dbp++;
4586 /* Try to skip "(quote " */
4587 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4588 /* Ok, then skip "(" before name in (defstruct (foo)) */
4589 dbp = skip_spaces (dbp);
4590 }
4591 get_tag (dbp);
4592 }
4593
4594 static void
4595 Lisp_functions (inf)
4596 FILE *inf;
4597 {
4598 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4599 {
4600 if (dbp[0] != '(')
4601 continue;
4602
4603 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4604 {
4605 dbp = skip_non_spaces (dbp);
4606 dbp = skip_spaces (dbp);
4607 L_getit ();
4608 }
4609 else
4610 {
4611 /* Check for (foo::defmumble name-defined ... */
4612 do
4613 dbp++;
4614 while (!notinname (*dbp) && *dbp != ':');
4615 if (*dbp == ':')
4616 {
4617 do
4618 dbp++;
4619 while (*dbp == ':');
4620
4621 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4622 {
4623 dbp = skip_non_spaces (dbp);
4624 dbp = skip_spaces (dbp);
4625 L_getit ();
4626 }
4627 }
4628 }
4629 }
4630 }
4631
4632 \f
4633 /*
4634 * Postscript tag functions
4635 * Just look for lines where the first character is '/'
4636 * Also look at "defineps" for PSWrap
4637 * Ideas by:
4638 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4639 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4640 */
4641 static void
4642 Postscript_functions (inf)
4643 FILE *inf;
4644 {
4645 register char *bp, *ep;
4646
4647 LOOP_ON_INPUT_LINES (inf, lb, bp)
4648 {
4649 if (bp[0] == '/')
4650 {
4651 for (ep = bp+1;
4652 *ep != '\0' && *ep != ' ' && *ep != '{';
4653 ep++)
4654 continue;
4655 pfnote (savenstr (bp, ep-bp), TRUE,
4656 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4657 }
4658 else if (LOOKING_AT (bp, "defineps"))
4659 get_tag (bp);
4660 }
4661 }
4662
4663 \f
4664 /*
4665 * Scheme tag functions
4666 * look for (def... xyzzy
4667 * (def... (xyzzy
4668 * (def ... ((...(xyzzy ....
4669 * (set! xyzzy
4670 * Original code by Ken Haase (1985?)
4671 */
4672
4673 static void
4674 Scheme_functions (inf)
4675 FILE *inf;
4676 {
4677 register char *bp;
4678
4679 LOOP_ON_INPUT_LINES (inf, lb, bp)
4680 {
4681 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4682 {
4683 bp = skip_non_spaces (bp+4);
4684 /* Skip over open parens and white space */
4685 while (notinname (*bp))
4686 bp++;
4687 get_tag (bp);
4688 }
4689 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4690 get_tag (bp);
4691 }
4692 }
4693
4694 \f
4695 /* Find tags in TeX and LaTeX input files. */
4696
4697 /* TEX_toktab is a table of TeX control sequences that define tags.
4698 Each TEX_tabent records one such control sequence.
4699 CONVERT THIS TO USE THE Stab TYPE!! */
4700 struct TEX_tabent
4701 {
4702 char *name;
4703 int len;
4704 };
4705
4706 static struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
4707
4708 /* Default set of control sequences to put into TEX_toktab.
4709 The value of environment var TEXTAGS is prepended to this. */
4710
4711 static char *TEX_defenv = "\
4712 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4713 :part:appendix:entry:index";
4714
4715 static void TEX_mode __P((FILE *));
4716 static struct TEX_tabent *TEX_decode_env __P((char *, char *));
4717 static int TEX_Token __P((char *));
4718
4719 static char TEX_esc = '\\';
4720 static char TEX_opgrp = '{';
4721 static char TEX_clgrp = '}';
4722
4723 /*
4724 * TeX/LaTeX scanning loop.
4725 */
4726 static void
4727 TeX_commands (inf)
4728 FILE *inf;
4729 {
4730 char *cp, *lasthit;
4731 register int i;
4732
4733 /* Select either \ or ! as escape character. */
4734 TEX_mode (inf);
4735
4736 /* Initialize token table once from environment. */
4737 if (!TEX_toktab)
4738 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4739
4740 LOOP_ON_INPUT_LINES (inf, lb, cp)
4741 {
4742 lasthit = cp;
4743 /* Look at each esc in line. */
4744 while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4745 {
4746 if (*++cp == '\0')
4747 break;
4748 linecharno += cp - lasthit;
4749 lasthit = cp;
4750 i = TEX_Token (lasthit);
4751 if (i >= 0)
4752 {
4753 register char *p;
4754 for (lasthit += TEX_toktab[i].len;
4755 *lasthit == TEX_esc || *lasthit == TEX_opgrp;
4756 lasthit++)
4757 continue;
4758 for (p = lasthit;
4759 !iswhite (*p) && *p != TEX_opgrp && *p != TEX_clgrp;
4760 p++)
4761 continue;
4762 pfnote (savenstr (lasthit, p-lasthit), TRUE,
4763 lb.buffer, lb.len, lineno, linecharno);
4764 break; /* We only tag a line once */
4765 }
4766 }
4767 }
4768 }
4769
4770 #define TEX_LESC '\\'
4771 #define TEX_SESC '!'
4772 #define TEX_cmt '%'
4773
4774 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4775 chars accordingly. */
4776 static void
4777 TEX_mode (inf)
4778 FILE *inf;
4779 {
4780 int c;
4781
4782 while ((c = getc (inf)) != EOF)
4783 {
4784 /* Skip to next line if we hit the TeX comment char. */
4785 if (c == TEX_cmt)
4786 while (c != '\n')
4787 c = getc (inf);
4788 else if (c == TEX_LESC || c == TEX_SESC )
4789 break;
4790 }
4791
4792 if (c == TEX_LESC)
4793 {
4794 TEX_esc = TEX_LESC;
4795 TEX_opgrp = '{';
4796 TEX_clgrp = '}';
4797 }
4798 else
4799 {
4800 TEX_esc = TEX_SESC;
4801 TEX_opgrp = '<';
4802 TEX_clgrp = '>';
4803 }
4804 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4805 No attempt is made to correct the situation. */
4806 rewind (inf);
4807 }
4808
4809 /* Read environment and prepend it to the default string.
4810 Build token table. */
4811 static struct TEX_tabent *
4812 TEX_decode_env (evarname, defenv)
4813 char *evarname;
4814 char *defenv;
4815 {
4816 register char *env, *p;
4817
4818 struct TEX_tabent *tab;
4819 int size, i;
4820
4821 /* Append default string to environment. */
4822 env = getenv (evarname);
4823 if (!env)
4824 env = defenv;
4825 else
4826 {
4827 char *oldenv = env;
4828 env = concat (oldenv, defenv, "");
4829 }
4830
4831 /* Allocate a token table */
4832 for (size = 1, p = env; p;)
4833 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4834 size++;
4835 /* Add 1 to leave room for null terminator. */
4836 tab = xnew (size + 1, struct TEX_tabent);
4837
4838 /* Unpack environment string into token table. Be careful about */
4839 /* zero-length strings (leading ':', "::" and trailing ':') */
4840 for (i = 0; *env;)
4841 {
4842 p = etags_strchr (env, ':');
4843 if (!p) /* End of environment string. */
4844 p = env + strlen (env);
4845 if (p - env > 0)
4846 { /* Only non-zero strings. */
4847 tab[i].name = savenstr (env, p - env);
4848 tab[i].len = strlen (tab[i].name);
4849 i++;
4850 }
4851 if (*p)
4852 env = p + 1;
4853 else
4854 {
4855 tab[i].name = NULL; /* Mark end of table. */
4856 tab[i].len = 0;
4857 break;
4858 }
4859 }
4860 return tab;
4861 }
4862
4863 /* If the text at CP matches one of the tag-defining TeX command names,
4864 return the pointer to the first occurrence of that command in TEX_toktab.
4865 Otherwise return -1.
4866 Keep the capital `T' in `token' for dumb truncating compilers
4867 (this distinguishes it from `TEX_toktab' */
4868 static int
4869 TEX_Token (cp)
4870 char *cp;
4871 {
4872 int i;
4873
4874 for (i = 0; TEX_toktab[i].len > 0; i++)
4875 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4876 return i;
4877 return -1;
4878 }
4879
4880 \f
4881 /* Texinfo support. Dave Love, Mar. 2000. */
4882 static void
4883 Texinfo_nodes (inf)
4884 FILE * inf;
4885 {
4886 char *cp, *start;
4887 LOOP_ON_INPUT_LINES (inf, lb, cp)
4888 if (LOOKING_AT (cp, "@node"))
4889 {
4890 start = cp;
4891 while (*cp != '\0' && *cp != ',')
4892 cp++;
4893 pfnote (savenstr (start, cp - start), TRUE,
4894 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4895 }
4896 }
4897
4898 \f
4899 /*
4900 * Prolog support
4901 *
4902 * Assumes that the predicate or rule starts at column 0.
4903 * Only the first clause of a predicate or rule is added.
4904 * Original code by Sunichirou Sugou (1989)
4905 * Rewritten by Anders Lindgren (1996)
4906 */
4907 static int prolog_pr __P((char *, char *));
4908 static void prolog_skip_comment __P((linebuffer *, FILE *));
4909 static int prolog_atom __P((char *, int));
4910
4911 static void
4912 Prolog_functions (inf)
4913 FILE *inf;
4914 {
4915 char *cp, *last;
4916 int len;
4917 int allocated;
4918
4919 allocated = 0;
4920 len = 0;
4921 last = NULL;
4922
4923 LOOP_ON_INPUT_LINES (inf, lb, cp)
4924 {
4925 if (cp[0] == '\0') /* Empty line */
4926 continue;
4927 else if (iswhite (cp[0])) /* Not a predicate */
4928 continue;
4929 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4930 prolog_skip_comment (&lb, inf);
4931 else if ((len = prolog_pr (cp, last)) > 0)
4932 {
4933 /* Predicate or rule. Store the function name so that we
4934 only generate a tag for the first clause. */
4935 if (last == NULL)
4936 last = xnew(len + 1, char);
4937 else if (len + 1 > allocated)
4938 xrnew (last, len + 1, char);
4939 allocated = len + 1;
4940 strncpy (last, cp, len);
4941 last[len] = '\0';
4942 }
4943 }
4944 }
4945
4946
4947 static void
4948 prolog_skip_comment (plb, inf)
4949 linebuffer *plb;
4950 FILE *inf;
4951 {
4952 char *cp;
4953
4954 do
4955 {
4956 for (cp = plb->buffer; *cp != '\0'; cp++)
4957 if (cp[0] == '*' && cp[1] == '/')
4958 return;
4959 lineno++;
4960 linecharno += readline (plb, inf);
4961 }
4962 while (!feof(inf));
4963 }
4964
4965 /*
4966 * A predicate or rule definition is added if it matches:
4967 * <beginning of line><Prolog Atom><whitespace>(
4968 * or <beginning of line><Prolog Atom><whitespace>:-
4969 *
4970 * It is added to the tags database if it doesn't match the
4971 * name of the previous clause header.
4972 *
4973 * Return the size of the name of the predicate or rule, or 0 if no
4974 * header was found.
4975 */
4976 static int
4977 prolog_pr (s, last)
4978 char *s;
4979 char *last; /* Name of last clause. */
4980 {
4981 int pos;
4982 int len;
4983
4984 pos = prolog_atom (s, 0);
4985 if (pos < 1)
4986 return 0;
4987
4988 len = pos;
4989 pos = skip_spaces (s + pos) - s;
4990
4991 if ((s[pos] == '.'
4992 || (s[pos] == '(' && (pos += 1))
4993 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
4994 && (last == NULL /* save only the first clause */
4995 || len != strlen (last)
4996 || !strneq (s, last, len)))
4997 {
4998 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4999 return len;
5000 }
5001 else
5002 return 0;
5003 }
5004
5005 /*
5006 * Consume a Prolog atom.
5007 * Return the number of bytes consumed, or -1 if there was an error.
5008 *
5009 * A prolog atom, in this context, could be one of:
5010 * - An alphanumeric sequence, starting with a lower case letter.
5011 * - A quoted arbitrary string. Single quotes can escape themselves.
5012 * Backslash quotes everything.
5013 */
5014 static int
5015 prolog_atom (s, pos)
5016 char *s;
5017 int pos;
5018 {
5019 int origpos;
5020
5021 origpos = pos;
5022
5023 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5024 {
5025 /* The atom is unquoted. */
5026 pos++;
5027 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5028 {
5029 pos++;
5030 }
5031 return pos - origpos;
5032 }
5033 else if (s[pos] == '\'')
5034 {
5035 pos++;
5036
5037 while (1)
5038 {
5039 if (s[pos] == '\'')
5040 {
5041 pos++;
5042 if (s[pos] != '\'')
5043 break;
5044 pos++; /* A double quote */
5045 }
5046 else if (s[pos] == '\0')
5047 /* Multiline quoted atoms are ignored. */
5048 return -1;
5049 else if (s[pos] == '\\')
5050 {
5051 if (s[pos+1] == '\0')
5052 return -1;
5053 pos += 2;
5054 }
5055 else
5056 pos++;
5057 }
5058 return pos - origpos;
5059 }
5060 else
5061 return -1;
5062 }
5063
5064 \f
5065 /*
5066 * Support for Erlang
5067 *
5068 * Generates tags for functions, defines, and records.
5069 * Assumes that Erlang functions start at column 0.
5070 * Original code by Anders Lindgren (1996)
5071 */
5072 static int erlang_func __P((char *, char *));
5073 static void erlang_attribute __P((char *));
5074 static int erlang_atom __P((char *, int));
5075
5076 static void
5077 Erlang_functions (inf)
5078 FILE *inf;
5079 {
5080 char *cp, *last;
5081 int len;
5082 int allocated;
5083
5084 allocated = 0;
5085 len = 0;
5086 last = NULL;
5087
5088 LOOP_ON_INPUT_LINES (inf, lb, cp)
5089 {
5090 if (cp[0] == '\0') /* Empty line */
5091 continue;
5092 else if (iswhite (cp[0])) /* Not function nor attribute */
5093 continue;
5094 else if (cp[0] == '%') /* comment */
5095 continue;
5096 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5097 continue;
5098 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5099 {
5100 erlang_attribute (cp);
5101 last = NULL;
5102 }
5103 else if ((len = erlang_func (cp, last)) > 0)
5104 {
5105 /*
5106 * Function. Store the function name so that we only
5107 * generates a tag for the first clause.
5108 */
5109 if (last == NULL)
5110 last = xnew (len + 1, char);
5111 else if (len + 1 > allocated)
5112 xrnew (last, len + 1, char);
5113 allocated = len + 1;
5114 strncpy (last, cp, len);
5115 last[len] = '\0';
5116 }
5117 }
5118 }
5119
5120
5121 /*
5122 * A function definition is added if it matches:
5123 * <beginning of line><Erlang Atom><whitespace>(
5124 *
5125 * It is added to the tags database if it doesn't match the
5126 * name of the previous clause header.
5127 *
5128 * Return the size of the name of the function, or 0 if no function
5129 * was found.
5130 */
5131 static int
5132 erlang_func (s, last)
5133 char *s;
5134 char *last; /* Name of last clause. */
5135 {
5136 int pos;
5137 int len;
5138
5139 pos = erlang_atom (s, 0);
5140 if (pos < 1)
5141 return 0;
5142
5143 len = pos;
5144 pos = skip_spaces (s + pos) - s;
5145
5146 /* Save only the first clause. */
5147 if (s[pos++] == '('
5148 && (last == NULL
5149 || len != (int)strlen (last)
5150 || !strneq (s, last, len)))
5151 {
5152 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
5153 return len;
5154 }
5155
5156 return 0;
5157 }
5158
5159
5160 /*
5161 * Handle attributes. Currently, tags are generated for defines
5162 * and records.
5163 *
5164 * They are on the form:
5165 * -define(foo, bar).
5166 * -define(Foo(M, N), M+N).
5167 * -record(graph, {vtab = notable, cyclic = true}).
5168 */
5169 static void
5170 erlang_attribute (s)
5171 char *s;
5172 {
5173 int pos;
5174 int len;
5175
5176 if (LOOKING_AT (s, "-define") || LOOKING_AT (s, "-record"))
5177 {
5178 if (s[pos++] == '(')
5179 {
5180 pos = skip_spaces (s + pos) - s;
5181 len = erlang_atom (s, pos);
5182 if (len != 0)
5183 pfnote (savenstr (& s[pos], len), TRUE,
5184 s, pos + len, lineno, linecharno);
5185 }
5186 }
5187 return;
5188 }
5189
5190
5191 /*
5192 * Consume an Erlang atom (or variable).
5193 * Return the number of bytes consumed, or -1 if there was an error.
5194 */
5195 static int
5196 erlang_atom (s, pos)
5197 char *s;
5198 int pos;
5199 {
5200 int origpos;
5201
5202 origpos = pos;
5203
5204 if (ISALPHA (s[pos]) || s[pos] == '_')
5205 {
5206 /* The atom is unquoted. */
5207 pos++;
5208 while (ISALNUM (s[pos]) || s[pos] == '_')
5209 pos++;
5210 return pos - origpos;
5211 }
5212 else if (s[pos] == '\'')
5213 {
5214 pos++;
5215
5216 while (1)
5217 {
5218 if (s[pos] == '\'')
5219 {
5220 pos++;
5221 break;
5222 }
5223 else if (s[pos] == '\0')
5224 /* Multiline quoted atoms are ignored. */
5225 return -1;
5226 else if (s[pos] == '\\')
5227 {
5228 if (s[pos+1] == '\0')
5229 return -1;
5230 pos += 2;
5231 }
5232 else
5233 pos++;
5234 }
5235 return pos - origpos;
5236 }
5237 else
5238 return -1;
5239 }
5240
5241 \f
5242 #ifdef ETAGS_REGEXPS
5243
5244 static char *scan_separators __P((char *));
5245 static void analyse_regex __P((char *, bool));
5246 static void add_regex __P((char *, bool, language *));
5247 static char *substitute __P((char *, char *, struct re_registers *));
5248
5249 /* Take a string like "/blah/" and turn it into "blah", making sure
5250 that the first and last characters are the same, and handling
5251 quoted separator characters. Actually, stops on the occurrence of
5252 an unquoted separator. Also turns "\t" into a Tab character.
5253 Returns pointer to terminating separator. Works in place. Null
5254 terminates name string. */
5255 static char *
5256 scan_separators (name)
5257 char *name;
5258 {
5259 char sep = name[0];
5260 char *copyto = name;
5261 bool quoted = FALSE;
5262
5263 for (++name; *name != '\0'; ++name)
5264 {
5265 if (quoted)
5266 {
5267 if (*name == 't')
5268 *copyto++ = '\t';
5269 else if (*name == sep)
5270 *copyto++ = sep;
5271 else
5272 {
5273 /* Something else is quoted, so preserve the quote. */
5274 *copyto++ = '\\';
5275 *copyto++ = *name;
5276 }
5277 quoted = FALSE;
5278 }
5279 else if (*name == '\\')
5280 quoted = TRUE;
5281 else if (*name == sep)
5282 break;
5283 else
5284 *copyto++ = *name;
5285 }
5286
5287 /* Terminate copied string. */
5288 *copyto = '\0';
5289 return name;
5290 }
5291
5292 /* Look at the argument of --regex or --no-regex and do the right
5293 thing. Same for each line of a regexp file. */
5294 static void
5295 analyse_regex (regex_arg, ignore_case)
5296 char *regex_arg;
5297 bool ignore_case;
5298 {
5299 if (regex_arg == NULL)
5300 {
5301 free_patterns (); /* --no-regex: remove existing regexps */
5302 return;
5303 }
5304
5305 /* A real --regexp option or a line in a regexp file. */
5306 switch (regex_arg[0])
5307 {
5308 /* Comments in regexp file or null arg to --regex. */
5309 case '\0':
5310 case ' ':
5311 case '\t':
5312 break;
5313
5314 /* Read a regex file. This is recursive and may result in a
5315 loop, which will stop when the file descriptors are exhausted. */
5316 case '@':
5317 {
5318 FILE *regexfp;
5319 linebuffer regexbuf;
5320 char *regexfile = regex_arg + 1;
5321
5322 /* regexfile is a file containing regexps, one per line. */
5323 regexfp = fopen (regexfile, "r");
5324 if (regexfp == NULL)
5325 {
5326 pfatal (regexfile);
5327 return;
5328 }
5329 initbuffer (&regexbuf);
5330 while (readline_internal (&regexbuf, regexfp) > 0)
5331 analyse_regex (regexbuf.buffer, ignore_case);
5332 free (regexbuf.buffer);
5333 fclose (regexfp);
5334 }
5335 break;
5336
5337 /* Regexp to be used for a specific language only. */
5338 case '{':
5339 {
5340 language *lang;
5341 char *lang_name = regex_arg + 1;
5342 char *cp;
5343
5344 for (cp = lang_name; *cp != '}'; cp++)
5345 if (*cp == '\0')
5346 {
5347 error ("unterminated language name in regex: %s", regex_arg);
5348 return;
5349 }
5350 *cp = '\0';
5351 lang = get_language_from_langname (lang_name);
5352 if (lang == NULL)
5353 return;
5354 add_regex (cp + 1, ignore_case, lang);
5355 }
5356 break;
5357
5358 /* Regexp to be used for any language. */
5359 default:
5360 add_regex (regex_arg, ignore_case, NULL);
5361 break;
5362 }
5363 }
5364
5365 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5366 expression, into a real regular expression by compiling it. */
5367 static void
5368 add_regex (regexp_pattern, ignore_case, lang)
5369 char *regexp_pattern;
5370 bool ignore_case;
5371 language *lang;
5372 {
5373 static struct re_pattern_buffer zeropattern;
5374 char *name;
5375 const char *err;
5376 struct re_pattern_buffer *patbuf;
5377 pattern *pp;
5378
5379
5380 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
5381 {
5382 error ("%s: unterminated regexp", regexp_pattern);
5383 return;
5384 }
5385 name = scan_separators (regexp_pattern);
5386 if (regexp_pattern[0] == '\0')
5387 {
5388 error ("null regexp", (char *)NULL);
5389 return;
5390 }
5391 (void) scan_separators (name);
5392
5393 patbuf = xnew (1, struct re_pattern_buffer);
5394 *patbuf = zeropattern;
5395 if (ignore_case)
5396 patbuf->translate = lc_trans; /* translation table to fold case */
5397
5398 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
5399 if (err != NULL)
5400 {
5401 error ("%s while compiling pattern", err);
5402 return;
5403 }
5404
5405 pp = p_head;
5406 p_head = xnew (1, pattern);
5407 p_head->regex = savestr (regexp_pattern);
5408 p_head->p_next = pp;
5409 p_head->lang = lang;
5410 p_head->pat = patbuf;
5411 p_head->name_pattern = savestr (name);
5412 p_head->error_signaled = FALSE;
5413 p_head->ignore_case = ignore_case;
5414 }
5415
5416 /*
5417 * Do the substitutions indicated by the regular expression and
5418 * arguments.
5419 */
5420 static char *
5421 substitute (in, out, regs)
5422 char *in, *out;
5423 struct re_registers *regs;
5424 {
5425 char *result, *t;
5426 int size, dig, diglen;
5427
5428 result = NULL;
5429 size = strlen (out);
5430
5431 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5432 if (out[size - 1] == '\\')
5433 fatal ("pattern error in \"%s\"", out);
5434 for (t = etags_strchr (out, '\\');
5435 t != NULL;
5436 t = etags_strchr (t + 2, '\\'))
5437 if (ISDIGIT (t[1]))
5438 {
5439 dig = t[1] - '0';
5440 diglen = regs->end[dig] - regs->start[dig];
5441 size += diglen - 2;
5442 }
5443 else
5444 size -= 1;
5445
5446 /* Allocate space and do the substitutions. */
5447 result = xnew (size + 1, char);
5448
5449 for (t = result; *out != '\0'; out++)
5450 if (*out == '\\' && ISDIGIT (*++out))
5451 {
5452 dig = *out - '0';
5453 diglen = regs->end[dig] - regs->start[dig];
5454 strncpy (t, in + regs->start[dig], diglen);
5455 t += diglen;
5456 }
5457 else
5458 *t++ = *out;
5459 *t = '\0';
5460
5461 assert (t <= result + size && t - result == (int)strlen (result));
5462
5463 return result;
5464 }
5465
5466 /* Deallocate all patterns. */
5467 static void
5468 free_patterns ()
5469 {
5470 pattern *pp;
5471 while (p_head != NULL)
5472 {
5473 pp = p_head->p_next;
5474 free (p_head->regex);
5475 free (p_head->name_pattern);
5476 free (p_head);
5477 p_head = pp;
5478 }
5479 return;
5480 }
5481 #endif /* ETAGS_REGEXPS */
5482
5483 \f
5484 static bool
5485 nocase_tail (cp)
5486 char *cp;
5487 {
5488 register int len = 0;
5489
5490 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5491 cp++, len++;
5492 if (*cp == '\0' && !intoken (dbp[len]))
5493 {
5494 dbp += len;
5495 return TRUE;
5496 }
5497 return FALSE;
5498 }
5499
5500 static char *
5501 get_tag (bp)
5502 register char *bp;
5503 {
5504 register char *cp, *name;
5505
5506 if (*bp == '\0')
5507 return NULL;
5508 /* Go till you get to white space or a syntactic break */
5509 for (cp = bp + 1; !notinname (*cp); cp++)
5510 continue;
5511 name = savenstr (bp, cp-bp);
5512 pfnote (name, TRUE,
5513 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5514 return name;
5515 }
5516
5517 /* Initialize a linebuffer for use */
5518 static void
5519 initbuffer (lbp)
5520 linebuffer *lbp;
5521 {
5522 lbp->size = (DEBUG) ? 3 : 200;
5523 lbp->buffer = xnew (lbp->size, char);
5524 lbp->buffer[0] = '\0';
5525 lbp->len = 0;
5526 }
5527
5528 /*
5529 * Read a line of text from `stream' into `lbp', excluding the
5530 * newline or CR-NL, if any. Return the number of characters read from
5531 * `stream', which is the length of the line including the newline.
5532 *
5533 * On DOS or Windows we do not count the CR character, if any, before the
5534 * NL, in the returned length; this mirrors the behavior of emacs on those
5535 * platforms (for text files, it translates CR-NL to NL as it reads in the
5536 * file).
5537 */
5538 static long
5539 readline_internal (lbp, stream)
5540 linebuffer *lbp;
5541 register FILE *stream;
5542 {
5543 char *buffer = lbp->buffer;
5544 register char *p = lbp->buffer;
5545 register char *pend;
5546 int chars_deleted;
5547
5548 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5549
5550 while (1)
5551 {
5552 register int c = getc (stream);
5553 if (p == pend)
5554 {
5555 /* We're at the end of linebuffer: expand it. */
5556 lbp->size *= 2;
5557 xrnew (buffer, lbp->size, char);
5558 p += buffer - lbp->buffer;
5559 pend = buffer + lbp->size;
5560 lbp->buffer = buffer;
5561 }
5562 if (c == EOF)
5563 {
5564 *p = '\0';
5565 chars_deleted = 0;
5566 break;
5567 }
5568 if (c == '\n')
5569 {
5570 if (p > buffer && p[-1] == '\r')
5571 {
5572 p -= 1;
5573 #ifdef DOS_NT
5574 /* Assume CRLF->LF translation will be performed by Emacs
5575 when loading this file, so CRs won't appear in the buffer.
5576 It would be cleaner to compensate within Emacs;
5577 however, Emacs does not know how many CRs were deleted
5578 before any given point in the file. */
5579 chars_deleted = 1;
5580 #else
5581 chars_deleted = 2;
5582 #endif
5583 }
5584 else
5585 {
5586 chars_deleted = 1;
5587 }
5588 *p = '\0';
5589 break;
5590 }
5591 *p++ = c;
5592 }
5593 lbp->len = p - buffer;
5594
5595 return lbp->len + chars_deleted;
5596 }
5597
5598 /*
5599 * Like readline_internal, above, but in addition try to match the
5600 * input line against relevant regular expressions.
5601 */
5602 static long
5603 readline (lbp, stream)
5604 linebuffer *lbp;
5605 FILE *stream;
5606 {
5607 /* Read new line. */
5608 long result = readline_internal (lbp, stream);
5609
5610 /* Honour #line directives. */
5611 if (!no_line_directive)
5612 {
5613 static bool discard_until_line_directive;
5614
5615 /* Check whether this is a #line directive. */
5616 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
5617 {
5618 int start, lno;
5619
5620 if (DEBUG) start = 0; /* shut up the compiler */
5621 if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
5622 {
5623 char *endp = lbp->buffer + start;
5624
5625 assert (start > 0);
5626 while ((endp = etags_strchr (endp, '"')) != NULL
5627 && endp[-1] == '\\')
5628 endp++;
5629 if (endp != NULL)
5630 /* Ok, this is a real #line directive. Let's deal with it. */
5631 {
5632 char *taggedabsname; /* absolute name of original file */
5633 char *taggedfname; /* name of original file as given */
5634 char *name; /* temp var */
5635
5636 discard_until_line_directive = FALSE; /* found it */
5637 name = lbp->buffer + start;
5638 *endp = '\0';
5639 canonicalize_filename (name); /* for DOS */
5640 taggedabsname = absolute_filename (name, curfdp->infabsdir);
5641 if (filename_is_absolute (name)
5642 || filename_is_absolute (curfdp->infname))
5643 taggedfname = savestr (taggedabsname);
5644 else
5645 taggedfname = relative_filename (taggedabsname,tagfiledir);
5646
5647 if (streq (curfdp->taggedfname, taggedfname))
5648 /* The #line directive is only a line number change. We
5649 deal with this afterwards. */
5650 free (taggedfname);
5651 else
5652 /* The tags following this #line directive should be
5653 attributed to taggedfname. In order to do this, set
5654 curfdp accordingly. */
5655 {
5656 fdesc *fdp; /* file description pointer */
5657
5658 /* Go look for a file description already set up for the
5659 file indicated in the #line directive. If there is
5660 one, use it from now until the next #line
5661 directive. */
5662 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5663 if (streq (fdp->infname, curfdp->infname)
5664 && streq (fdp->taggedfname, taggedfname))
5665 /* If we remove the second test above (after the &&)
5666 then all entries pertaining to the same file are
5667 coalesced in the tags file. If we use it, then
5668 entries pertaining to the same file but generated
5669 from different files (via #line directives) will
5670 go into separate sections in the tags file. These
5671 alternatives look equivalent. The first one
5672 destroys some apparently useless information. */
5673 {
5674 curfdp = fdp;
5675 free (taggedfname);
5676 break;
5677 }
5678 /* Else, if we already tagged the real file, skip all
5679 input lines until the next #line directive. */
5680 if (fdp == NULL) /* not found */
5681 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5682 if (streq (fdp->infabsname, taggedabsname))
5683 {
5684 discard_until_line_directive = TRUE;
5685 free (taggedfname);
5686 break;
5687 }
5688 /* Else create a new file description and use that from
5689 now on, until the next #line directive. */
5690 if (fdp == NULL) /* not found */
5691 {
5692 fdp = fdhead;
5693 fdhead = xnew (1, fdesc);
5694 *fdhead = *curfdp; /* copy curr. file description */
5695 fdhead->next = fdp;
5696 fdhead->infname = savestr (curfdp->infname);
5697 fdhead->infabsname = savestr (curfdp->infabsname);
5698 fdhead->infabsdir = savestr (curfdp->infabsdir);
5699 fdhead->taggedfname = taggedfname;
5700 fdhead->usecharno = FALSE;
5701 curfdp = fdhead;
5702 }
5703 }
5704 free (taggedabsname);
5705 lineno = lno;
5706 return readline (lbp, stream);
5707 } /* if a real #line directive */
5708 } /* if #line is followed by a a number */
5709 } /* if line begins with "#line " */
5710
5711 /* If we are here, no #line directive was found. */
5712 if (discard_until_line_directive)
5713 {
5714 if (result > 0)
5715 /* Do a tail recursion on ourselves, thus discarding the contents
5716 of the line buffer. */
5717 return readline (lbp, stream);
5718 /* End of file. */
5719 discard_until_line_directive = FALSE;
5720 return 0;
5721 }
5722 } /* if #line directives should be considered */
5723
5724 #ifdef ETAGS_REGEXPS
5725 {
5726 int match;
5727 pattern *pp;
5728
5729 /* Match against relevant patterns. */
5730 if (lbp->len > 0)
5731 for (pp = p_head; pp != NULL; pp = pp->p_next)
5732 {
5733 /* Only use generic regexps or those for the current language. */
5734 if (pp->lang != NULL && pp->lang != fdhead->lang)
5735 continue;
5736
5737 match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
5738 switch (match)
5739 {
5740 case -2:
5741 /* Some error. */
5742 if (!pp->error_signaled)
5743 {
5744 error ("error while matching \"%s\"", pp->regex);
5745 pp->error_signaled = TRUE;
5746 }
5747 break;
5748 case -1:
5749 /* No match. */
5750 break;
5751 default:
5752 /* Match occurred. Construct a tag. */
5753 if (pp->name_pattern[0] != '\0')
5754 {
5755 /* Make a named tag. */
5756 char *name = substitute (lbp->buffer,
5757 pp->name_pattern, &pp->regs);
5758 if (name != NULL)
5759 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5760 }
5761 else
5762 {
5763 /* Make an unnamed tag. */
5764 pfnote ((char *)NULL, TRUE,
5765 lbp->buffer, match, lineno, linecharno);
5766 }
5767 break;
5768 }
5769 }
5770 }
5771 #endif /* ETAGS_REGEXPS */
5772
5773 return result;
5774 }
5775
5776 \f
5777 /*
5778 * Return a pointer to a space of size strlen(cp)+1 allocated
5779 * with xnew where the string CP has been copied.
5780 */
5781 static char *
5782 savestr (cp)
5783 char *cp;
5784 {
5785 return savenstr (cp, strlen (cp));
5786 }
5787
5788 /*
5789 * Return a pointer to a space of size LEN+1 allocated with xnew where
5790 * the string CP has been copied for at most the first LEN characters.
5791 */
5792 static char *
5793 savenstr (cp, len)
5794 char *cp;
5795 int len;
5796 {
5797 register char *dp;
5798
5799 dp = xnew (len + 1, char);
5800 strncpy (dp, cp, len);
5801 dp[len] = '\0';
5802 return dp;
5803 }
5804
5805 /*
5806 * Return the ptr in sp at which the character c last
5807 * appears; NULL if not found
5808 *
5809 * Identical to POSIX strrchr, included for portability.
5810 */
5811 static char *
5812 etags_strrchr (sp, c)
5813 register const char *sp;
5814 register int c;
5815 {
5816 register const char *r;
5817
5818 r = NULL;
5819 do
5820 {
5821 if (*sp == c)
5822 r = sp;
5823 } while (*sp++);
5824 return (char *)r;
5825 }
5826
5827 /*
5828 * Return the ptr in sp at which the character c first
5829 * appears; NULL if not found
5830 *
5831 * Identical to POSIX strchr, included for portability.
5832 */
5833 static char *
5834 etags_strchr (sp, c)
5835 register const char *sp;
5836 register int c;
5837 {
5838 do
5839 {
5840 if (*sp == c)
5841 return (char *)sp;
5842 } while (*sp++);
5843 return NULL;
5844 }
5845
5846 /*
5847 * Return TRUE if the two strings are equal, ignoring case for alphabetic
5848 * characters.
5849 *
5850 * Analogous to BSD's strcasecmp, included for portability.
5851 */
5852 static bool
5853 strcaseeq (s1, s2)
5854 register const char *s1;
5855 register const char *s2;
5856 {
5857 while (*s1 != '\0'
5858 && (ISALPHA (*s1) && ISALPHA (*s2)
5859 ? lowcase (*s1) == lowcase (*s2)
5860 : *s1 == *s2))
5861 s1++, s2++;
5862
5863 return (*s1 == *s2);
5864 }
5865
5866 /* Skip spaces, return new pointer. */
5867 static char *
5868 skip_spaces (cp)
5869 char *cp;
5870 {
5871 while (iswhite (*cp))
5872 cp++;
5873 return cp;
5874 }
5875
5876 /* Skip non spaces, return new pointer. */
5877 static char *
5878 skip_non_spaces (cp)
5879 char *cp;
5880 {
5881 while (*cp != '\0' && !iswhite (*cp))
5882 cp++;
5883 return cp;
5884 }
5885
5886 /* Print error message and exit. */
5887 void
5888 fatal (s1, s2)
5889 char *s1, *s2;
5890 {
5891 error (s1, s2);
5892 exit (BAD);
5893 }
5894
5895 static void
5896 pfatal (s1)
5897 char *s1;
5898 {
5899 perror (s1);
5900 exit (BAD);
5901 }
5902
5903 static void
5904 suggest_asking_for_help ()
5905 {
5906 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5907 progname,
5908 #ifdef LONG_OPTIONS
5909 "--help"
5910 #else
5911 "-h"
5912 #endif
5913 );
5914 exit (BAD);
5915 }
5916
5917 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
5918 static void
5919 error (s1, s2)
5920 const char *s1, *s2;
5921 {
5922 fprintf (stderr, "%s: ", progname);
5923 fprintf (stderr, s1, s2);
5924 fprintf (stderr, "\n");
5925 }
5926
5927 /* Return a newly-allocated string whose contents
5928 concatenate those of s1, s2, s3. */
5929 static char *
5930 concat (s1, s2, s3)
5931 char *s1, *s2, *s3;
5932 {
5933 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5934 char *result = xnew (len1 + len2 + len3 + 1, char);
5935
5936 strcpy (result, s1);
5937 strcpy (result + len1, s2);
5938 strcpy (result + len1 + len2, s3);
5939 result[len1 + len2 + len3] = '\0';
5940
5941 return result;
5942 }
5943
5944 \f
5945 /* Does the same work as the system V getcwd, but does not need to
5946 guess the buffer size in advance. */
5947 static char *
5948 etags_getcwd ()
5949 {
5950 #ifdef HAVE_GETCWD
5951 int bufsize = 200;
5952 char *path = xnew (bufsize, char);
5953
5954 while (getcwd (path, bufsize) == NULL)
5955 {
5956 if (errno != ERANGE)
5957 pfatal ("getcwd");
5958 bufsize *= 2;
5959 free (path);
5960 path = xnew (bufsize, char);
5961 }
5962
5963 canonicalize_filename (path);
5964 return path;
5965
5966 #else /* not HAVE_GETCWD */
5967 #if MSDOS
5968
5969 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
5970
5971 getwd (path);
5972
5973 for (p = path; *p != '\0'; p++)
5974 if (*p == '\\')
5975 *p = '/';
5976 else
5977 *p = lowcase (*p);
5978
5979 return strdup (path);
5980 #else /* not MSDOS */
5981 linebuffer path;
5982 FILE *pipe;
5983
5984 initbuffer (&path);
5985 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5986 if (pipe == NULL || readline_internal (&path, pipe) == 0)
5987 pfatal ("pwd");
5988 pclose (pipe);
5989
5990 return path.buffer;
5991 #endif /* not MSDOS */
5992 #endif /* not HAVE_GETCWD */
5993 }
5994
5995 /* Return a newly allocated string containing the file name of FILE
5996 relative to the absolute directory DIR (which should end with a slash). */
5997 static char *
5998 relative_filename (file, dir)
5999 char *file, *dir;
6000 {
6001 char *fp, *dp, *afn, *res;
6002 int i;
6003
6004 /* Find the common root of file and dir (with a trailing slash). */
6005 afn = absolute_filename (file, cwd);
6006 fp = afn;
6007 dp = dir;
6008 while (*fp++ == *dp++)
6009 continue;
6010 fp--, dp--; /* back to the first differing char */
6011 #ifdef DOS_NT
6012 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6013 return afn;
6014 #endif
6015 do /* look at the equal chars until '/' */
6016 fp--, dp--;
6017 while (*fp != '/');
6018
6019 /* Build a sequence of "../" strings for the resulting relative file name. */
6020 i = 0;
6021 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6022 i += 1;
6023 res = xnew (3*i + strlen (fp + 1) + 1, char);
6024 res[0] = '\0';
6025 while (i-- > 0)
6026 strcat (res, "../");
6027
6028 /* Add the file name relative to the common root of file and dir. */
6029 strcat (res, fp + 1);
6030 free (afn);
6031
6032 return res;
6033 }
6034
6035 /* Return a newly allocated string containing the absolute file name
6036 of FILE given DIR (which should end with a slash). */
6037 static char *
6038 absolute_filename (file, dir)
6039 char *file, *dir;
6040 {
6041 char *slashp, *cp, *res;
6042
6043 if (filename_is_absolute (file))
6044 res = savestr (file);
6045 #ifdef DOS_NT
6046 /* We don't support non-absolute file names with a drive
6047 letter, like `d:NAME' (it's too much hassle). */
6048 else if (file[1] == ':')
6049 fatal ("%s: relative file names with drive letters not supported", file);
6050 #endif
6051 else
6052 res = concat (dir, file, "");
6053
6054 /* Delete the "/dirname/.." and "/." substrings. */
6055 slashp = etags_strchr (res, '/');
6056 while (slashp != NULL && slashp[0] != '\0')
6057 {
6058 if (slashp[1] == '.')
6059 {
6060 if (slashp[2] == '.'
6061 && (slashp[3] == '/' || slashp[3] == '\0'))
6062 {
6063 cp = slashp;
6064 do
6065 cp--;
6066 while (cp >= res && !filename_is_absolute (cp));
6067 if (cp < res)
6068 cp = slashp; /* the absolute name begins with "/.." */
6069 #ifdef DOS_NT
6070 /* Under MSDOS and NT we get `d:/NAME' as absolute
6071 file name, so the luser could say `d:/../NAME'.
6072 We silently treat this as `d:/NAME'. */
6073 else if (cp[0] != '/')
6074 cp = slashp;
6075 #endif
6076 strcpy (cp, slashp + 3);
6077 slashp = cp;
6078 continue;
6079 }
6080 else if (slashp[2] == '/' || slashp[2] == '\0')
6081 {
6082 strcpy (slashp, slashp + 2);
6083 continue;
6084 }
6085 }
6086
6087 slashp = etags_strchr (slashp + 1, '/');
6088 }
6089
6090 if (res[0] == '\0')
6091 return savestr ("/");
6092 else
6093 return res;
6094 }
6095
6096 /* Return a newly allocated string containing the absolute
6097 file name of dir where FILE resides given DIR (which should
6098 end with a slash). */
6099 static char *
6100 absolute_dirname (file, dir)
6101 char *file, *dir;
6102 {
6103 char *slashp, *res;
6104 char save;
6105
6106 canonicalize_filename (file);
6107 slashp = etags_strrchr (file, '/');
6108 if (slashp == NULL)
6109 return savestr (dir);
6110 save = slashp[1];
6111 slashp[1] = '\0';
6112 res = absolute_filename (file, dir);
6113 slashp[1] = save;
6114
6115 return res;
6116 }
6117
6118 /* Whether the argument string is an absolute file name. The argument
6119 string must have been canonicalized with canonicalize_filename. */
6120 static bool
6121 filename_is_absolute (fn)
6122 char *fn;
6123 {
6124 return (fn[0] == '/'
6125 #ifdef DOS_NT
6126 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6127 #endif
6128 );
6129 }
6130
6131 /* Translate backslashes into slashes. Works in place. */
6132 static void
6133 canonicalize_filename (fn)
6134 register char *fn;
6135 {
6136 #ifdef DOS_NT
6137 /* Canonicalize drive letter case. */
6138 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6139 fn[0] = upcase (fn[0]);
6140 /* Convert backslashes to slashes. */
6141 for (; *fn != '\0'; fn++)
6142 if (*fn == '\\')
6143 *fn = '/';
6144 #else
6145 /* No action. */
6146 fn = NULL; /* shut up the compiler */
6147 #endif
6148 }
6149
6150 /* Set the minimum size of a string contained in a linebuffer. */
6151 static void
6152 linebuffer_setlen (lbp, toksize)
6153 linebuffer *lbp;
6154 int toksize;
6155 {
6156 while (lbp->size <= toksize)
6157 {
6158 lbp->size *= 2;
6159 xrnew (lbp->buffer, lbp->size, char);
6160 }
6161 lbp->len = toksize;
6162 }
6163
6164 /* Like malloc but get fatal error if memory is exhausted. */
6165 static PTR
6166 xmalloc (size)
6167 unsigned int size;
6168 {
6169 PTR result = (PTR) malloc (size);
6170 if (result == NULL)
6171 fatal ("virtual memory exhausted", (char *)NULL);
6172 return result;
6173 }
6174
6175 static PTR
6176 xrealloc (ptr, size)
6177 char *ptr;
6178 unsigned int size;
6179 {
6180 PTR result = (PTR) realloc (ptr, size);
6181 if (result == NULL)
6182 fatal ("virtual memory exhausted", (char *)NULL);
6183 return result;
6184 }
6185
6186 /*
6187 * Local Variables:
6188 * c-indentation-style: gnu
6189 * indent-tabs-mode: t
6190 * tab-width: 8
6191 * fill-column: 79
6192 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node")
6193 * End:
6194 */