(main): Use `sort -o TAGFILE TAGFILE' instead of `sort TAGFILE -o
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001
3 Free Software Foundation, Inc. and Ken Arnold
4
5 This file is not considered part of GNU Emacs.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 /*
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1992 Joseph B. Wells improved C and C++ parsing.
29 * 1993 Francesco Potortì reorganised C and C++.
30 * 1994 Regexp tags by Tom Tromey.
31 * 2001 Nested classes by Francesco Potortì (ideas by Mykola Dzyuba).
32 *
33 * Francesco Potortì <pot@gnu.org> has maintained it since 1993.
34 */
35
36 char pot_etags_version[] = "@(#) pot revision number is 15.2";
37
38 #define TRUE 1
39 #define FALSE 0
40
41 #ifdef DEBUG
42 # undef DEBUG
43 # define DEBUG TRUE
44 #else
45 # define DEBUG FALSE
46 # define NDEBUG /* disable assert */
47 #endif
48
49 #ifdef HAVE_CONFIG_H
50 # include <config.h>
51 /* On some systems, Emacs defines static as nothing for the sake
52 of unexec. We don't want that here since we don't use unexec. */
53 # undef static
54 # define ETAGS_REGEXPS /* use the regexp features */
55 # define LONG_OPTIONS /* accept long options */
56 # ifndef PTR /* for Xemacs */
57 # define PTR void *
58 # endif
59 # ifndef __P /* for Xemacs */
60 # define __P(args) args
61 # endif
62 #else
63 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
64 # define __P(args) args /* use prototypes */
65 # define PTR void * /* for generic pointers */
66 # else
67 # define __P(args) () /* no prototypes */
68 # define const /* remove const for old compilers' sake */
69 # define PTR long * /* don't use void* */
70 # endif
71 #endif /* !HAVE_CONFIG_H */
72
73 #ifndef _GNU_SOURCE
74 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
75 #endif
76
77 /* WIN32_NATIVE is for Xemacs.
78 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
79 #ifdef WIN32_NATIVE
80 # undef MSDOS
81 # undef WINDOWSNT
82 # define WINDOWSNT
83 #endif /* WIN32_NATIVE */
84
85 #ifdef MSDOS
86 # undef MSDOS
87 # define MSDOS TRUE
88 # include <fcntl.h>
89 # include <sys/param.h>
90 # include <io.h>
91 # ifndef HAVE_CONFIG_H
92 # define DOS_NT
93 # include <sys/config.h>
94 # endif
95 #else
96 # define MSDOS FALSE
97 #endif /* MSDOS */
98
99 #ifdef WINDOWSNT
100 # include <stdlib.h>
101 # include <fcntl.h>
102 # include <string.h>
103 # include <direct.h>
104 # include <io.h>
105 # define MAXPATHLEN _MAX_PATH
106 # undef HAVE_NTGUI
107 # undef DOS_NT
108 # define DOS_NT
109 # ifndef HAVE_GETCWD
110 # define HAVE_GETCWD
111 # endif /* undef HAVE_GETCWD */
112 #else /* !WINDOWSNT */
113 # ifdef STDC_HEADERS
114 # include <stdlib.h>
115 # include <string.h>
116 # else
117 extern char *getenv ();
118 # endif
119 #endif /* !WINDOWSNT */
120
121 #ifdef HAVE_UNISTD_H
122 # include <unistd.h>
123 #else
124 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
125 extern char *getcwd (char *buf, size_t size);
126 # endif
127 #endif /* HAVE_UNISTD_H */
128
129 #include <stdio.h>
130 #include <ctype.h>
131 #include <errno.h>
132 #ifndef errno
133 extern int errno;
134 #endif
135 #include <sys/types.h>
136 #include <sys/stat.h>
137
138 #include <assert.h>
139 #ifdef NDEBUG
140 # undef assert /* some systems have a buggy assert.h */
141 # define assert(x) ((void) 0)
142 #endif
143
144 #if !defined (S_ISREG) && defined (S_IFREG)
145 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
146 #endif
147
148 #ifdef LONG_OPTIONS
149 # include <getopt.h>
150 #else
151 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
152 extern char *optarg;
153 extern int optind, opterr;
154 #endif /* LONG_OPTIONS */
155
156 #ifdef ETAGS_REGEXPS
157 # ifndef HAVE_CONFIG_H /* this is a standalone compilation */
158 # ifdef __CYGWIN__ /* compiling on Cygwin */
159 !!! NOTICE !!!
160 the regex.h distributed with Cygwin is not compatible with etags, alas!
161 If you want regular expression support, you should delete this notice and
162 arrange to use the GNU regex.h and regex.c.
163 # endif
164 # endif
165 # include <regex.h>
166 #endif /* ETAGS_REGEXPS */
167
168 /* Define CTAGS to make the program "ctags" compatible with the usual one.
169 Leave it undefined to make the program "etags", which makes emacs-style
170 tag tables and tags typedefs, #defines and struct/union/enum by default. */
171 #ifdef CTAGS
172 # undef CTAGS
173 # define CTAGS TRUE
174 #else
175 # define CTAGS FALSE
176 #endif
177
178 /* Exit codes for success and failure. */
179 #ifdef VMS
180 # define GOOD 1
181 # define BAD 0
182 #else
183 # define GOOD 0
184 # define BAD 1
185 #endif
186
187 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
188 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
189
190 #define CHARS 256 /* 2^sizeof(char) */
191 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
192 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
193 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
194 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
195 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
196 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
197
198 #define ISALNUM(c) isalnum (CHAR(c))
199 #define ISALPHA(c) isalpha (CHAR(c))
200 #define ISDIGIT(c) isdigit (CHAR(c))
201 #define ISLOWER(c) islower (CHAR(c))
202
203 #define lowcase(c) tolower (CHAR(c))
204 #define upcase(c) toupper (CHAR(c))
205
206
207 /*
208 * xnew, xrnew -- allocate, reallocate storage
209 *
210 * SYNOPSIS: Type *xnew (int n, Type);
211 * void xrnew (OldPointer, int n, Type);
212 */
213 #if DEBUG
214 # include "chkmalloc.h"
215 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
216 (n) * sizeof (Type)))
217 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
218 (char *) (op), (n) * sizeof (Type)))
219 #else
220 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
221 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
222 (char *) (op), (n) * sizeof (Type)))
223 #endif
224
225 #define bool int
226
227 typedef void Lang_function __P((FILE *));
228
229 typedef struct
230 {
231 char *suffix;
232 char *command; /* Takes one arg and decompresses to stdout */
233 } compressor;
234
235 typedef struct
236 {
237 char *name;
238 Lang_function *function;
239 char **filenames;
240 char **suffixes;
241 char **interpreters;
242 } language;
243
244 typedef struct node_st
245 { /* sorting structure */
246 char *name; /* function or type name */
247 char *file; /* file name */
248 bool is_func; /* use pattern or line no */
249 bool been_warned; /* set if noticed dup */
250 int lno; /* line number tag is on */
251 long cno; /* character number line starts on */
252 char *pat; /* search pattern */
253 struct node_st *left, *right; /* left and right sons */
254 } node;
255
256 /*
257 * A `linebuffer' is a structure which holds a line of text.
258 * `readline_internal' reads a line from a stream into a linebuffer
259 * and works regardless of the length of the line.
260 * SIZE is the size of BUFFER, LEN is the length of the string in
261 * BUFFER after readline reads it.
262 */
263 typedef struct
264 {
265 long size;
266 int len;
267 char *buffer;
268 } linebuffer;
269
270 /* Many compilers barf on this:
271 Lang_function Ada_funcs;
272 so let's write it this way */
273 static void Ada_funcs __P((FILE *));
274 static void Asm_labels __P((FILE *));
275 static void C_entries __P((int c_ext, FILE *));
276 static void default_C_entries __P((FILE *));
277 static void plain_C_entries __P((FILE *));
278 static void Cjava_entries __P((FILE *));
279 static void Cobol_paragraphs __P((FILE *));
280 static void Cplusplus_entries __P((FILE *));
281 static void Cstar_entries __P((FILE *));
282 static void Erlang_functions __P((FILE *));
283 static void Fortran_functions __P((FILE *));
284 static void Yacc_entries __P((FILE *));
285 static void Lisp_functions __P((FILE *));
286 static void Makefile_targets __P((FILE *));
287 static void Pascal_functions __P((FILE *));
288 static void Perl_functions __P((FILE *));
289 static void PHP_functions __P((FILE *));
290 static void Postscript_functions __P((FILE *));
291 static void Prolog_functions __P((FILE *));
292 static void Python_functions __P((FILE *));
293 static void Scheme_functions __P((FILE *));
294 static void TeX_commands __P((FILE *));
295 static void Texinfo_nodes __P((FILE *));
296 static void just_read_file __P((FILE *));
297
298 static void print_language_names __P((void));
299 static void print_version __P((void));
300 static void print_help __P((void));
301 int main __P((int, char **));
302
303 static compressor *get_compressor_from_suffix __P((char *, char **));
304 static language *get_language_from_langname __P((const char *));
305 static language *get_language_from_interpreter __P((char *));
306 static language *get_language_from_filename __P((char *));
307 static long readline __P((linebuffer *, FILE *));
308 static long readline_internal __P((linebuffer *, FILE *));
309 static bool nocase_tail __P((char *));
310 static char *get_tag __P((char *));
311
312 #ifdef ETAGS_REGEXPS
313 static void analyse_regex __P((char *, bool));
314 static void add_regex __P((char *, bool, language *));
315 static void free_patterns __P((void));
316 #endif /* ETAGS_REGEXPS */
317 static void error __P((const char *, const char *));
318 static void suggest_asking_for_help __P((void));
319 void fatal __P((char *, char *));
320 static void pfatal __P((char *));
321 static void add_node __P((node *, node **));
322
323 static void init __P((void));
324 static void initbuffer __P((linebuffer *));
325 static void find_entries __P((char *, FILE *));
326 static void free_tree __P((node *));
327 static void pfnote __P((char *, bool, char *, int, int, long));
328 static void new_pfnote __P((char *, int, bool, char *, int, int, long));
329 static void process_file __P((char *));
330 static void put_entries __P((node *));
331
332 static char *concat __P((char *, char *, char *));
333 static char *skip_spaces __P((char *));
334 static char *skip_non_spaces __P((char *));
335 static char *savenstr __P((char *, int));
336 static char *savestr __P((char *));
337 static char *etags_strchr __P((const char *, int));
338 static char *etags_strrchr __P((const char *, int));
339 static char *etags_getcwd __P((void));
340 static char *relative_filename __P((char *, char *));
341 static char *absolute_filename __P((char *, char *));
342 static char *absolute_dirname __P((char *, char *));
343 static bool filename_is_absolute __P((char *f));
344 static void canonicalize_filename __P((char *));
345 static void linebuffer_setlen __P((linebuffer *, int));
346 static PTR xmalloc __P((unsigned int));
347 static PTR xrealloc __P((char *, unsigned int));
348
349 \f
350 static char searchar = '/'; /* use /.../ searches */
351
352 static char *tagfile; /* output file */
353 static char *progname; /* name this program was invoked with */
354 static char *cwd; /* current working directory */
355 static char *tagfiledir; /* directory of tagfile */
356 static FILE *tagf; /* ioptr for tags file */
357
358 static char *curfile; /* current input uncompressed file name */
359 static char *curfiledir; /* absolute dir of curfile */
360 static char *curtagfname; /* current file name to write in tagfile */
361 static language *curlang; /* current language */
362
363 static int lineno; /* line number of current line */
364 static long charno; /* current character number */
365 static long linecharno; /* charno of start of current line */
366 static char *dbp; /* pointer to start of current tag */
367 static bool nocharno; /* only use line number when making tag */
368 static const int invalidcharno = -1;
369
370 static node *head; /* the head of the binary tree of tags */
371
372 static linebuffer lb; /* the current line */
373
374 /* boolean "functions" (see init) */
375 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
376 static char
377 /* white chars */
378 *white = " \f\t\n\r\v",
379 /* not in a name */
380 *nonam = " \f\t\n\r()=,;",
381 /* token ending chars */
382 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
383 /* token starting chars */
384 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
385 /* valid in-token chars */
386 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
387
388 static bool append_to_tagfile; /* -a: append to tags */
389 /* The following four default to TRUE for etags, but to FALSE for ctags. */
390 static bool typedefs; /* -t: create tags for C and Ada typedefs */
391 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
392 /* 0 struct/enum/union decls, and C++ */
393 /* member functions. */
394 static bool constantypedefs; /* -d: create tags for C #define, enum */
395 /* constants and variables. */
396 /* -D: opposite of -d. Default under ctags. */
397 static bool declarations; /* --declarations: tag them and extern in C&Co*/
398 static bool globals; /* create tags for global variables */
399 static bool no_line_directive; /* ignore #line directives */
400 static bool members; /* create tags for C member variables */
401 static bool update; /* -u: update tags */
402 static bool vgrind_style; /* -v: create vgrind style index output */
403 static bool no_warnings; /* -w: suppress warnings */
404 static bool cxref_style; /* -x: create cxref style output */
405 static bool cplusplus; /* .[hc] means C++, not C */
406 static bool noindentypedefs; /* -I: ignore indentation in C */
407 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
408
409 #ifdef LONG_OPTIONS
410 static struct option longopts[] =
411 {
412 { "packages-only", no_argument, &packages_only, TRUE },
413 { "append", no_argument, NULL, 'a' },
414 { "backward-search", no_argument, NULL, 'B' },
415 { "c++", no_argument, NULL, 'C' },
416 { "cxref", no_argument, NULL, 'x' },
417 { "defines", no_argument, NULL, 'd' },
418 { "declarations", no_argument, &declarations, TRUE },
419 { "no-defines", no_argument, NULL, 'D' },
420 { "globals", no_argument, &globals, TRUE },
421 { "no-globals", no_argument, &globals, FALSE },
422 { "no-line-directive", no_argument, &no_line_directive, TRUE },
423 { "help", no_argument, NULL, 'h' },
424 { "help", no_argument, NULL, 'H' },
425 { "ignore-indentation", no_argument, NULL, 'I' },
426 { "include", required_argument, NULL, 'i' },
427 { "language", required_argument, NULL, 'l' },
428 { "members", no_argument, &members, TRUE },
429 { "no-members", no_argument, &members, FALSE },
430 { "no-warn", no_argument, NULL, 'w' },
431 { "output", required_argument, NULL, 'o' },
432 #ifdef ETAGS_REGEXPS
433 { "regex", required_argument, NULL, 'r' },
434 { "no-regex", no_argument, NULL, 'R' },
435 { "ignore-case-regex", required_argument, NULL, 'c' },
436 #endif /* ETAGS_REGEXPS */
437 { "typedefs", no_argument, NULL, 't' },
438 { "typedefs-and-c++", no_argument, NULL, 'T' },
439 { "update", no_argument, NULL, 'u' },
440 { "version", no_argument, NULL, 'V' },
441 { "vgrind", no_argument, NULL, 'v' },
442 { NULL }
443 };
444 #endif /* LONG_OPTIONS */
445
446 #ifdef ETAGS_REGEXPS
447 /* Structure defining a regular expression. Elements are
448 the compiled pattern, and the name string. */
449 typedef struct pattern
450 {
451 struct pattern *p_next;
452 language *lang;
453 char *regex;
454 struct re_pattern_buffer *pat;
455 struct re_registers regs;
456 char *name_pattern;
457 bool error_signaled;
458 } pattern;
459
460 /* List of all regexps. */
461 static pattern *p_head = NULL;
462
463 /* How many characters in the character set. (From regex.c.) */
464 #define CHAR_SET_SIZE 256
465 /* Translation table for case-insensitive matching. */
466 static char lc_trans[CHAR_SET_SIZE];
467 #endif /* ETAGS_REGEXPS */
468
469 static compressor compressors[] =
470 {
471 { "z", "gzip -d -c"},
472 { "Z", "gzip -d -c"},
473 { "gz", "gzip -d -c"},
474 { "GZ", "gzip -d -c"},
475 { "bz2", "bzip2 -d -c" },
476 { NULL }
477 };
478
479 /*
480 * Language stuff.
481 */
482
483 /* Non-NULL if language fixed. */
484 static language *forced_lang = NULL;
485
486 /* Ada code */
487 static char *Ada_suffixes [] =
488 { "ads", "adb", "ada", NULL };
489
490 /* Assembly code */
491 static char *Asm_suffixes [] =
492 { "a", /* Unix assembler */
493 "asm", /* Microcontroller assembly */
494 "def", /* BSO/Tasking definition includes */
495 "inc", /* Microcontroller include files */
496 "ins", /* Microcontroller include files */
497 "s", "sa", /* Unix assembler */
498 "S", /* cpp-processed Unix assembler */
499 "src", /* BSO/Tasking C compiler output */
500 NULL
501 };
502
503 /* Note that .c and .h can be considered C++, if the --c++ flag was
504 given, or if the `class' keyowrd is met inside the file.
505 That is why default_C_entries is called for these. */
506 static char *default_C_suffixes [] =
507 { "c", "h", NULL };
508
509 static char *Cplusplus_suffixes [] =
510 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
511 "M", /* Objective C++ */
512 "pdb", /* Postscript with C syntax */
513 NULL };
514
515 static char *Cjava_suffixes [] =
516 { "java", NULL };
517
518 static char *Cobol_suffixes [] =
519 { "COB", "cob", NULL };
520
521 static char *Cstar_suffixes [] =
522 { "cs", "hs", NULL };
523
524 static char *Erlang_suffixes [] =
525 { "erl", "hrl", NULL };
526
527 static char *Fortran_suffixes [] =
528 { "F", "f", "f90", "for", NULL };
529
530 static char *Lisp_suffixes [] =
531 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
532
533 static char *Makefile_filenames [] =
534 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
535
536 static char *Pascal_suffixes [] =
537 { "p", "pas", NULL };
538
539 static char *Perl_suffixes [] =
540 { "pl", "pm", NULL };
541
542 static char *Perl_interpreters [] =
543 { "perl", "@PERL@", NULL };
544
545 static char *PHP_suffixes [] =
546 { "php", "php3", "php4", NULL };
547
548 static char *plain_C_suffixes [] =
549 { "lm", /* Objective lex file */
550 "m", /* Objective C file */
551 "pc", /* Pro*C file */
552 NULL };
553
554 static char *Postscript_suffixes [] =
555 { "ps", "psw", NULL }; /* .psw is for PSWrap */
556
557 static char *Prolog_suffixes [] =
558 { "prolog", NULL };
559
560 static char *Python_suffixes [] =
561 { "py", NULL };
562
563 /* Can't do the `SCM' or `scm' prefix with a version number. */
564 static char *Scheme_suffixes [] =
565 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
566
567 static char *TeX_suffixes [] =
568 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
569
570 static char *Texinfo_suffixes [] =
571 { "texi", "texinfo", "txi", NULL };
572
573 static char *Yacc_suffixes [] =
574 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
575
576 /*
577 * Table of languages.
578 *
579 * It is ok for a given function to be listed under more than one
580 * name. I just didn't.
581 */
582
583 static language lang_names [] =
584 {
585 { "ada", Ada_funcs, NULL, Ada_suffixes, NULL },
586 { "asm", Asm_labels, NULL, Asm_suffixes, NULL },
587 { "c", default_C_entries, NULL, default_C_suffixes, NULL },
588 { "c++", Cplusplus_entries, NULL, Cplusplus_suffixes, NULL },
589 { "c*", Cstar_entries, NULL, Cstar_suffixes, NULL },
590 { "cobol", Cobol_paragraphs, NULL, Cobol_suffixes, NULL },
591 { "erlang", Erlang_functions, NULL, Erlang_suffixes, NULL },
592 { "fortran", Fortran_functions, NULL, Fortran_suffixes, NULL },
593 { "java", Cjava_entries, NULL, Cjava_suffixes, NULL },
594 { "lisp", Lisp_functions, NULL, Lisp_suffixes, NULL },
595 { "makefile", Makefile_targets, Makefile_filenames, NULL, NULL },
596 { "pascal", Pascal_functions, NULL, Pascal_suffixes, NULL },
597 { "perl", Perl_functions, NULL, Perl_suffixes, Perl_interpreters },
598 { "php", PHP_functions, NULL, PHP_suffixes, NULL },
599 { "postscript", Postscript_functions, NULL, Postscript_suffixes, NULL },
600 { "proc", plain_C_entries, NULL, plain_C_suffixes, NULL },
601 { "prolog", Prolog_functions, NULL, Prolog_suffixes, NULL },
602 { "python", Python_functions, NULL, Python_suffixes, NULL },
603 { "scheme", Scheme_functions, NULL, Scheme_suffixes, NULL },
604 { "tex", TeX_commands, NULL, TeX_suffixes, NULL },
605 { "texinfo", Texinfo_nodes, NULL, Texinfo_suffixes, NULL },
606 { "yacc", Yacc_entries, NULL, Yacc_suffixes, NULL },
607 { "auto", NULL }, /* default guessing scheme */
608 { "none", just_read_file }, /* regexp matching only */
609 { NULL, NULL } /* end of list */
610 };
611
612 \f
613 static void
614 print_language_names ()
615 {
616 language *lang;
617 char **name, **ext;
618
619 puts ("\nThese are the currently supported languages, along with the\n\
620 default file names and dot suffixes:");
621 for (lang = lang_names; lang->name != NULL; lang++)
622 {
623 printf (" %-*s", 10, lang->name);
624 if (lang->filenames != NULL)
625 for (name = lang->filenames; *name != NULL; name++)
626 printf (" %s", *name);
627 if (lang->suffixes != NULL)
628 for (ext = lang->suffixes; *ext != NULL; ext++)
629 printf (" .%s", *ext);
630 puts ("");
631 }
632 puts ("Where `auto' means use default language for files based on file\n\
633 name suffix, and `none' means only do regexp processing on files.\n\
634 If no language is specified and no matching suffix is found,\n\
635 the first line of the file is read for a sharp-bang (#!) sequence\n\
636 followed by the name of an interpreter. If no such sequence is found,\n\
637 Fortran is tried first; if no tags are found, C is tried next.\n\
638 When parsing any C file, a \"class\" keyword switches to C++.\n\
639 Compressed files are supported using gzip and bzip2.");
640 }
641
642 #ifndef EMACS_NAME
643 # define EMACS_NAME "GNU Emacs"
644 #endif
645 #ifndef VERSION
646 # define VERSION "21"
647 #endif
648 static void
649 print_version ()
650 {
651 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
652 puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
653 puts ("This program is distributed under the same terms as Emacs");
654
655 exit (GOOD);
656 }
657
658 static void
659 print_help ()
660 {
661 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
662 \n\
663 These are the options accepted by %s.\n", progname, progname);
664 #ifdef LONG_OPTIONS
665 puts ("You may use unambiguous abbreviations for the long option names.");
666 #else
667 puts ("Long option names do not work with this executable, as it is not\n\
668 linked with GNU getopt.");
669 #endif /* LONG_OPTIONS */
670 puts ("A - as file name means read names from stdin (one per line).");
671 if (!CTAGS)
672 printf (" Absolute names are stored in the output file as they are.\n\
673 Relative ones are stored relative to the output file's directory.");
674 puts ("\n");
675
676 puts ("-a, --append\n\
677 Append tag entries to existing tags file.");
678
679 puts ("--packages-only\n\
680 For Ada files, only generate tags for packages .");
681
682 if (CTAGS)
683 puts ("-B, --backward-search\n\
684 Write the search commands for the tag entries using '?', the\n\
685 backward-search command instead of '/', the forward-search command.");
686
687 /* This option is mostly obsolete, because etags can now automatically
688 detect C++. Retained for backward compatibility and for debugging and
689 experimentation. In principle, we could want to tag as C++ even
690 before any "class" keyword.
691 puts ("-C, --c++\n\
692 Treat files whose name suffix defaults to C language as C++ files.");
693 */
694
695 puts ("--declarations\n\
696 In C and derived languages, create tags for function declarations,");
697 if (CTAGS)
698 puts ("\tand create tags for extern variables if --globals is used.");
699 else
700 puts
701 ("\tand create tags for extern variables unless --no-globals is used.");
702
703 if (CTAGS)
704 puts ("-d, --defines\n\
705 Create tag entries for C #define constants and enum constants, too.");
706 else
707 puts ("-D, --no-defines\n\
708 Don't create tag entries for C #define constants and enum constants.\n\
709 This makes the tags file smaller.");
710
711 if (!CTAGS)
712 {
713 puts ("-i FILE, --include=FILE\n\
714 Include a note in tag file indicating that, when searching for\n\
715 a tag, one should also consult the tags file FILE after\n\
716 checking the current file.");
717 puts ("-l LANG, --language=LANG\n\
718 Force the following files to be considered as written in the\n\
719 named language up to the next --language=LANG option.");
720 }
721
722 if (CTAGS)
723 puts ("--globals\n\
724 Create tag entries for global variables in some languages.");
725 else
726 puts ("--no-globals\n\
727 Do not create tag entries for global variables in some\n\
728 languages. This makes the tags file smaller.");
729 puts ("--members\n\
730 Create tag entries for member variables in C and derived languages.");
731
732 #ifdef ETAGS_REGEXPS
733 puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
734 Make a tag for each line matching pattern REGEXP in the following\n\
735 files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
736 regexfile is a file containing one REGEXP per line.\n\
737 REGEXP is anchored (as if preceded by ^).\n\
738 The form /REGEXP/NAME/ creates a named tag.\n\
739 For example Tcl named tags can be created with:\n\
740 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\"");
741 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
742 Like -r, --regex but ignore case when matching expressions.");
743 puts ("-R, --no-regex\n\
744 Don't create tags from regexps for the following files.");
745 #endif /* ETAGS_REGEXPS */
746 puts ("-o FILE, --output=FILE\n\
747 Write the tags to FILE.");
748 puts ("-I, --ignore-indentation\n\
749 Don't rely on indentation quite as much as normal. Currently,\n\
750 this means not to assume that a closing brace in the first\n\
751 column is the final brace of a function or structure\n\
752 definition in C and C++.");
753
754 if (CTAGS)
755 {
756 puts ("-t, --typedefs\n\
757 Generate tag entries for C and Ada typedefs.");
758 puts ("-T, --typedefs-and-c++\n\
759 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
760 and C++ member functions.");
761 puts ("-u, --update\n\
762 Update the tag entries for the given files, leaving tag\n\
763 entries for other files in place. Currently, this is\n\
764 implemented by deleting the existing entries for the given\n\
765 files and then rewriting the new entries at the end of the\n\
766 tags file. It is often faster to simply rebuild the entire\n\
767 tag file than to use this.");
768 puts ("-v, --vgrind\n\
769 Generates an index of items intended for human consumption,\n\
770 similar to the output of vgrind. The index is sorted, and\n\
771 gives the page number of each item.");
772 puts ("-w, --no-warn\n\
773 Suppress warning messages about entries defined in multiple\n\
774 files.");
775 puts ("-x, --cxref\n\
776 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
777 The output uses line numbers instead of page numbers, but\n\
778 beyond that the differences are cosmetic; try both to see\n\
779 which you like.");
780 }
781
782 puts ("-V, --version\n\
783 Print the version of the program.\n\
784 -h, --help\n\
785 Print this help message.");
786
787 print_language_names ();
788
789 puts ("");
790 puts ("Report bugs to bug-gnu-emacs@gnu.org");
791
792 exit (GOOD);
793 }
794
795 \f
796 enum argument_type
797 {
798 at_language,
799 at_regexp,
800 at_filename,
801 at_icregexp
802 };
803
804 /* This structure helps us allow mixing of --lang and file names. */
805 typedef struct
806 {
807 enum argument_type arg_type;
808 char *what;
809 language *lang; /* language of the regexp */
810 } argument;
811
812 #ifdef VMS /* VMS specific functions */
813
814 #define EOS '\0'
815
816 /* This is a BUG! ANY arbitrary limit is a BUG!
817 Won't someone please fix this? */
818 #define MAX_FILE_SPEC_LEN 255
819 typedef struct {
820 short curlen;
821 char body[MAX_FILE_SPEC_LEN + 1];
822 } vspec;
823
824 /*
825 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
826 returning in each successive call the next file name matching the input
827 spec. The function expects that each in_spec passed
828 to it will be processed to completion; in particular, up to and
829 including the call following that in which the last matching name
830 is returned, the function ignores the value of in_spec, and will
831 only start processing a new spec with the following call.
832 If an error occurs, on return out_spec contains the value
833 of in_spec when the error occurred.
834
835 With each successive file name returned in out_spec, the
836 function's return value is one. When there are no more matching
837 names the function returns zero. If on the first call no file
838 matches in_spec, or there is any other error, -1 is returned.
839 */
840
841 #include <rmsdef.h>
842 #include <descrip.h>
843 #define OUTSIZE MAX_FILE_SPEC_LEN
844 static short
845 fn_exp (out, in)
846 vspec *out;
847 char *in;
848 {
849 static long context = 0;
850 static struct dsc$descriptor_s o;
851 static struct dsc$descriptor_s i;
852 static bool pass1 = TRUE;
853 long status;
854 short retval;
855
856 if (pass1)
857 {
858 pass1 = FALSE;
859 o.dsc$a_pointer = (char *) out;
860 o.dsc$w_length = (short)OUTSIZE;
861 i.dsc$a_pointer = in;
862 i.dsc$w_length = (short)strlen(in);
863 i.dsc$b_dtype = DSC$K_DTYPE_T;
864 i.dsc$b_class = DSC$K_CLASS_S;
865 o.dsc$b_dtype = DSC$K_DTYPE_VT;
866 o.dsc$b_class = DSC$K_CLASS_VS;
867 }
868 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
869 {
870 out->body[out->curlen] = EOS;
871 return 1;
872 }
873 else if (status == RMS$_NMF)
874 retval = 0;
875 else
876 {
877 strcpy(out->body, in);
878 retval = -1;
879 }
880 lib$find_file_end(&context);
881 pass1 = TRUE;
882 return retval;
883 }
884
885 /*
886 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
887 name of each file specified by the provided arg expanding wildcards.
888 */
889 static char *
890 gfnames (arg, p_error)
891 char *arg;
892 bool *p_error;
893 {
894 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
895
896 switch (fn_exp (&filename, arg))
897 {
898 case 1:
899 *p_error = FALSE;
900 return filename.body;
901 case 0:
902 *p_error = FALSE;
903 return NULL;
904 default:
905 *p_error = TRUE;
906 return filename.body;
907 }
908 }
909
910 #ifndef OLD /* Newer versions of VMS do provide `system'. */
911 system (cmd)
912 char *cmd;
913 {
914 error ("%s", "system() function not implemented under VMS");
915 }
916 #endif
917
918 #define VERSION_DELIM ';'
919 char *massage_name (s)
920 char *s;
921 {
922 char *start = s;
923
924 for ( ; *s; s++)
925 if (*s == VERSION_DELIM)
926 {
927 *s = EOS;
928 break;
929 }
930 else
931 *s = lowcase (*s);
932 return start;
933 }
934 #endif /* VMS */
935
936 \f
937 int
938 main (argc, argv)
939 int argc;
940 char *argv[];
941 {
942 int i;
943 unsigned int nincluded_files;
944 char **included_files;
945 char *this_file;
946 argument *argbuffer;
947 int current_arg, file_count;
948 linebuffer filename_lb;
949 #ifdef VMS
950 bool got_err;
951 #endif
952
953 #ifdef DOS_NT
954 _fmode = O_BINARY; /* all of files are treated as binary files */
955 #endif /* DOS_NT */
956
957 progname = argv[0];
958 nincluded_files = 0;
959 included_files = xnew (argc, char *);
960 current_arg = 0;
961 file_count = 0;
962
963 /* Allocate enough no matter what happens. Overkill, but each one
964 is small. */
965 argbuffer = xnew (argc, argument);
966
967 #ifdef ETAGS_REGEXPS
968 /* Set syntax for regular expression routines. */
969 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
970 /* Translation table for case-insensitive search. */
971 for (i = 0; i < CHAR_SET_SIZE; i++)
972 lc_trans[i] = lowcase (i);
973 #endif /* ETAGS_REGEXPS */
974
975 /*
976 * If etags, always find typedefs and structure tags. Why not?
977 * Also default to find macro constants, enum constants and
978 * global variables.
979 */
980 if (!CTAGS)
981 {
982 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
983 globals = TRUE;
984 declarations = FALSE;
985 members = FALSE;
986 }
987
988 while (1)
989 {
990 int opt;
991 char *optstring;
992
993 #ifdef ETAGS_REGEXPS
994 optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
995 #else
996 optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
997 #endif /* ETAGS_REGEXPS */
998
999 #ifndef LONG_OPTIONS
1000 optstring = optstring + 1;
1001 #endif /* LONG_OPTIONS */
1002
1003 opt = getopt_long (argc, argv, optstring, longopts, 0);
1004 if (opt == EOF)
1005 break;
1006
1007 switch (opt)
1008 {
1009 case 0:
1010 /* If getopt returns 0, then it has already processed a
1011 long-named option. We should do nothing. */
1012 break;
1013
1014 case 1:
1015 /* This means that a file name has been seen. Record it. */
1016 argbuffer[current_arg].arg_type = at_filename;
1017 argbuffer[current_arg].what = optarg;
1018 ++current_arg;
1019 ++file_count;
1020 break;
1021
1022 /* Common options. */
1023 case 'a': append_to_tagfile = TRUE; break;
1024 case 'C': cplusplus = TRUE; break;
1025 case 'd': constantypedefs = TRUE; break;
1026 case 'D': constantypedefs = FALSE; break;
1027 case 'f': /* for compatibility with old makefiles */
1028 case 'o':
1029 if (tagfile)
1030 {
1031 error ("-o option may only be given once.", (char *)NULL);
1032 suggest_asking_for_help ();
1033 }
1034 tagfile = optarg;
1035 break;
1036 case 'I':
1037 case 'S': /* for backward compatibility */
1038 noindentypedefs = TRUE;
1039 break;
1040 case 'l':
1041 {
1042 language *lang = get_language_from_langname (optarg);
1043 if (lang != NULL)
1044 {
1045 argbuffer[current_arg].lang = lang;
1046 argbuffer[current_arg].arg_type = at_language;
1047 ++current_arg;
1048 }
1049 }
1050 break;
1051 case 'r':
1052 argbuffer[current_arg].arg_type = at_regexp;
1053 argbuffer[current_arg].what = optarg;
1054 ++current_arg;
1055 break;
1056 case 'R':
1057 argbuffer[current_arg].arg_type = at_regexp;
1058 argbuffer[current_arg].what = NULL;
1059 ++current_arg;
1060 break;
1061 case 'c':
1062 argbuffer[current_arg].arg_type = at_icregexp;
1063 argbuffer[current_arg].what = optarg;
1064 ++current_arg;
1065 break;
1066 case 'V':
1067 print_version ();
1068 break;
1069 case 'h':
1070 case 'H':
1071 print_help ();
1072 break;
1073 case 't':
1074 typedefs = TRUE;
1075 break;
1076 case 'T':
1077 typedefs = typedefs_or_cplusplus = TRUE;
1078 break;
1079 /* Etags options */
1080 case 'i':
1081 included_files[nincluded_files++] = optarg;
1082 break;
1083 /* Ctags options. */
1084 case 'B': searchar = '?'; break;
1085 case 'u': update = TRUE; break;
1086 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1087 case 'x': cxref_style = TRUE; break;
1088 case 'w': no_warnings = TRUE; break;
1089 default:
1090 suggest_asking_for_help ();
1091 }
1092 }
1093
1094 for (; optind < argc; ++optind)
1095 {
1096 argbuffer[current_arg].arg_type = at_filename;
1097 argbuffer[current_arg].what = argv[optind];
1098 ++current_arg;
1099 ++file_count;
1100 }
1101
1102 if (nincluded_files == 0 && file_count == 0)
1103 {
1104 error ("no input files specified.", (char *)NULL);
1105 suggest_asking_for_help ();
1106 }
1107
1108 if (tagfile == NULL)
1109 tagfile = CTAGS ? "tags" : "TAGS";
1110 cwd = etags_getcwd (); /* the current working directory */
1111 if (cwd[strlen (cwd) - 1] != '/')
1112 {
1113 char *oldcwd = cwd;
1114 cwd = concat (oldcwd, "/", "");
1115 free (oldcwd);
1116 }
1117 if (streq (tagfile, "-"))
1118 tagfiledir = cwd;
1119 else
1120 tagfiledir = absolute_dirname (tagfile, cwd);
1121
1122 init (); /* set up boolean "functions" */
1123
1124 initbuffer (&lb);
1125 initbuffer (&filename_lb);
1126
1127 if (!CTAGS)
1128 {
1129 if (streq (tagfile, "-"))
1130 {
1131 tagf = stdout;
1132 #ifdef DOS_NT
1133 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1134 doesn't take effect until after `stdout' is already open). */
1135 if (!isatty (fileno (stdout)))
1136 setmode (fileno (stdout), O_BINARY);
1137 #endif /* DOS_NT */
1138 }
1139 else
1140 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1141 if (tagf == NULL)
1142 pfatal (tagfile);
1143 }
1144
1145 /*
1146 * Loop through files finding functions.
1147 */
1148 for (i = 0; i < current_arg; ++i)
1149 {
1150 switch (argbuffer[i].arg_type)
1151 {
1152 case at_language:
1153 forced_lang = argbuffer[i].lang;
1154 break;
1155 #ifdef ETAGS_REGEXPS
1156 case at_regexp:
1157 analyse_regex (argbuffer[i].what, FALSE);
1158 break;
1159 case at_icregexp:
1160 analyse_regex (argbuffer[i].what, TRUE);
1161 break;
1162 #endif
1163 case at_filename:
1164 #ifdef VMS
1165 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1166 {
1167 if (got_err)
1168 {
1169 error ("can't find file %s\n", this_file);
1170 argc--, argv++;
1171 }
1172 else
1173 {
1174 this_file = massage_name (this_file);
1175 }
1176 #else
1177 this_file = argbuffer[i].what;
1178 #endif
1179 /* Input file named "-" means read file names from stdin
1180 (one per line) and use them. */
1181 if (streq (this_file, "-"))
1182 while (readline_internal (&filename_lb, stdin) > 0)
1183 process_file (filename_lb.buffer);
1184 else
1185 process_file (this_file);
1186 #ifdef VMS
1187 }
1188 #endif
1189 break;
1190 }
1191 }
1192
1193 #ifdef ETAGS_REGEXPS
1194 free_patterns ();
1195 #endif /* ETAGS_REGEXPS */
1196
1197 if (!CTAGS || cxref_style)
1198 {
1199 put_entries (head);
1200 free_tree (head);
1201 head = NULL;
1202 if (!CTAGS)
1203 while (nincluded_files-- > 0)
1204 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1205
1206 if (fclose (tagf) == EOF)
1207 pfatal (tagfile);
1208 exit (GOOD);
1209 }
1210
1211 if (update)
1212 {
1213 char cmd[BUFSIZ];
1214 for (i = 0; i < current_arg; ++i)
1215 {
1216 if (argbuffer[i].arg_type != at_filename)
1217 continue;
1218 sprintf (cmd,
1219 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1220 tagfile, argbuffer[i].what, tagfile);
1221 if (system (cmd) != GOOD)
1222 fatal ("failed to execute shell command", (char *)NULL);
1223 }
1224 append_to_tagfile = TRUE;
1225 }
1226
1227 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1228 if (tagf == NULL)
1229 pfatal (tagfile);
1230 put_entries (head);
1231 free_tree (head);
1232 head = NULL;
1233 if (fclose (tagf) == EOF)
1234 pfatal (tagfile);
1235
1236 if (update)
1237 {
1238 char cmd[BUFSIZ];
1239 sprintf (cmd, "sort -o %s %s", tagfile, tagfile);
1240 exit (system (cmd));
1241 }
1242 return GOOD;
1243 }
1244
1245
1246
1247 /*
1248 * Return a compressor given the file name. If EXTPTR is non-zero,
1249 * return a pointer into FILE where the compressor-specific
1250 * extension begins. If no compressor is found, NULL is returned
1251 * and EXTPTR is not significant.
1252 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1253 */
1254 static compressor *
1255 get_compressor_from_suffix (file, extptr)
1256 char *file;
1257 char **extptr;
1258 {
1259 compressor *compr;
1260 char *slash, *suffix;
1261
1262 /* This relies on FN to be after canonicalize_filename,
1263 so we don't need to consider backslashes on DOS_NT. */
1264 slash = etags_strrchr (file, '/');
1265 suffix = etags_strrchr (file, '.');
1266 if (suffix == NULL || suffix < slash)
1267 return NULL;
1268 if (extptr != NULL)
1269 *extptr = suffix;
1270 suffix += 1;
1271 /* Let those poor souls who live with DOS 8+3 file name limits get
1272 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1273 Only the first do loop is run if not MSDOS */
1274 do
1275 {
1276 for (compr = compressors; compr->suffix != NULL; compr++)
1277 if (streq (compr->suffix, suffix))
1278 return compr;
1279 if (!MSDOS)
1280 break; /* do it only once: not really a loop */
1281 if (extptr != NULL)
1282 *extptr = ++suffix;
1283 } while (*suffix != '\0');
1284 return NULL;
1285 }
1286
1287
1288
1289 /*
1290 * Return a language given the name.
1291 */
1292 static language *
1293 get_language_from_langname (name)
1294 const char *name;
1295 {
1296 language *lang;
1297
1298 if (name == NULL)
1299 error ("empty language name", (char *)NULL);
1300 else
1301 {
1302 for (lang = lang_names; lang->name != NULL; lang++)
1303 if (streq (name, lang->name))
1304 return lang;
1305 error ("unknown language \"%s\"", name);
1306 }
1307
1308 return NULL;
1309 }
1310
1311
1312 /*
1313 * Return a language given the interpreter name.
1314 */
1315 static language *
1316 get_language_from_interpreter (interpreter)
1317 char *interpreter;
1318 {
1319 language *lang;
1320 char **iname;
1321
1322 if (interpreter == NULL)
1323 return NULL;
1324 for (lang = lang_names; lang->name != NULL; lang++)
1325 if (lang->interpreters != NULL)
1326 for (iname = lang->interpreters; *iname != NULL; iname++)
1327 if (streq (*iname, interpreter))
1328 return lang;
1329
1330 return NULL;
1331 }
1332
1333
1334
1335 /*
1336 * Return a language given the file name.
1337 */
1338 static language *
1339 get_language_from_filename (file)
1340 char *file;
1341 {
1342 language *lang;
1343 char **name, **ext, *suffix;
1344
1345 /* Try whole file name first. */
1346 for (lang = lang_names; lang->name != NULL; lang++)
1347 if (lang->filenames != NULL)
1348 for (name = lang->filenames; *name != NULL; name++)
1349 if (streq (*name, file))
1350 return lang;
1351
1352 /* If not found, try suffix after last dot. */
1353 suffix = etags_strrchr (file, '.');
1354 if (suffix == NULL)
1355 return NULL;
1356 suffix += 1;
1357 for (lang = lang_names; lang->name != NULL; lang++)
1358 if (lang->suffixes != NULL)
1359 for (ext = lang->suffixes; *ext != NULL; ext++)
1360 if (streq (*ext, suffix))
1361 return lang;
1362 return NULL;
1363 }
1364
1365
1366
1367 /*
1368 * This routine is called on each file argument.
1369 */
1370 static void
1371 process_file (file)
1372 char *file;
1373 {
1374 struct stat stat_buf;
1375 FILE *inf;
1376 compressor *compr;
1377 char *compressed_name, *uncompressed_name;
1378 char *ext, *real_name;
1379 int retval;
1380
1381
1382 canonicalize_filename (file);
1383 if (streq (file, tagfile) && !streq (tagfile, "-"))
1384 {
1385 error ("skipping inclusion of %s in self.", file);
1386 return;
1387 }
1388 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1389 {
1390 compressed_name = NULL;
1391 real_name = uncompressed_name = savestr (file);
1392 }
1393 else
1394 {
1395 real_name = compressed_name = savestr (file);
1396 uncompressed_name = savenstr (file, ext - file);
1397 }
1398
1399 /* If the canonicalized uncompressed name has already be dealt with,
1400 skip it silently, else add it to the list. */
1401 {
1402 typedef struct processed_file
1403 {
1404 char *filename;
1405 struct processed_file *next;
1406 } processed_file;
1407 static processed_file *pf_head = NULL;
1408 register processed_file *fnp;
1409
1410 for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1411 if (streq (uncompressed_name, fnp->filename))
1412 goto exit;
1413 fnp = pf_head;
1414 pf_head = xnew (1, struct processed_file);
1415 pf_head->filename = savestr (uncompressed_name);
1416 pf_head->next = fnp;
1417 }
1418
1419 if (stat (real_name, &stat_buf) != 0)
1420 {
1421 /* Reset real_name and try with a different name. */
1422 real_name = NULL;
1423 if (compressed_name != NULL) /* try with the given suffix */
1424 {
1425 if (stat (uncompressed_name, &stat_buf) == 0)
1426 real_name = uncompressed_name;
1427 }
1428 else /* try all possible suffixes */
1429 {
1430 for (compr = compressors; compr->suffix != NULL; compr++)
1431 {
1432 compressed_name = concat (file, ".", compr->suffix);
1433 if (stat (compressed_name, &stat_buf) != 0)
1434 {
1435 if (MSDOS)
1436 {
1437 char *suf = compressed_name + strlen (file);
1438 size_t suflen = strlen (compr->suffix) + 1;
1439 for ( ; suf[1]; suf++, suflen--)
1440 {
1441 memmove (suf, suf + 1, suflen);
1442 if (stat (compressed_name, &stat_buf) == 0)
1443 {
1444 real_name = compressed_name;
1445 break;
1446 }
1447 }
1448 if (real_name != NULL)
1449 break;
1450 } /* MSDOS */
1451 free (compressed_name);
1452 compressed_name = NULL;
1453 }
1454 else
1455 {
1456 real_name = compressed_name;
1457 break;
1458 }
1459 }
1460 }
1461 if (real_name == NULL)
1462 {
1463 perror (file);
1464 goto exit;
1465 }
1466 } /* try with a different name */
1467
1468 if (!S_ISREG (stat_buf.st_mode))
1469 {
1470 error ("skipping %s: it is not a regular file.", real_name);
1471 goto exit;
1472 }
1473 if (real_name == compressed_name)
1474 {
1475 char *cmd = concat (compr->command, " ", real_name);
1476 inf = (FILE *) popen (cmd, "r");
1477 free (cmd);
1478 }
1479 else
1480 inf = fopen (real_name, "r");
1481 if (inf == NULL)
1482 {
1483 perror (real_name);
1484 goto exit;
1485 }
1486
1487 curfile = uncompressed_name;
1488 curfiledir = absolute_dirname (curfile, cwd);
1489 if (filename_is_absolute (curfile))
1490 {
1491 /* file is an absolute file name. Canonicalize it. */
1492 curtagfname = absolute_filename (curfile, NULL);
1493 }
1494 else
1495 {
1496 /* file is a file name relative to cwd. Make it relative
1497 to the directory of the tags file. */
1498 curtagfname = relative_filename (curfile, tagfiledir);
1499 }
1500 nocharno = FALSE; /* use char position when making tags */
1501 find_entries (curfile, inf);
1502
1503 free (curfiledir);
1504 if (real_name == compressed_name)
1505 retval = pclose (inf);
1506 else
1507 retval = fclose (inf);
1508 if (retval < 0)
1509 pfatal (file);
1510
1511 exit:
1512 if (compressed_name) free(compressed_name);
1513 if (uncompressed_name) free(uncompressed_name);
1514 return;
1515 }
1516
1517 /*
1518 * This routine sets up the boolean pseudo-functions which work
1519 * by setting boolean flags dependent upon the corresponding character.
1520 * Every char which is NOT in that string is not a white char. Therefore,
1521 * all of the array "_wht" is set to FALSE, and then the elements
1522 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1523 * of a char is TRUE if it is the string "white", else FALSE.
1524 */
1525 static void
1526 init ()
1527 {
1528 register char *sp;
1529 register int i;
1530
1531 for (i = 0; i < CHARS; i++)
1532 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1533 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1534 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1535 notinname('\0') = notinname('\n');
1536 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1537 begtoken('\0') = begtoken('\n');
1538 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1539 intoken('\0') = intoken('\n');
1540 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1541 endtoken('\0') = endtoken('\n');
1542 }
1543
1544 /*
1545 * This routine opens the specified file and calls the function
1546 * which finds the function and type definitions.
1547 */
1548 static node *last_node = NULL;
1549
1550 static void
1551 find_entries (file, inf)
1552 char *file;
1553 FILE *inf;
1554 {
1555 char *cp;
1556 language *lang;
1557 node *old_last_node;
1558
1559 /* If user specified a language, use it. */
1560 lang = forced_lang;
1561 if (lang != NULL && lang->function != NULL)
1562 {
1563 curlang = lang;
1564 lang->function (inf);
1565 return;
1566 }
1567
1568 /* Try to guess the language given the file name. */
1569 lang = get_language_from_filename (file);
1570 if (lang != NULL && lang->function != NULL)
1571 {
1572 curlang = lang;
1573 lang->function (inf);
1574 return;
1575 }
1576
1577 /* Look for sharp-bang as the first two characters. */
1578 if (readline_internal (&lb, inf) > 0
1579 && lb.len >= 2
1580 && lb.buffer[0] == '#'
1581 && lb.buffer[1] == '!')
1582 {
1583 char *lp;
1584
1585 /* Set lp to point at the first char after the last slash in the
1586 line or, if no slashes, at the first nonblank. Then set cp to
1587 the first successive blank and terminate the string. */
1588 lp = etags_strrchr (lb.buffer+2, '/');
1589 if (lp != NULL)
1590 lp += 1;
1591 else
1592 lp = skip_spaces (lb.buffer + 2);
1593 cp = skip_non_spaces (lp);
1594 *cp = '\0';
1595
1596 if (strlen (lp) > 0)
1597 {
1598 lang = get_language_from_interpreter (lp);
1599 if (lang != NULL && lang->function != NULL)
1600 {
1601 curlang = lang;
1602 lang->function (inf);
1603 return;
1604 }
1605 }
1606 }
1607 /* We rewind here, even if inf may be a pipe. We fail if the
1608 length of the first line is longer than the pipe block size,
1609 which is unlikely. */
1610 rewind (inf);
1611
1612 /* Try Fortran. */
1613 old_last_node = last_node;
1614 curlang = get_language_from_langname ("fortran");
1615 Fortran_functions (inf);
1616
1617 /* No Fortran entries found. Try C. */
1618 if (old_last_node == last_node)
1619 {
1620 /* We do not tag if rewind fails.
1621 Only the file name will be recorded in the tags file. */
1622 rewind (inf);
1623 curlang = get_language_from_langname (cplusplus ? "c++" : "c");
1624 default_C_entries (inf);
1625 }
1626 return;
1627 }
1628
1629 \f
1630 /* Record a tag. */
1631 static void
1632 pfnote (name, is_func, linestart, linelen, lno, cno)
1633 char *name; /* tag name, or NULL if unnamed */
1634 bool is_func; /* tag is a function */
1635 char *linestart; /* start of the line where tag is */
1636 int linelen; /* length of the line where tag is */
1637 int lno; /* line number */
1638 long cno; /* character number */
1639 {
1640 register node *np;
1641
1642 if (CTAGS && name == NULL)
1643 return;
1644
1645 np = xnew (1, node);
1646
1647 /* If ctags mode, change name "main" to M<thisfilename>. */
1648 if (CTAGS && !cxref_style && streq (name, "main"))
1649 {
1650 register char *fp = etags_strrchr (curtagfname, '/');
1651 np->name = concat ("M", fp == NULL ? curtagfname : fp + 1, "");
1652 fp = etags_strrchr (np->name, '.');
1653 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1654 fp[0] = '\0';
1655 }
1656 else
1657 np->name = name;
1658 np->been_warned = FALSE;
1659 np->file = curtagfname;
1660 np->is_func = is_func;
1661 np->lno = lno;
1662 if (nocharno)
1663 np->cno = invalidcharno;
1664 else
1665 /* Our char numbers are 0-base, because of C language tradition?
1666 ctags compatibility? old versions compatibility? I don't know.
1667 Anyway, since emacs's are 1-base we expect etags.el to take care
1668 of the difference. If we wanted to have 1-based numbers, we would
1669 uncomment the +1 below. */
1670 np->cno = cno /* + 1 */ ;
1671 np->left = np->right = NULL;
1672 if (CTAGS && !cxref_style)
1673 {
1674 if (strlen (linestart) < 50)
1675 np->pat = concat (linestart, "$", "");
1676 else
1677 np->pat = savenstr (linestart, 50);
1678 }
1679 else
1680 np->pat = savenstr (linestart, linelen);
1681
1682 add_node (np, &head);
1683 }
1684
1685 /*
1686 * TAGS format specification
1687 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1688 *
1689 * pfnote should emit the optimized form [unnamed tag] only if:
1690 * 1. name does not contain any of the characters " \t\r\n(),;";
1691 * 2. linestart contains name as either a rightmost, or rightmost but
1692 * one character, substring;
1693 * 3. the character, if any, immediately before name in linestart must
1694 * be one of the characters " \t(),;";
1695 * 4. the character, if any, immediately after name in linestart must
1696 * also be one of the characters " \t(),;".
1697 *
1698 * The real implementation uses the notinname() macro, which recognises
1699 * characters slightly different from " \t\r\n(),;". See the variable
1700 * `nonam'.
1701 */
1702 #define traditional_tag_style TRUE
1703 static void
1704 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1705 char *name; /* tag name, or NULL if unnamed */
1706 int namelen; /* tag length */
1707 bool is_func; /* tag is a function */
1708 char *linestart; /* start of the line where tag is */
1709 int linelen; /* length of the line where tag is */
1710 int lno; /* line number */
1711 long cno; /* character number */
1712 {
1713 register char *cp;
1714 bool named;
1715
1716 named = TRUE;
1717 if (!CTAGS)
1718 {
1719 for (cp = name; !notinname (*cp); cp++)
1720 continue;
1721 if (*cp == '\0') /* rule #1 */
1722 {
1723 cp = linestart + linelen - namelen;
1724 if (notinname (linestart[linelen-1]))
1725 cp -= 1; /* rule #4 */
1726 if (cp >= linestart /* rule #2 */
1727 && (cp == linestart
1728 || notinname (cp[-1])) /* rule #3 */
1729 && strneq (name, cp, namelen)) /* rule #2 */
1730 named = FALSE; /* use unnamed tag */
1731 }
1732 }
1733
1734 if (named)
1735 name = savenstr (name, namelen);
1736 else
1737 name = NULL;
1738 pfnote (name, is_func, linestart, linelen, lno, cno);
1739 }
1740
1741 /*
1742 * free_tree ()
1743 * recurse on left children, iterate on right children.
1744 */
1745 static void
1746 free_tree (np)
1747 register node *np;
1748 {
1749 while (np)
1750 {
1751 register node *node_right = np->right;
1752 free_tree (np->left);
1753 if (np->name != NULL)
1754 free (np->name);
1755 free (np->pat);
1756 free (np);
1757 np = node_right;
1758 }
1759 }
1760
1761 /*
1762 * add_node ()
1763 * Adds a node to the tree of nodes. In etags mode, sort by file
1764 * name. In ctags mode, sort by tag name. Make no attempt at
1765 * balancing.
1766 *
1767 * add_node is the only function allowed to add nodes, so it can
1768 * maintain state.
1769 */
1770 static void
1771 add_node (np, cur_node_p)
1772 node *np, **cur_node_p;
1773 {
1774 register int dif;
1775 register node *cur_node = *cur_node_p;
1776
1777 if (cur_node == NULL)
1778 {
1779 *cur_node_p = np;
1780 last_node = np;
1781 return;
1782 }
1783
1784 if (!CTAGS)
1785 {
1786 /* Etags Mode */
1787 assert (last_node != NULL);
1788 /* For each file name, tags are in a linked sublist on the right
1789 pointer. The first tags of different files are a linked list
1790 on the left pointer. last_node points to the end of the last
1791 used sublist. */
1792 if (last_node->file == np->file)
1793 {
1794 /* Let's use the same sublist as the last added node. */
1795 last_node->right = np;
1796 last_node = np;
1797 }
1798 else if (streq (cur_node->file, np->file))
1799 {
1800 /* Scanning the list we found the head of a sublist which is
1801 good for us. Let's scan this sublist. */
1802 add_node (np, &cur_node->right);
1803 }
1804 else
1805 /* The head of this sublist is not good for us. Let's try the
1806 next one. */
1807 add_node (np, &cur_node->left);
1808 }
1809 else
1810 {
1811 /* Ctags Mode */
1812 dif = strcmp (np->name, cur_node->name);
1813
1814 /*
1815 * If this tag name matches an existing one, then
1816 * do not add the node, but maybe print a warning.
1817 */
1818 if (!dif)
1819 {
1820 if (streq (np->file, cur_node->file))
1821 {
1822 if (!no_warnings)
1823 {
1824 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1825 np->file, lineno, np->name);
1826 fprintf (stderr, "Second entry ignored\n");
1827 }
1828 }
1829 else if (!cur_node->been_warned && !no_warnings)
1830 {
1831 fprintf
1832 (stderr,
1833 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1834 np->file, cur_node->file, np->name);
1835 cur_node->been_warned = TRUE;
1836 }
1837 return;
1838 }
1839
1840 /* Actually add the node */
1841 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1842 }
1843 }
1844
1845 \f
1846 static int total_size_of_entries __P((node *));
1847 static int number_len __P((long));
1848
1849 /* Length of a number's decimal representation. */
1850 static int
1851 number_len (num)
1852 long num;
1853 {
1854 int len = 1;
1855 while ((num /= 10) > 0)
1856 len += 1;
1857 return len;
1858 }
1859
1860 /*
1861 * Return total number of characters that put_entries will output for
1862 * the nodes in the linked list at the right of the specified node.
1863 * This count is irrelevant with etags.el since emacs 19.34 at least,
1864 * but is still supplied for backward compatibility.
1865 */
1866 static int
1867 total_size_of_entries (np)
1868 register node *np;
1869 {
1870 register int total = 0;
1871
1872 for (; np != NULL; np = np->right)
1873 {
1874 total += strlen (np->pat) + 1; /* pat\177 */
1875 if (np->name != NULL)
1876 total += strlen (np->name) + 1; /* name\001 */
1877 total += number_len ((long) np->lno) + 1; /* lno, */
1878 if (np->cno != invalidcharno) /* cno */
1879 total += number_len (np->cno);
1880 total += 1; /* newline */
1881 }
1882
1883 return total;
1884 }
1885
1886 static void
1887 put_entries (np)
1888 register node *np;
1889 {
1890 register char *sp;
1891 static char *file = NULL;
1892
1893 if (np == NULL)
1894 return;
1895
1896 /* Output subentries that precede this one */
1897 if (CTAGS)
1898 put_entries (np->left);
1899
1900 /* Output this entry */
1901 if (!CTAGS)
1902 {
1903 /* Etags mode */
1904 if (file != np->file
1905 && (file == NULL || !streq (file, np->file)))
1906 {
1907 file = np->file;
1908 fprintf (tagf, "\f\n%s,%d\n",
1909 file, total_size_of_entries (np));
1910 }
1911 fputs (np->pat, tagf);
1912 fputc ('\177', tagf);
1913 if (np->name != NULL)
1914 {
1915 fputs (np->name, tagf);
1916 fputc ('\001', tagf);
1917 }
1918 fprintf (tagf, "%d,", np->lno);
1919 if (np->cno == invalidcharno)
1920 fputc ('\n', tagf);
1921 else
1922 fprintf (tagf, "%ld\n", np->cno);
1923 }
1924 else
1925 {
1926 /* Ctags mode */
1927 if (np->name == NULL)
1928 error ("internal error: NULL name in ctags mode.", (char *)NULL);
1929
1930 if (cxref_style)
1931 {
1932 if (vgrind_style)
1933 fprintf (stdout, "%s %s %d\n",
1934 np->name, np->file, (np->lno + 63) / 64);
1935 else
1936 fprintf (stdout, "%-16s %3d %-16s %s\n",
1937 np->name, np->lno, np->file, np->pat);
1938 }
1939 else
1940 {
1941 fprintf (tagf, "%s\t%s\t", np->name, np->file);
1942
1943 if (np->is_func)
1944 { /* a function */
1945 putc (searchar, tagf);
1946 putc ('^', tagf);
1947
1948 for (sp = np->pat; *sp; sp++)
1949 {
1950 if (*sp == '\\' || *sp == searchar)
1951 putc ('\\', tagf);
1952 putc (*sp, tagf);
1953 }
1954 putc (searchar, tagf);
1955 }
1956 else
1957 { /* a typedef; text pattern inadequate */
1958 fprintf (tagf, "%d", np->lno);
1959 }
1960 putc ('\n', tagf);
1961 }
1962 }
1963
1964
1965 /* Output subentries that follow this one */
1966 put_entries (np->right);
1967 if (!CTAGS)
1968 put_entries (np->left);
1969 }
1970
1971 \f
1972 /* C extensions. */
1973 #define C_EXT 0x00fff /* C extensions */
1974 #define C_PLAIN 0x00000 /* C */
1975 #define C_PLPL 0x00001 /* C++ */
1976 #define C_STAR 0x00003 /* C* */
1977 #define C_JAVA 0x00005 /* JAVA */
1978 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
1979 #define YACC 0x10000 /* yacc file */
1980
1981 /*
1982 * The C symbol tables.
1983 */
1984 enum sym_type
1985 {
1986 st_none,
1987 st_C_objprot, st_C_objimpl, st_C_objend,
1988 st_C_gnumacro,
1989 st_C_ignore,
1990 st_C_javastruct,
1991 st_C_operator,
1992 st_C_class, st_C_template,
1993 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1994 };
1995
1996 static unsigned int hash __P((const char *, unsigned int));
1997 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
1998 static enum sym_type C_symtype __P((char *, int, int));
1999
2000 /* Feed stuff between (but not including) %[ and %] lines to:
2001 gperf -c -k 1,3 -o -p -r -t
2002 %[
2003 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2004 %%
2005 if, 0, st_C_ignore
2006 for, 0, st_C_ignore
2007 while, 0, st_C_ignore
2008 switch, 0, st_C_ignore
2009 return, 0, st_C_ignore
2010 @interface, 0, st_C_objprot
2011 @protocol, 0, st_C_objprot
2012 @implementation,0, st_C_objimpl
2013 @end, 0, st_C_objend
2014 import, C_JAVA, st_C_ignore
2015 package, C_JAVA, st_C_ignore
2016 friend, C_PLPL, st_C_ignore
2017 extends, C_JAVA, st_C_javastruct
2018 implements, C_JAVA, st_C_javastruct
2019 interface, C_JAVA, st_C_struct
2020 class, 0, st_C_class
2021 namespace, C_PLPL, st_C_struct
2022 domain, C_STAR, st_C_struct
2023 union, 0, st_C_struct
2024 struct, 0, st_C_struct
2025 extern, 0, st_C_extern
2026 enum, 0, st_C_enum
2027 typedef, 0, st_C_typedef
2028 define, 0, st_C_define
2029 operator, C_PLPL, st_C_operator
2030 template, 0, st_C_template
2031 bool, C_PLPL, st_C_typespec
2032 long, 0, st_C_typespec
2033 short, 0, st_C_typespec
2034 int, 0, st_C_typespec
2035 char, 0, st_C_typespec
2036 float, 0, st_C_typespec
2037 double, 0, st_C_typespec
2038 signed, 0, st_C_typespec
2039 unsigned, 0, st_C_typespec
2040 auto, 0, st_C_typespec
2041 void, 0, st_C_typespec
2042 static, 0, st_C_typespec
2043 const, 0, st_C_typespec
2044 volatile, 0, st_C_typespec
2045 explicit, C_PLPL, st_C_typespec
2046 mutable, C_PLPL, st_C_typespec
2047 typename, C_PLPL, st_C_typespec
2048 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2049 DEFUN, 0, st_C_gnumacro
2050 SYSCALL, 0, st_C_gnumacro
2051 ENTRY, 0, st_C_gnumacro
2052 PSEUDO, 0, st_C_gnumacro
2053 # These are defined inside C functions, so currently they are not met.
2054 # EXFUN used in glibc, DEFVAR_* in emacs.
2055 #EXFUN, 0, st_C_gnumacro
2056 #DEFVAR_, 0, st_C_gnumacro
2057 %]
2058 and replace lines between %< and %> with its output,
2059 then make in_word_set and C_stab_entry static. */
2060 /*%<*/
2061 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2062 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
2063 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2064
2065 #define TOTAL_KEYWORDS 47
2066 #define MIN_WORD_LENGTH 2
2067 #define MAX_WORD_LENGTH 15
2068 #define MIN_HASH_VALUE 18
2069 #define MAX_HASH_VALUE 138
2070 /* maximum key range = 121, duplicates = 0 */
2071
2072 #ifdef __GNUC__
2073 __inline
2074 #endif
2075 static unsigned int
2076 hash (str, len)
2077 register const char *str;
2078 register unsigned int len;
2079 {
2080 static unsigned char asso_values[] =
2081 {
2082 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2083 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2084 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2085 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2086 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2087 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2088 139, 139, 139, 139, 63, 139, 139, 139, 33, 44,
2089 62, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2090 42, 139, 139, 12, 32, 139, 139, 139, 139, 139,
2091 139, 139, 139, 139, 139, 139, 139, 34, 59, 37,
2092 24, 58, 33, 3, 139, 16, 139, 139, 42, 60,
2093 18, 11, 39, 139, 23, 57, 4, 63, 6, 20,
2094 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2095 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2096 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2097 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2098 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2099 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2100 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2101 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2102 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2103 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2104 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2105 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2106 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2107 139, 139, 139, 139, 139, 139
2108 };
2109 register int hval = len;
2110
2111 switch (hval)
2112 {
2113 default:
2114 case 3:
2115 hval += asso_values[(unsigned char)str[2]];
2116 case 2:
2117 case 1:
2118 hval += asso_values[(unsigned char)str[0]];
2119 break;
2120 }
2121 return hval;
2122 }
2123
2124 #ifdef __GNUC__
2125 __inline
2126 #endif
2127 static struct C_stab_entry *
2128 in_word_set (str, len)
2129 register const char *str;
2130 register unsigned int len;
2131 {
2132 static struct C_stab_entry wordlist[] =
2133 {
2134 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2135 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2136 {"if", 0, st_C_ignore},
2137 {""}, {""}, {""}, {""},
2138 {"int", 0, st_C_typespec},
2139 {""}, {""},
2140 {"void", 0, st_C_typespec},
2141 {""}, {""},
2142 {"interface", C_JAVA, st_C_struct},
2143 {""},
2144 {"SYSCALL", 0, st_C_gnumacro},
2145 {""},
2146 {"return", 0, st_C_ignore},
2147 {""}, {""}, {""}, {""}, {""}, {""}, {""},
2148 {"while", 0, st_C_ignore},
2149 {"auto", 0, st_C_typespec},
2150 {""}, {""}, {""}, {""}, {""}, {""},
2151 {"float", 0, st_C_typespec},
2152 {"typedef", 0, st_C_typedef},
2153 {"typename", C_PLPL, st_C_typespec},
2154 {""}, {""}, {""},
2155 {"friend", C_PLPL, st_C_ignore},
2156 {"volatile", 0, st_C_typespec},
2157 {""}, {""},
2158 {"for", 0, st_C_ignore},
2159 {"const", 0, st_C_typespec},
2160 {"import", C_JAVA, st_C_ignore},
2161 {""},
2162 {"define", 0, st_C_define},
2163 {"long", 0, st_C_typespec},
2164 {"implements", C_JAVA, st_C_javastruct},
2165 {"signed", 0, st_C_typespec},
2166 {""},
2167 {"extern", 0, st_C_extern},
2168 {"extends", C_JAVA, st_C_javastruct},
2169 {""},
2170 {"mutable", C_PLPL, st_C_typespec},
2171 {"template", 0, st_C_template},
2172 {"short", 0, st_C_typespec},
2173 {"bool", C_PLPL, st_C_typespec},
2174 {"char", 0, st_C_typespec},
2175 {"class", 0, st_C_class},
2176 {"operator", C_PLPL, st_C_operator},
2177 {""},
2178 {"switch", 0, st_C_ignore},
2179 {""},
2180 {"ENTRY", 0, st_C_gnumacro},
2181 {""},
2182 {"package", C_JAVA, st_C_ignore},
2183 {"union", 0, st_C_struct},
2184 {"@end", 0, st_C_objend},
2185 {"struct", 0, st_C_struct},
2186 {"namespace", C_PLPL, st_C_struct},
2187 {""}, {""},
2188 {"domain", C_STAR, st_C_struct},
2189 {"@interface", 0, st_C_objprot},
2190 {"PSEUDO", 0, st_C_gnumacro},
2191 {"double", 0, st_C_typespec},
2192 {""},
2193 {"@protocol", 0, st_C_objprot},
2194 {""},
2195 {"static", 0, st_C_typespec},
2196 {""}, {""},
2197 {"DEFUN", 0, st_C_gnumacro},
2198 {""}, {""}, {""}, {""},
2199 {"explicit", C_PLPL, st_C_typespec},
2200 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2201 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2202 {""},
2203 {"enum", 0, st_C_enum},
2204 {""}, {""},
2205 {"unsigned", 0, st_C_typespec},
2206 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2207 {"@implementation",0, st_C_objimpl}
2208 };
2209
2210 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2211 {
2212 register int key = hash (str, len);
2213
2214 if (key <= MAX_HASH_VALUE && key >= 0)
2215 {
2216 register const char *s = wordlist[key].name;
2217
2218 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2219 return &wordlist[key];
2220 }
2221 }
2222 return 0;
2223 }
2224 /*%>*/
2225
2226 static enum sym_type
2227 C_symtype (str, len, c_ext)
2228 char *str;
2229 int len;
2230 int c_ext;
2231 {
2232 register struct C_stab_entry *se = in_word_set (str, len);
2233
2234 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2235 return st_none;
2236 return se->type;
2237 }
2238
2239 \f
2240 /*
2241 * C functions and variables are recognized using a simple
2242 * finite automaton. fvdef is its state variable.
2243 */
2244 static enum
2245 {
2246 fvnone, /* nothing seen */
2247 fdefunkey, /* Emacs DEFUN keyword seen */
2248 fdefunname, /* Emacs DEFUN name seen */
2249 foperator, /* func: operator keyword seen (cplpl) */
2250 fvnameseen, /* function or variable name seen */
2251 fstartlist, /* func: just after open parenthesis */
2252 finlist, /* func: in parameter list */
2253 flistseen, /* func: after parameter list */
2254 fignore, /* func: before open brace */
2255 vignore /* var-like: ignore until ';' */
2256 } fvdef;
2257
2258 static bool fvextern; /* func or var: extern keyword seen; */
2259
2260 /*
2261 * typedefs are recognized using a simple finite automaton.
2262 * typdef is its state variable.
2263 */
2264 static enum
2265 {
2266 tnone, /* nothing seen */
2267 tkeyseen, /* typedef keyword seen */
2268 ttypeseen, /* defined type seen */
2269 tinbody, /* inside typedef body */
2270 tend, /* just before typedef tag */
2271 tignore /* junk after typedef tag */
2272 } typdef;
2273
2274 /*
2275 * struct-like structures (enum, struct and union) are recognized
2276 * using another simple finite automaton. `structdef' is its state
2277 * variable.
2278 */
2279 static enum
2280 {
2281 snone, /* nothing seen yet,
2282 or in struct body if cblev > 0 */
2283 skeyseen, /* struct-like keyword seen */
2284 stagseen, /* struct-like tag seen */
2285 sintemplate, /* inside template (ignore) */
2286 scolonseen /* colon seen after struct-like tag */
2287 } structdef;
2288
2289 /*
2290 * When objdef is different from onone, objtag is the name of the class.
2291 */
2292 static char *objtag = "<uninited>";
2293
2294 /*
2295 * Yet another little state machine to deal with preprocessor lines.
2296 */
2297 static enum
2298 {
2299 dnone, /* nothing seen */
2300 dsharpseen, /* '#' seen as first char on line */
2301 ddefineseen, /* '#' and 'define' seen */
2302 dignorerest /* ignore rest of line */
2303 } definedef;
2304
2305 /*
2306 * State machine for Objective C protocols and implementations.
2307 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2308 */
2309 static enum
2310 {
2311 onone, /* nothing seen */
2312 oprotocol, /* @interface or @protocol seen */
2313 oimplementation, /* @implementations seen */
2314 otagseen, /* class name seen */
2315 oparenseen, /* parenthesis before category seen */
2316 ocatseen, /* category name seen */
2317 oinbody, /* in @implementation body */
2318 omethodsign, /* in @implementation body, after +/- */
2319 omethodtag, /* after method name */
2320 omethodcolon, /* after method colon */
2321 omethodparm, /* after method parameter */
2322 oignore /* wait for @end */
2323 } objdef;
2324
2325
2326 /*
2327 * Use this structure to keep info about the token read, and how it
2328 * should be tagged. Used by the make_C_tag function to build a tag.
2329 */
2330 static struct tok
2331 {
2332 bool valid;
2333 bool named;
2334 int offset;
2335 int length;
2336 int lineno;
2337 long linepos;
2338 char *line;
2339 } token; /* latest token read */
2340 static linebuffer token_name; /* its name */
2341
2342 /*
2343 * Variables and functions for dealing with nested structures.
2344 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2345 */
2346 static void pushclass_above __P((int, char *, int));
2347 static void popclass_above __P((int));
2348 static void write_classname __P((linebuffer *, char *qualifier));
2349
2350 static struct {
2351 char **cname; /* nested class names */
2352 int *cblev; /* nested class curly brace level */
2353 int nl; /* class nesting level (elements used) */
2354 int size; /* length of the array */
2355 } cstack; /* stack for nested declaration tags */
2356 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2357 #define nestlev (cstack.nl)
2358 /* After struct keyword or in struct body, not inside an nested function. */
2359 #define instruct (structdef == snone && nestlev > 0 \
2360 && cblev == cstack.cblev[nestlev-1] + 1)
2361
2362 static void
2363 pushclass_above (cblev, str, len)
2364 int cblev;
2365 char *str;
2366 int len;
2367 {
2368 int nl;
2369
2370 popclass_above (cblev);
2371 nl = cstack.nl;
2372 if (nl >= cstack.size)
2373 {
2374 int size = cstack.size *= 2;
2375 xrnew (cstack.cname, size, char *);
2376 xrnew (cstack.cblev, size, int);
2377 }
2378 assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2379 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2380 cstack.cblev[nl] = cblev;
2381 cstack.nl = nl + 1;
2382 }
2383
2384 static void
2385 popclass_above (cblev)
2386 int cblev;
2387 {
2388 int nl;
2389
2390 for (nl = cstack.nl - 1;
2391 nl >= 0 && cstack.cblev[nl] >= cblev;
2392 nl--)
2393 {
2394 if (cstack.cname[nl] != NULL)
2395 free (cstack.cname[nl]);
2396 cstack.nl = nl;
2397 }
2398 }
2399
2400 static void
2401 write_classname (cn, qualifier)
2402 linebuffer *cn;
2403 char *qualifier;
2404 {
2405 int i, len;
2406 int qlen = strlen (qualifier);
2407
2408 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2409 {
2410 len = 0;
2411 cn->len = 0;
2412 cn->buffer[0] = '\0';
2413 }
2414 else
2415 {
2416 len = strlen (cstack.cname[0]);
2417 linebuffer_setlen (cn, len);
2418 strcpy (cn->buffer, cstack.cname[0]);
2419 }
2420 for (i = 1; i < cstack.nl; i++)
2421 {
2422 char *s;
2423 int slen;
2424
2425 s = cstack.cname[i];
2426 if (s == NULL)
2427 continue;
2428 slen = strlen (s);
2429 len += slen + qlen;
2430 linebuffer_setlen (cn, len);
2431 strncat (cn->buffer, qualifier, qlen);
2432 strncat (cn->buffer, s, slen);
2433 }
2434 }
2435
2436 \f
2437 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2438 static void make_C_tag __P((bool));
2439
2440 /*
2441 * consider_token ()
2442 * checks to see if the current token is at the start of a
2443 * function or variable, or corresponds to a typedef, or
2444 * is a struct/union/enum tag, or #define, or an enum constant.
2445 *
2446 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2447 * with args. C_EXTP points to which language we are looking at.
2448 *
2449 * Globals
2450 * fvdef IN OUT
2451 * structdef IN OUT
2452 * definedef IN OUT
2453 * typdef IN OUT
2454 * objdef IN OUT
2455 */
2456
2457 static bool
2458 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2459 register char *str; /* IN: token pointer */
2460 register int len; /* IN: token length */
2461 register int c; /* IN: first char after the token */
2462 int *c_extp; /* IN, OUT: C extensions mask */
2463 int cblev; /* IN: curly brace level */
2464 int parlev; /* IN: parenthesis level */
2465 bool *is_func_or_var; /* OUT: function or variable found */
2466 {
2467 /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2468 structtype is the type of the preceding struct-like keyword, and
2469 structcblev is the curly brace level where it has been seen. */
2470 static enum sym_type structtype;
2471 static int structcblev;
2472 static enum sym_type toktype;
2473
2474
2475 toktype = C_symtype (str, len, *c_extp);
2476
2477 /*
2478 * Advance the definedef state machine.
2479 */
2480 switch (definedef)
2481 {
2482 case dnone:
2483 /* We're not on a preprocessor line. */
2484 if (toktype == st_C_gnumacro)
2485 {
2486 fvdef = fdefunkey;
2487 return FALSE;
2488 }
2489 break;
2490 case dsharpseen:
2491 if (toktype == st_C_define)
2492 {
2493 definedef = ddefineseen;
2494 }
2495 else
2496 {
2497 definedef = dignorerest;
2498 }
2499 return FALSE;
2500 case ddefineseen:
2501 /*
2502 * Make a tag for any macro, unless it is a constant
2503 * and constantypedefs is FALSE.
2504 */
2505 definedef = dignorerest;
2506 *is_func_or_var = (c == '(');
2507 if (!*is_func_or_var && !constantypedefs)
2508 return FALSE;
2509 else
2510 return TRUE;
2511 case dignorerest:
2512 return FALSE;
2513 default:
2514 error ("internal error: definedef value.", (char *)NULL);
2515 }
2516
2517 /*
2518 * Now typedefs
2519 */
2520 switch (typdef)
2521 {
2522 case tnone:
2523 if (toktype == st_C_typedef)
2524 {
2525 if (typedefs)
2526 typdef = tkeyseen;
2527 fvextern = FALSE;
2528 fvdef = fvnone;
2529 return FALSE;
2530 }
2531 break;
2532 case tkeyseen:
2533 switch (toktype)
2534 {
2535 case st_none:
2536 case st_C_typespec:
2537 case st_C_class:
2538 case st_C_struct:
2539 case st_C_enum:
2540 typdef = ttypeseen;
2541 break;
2542 }
2543 break;
2544 case ttypeseen:
2545 if (structdef == snone && fvdef == fvnone)
2546 {
2547 fvdef = fvnameseen;
2548 return TRUE;
2549 }
2550 break;
2551 case tend:
2552 switch (toktype)
2553 {
2554 case st_C_typespec:
2555 case st_C_class:
2556 case st_C_struct:
2557 case st_C_enum:
2558 return FALSE;
2559 }
2560 return TRUE;
2561 }
2562
2563 /*
2564 * This structdef business is NOT invoked when we are ctags and the
2565 * file is plain C. This is because a struct tag may have the same
2566 * name as another tag, and this loses with ctags.
2567 */
2568 switch (toktype)
2569 {
2570 case st_C_javastruct:
2571 if (structdef == stagseen)
2572 structdef = scolonseen;
2573 return FALSE;
2574 case st_C_template:
2575 case st_C_class:
2576 if (cblev == 0
2577 && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2578 && definedef == dnone && structdef == snone
2579 && typdef == tnone && fvdef == fvnone)
2580 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2581 if (toktype == st_C_template)
2582 break;
2583 /* FALLTHRU */
2584 case st_C_struct:
2585 case st_C_enum:
2586 if (parlev == 0
2587 && fvdef != vignore
2588 && (typdef == tkeyseen
2589 || (typedefs_or_cplusplus && structdef == snone)))
2590 {
2591 structdef = skeyseen;
2592 structtype = toktype;
2593 structcblev = cblev;
2594 }
2595 return FALSE;
2596 }
2597
2598 if (structdef == skeyseen)
2599 {
2600 structdef = stagseen;
2601 return TRUE;
2602 }
2603
2604 if (typdef != tnone)
2605 definedef = dnone;
2606
2607 /* Detect Objective C constructs. */
2608 switch (objdef)
2609 {
2610 case onone:
2611 switch (toktype)
2612 {
2613 case st_C_objprot:
2614 objdef = oprotocol;
2615 return FALSE;
2616 case st_C_objimpl:
2617 objdef = oimplementation;
2618 return FALSE;
2619 }
2620 break;
2621 case oimplementation:
2622 /* Save the class tag for functions or variables defined inside. */
2623 objtag = savenstr (str, len);
2624 objdef = oinbody;
2625 return FALSE;
2626 case oprotocol:
2627 /* Save the class tag for categories. */
2628 objtag = savenstr (str, len);
2629 objdef = otagseen;
2630 *is_func_or_var = TRUE;
2631 return TRUE;
2632 case oparenseen:
2633 objdef = ocatseen;
2634 *is_func_or_var = TRUE;
2635 return TRUE;
2636 case oinbody:
2637 break;
2638 case omethodsign:
2639 if (parlev == 0)
2640 {
2641 objdef = omethodtag;
2642 linebuffer_setlen (&token_name, len);
2643 strncpy (token_name.buffer, str, len);
2644 token_name.buffer[len] = '\0';
2645 return TRUE;
2646 }
2647 return FALSE;
2648 case omethodcolon:
2649 if (parlev == 0)
2650 objdef = omethodparm;
2651 return FALSE;
2652 case omethodparm:
2653 if (parlev == 0)
2654 {
2655 objdef = omethodtag;
2656 linebuffer_setlen (&token_name, token_name.len + len);
2657 strncat (token_name.buffer, str, len);
2658 return TRUE;
2659 }
2660 return FALSE;
2661 case oignore:
2662 if (toktype == st_C_objend)
2663 {
2664 /* Memory leakage here: the string pointed by objtag is
2665 never released, because many tests would be needed to
2666 avoid breaking on incorrect input code. The amount of
2667 memory leaked here is the sum of the lengths of the
2668 class tags.
2669 free (objtag); */
2670 objdef = onone;
2671 }
2672 return FALSE;
2673 }
2674
2675 /* A function, variable or enum constant? */
2676 switch (toktype)
2677 {
2678 case st_C_extern:
2679 fvextern = TRUE;
2680 /* FALLTHRU */
2681 case st_C_typespec:
2682 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2683 fvdef = fvnone; /* should be useless */
2684 return FALSE;
2685 case st_C_ignore:
2686 fvextern = FALSE;
2687 fvdef = vignore;
2688 return FALSE;
2689 case st_C_operator:
2690 fvdef = foperator;
2691 *is_func_or_var = TRUE;
2692 return TRUE;
2693 case st_none:
2694 if (constantypedefs
2695 && structdef == snone
2696 && structtype == st_C_enum && cblev > structcblev)
2697 return TRUE; /* enum constant */
2698 switch (fvdef)
2699 {
2700 case fdefunkey:
2701 if (cblev > 0)
2702 break;
2703 fvdef = fdefunname; /* GNU macro */
2704 *is_func_or_var = TRUE;
2705 return TRUE;
2706 case fvnone:
2707 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2708 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2709 {
2710 fvdef = vignore;
2711 return FALSE;
2712 }
2713 if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2714 {
2715 fvdef = foperator;
2716 *is_func_or_var = TRUE;
2717 return TRUE;
2718 }
2719 if (cblev > 0 && !instruct)
2720 break;
2721 fvdef = fvnameseen; /* function or variable */
2722 *is_func_or_var = TRUE;
2723 return TRUE;
2724 }
2725 break;
2726 }
2727
2728 return FALSE;
2729 }
2730
2731 \f
2732 /*
2733 * C_entries often keeps pointers to tokens or lines which are older than
2734 * the line currently read. By keeping two line buffers, and switching
2735 * them at end of line, it is possible to use those pointers.
2736 */
2737 static struct
2738 {
2739 long linepos;
2740 linebuffer lb;
2741 } lbs[2];
2742
2743 #define current_lb_is_new (newndx == curndx)
2744 #define switch_line_buffers() (curndx = 1 - curndx)
2745
2746 #define curlb (lbs[curndx].lb)
2747 #define newlb (lbs[newndx].lb)
2748 #define curlinepos (lbs[curndx].linepos)
2749 #define newlinepos (lbs[newndx].linepos)
2750
2751 #define CNL_SAVE_DEFINEDEF() \
2752 do { \
2753 curlinepos = charno; \
2754 lineno++; \
2755 linecharno = charno; \
2756 charno += readline (&curlb, inf); \
2757 lp = curlb.buffer; \
2758 quotednl = FALSE; \
2759 newndx = curndx; \
2760 } while (0)
2761
2762 #define CNL() \
2763 do { \
2764 CNL_SAVE_DEFINEDEF(); \
2765 if (savetoken.valid) \
2766 { \
2767 token = savetoken; \
2768 savetoken.valid = FALSE; \
2769 } \
2770 definedef = dnone; \
2771 } while (0)
2772
2773
2774 static void
2775 make_C_tag (isfun)
2776 bool isfun;
2777 {
2778 /* This function should never be called when token.valid is FALSE, but
2779 we must protect against invalid input or internal errors. */
2780 if (DEBUG || token.valid)
2781 {
2782 if (traditional_tag_style)
2783 {
2784 /* This was the original code. Now we call new_pfnote instead,
2785 which uses the new method for naming tags (see new_pfnote). */
2786 char *name = NULL;
2787
2788 if (CTAGS || token.named)
2789 name = savestr (token_name.buffer);
2790 if (DEBUG && !token.valid)
2791 {
2792 if (token.named)
2793 name = concat (name, "##invalid##", "");
2794 else
2795 name = savestr ("##invalid##");
2796 }
2797 pfnote (name, isfun, token.line,
2798 token.offset+token.length+1, token.lineno, token.linepos);
2799 }
2800 else
2801 new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
2802 token.offset+token.length+1, token.lineno, token.linepos);
2803 token.valid = FALSE;
2804 }
2805 }
2806
2807
2808 /*
2809 * C_entries ()
2810 * This routine finds functions, variables, typedefs,
2811 * #define's, enum constants and struct/union/enum definitions in
2812 * C syntax and adds them to the list.
2813 */
2814 static void
2815 C_entries (c_ext, inf)
2816 int c_ext; /* extension of C */
2817 FILE *inf; /* input file */
2818 {
2819 register char c; /* latest char read; '\0' for end of line */
2820 register char *lp; /* pointer one beyond the character `c' */
2821 int curndx, newndx; /* indices for current and new lb */
2822 register int tokoff; /* offset in line of start of current token */
2823 register int toklen; /* length of current token */
2824 char *qualifier; /* string used to qualify names */
2825 int qlen; /* length of qualifier */
2826 int cblev; /* current curly brace level */
2827 int parlev; /* current parenthesis level */
2828 int typdefcblev; /* cblev where a typedef struct body begun */
2829 bool incomm, inquote, inchar, quotednl, midtoken;
2830 bool cplpl, cjava;
2831 bool yacc_rules; /* in the rules part of a yacc file */
2832 struct tok savetoken; /* token saved during preprocessor handling */
2833
2834
2835 initbuffer (&token_name);
2836 initbuffer (&lbs[0].lb);
2837 initbuffer (&lbs[1].lb);
2838 if (cstack.size == 0)
2839 {
2840 cstack.size = (DEBUG) ? 1 : 4;
2841 cstack.nl = 0;
2842 cstack.cname = xnew (cstack.size, char *);
2843 cstack.cblev = xnew (cstack.size, int);
2844 }
2845
2846 tokoff = toklen = typdefcblev = 0; /* keep compiler quiet */
2847 curndx = newndx = 0;
2848 lineno = 0;
2849 charno = 0;
2850 lp = curlb.buffer;
2851 *lp = 0;
2852
2853 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2854 structdef = snone; definedef = dnone; objdef = onone;
2855 yacc_rules = FALSE;
2856 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2857 token.valid = savetoken.valid = FALSE;
2858 cblev = 0;
2859 parlev = 0;
2860 cplpl = (c_ext & C_PLPL) == C_PLPL;
2861 cjava = (c_ext & C_JAVA) == C_JAVA;
2862 if (cjava)
2863 { qualifier = "."; qlen = 1; }
2864 else
2865 { qualifier = "::"; qlen = 2; }
2866
2867
2868 while (!feof (inf))
2869 {
2870 c = *lp++;
2871 if (c == '\\')
2872 {
2873 /* If we're at the end of the line, the next character is a
2874 '\0'; don't skip it, because it's the thing that tells us
2875 to read the next line. */
2876 if (*lp == '\0')
2877 {
2878 quotednl = TRUE;
2879 continue;
2880 }
2881 lp++;
2882 c = ' ';
2883 }
2884 else if (incomm)
2885 {
2886 switch (c)
2887 {
2888 case '*':
2889 if (*lp == '/')
2890 {
2891 c = *lp++;
2892 incomm = FALSE;
2893 }
2894 break;
2895 case '\0':
2896 /* Newlines inside comments do not end macro definitions in
2897 traditional cpp. */
2898 CNL_SAVE_DEFINEDEF ();
2899 break;
2900 }
2901 continue;
2902 }
2903 else if (inquote)
2904 {
2905 switch (c)
2906 {
2907 case '"':
2908 inquote = FALSE;
2909 break;
2910 case '\0':
2911 /* Newlines inside strings do not end macro definitions
2912 in traditional cpp, even though compilers don't
2913 usually accept them. */
2914 CNL_SAVE_DEFINEDEF ();
2915 break;
2916 }
2917 continue;
2918 }
2919 else if (inchar)
2920 {
2921 switch (c)
2922 {
2923 case '\0':
2924 /* Hmmm, something went wrong. */
2925 CNL ();
2926 /* FALLTHRU */
2927 case '\'':
2928 inchar = FALSE;
2929 break;
2930 }
2931 continue;
2932 }
2933 else
2934 switch (c)
2935 {
2936 case '"':
2937 inquote = TRUE;
2938 switch (fvdef)
2939 {
2940 case fdefunkey:
2941 case fstartlist:
2942 case finlist:
2943 case fignore:
2944 case vignore:
2945 break;
2946 default:
2947 fvextern = FALSE;
2948 fvdef = fvnone;
2949 }
2950 continue;
2951 case '\'':
2952 inchar = TRUE;
2953 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2954 {
2955 fvextern = FALSE;
2956 fvdef = fvnone;
2957 }
2958 continue;
2959 case '/':
2960 if (*lp == '*')
2961 {
2962 lp++;
2963 incomm = TRUE;
2964 continue;
2965 }
2966 else if (/* cplpl && */ *lp == '/')
2967 {
2968 c = '\0';
2969 break;
2970 }
2971 else
2972 break;
2973 case '%':
2974 if ((c_ext & YACC) && *lp == '%')
2975 {
2976 /* Entering or exiting rules section in yacc file. */
2977 lp++;
2978 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2979 typdef = tnone; structdef = snone;
2980 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2981 cblev = 0;
2982 yacc_rules = !yacc_rules;
2983 continue;
2984 }
2985 else
2986 break;
2987 case '#':
2988 if (definedef == dnone)
2989 {
2990 char *cp;
2991 bool cpptoken = TRUE;
2992
2993 /* Look back on this line. If all blanks, or nonblanks
2994 followed by an end of comment, this is a preprocessor
2995 token. */
2996 for (cp = newlb.buffer; cp < lp-1; cp++)
2997 if (!iswhite (*cp))
2998 {
2999 if (*cp == '*' && *(cp+1) == '/')
3000 {
3001 cp++;
3002 cpptoken = TRUE;
3003 }
3004 else
3005 cpptoken = FALSE;
3006 }
3007 if (cpptoken)
3008 definedef = dsharpseen;
3009 } /* if (definedef == dnone) */
3010
3011 continue;
3012 } /* switch (c) */
3013
3014
3015 /* Consider token only if some involved conditions are satisfied. */
3016 if (typdef != tignore
3017 && definedef != dignorerest
3018 && fvdef != finlist
3019 && structdef != sintemplate
3020 && (definedef != dnone
3021 || structdef != scolonseen))
3022 {
3023 if (midtoken)
3024 {
3025 if (endtoken (c))
3026 {
3027 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
3028 {
3029 /*
3030 * This handles :: in the middle, but not at the
3031 * beginning of an identifier. Also, space-separated
3032 * :: is not recognised.
3033 */
3034 lp += 2;
3035 toklen += 2;
3036 c = lp[-1];
3037 goto still_in_token;
3038 }
3039 else
3040 {
3041 bool funorvar = FALSE;
3042
3043 if (yacc_rules
3044 || consider_token (newlb.buffer + tokoff, toklen, c,
3045 &c_ext, cblev, parlev, &funorvar))
3046 {
3047 if (fvdef == foperator)
3048 {
3049 char *oldlp = lp;
3050 lp = skip_spaces (lp-1);
3051 if (*lp != '\0')
3052 lp += 1;
3053 while (*lp != '\0'
3054 && !iswhite (*lp) && *lp != '(')
3055 lp += 1;
3056 c = *lp++;
3057 toklen += lp - oldlp;
3058 }
3059 token.named = FALSE;
3060 if ((c_ext & C_EXT) /* not pure C */
3061 && nestlev > 0 && definedef == dnone)
3062 /* in struct body */
3063 {
3064 write_classname (&token_name, qualifier);
3065 linebuffer_setlen (&token_name,
3066 token_name.len+qlen+toklen);
3067 strcat (token_name.buffer, qualifier);
3068 strncat (token_name.buffer,
3069 newlb.buffer + tokoff, toklen);
3070 token.named = TRUE;
3071 }
3072 else if (objdef == ocatseen)
3073 /* Objective C category */
3074 {
3075 int len = strlen (objtag) + 2 + toklen;
3076 linebuffer_setlen (&token_name, len);
3077 strcpy (token_name.buffer, objtag);
3078 strcat (token_name.buffer, "(");
3079 strncat (token_name.buffer,
3080 newlb.buffer + tokoff, toklen);
3081 strcat (token_name.buffer, ")");
3082 token.named = TRUE;
3083 }
3084 else if (objdef == omethodtag
3085 || objdef == omethodparm)
3086 /* Objective C method */
3087 {
3088 token.named = TRUE;
3089 }
3090 else if (fvdef == fdefunname)
3091 /* GNU DEFUN and similar macros */
3092 {
3093 bool defun = (newlb.buffer[tokoff] == 'F');
3094 int off = tokoff;
3095 int len = toklen;
3096
3097 /* Rewrite the tag so that emacs lisp DEFUNs
3098 can be found by their elisp name */
3099 if (defun)
3100 {
3101 off += 1;
3102 len -= 1;
3103 }
3104 len = toklen;
3105 linebuffer_setlen (&token_name, len);
3106 strncpy (token_name.buffer,
3107 newlb.buffer + off, len);
3108 token_name.buffer[len] = '\0';
3109 if (defun)
3110 while (--len >= 0)
3111 if (token_name.buffer[len] == '_')
3112 token_name.buffer[len] = '-';
3113 token.named = defun;
3114 }
3115 else
3116 {
3117 linebuffer_setlen (&token_name, toklen);
3118 strncpy (token_name.buffer,
3119 newlb.buffer + tokoff, toklen);
3120 token_name.buffer[toklen] = '\0';
3121 /* Name macros and members. */
3122 token.named = (structdef == stagseen
3123 || typdef == ttypeseen
3124 || typdef == tend
3125 || (funorvar
3126 && definedef == dignorerest)
3127 || (funorvar
3128 && definedef == dnone
3129 && structdef == snone
3130 && cblev > 0));
3131 }
3132 token.lineno = lineno;
3133 token.offset = tokoff;
3134 token.length = toklen;
3135 token.line = newlb.buffer;
3136 token.linepos = newlinepos;
3137 token.valid = TRUE;
3138
3139 if (definedef == dnone
3140 && (fvdef == fvnameseen
3141 || fvdef == foperator
3142 || structdef == stagseen
3143 || typdef == tend
3144 || typdef == ttypeseen
3145 || objdef != onone))
3146 {
3147 if (current_lb_is_new)
3148 switch_line_buffers ();
3149 }
3150 else if (definedef != dnone
3151 || fvdef == fdefunname
3152 || instruct)
3153 make_C_tag (funorvar);
3154 }
3155 midtoken = FALSE;
3156 }
3157 } /* if (endtoken (c)) */
3158 else if (intoken (c))
3159 still_in_token:
3160 {
3161 toklen++;
3162 continue;
3163 }
3164 } /* if (midtoken) */
3165 else if (begtoken (c))
3166 {
3167 switch (definedef)
3168 {
3169 case dnone:
3170 switch (fvdef)
3171 {
3172 case fstartlist:
3173 fvdef = finlist;
3174 continue;
3175 case flistseen:
3176 make_C_tag (TRUE); /* a function */
3177 fvdef = fignore;
3178 break;
3179 case fvnameseen:
3180 fvdef = fvnone;
3181 break;
3182 }
3183 if (structdef == stagseen && !cjava)
3184 {
3185 popclass_above (cblev);
3186 structdef = snone;
3187 }
3188 break;
3189 case dsharpseen:
3190 savetoken = token;
3191 }
3192 if (!yacc_rules || lp == newlb.buffer + 1)
3193 {
3194 tokoff = lp - 1 - newlb.buffer;
3195 toklen = 1;
3196 midtoken = TRUE;
3197 }
3198 continue;
3199 } /* if (begtoken) */
3200 } /* if must look at token */
3201
3202
3203 /* Detect end of line, colon, comma, semicolon and various braces
3204 after having handled a token.*/
3205 switch (c)
3206 {
3207 case ':':
3208 if (yacc_rules && token.offset == 0 && token.valid)
3209 {
3210 make_C_tag (FALSE); /* a yacc function */
3211 break;
3212 }
3213 if (definedef != dnone)
3214 break;
3215 switch (objdef)
3216 {
3217 case otagseen:
3218 objdef = oignore;
3219 make_C_tag (TRUE); /* an Objective C class */
3220 break;
3221 case omethodtag:
3222 case omethodparm:
3223 objdef = omethodcolon;
3224 linebuffer_setlen (&token_name, token_name.len + 1);
3225 strcat (token_name.buffer, ":");
3226 break;
3227 }
3228 if (structdef == stagseen)
3229 structdef = scolonseen;
3230 break;
3231 case ';':
3232 if (definedef != dnone)
3233 break;
3234 switch (typdef)
3235 {
3236 case tend:
3237 case ttypeseen:
3238 make_C_tag (FALSE); /* a typedef */
3239 typdef = tnone;
3240 fvdef = fvnone;
3241 break;
3242 case tnone:
3243 case tinbody:
3244 case tignore:
3245 switch (fvdef)
3246 {
3247 case fignore:
3248 if (typdef == tignore)
3249 fvdef = fvnone;
3250 break;
3251 case fvnameseen:
3252 if ((globals && cblev == 0 && (!fvextern || declarations))
3253 || (members && instruct))
3254 make_C_tag (FALSE); /* a variable */
3255 fvextern = FALSE;
3256 fvdef = fvnone;
3257 token.valid = FALSE;
3258 break;
3259 case flistseen:
3260 if ((declarations && typdef == tnone && !instruct)
3261 || (members && typdef != tignore && instruct))
3262 make_C_tag (TRUE); /* a function declaration */
3263 /* FALLTHRU */
3264 default:
3265 fvextern = FALSE;
3266 fvdef = fvnone;
3267 if (declarations
3268 && structdef == stagseen && (c_ext & C_PLPL))
3269 make_C_tag (FALSE); /* forward declaration */
3270 else
3271 /* The following instruction invalidates the token.
3272 Probably the token should be invalidated in all other
3273 cases where some state machine is reset prematurely. */
3274 token.valid = FALSE;
3275 } /* switch (fvdef) */
3276 /* FALLTHRU */
3277 default:
3278 if (!instruct)
3279 typdef = tnone;
3280 }
3281 if (structdef == stagseen)
3282 structdef = snone;
3283 break;
3284 case ',':
3285 if (definedef != dnone)
3286 break;
3287 switch (objdef)
3288 {
3289 case omethodtag:
3290 case omethodparm:
3291 make_C_tag (TRUE); /* an Objective C method */
3292 objdef = oinbody;
3293 break;
3294 }
3295 switch (fvdef)
3296 {
3297 case fdefunkey:
3298 case foperator:
3299 case fstartlist:
3300 case finlist:
3301 case fignore:
3302 case vignore:
3303 break;
3304 case fdefunname:
3305 fvdef = fignore;
3306 break;
3307 case fvnameseen: /* a variable */
3308 if ((globals && cblev == 0 && (!fvextern || declarations))
3309 || (members && instruct))
3310 make_C_tag (FALSE);
3311 break;
3312 case flistseen: /* a function */
3313 if ((declarations && typdef == tnone && !instruct)
3314 || (members && typdef != tignore && instruct))
3315 {
3316 make_C_tag (TRUE); /* a function declaration */
3317 fvdef = fvnameseen;
3318 }
3319 else if (!declarations)
3320 fvdef = fvnone;
3321 token.valid = FALSE;
3322 break;
3323 default:
3324 fvdef = fvnone;
3325 }
3326 if (structdef == stagseen)
3327 structdef = snone;
3328 break;
3329 case '[':
3330 if (definedef != dnone)
3331 break;
3332 if (structdef == stagseen)
3333 structdef = snone;
3334 switch (typdef)
3335 {
3336 case ttypeseen:
3337 case tend:
3338 typdef = tignore;
3339 make_C_tag (FALSE); /* a typedef */
3340 break;
3341 case tnone:
3342 case tinbody:
3343 switch (fvdef)
3344 {
3345 case foperator:
3346 case finlist:
3347 case fignore:
3348 case vignore:
3349 break;
3350 case fvnameseen:
3351 if ((members && cblev == 1)
3352 || (globals && cblev == 0
3353 && (!fvextern || declarations)))
3354 make_C_tag (FALSE); /* a variable */
3355 /* FALLTHRU */
3356 default:
3357 fvdef = fvnone;
3358 }
3359 break;
3360 }
3361 break;
3362 case '(':
3363 if (definedef != dnone)
3364 break;
3365 if (objdef == otagseen && parlev == 0)
3366 objdef = oparenseen;
3367 switch (fvdef)
3368 {
3369 case fvnameseen:
3370 if (typdef == ttypeseen
3371 && *lp != '*'
3372 && !instruct)
3373 {
3374 /* This handles constructs like:
3375 typedef void OperatorFun (int fun); */
3376 make_C_tag (FALSE);
3377 typdef = tignore;
3378 fvdef = fignore;
3379 break;
3380 }
3381 /* FALLTHRU */
3382 case foperator:
3383 fvdef = fstartlist;
3384 break;
3385 case flistseen:
3386 fvdef = finlist;
3387 break;
3388 }
3389 parlev++;
3390 break;
3391 case ')':
3392 if (definedef != dnone)
3393 break;
3394 if (objdef == ocatseen && parlev == 1)
3395 {
3396 make_C_tag (TRUE); /* an Objective C category */
3397 objdef = oignore;
3398 }
3399 if (--parlev == 0)
3400 {
3401 switch (fvdef)
3402 {
3403 case fstartlist:
3404 case finlist:
3405 fvdef = flistseen;
3406 break;
3407 }
3408 if (!instruct
3409 && (typdef == tend
3410 || typdef == ttypeseen))
3411 {
3412 typdef = tignore;
3413 make_C_tag (FALSE); /* a typedef */
3414 }
3415 }
3416 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3417 parlev = 0;
3418 break;
3419 case '{':
3420 if (definedef != dnone)
3421 break;
3422 if (typdef == ttypeseen)
3423 {
3424 /* Whenever typdef is set to tinbody (currently only
3425 here), typdefcblev should be set to cblev. */
3426 typdef = tinbody;
3427 typdefcblev = cblev;
3428 }
3429 switch (fvdef)
3430 {
3431 case flistseen:
3432 make_C_tag (TRUE); /* a function */
3433 /* FALLTHRU */
3434 case fignore:
3435 fvdef = fvnone;
3436 break;
3437 case fvnone:
3438 switch (objdef)
3439 {
3440 case otagseen:
3441 make_C_tag (TRUE); /* an Objective C class */
3442 objdef = oignore;
3443 break;
3444 case omethodtag:
3445 case omethodparm:
3446 make_C_tag (TRUE); /* an Objective C method */
3447 objdef = oinbody;
3448 break;
3449 default:
3450 /* Neutralize `extern "C" {' grot. */
3451 if (cblev == 0 && structdef == snone && nestlev == 0
3452 && typdef == tnone)
3453 cblev = -1;
3454 }
3455 }
3456 switch (structdef)
3457 {
3458 case skeyseen: /* unnamed struct */
3459 pushclass_above (cblev, NULL, 0);
3460 structdef = snone;
3461 break;
3462 case stagseen: /* named struct or enum */
3463 case scolonseen: /* a class */
3464 pushclass_above (cblev, token.line+token.offset, token.length);
3465 structdef = snone;
3466 make_C_tag (FALSE); /* a struct or enum */
3467 break;
3468 }
3469 cblev++;
3470 break;
3471 case '*':
3472 if (definedef != dnone)
3473 break;
3474 if (fvdef == fstartlist)
3475 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3476 break;
3477 case '}':
3478 if (definedef != dnone)
3479 break;
3480 if (!noindentypedefs && lp == newlb.buffer + 1)
3481 {
3482 cblev = 0; /* reset curly brace level if first column */
3483 parlev = 0; /* also reset paren level, just in case... */
3484 }
3485 else if (cblev > 0)
3486 cblev--;
3487 popclass_above (cblev);
3488 structdef = snone;
3489 /* Only if typdef == tinbody is typdefcblev significant. */
3490 if (typdef == tinbody && cblev <= typdefcblev)
3491 {
3492 assert (cblev == typdefcblev);
3493 typdef = tend;
3494 }
3495 break;
3496 case '=':
3497 if (definedef != dnone)
3498 break;
3499 switch (fvdef)
3500 {
3501 case foperator:
3502 case finlist:
3503 case fignore:
3504 case vignore:
3505 break;
3506 case fvnameseen:
3507 if ((members && cblev == 1)
3508 || (globals && cblev == 0 && (!fvextern || declarations)))
3509 make_C_tag (FALSE); /* a variable */
3510 /* FALLTHRU */
3511 default:
3512 fvdef = vignore;
3513 }
3514 break;
3515 case '<':
3516 if (cplpl && structdef == stagseen)
3517 {
3518 structdef = sintemplate;
3519 break;
3520 }
3521 goto resetfvdef;
3522 case '>':
3523 if (structdef == sintemplate)
3524 {
3525 structdef = stagseen;
3526 break;
3527 }
3528 goto resetfvdef;
3529 case '+':
3530 case '-':
3531 if (objdef == oinbody && cblev == 0)
3532 {
3533 objdef = omethodsign;
3534 break;
3535 }
3536 /* FALLTHRU */
3537 resetfvdef:
3538 case '#': case '~': case '&': case '%': case '/': case '|':
3539 case '^': case '!': case '.': case '?': case ']':
3540 if (definedef != dnone)
3541 break;
3542 /* These surely cannot follow a function tag in C. */
3543 switch (fvdef)
3544 {
3545 case foperator:
3546 case finlist:
3547 case fignore:
3548 case vignore:
3549 break;
3550 default:
3551 fvdef = fvnone;
3552 }
3553 break;
3554 case '\0':
3555 if (objdef == otagseen)
3556 {
3557 make_C_tag (TRUE); /* an Objective C class */
3558 objdef = oignore;
3559 }
3560 /* If a macro spans multiple lines don't reset its state. */
3561 if (quotednl)
3562 CNL_SAVE_DEFINEDEF ();
3563 else
3564 CNL ();
3565 break;
3566 } /* switch (c) */
3567
3568 } /* while not eof */
3569
3570 free (token_name.buffer);
3571 free (lbs[0].lb.buffer);
3572 free (lbs[1].lb.buffer);
3573 }
3574
3575 /*
3576 * Process either a C++ file or a C file depending on the setting
3577 * of a global flag.
3578 */
3579 static void
3580 default_C_entries (inf)
3581 FILE *inf;
3582 {
3583 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3584 }
3585
3586 /* Always do plain C. */
3587 static void
3588 plain_C_entries (inf)
3589 FILE *inf;
3590 {
3591 C_entries (0, inf);
3592 }
3593
3594 /* Always do C++. */
3595 static void
3596 Cplusplus_entries (inf)
3597 FILE *inf;
3598 {
3599 C_entries (C_PLPL, inf);
3600 }
3601
3602 /* Always do Java. */
3603 static void
3604 Cjava_entries (inf)
3605 FILE *inf;
3606 {
3607 C_entries (C_JAVA, inf);
3608 }
3609
3610 /* Always do C*. */
3611 static void
3612 Cstar_entries (inf)
3613 FILE *inf;
3614 {
3615 C_entries (C_STAR, inf);
3616 }
3617
3618 /* Always do Yacc. */
3619 static void
3620 Yacc_entries (inf)
3621 FILE *inf;
3622 {
3623 C_entries (YACC, inf);
3624 }
3625
3626 \f
3627 /* Useful macros. */
3628 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3629 for (lineno = charno = 0; /* loop initialization */ \
3630 !feof (file_pointer) /* loop test */ \
3631 && (lineno++, /* instructions at start of loop */ \
3632 linecharno = charno, \
3633 charno += readline (&line_buffer, file_pointer), \
3634 char_pointer = lb.buffer, \
3635 TRUE); \
3636 )
3637 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */ \
3638 (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
3639 && notinname ((cp)[sizeof(keyword)-1]) /* end of keyword */ \
3640 && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
3641
3642 /*
3643 * Read a file, but do no processing. This is used to do regexp
3644 * matching on files that have no language defined.
3645 */
3646 static void
3647 just_read_file (inf)
3648 FILE *inf;
3649 {
3650 register char *dummy;
3651
3652 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3653 continue;
3654 }
3655
3656 \f
3657 /* Fortran parsing */
3658
3659 static void F_takeprec __P((void));
3660 static void F_getit __P((FILE *));
3661
3662 static void
3663 F_takeprec ()
3664 {
3665 dbp = skip_spaces (dbp);
3666 if (*dbp != '*')
3667 return;
3668 dbp++;
3669 dbp = skip_spaces (dbp);
3670 if (strneq (dbp, "(*)", 3))
3671 {
3672 dbp += 3;
3673 return;
3674 }
3675 if (!ISDIGIT (*dbp))
3676 {
3677 --dbp; /* force failure */
3678 return;
3679 }
3680 do
3681 dbp++;
3682 while (ISDIGIT (*dbp));
3683 }
3684
3685 static void
3686 F_getit (inf)
3687 FILE *inf;
3688 {
3689 register char *cp;
3690
3691 dbp = skip_spaces (dbp);
3692 if (*dbp == '\0')
3693 {
3694 lineno++;
3695 linecharno = charno;
3696 charno += readline (&lb, inf);
3697 dbp = lb.buffer;
3698 if (dbp[5] != '&')
3699 return;
3700 dbp += 6;
3701 dbp = skip_spaces (dbp);
3702 }
3703 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3704 return;
3705 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3706 continue;
3707 pfnote (savenstr (dbp, cp-dbp), TRUE,
3708 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3709 }
3710
3711
3712 static void
3713 Fortran_functions (inf)
3714 FILE *inf;
3715 {
3716 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3717 {
3718 if (*dbp == '%')
3719 dbp++; /* Ratfor escape to fortran */
3720 dbp = skip_spaces (dbp);
3721 if (*dbp == '\0')
3722 continue;
3723 switch (lowcase (*dbp))
3724 {
3725 case 'i':
3726 if (nocase_tail ("integer"))
3727 F_takeprec ();
3728 break;
3729 case 'r':
3730 if (nocase_tail ("real"))
3731 F_takeprec ();
3732 break;
3733 case 'l':
3734 if (nocase_tail ("logical"))
3735 F_takeprec ();
3736 break;
3737 case 'c':
3738 if (nocase_tail ("complex") || nocase_tail ("character"))
3739 F_takeprec ();
3740 break;
3741 case 'd':
3742 if (nocase_tail ("double"))
3743 {
3744 dbp = skip_spaces (dbp);
3745 if (*dbp == '\0')
3746 continue;
3747 if (nocase_tail ("precision"))
3748 break;
3749 continue;
3750 }
3751 break;
3752 }
3753 dbp = skip_spaces (dbp);
3754 if (*dbp == '\0')
3755 continue;
3756 switch (lowcase (*dbp))
3757 {
3758 case 'f':
3759 if (nocase_tail ("function"))
3760 F_getit (inf);
3761 continue;
3762 case 's':
3763 if (nocase_tail ("subroutine"))
3764 F_getit (inf);
3765 continue;
3766 case 'e':
3767 if (nocase_tail ("entry"))
3768 F_getit (inf);
3769 continue;
3770 case 'b':
3771 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
3772 {
3773 dbp = skip_spaces (dbp);
3774 if (*dbp == '\0') /* assume un-named */
3775 pfnote (savestr ("blockdata"), TRUE,
3776 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3777 else
3778 F_getit (inf); /* look for name */
3779 }
3780 continue;
3781 }
3782 }
3783 }
3784
3785 \f
3786 /*
3787 * Ada parsing
3788 * Original code by
3789 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3790 */
3791
3792 static void Ada_getit __P((FILE *, char *));
3793
3794 /* Once we are positioned after an "interesting" keyword, let's get
3795 the real tag value necessary. */
3796 static void
3797 Ada_getit (inf, name_qualifier)
3798 FILE *inf;
3799 char *name_qualifier;
3800 {
3801 register char *cp;
3802 char *name;
3803 char c;
3804
3805 while (!feof (inf))
3806 {
3807 dbp = skip_spaces (dbp);
3808 if (*dbp == '\0'
3809 || (dbp[0] == '-' && dbp[1] == '-'))
3810 {
3811 lineno++;
3812 linecharno = charno;
3813 charno += readline (&lb, inf);
3814 dbp = lb.buffer;
3815 }
3816 switch (lowcase(*dbp))
3817 {
3818 case 'b':
3819 if (nocase_tail ("body"))
3820 {
3821 /* Skipping body of procedure body or package body or ....
3822 resetting qualifier to body instead of spec. */
3823 name_qualifier = "/b";
3824 continue;
3825 }
3826 break;
3827 case 't':
3828 /* Skipping type of task type or protected type ... */
3829 if (nocase_tail ("type"))
3830 continue;
3831 break;
3832 }
3833 if (*dbp == '"')
3834 {
3835 dbp += 1;
3836 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3837 continue;
3838 }
3839 else
3840 {
3841 dbp = skip_spaces (dbp);
3842 for (cp = dbp;
3843 (*cp != '\0'
3844 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
3845 cp++)
3846 continue;
3847 if (cp == dbp)
3848 return;
3849 }
3850 c = *cp;
3851 *cp = '\0';
3852 name = concat (dbp, name_qualifier, "");
3853 *cp = c;
3854 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3855 if (c == '"')
3856 dbp = cp + 1;
3857 return;
3858 }
3859 }
3860
3861 static void
3862 Ada_funcs (inf)
3863 FILE *inf;
3864 {
3865 bool inquote = FALSE;
3866
3867 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3868 {
3869 while (*dbp != '\0')
3870 {
3871 /* Skip a string i.e. "abcd". */
3872 if (inquote || (*dbp == '"'))
3873 {
3874 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3875 if (dbp != NULL)
3876 {
3877 inquote = FALSE;
3878 dbp += 1;
3879 continue; /* advance char */
3880 }
3881 else
3882 {
3883 inquote = TRUE;
3884 break; /* advance line */
3885 }
3886 }
3887
3888 /* Skip comments. */
3889 if (dbp[0] == '-' && dbp[1] == '-')
3890 break; /* advance line */
3891
3892 /* Skip character enclosed in single quote i.e. 'a'
3893 and skip single quote starting an attribute i.e. 'Image. */
3894 if (*dbp == '\'')
3895 {
3896 dbp++ ;
3897 if (*dbp != '\0')
3898 dbp++;
3899 continue;
3900 }
3901
3902 /* Search for beginning of a token. */
3903 if (!begtoken (*dbp))
3904 {
3905 dbp++;
3906 continue; /* advance char */
3907 }
3908
3909 /* We are at the beginning of a token. */
3910 switch (lowcase(*dbp))
3911 {
3912 case 'f':
3913 if (!packages_only && nocase_tail ("function"))
3914 Ada_getit (inf, "/f");
3915 else
3916 break; /* from switch */
3917 continue; /* advance char */
3918 case 'p':
3919 if (!packages_only && nocase_tail ("procedure"))
3920 Ada_getit (inf, "/p");
3921 else if (nocase_tail ("package"))
3922 Ada_getit (inf, "/s");
3923 else if (nocase_tail ("protected")) /* protected type */
3924 Ada_getit (inf, "/t");
3925 else
3926 break; /* from switch */
3927 continue; /* advance char */
3928 case 't':
3929 if (!packages_only && nocase_tail ("task"))
3930 Ada_getit (inf, "/k");
3931 else if (typedefs && !packages_only && nocase_tail ("type"))
3932 {
3933 Ada_getit (inf, "/t");
3934 while (*dbp != '\0')
3935 dbp += 1;
3936 }
3937 else
3938 break; /* from switch */
3939 continue; /* advance char */
3940 }
3941
3942 /* Look for the end of the token. */
3943 while (!endtoken (*dbp))
3944 dbp++;
3945
3946 } /* advance char */
3947 } /* advance line */
3948 }
3949
3950 \f
3951 /*
3952 * Unix and microcontroller assembly tag handling
3953 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
3954 * Idea by Bob Weiner, Motorola Inc. (1994)
3955 */
3956 static void
3957 Asm_labels (inf)
3958 FILE *inf;
3959 {
3960 register char *cp;
3961
3962 LOOP_ON_INPUT_LINES (inf, lb, cp)
3963 {
3964 /* If first char is alphabetic or one of [_.$], test for colon
3965 following identifier. */
3966 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3967 {
3968 /* Read past label. */
3969 cp++;
3970 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3971 cp++;
3972 if (*cp == ':' || iswhite (*cp))
3973 {
3974 /* Found end of label, so copy it and add it to the table. */
3975 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3976 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3977 }
3978 }
3979 }
3980 }
3981
3982 \f
3983 /*
3984 * Perl support
3985 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
3986 * Perl variable names: /^(my|local).../
3987 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
3988 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
3989 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
3990 */
3991 static void
3992 Perl_functions (inf)
3993 FILE *inf;
3994 {
3995 char *package = savestr ("main"); /* current package name */
3996 register char *cp;
3997
3998 LOOP_ON_INPUT_LINES (inf, lb, cp)
3999 {
4000 skip_spaces(cp);
4001
4002 if (LOOKING_AT (cp, "package"))
4003 {
4004 free (package);
4005 package = get_tag (cp);
4006 if (package == NULL) /* can't parse package name */
4007 package = savestr ("");
4008 else
4009 package = savestr(package); /* make a copy */
4010 }
4011 else if (LOOKING_AT (cp, "sub"))
4012 {
4013 char *name, *fullname, *pos;
4014 char *sp = cp;
4015
4016 while (!notinname (*cp))
4017 cp++;
4018 if (cp == sp)
4019 continue;
4020 name = savenstr (sp, cp-sp);
4021 if ((pos = etags_strchr (name, ':')) != NULL && pos[1] == ':')
4022 fullname = name;
4023 else
4024 fullname = concat (package, "::", name);
4025 pfnote (fullname, TRUE,
4026 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4027 if (name != fullname)
4028 free (name);
4029 }
4030 else if (globals /* only if tagging global vars is enabled */
4031 && (LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local")))
4032 {
4033 /* After "my" or "local", but before any following paren or space. */
4034 char *varname = NULL;
4035
4036 if (*cp == '$' || *cp == '@' || *cp == '%')
4037 {
4038 char* varstart = ++cp;
4039 while (ISALNUM (*cp) || *cp == '_')
4040 cp++;
4041 varname = savenstr (varstart, cp-varstart);
4042 }
4043 else
4044 {
4045 /* Should be examining a variable list at this point;
4046 could insist on seeing an open parenthesis. */
4047 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4048 cp++;
4049 }
4050
4051 /* Perhaps I should back cp up one character, so the TAGS table
4052 doesn't mention (and so depend upon) the following char. */
4053 pfnote (varname, FALSE,
4054 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4055 }
4056 }
4057 }
4058
4059
4060 /*
4061 * Python support
4062 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4063 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4064 * More ideas by seb bacon <seb@jamkit.com> (2002)
4065 */
4066 static void
4067 Python_functions (inf)
4068 FILE *inf;
4069 {
4070 register char *cp;
4071
4072 LOOP_ON_INPUT_LINES (inf, lb, cp)
4073 {
4074 cp = skip_spaces (cp);
4075 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4076 {
4077 char *name = cp;
4078 while (!notinname (*cp) && *cp != ':')
4079 cp++;
4080 pfnote (savenstr (name, cp-name), TRUE,
4081 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4082 }
4083 }
4084 }
4085
4086 \f
4087 /*
4088 * PHP support
4089 * Look for:
4090 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4091 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4092 * - /^[ \t]*define\(\"[^\"]+/
4093 * Only with --members:
4094 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4095 * Idea by Diez B. Roggisch (2001)
4096 */
4097 static void
4098 PHP_functions (inf)
4099 FILE *inf;
4100 {
4101 register char *cp, *name;
4102 bool search_identifier = FALSE;
4103
4104 LOOP_ON_INPUT_LINES (inf, lb, cp)
4105 {
4106 cp = skip_spaces (cp);
4107 name = cp;
4108 if (search_identifier
4109 && *cp != '\0')
4110 {
4111 while (!notinname (*cp))
4112 cp++;
4113 pfnote (savenstr (name, cp-name), TRUE,
4114 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4115 search_identifier = FALSE;
4116 }
4117 else if (LOOKING_AT (cp, "function"))
4118 {
4119 if(*cp == '&')
4120 cp = skip_spaces (cp+1);
4121 if(*cp != '\0')
4122 {
4123 name = cp;
4124 while (!notinname (*cp))
4125 cp++;
4126 pfnote (savenstr (name, cp-name), TRUE,
4127 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4128 }
4129 else
4130 search_identifier = TRUE;
4131 }
4132 else if (LOOKING_AT (cp, "class"))
4133 {
4134 if (*cp != '\0')
4135 {
4136 name = cp;
4137 while (*cp != '\0' && !iswhite (*cp))
4138 cp++;
4139 pfnote (savenstr (name, cp-name), FALSE,
4140 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4141 }
4142 else
4143 search_identifier = TRUE;
4144 }
4145 else if (strneq (cp, "define", 6)
4146 && (cp = skip_spaces (cp+6))
4147 && *cp++ == '('
4148 && (*cp == '"' || *cp == '\''))
4149 {
4150 char quote = *cp++;
4151 name = cp;
4152 while (*cp != quote && *cp != '\0')
4153 cp++;
4154 pfnote (savenstr (name, cp-name), FALSE,
4155 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4156 }
4157 else if (members
4158 && LOOKING_AT (cp, "var")
4159 && *cp == '$')
4160 {
4161 name = cp;
4162 while (!notinname(*cp))
4163 cp++;
4164 pfnote (savenstr (name, cp-name), FALSE,
4165 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4166 }
4167 }
4168 }
4169
4170 \f
4171 /*
4172 * Cobol tag functions
4173 * We could look for anything that could be a paragraph name.
4174 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4175 * Idea by Corny de Souza (1993)
4176 */
4177 static void
4178 Cobol_paragraphs (inf)
4179 FILE *inf;
4180 {
4181 register char *bp, *ep;
4182
4183 LOOP_ON_INPUT_LINES (inf, lb, bp)
4184 {
4185 if (lb.len < 9)
4186 continue;
4187 bp += 8;
4188
4189 /* If eoln, compiler option or comment ignore whole line. */
4190 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4191 continue;
4192
4193 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4194 continue;
4195 if (*ep++ == '.')
4196 pfnote (savenstr (bp, ep-bp), TRUE,
4197 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4198 }
4199 }
4200
4201 \f
4202 /*
4203 * Makefile support
4204 * Idea by Assar Westerlund <assar@sics.se> (2001)
4205 */
4206 static void
4207 Makefile_targets (inf)
4208 FILE *inf;
4209 {
4210 register char *bp;
4211
4212 LOOP_ON_INPUT_LINES (inf, lb, bp)
4213 {
4214 if (*bp == '\t' || *bp == '#')
4215 continue;
4216 while (*bp != '\0' && *bp != '=' && *bp != ':')
4217 bp++;
4218 if (*bp == ':')
4219 pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4220 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4221 }
4222 }
4223
4224 \f
4225 /*
4226 * Pascal parsing
4227 * Original code by Mosur K. Mohan (1989)
4228 *
4229 * Locates tags for procedures & functions. Doesn't do any type- or
4230 * var-definitions. It does look for the keyword "extern" or
4231 * "forward" immediately following the procedure statement; if found,
4232 * the tag is skipped.
4233 */
4234 static void
4235 Pascal_functions (inf)
4236 FILE *inf;
4237 {
4238 linebuffer tline; /* mostly copied from C_entries */
4239 long save_lcno;
4240 int save_lineno, save_len;
4241 char c, *cp, *namebuf;
4242
4243 bool /* each of these flags is TRUE iff: */
4244 incomment, /* point is inside a comment */
4245 inquote, /* point is inside '..' string */
4246 get_tagname, /* point is after PROCEDURE/FUNCTION
4247 keyword, so next item = potential tag */
4248 found_tag, /* point is after a potential tag */
4249 inparms, /* point is within parameter-list */
4250 verify_tag; /* point has passed the parm-list, so the
4251 next token will determine whether this
4252 is a FORWARD/EXTERN to be ignored, or
4253 whether it is a real tag */
4254
4255 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4256 namebuf = NULL; /* keep compiler quiet */
4257 lineno = 0;
4258 charno = 0;
4259 dbp = lb.buffer;
4260 *dbp = '\0';
4261 initbuffer (&tline);
4262
4263 incomment = inquote = FALSE;
4264 found_tag = FALSE; /* have a proc name; check if extern */
4265 get_tagname = FALSE; /* have found "procedure" keyword */
4266 inparms = FALSE; /* found '(' after "proc" */
4267 verify_tag = FALSE; /* check if "extern" is ahead */
4268
4269
4270 while (!feof (inf)) /* long main loop to get next char */
4271 {
4272 c = *dbp++;
4273 if (c == '\0') /* if end of line */
4274 {
4275 lineno++;
4276 linecharno = charno;
4277 charno += readline (&lb, inf);
4278 dbp = lb.buffer;
4279 if (*dbp == '\0')
4280 continue;
4281 if (!((found_tag && verify_tag)
4282 || get_tagname))
4283 c = *dbp++; /* only if don't need *dbp pointing
4284 to the beginning of the name of
4285 the procedure or function */
4286 }
4287 if (incomment)
4288 {
4289 if (c == '}') /* within { } comments */
4290 incomment = FALSE;
4291 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4292 {
4293 dbp++;
4294 incomment = FALSE;
4295 }
4296 continue;
4297 }
4298 else if (inquote)
4299 {
4300 if (c == '\'')
4301 inquote = FALSE;
4302 continue;
4303 }
4304 else
4305 switch (c)
4306 {
4307 case '\'':
4308 inquote = TRUE; /* found first quote */
4309 continue;
4310 case '{': /* found open { comment */
4311 incomment = TRUE;
4312 continue;
4313 case '(':
4314 if (*dbp == '*') /* found open (* comment */
4315 {
4316 incomment = TRUE;
4317 dbp++;
4318 }
4319 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4320 inparms = TRUE;
4321 continue;
4322 case ')': /* end of parms list */
4323 if (inparms)
4324 inparms = FALSE;
4325 continue;
4326 case ';':
4327 if (found_tag && !inparms) /* end of proc or fn stmt */
4328 {
4329 verify_tag = TRUE;
4330 break;
4331 }
4332 continue;
4333 }
4334 if (found_tag && verify_tag && (*dbp != ' '))
4335 {
4336 /* check if this is an "extern" declaration */
4337 if (*dbp == '\0')
4338 continue;
4339 if (lowcase (*dbp == 'e'))
4340 {
4341 if (nocase_tail ("extern")) /* superfluous, really! */
4342 {
4343 found_tag = FALSE;
4344 verify_tag = FALSE;
4345 }
4346 }
4347 else if (lowcase (*dbp) == 'f')
4348 {
4349 if (nocase_tail ("forward")) /* check for forward reference */
4350 {
4351 found_tag = FALSE;
4352 verify_tag = FALSE;
4353 }
4354 }
4355 if (found_tag && verify_tag) /* not external proc, so make tag */
4356 {
4357 found_tag = FALSE;
4358 verify_tag = FALSE;
4359 pfnote (namebuf, TRUE,
4360 tline.buffer, save_len, save_lineno, save_lcno);
4361 continue;
4362 }
4363 }
4364 if (get_tagname) /* grab name of proc or fn */
4365 {
4366 if (*dbp == '\0')
4367 continue;
4368
4369 /* save all values for later tagging */
4370 linebuffer_setlen (&tline, lb.len);
4371 strcpy (tline.buffer, lb.buffer);
4372 save_lineno = lineno;
4373 save_lcno = linecharno;
4374
4375 /* grab block name */
4376 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4377 continue;
4378 namebuf = savenstr (dbp, cp-dbp);
4379 dbp = cp; /* set dbp to e-o-token */
4380 save_len = dbp - lb.buffer + 1;
4381 get_tagname = FALSE;
4382 found_tag = TRUE;
4383 continue;
4384
4385 /* and proceed to check for "extern" */
4386 }
4387 else if (!incomment && !inquote && !found_tag)
4388 {
4389 /* check for proc/fn keywords */
4390 switch (lowcase (c))
4391 {
4392 case 'p':
4393 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4394 get_tagname = TRUE;
4395 continue;
4396 case 'f':
4397 if (nocase_tail ("unction"))
4398 get_tagname = TRUE;
4399 continue;
4400 }
4401 }
4402 } /* while not eof */
4403
4404 free (tline.buffer);
4405 }
4406
4407 \f
4408 /*
4409 * Lisp tag functions
4410 * look for (def or (DEF, quote or QUOTE
4411 */
4412
4413 static void L_getit __P((void));
4414
4415 static void
4416 L_getit ()
4417 {
4418 if (*dbp == '\'') /* Skip prefix quote */
4419 dbp++;
4420 else if (*dbp == '(')
4421 {
4422 dbp++;
4423 /* Try to skip "(quote " */
4424 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4425 /* Ok, then skip "(" before name in (defstruct (foo)) */
4426 dbp = skip_spaces (dbp);
4427 }
4428 get_tag (dbp);
4429 }
4430
4431 static void
4432 Lisp_functions (inf)
4433 FILE *inf;
4434 {
4435 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4436 {
4437 if (dbp[0] != '(')
4438 continue;
4439
4440 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4441 {
4442 dbp = skip_non_spaces (dbp);
4443 dbp = skip_spaces (dbp);
4444 L_getit ();
4445 }
4446 else
4447 {
4448 /* Check for (foo::defmumble name-defined ... */
4449 do
4450 dbp++;
4451 while (!notinname (*dbp) && *dbp != ':');
4452 if (*dbp == ':')
4453 {
4454 do
4455 dbp++;
4456 while (*dbp == ':');
4457
4458 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4459 {
4460 dbp = skip_non_spaces (dbp);
4461 dbp = skip_spaces (dbp);
4462 L_getit ();
4463 }
4464 }
4465 }
4466 }
4467 }
4468
4469 \f
4470 /*
4471 * Postscript tag functions
4472 * Just look for lines where the first character is '/'
4473 * Also look at "defineps" for PSWrap
4474 * Ideas by:
4475 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4476 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4477 */
4478 static void
4479 Postscript_functions (inf)
4480 FILE *inf;
4481 {
4482 register char *bp, *ep;
4483
4484 LOOP_ON_INPUT_LINES (inf, lb, bp)
4485 {
4486 if (bp[0] == '/')
4487 {
4488 for (ep = bp+1;
4489 *ep != '\0' && *ep != ' ' && *ep != '{';
4490 ep++)
4491 continue;
4492 pfnote (savenstr (bp, ep-bp), TRUE,
4493 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4494 }
4495 else if (LOOKING_AT (bp, "defineps"))
4496 get_tag (bp);
4497 }
4498 }
4499
4500 \f
4501 /*
4502 * Scheme tag functions
4503 * look for (def... xyzzy
4504 * (def... (xyzzy
4505 * (def ... ((...(xyzzy ....
4506 * (set! xyzzy
4507 * Original code by Ken Haase (1985?)
4508 */
4509
4510 static void
4511 Scheme_functions (inf)
4512 FILE *inf;
4513 {
4514 register char *bp;
4515
4516 LOOP_ON_INPUT_LINES (inf, lb, bp)
4517 {
4518 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4519 {
4520 bp = skip_non_spaces (bp+4);
4521 /* Skip over open parens and white space */
4522 while (notinname (*bp))
4523 bp++;
4524 get_tag (bp);
4525 }
4526 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4527 get_tag (bp);
4528 }
4529 }
4530
4531 \f
4532 /* Find tags in TeX and LaTeX input files. */
4533
4534 /* TEX_toktab is a table of TeX control sequences that define tags.
4535 Each TEX_tabent records one such control sequence.
4536 CONVERT THIS TO USE THE Stab TYPE!! */
4537 struct TEX_tabent
4538 {
4539 char *name;
4540 int len;
4541 };
4542
4543 static struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
4544
4545 /* Default set of control sequences to put into TEX_toktab.
4546 The value of environment var TEXTAGS is prepended to this. */
4547
4548 static char *TEX_defenv = "\
4549 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4550 :part:appendix:entry:index";
4551
4552 static void TEX_mode __P((FILE *));
4553 static struct TEX_tabent *TEX_decode_env __P((char *, char *));
4554 static int TEX_Token __P((char *));
4555
4556 static char TEX_esc = '\\';
4557 static char TEX_opgrp = '{';
4558 static char TEX_clgrp = '}';
4559
4560 /*
4561 * TeX/LaTeX scanning loop.
4562 */
4563 static void
4564 TeX_commands (inf)
4565 FILE *inf;
4566 {
4567 char *cp, *lasthit;
4568 register int i;
4569
4570 /* Select either \ or ! as escape character. */
4571 TEX_mode (inf);
4572
4573 /* Initialize token table once from environment. */
4574 if (!TEX_toktab)
4575 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4576
4577 LOOP_ON_INPUT_LINES (inf, lb, cp)
4578 {
4579 lasthit = cp;
4580 /* Look at each esc in line. */
4581 while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4582 {
4583 if (*++cp == '\0')
4584 break;
4585 linecharno += cp - lasthit;
4586 lasthit = cp;
4587 i = TEX_Token (lasthit);
4588 if (i >= 0)
4589 {
4590 register char *p;
4591 for (lasthit += TEX_toktab[i].len;
4592 *lasthit == TEX_esc || *lasthit == TEX_opgrp;
4593 lasthit++)
4594 continue;
4595 for (p = lasthit;
4596 !iswhite (*p) && *p != TEX_opgrp && *p != TEX_clgrp;
4597 p++)
4598 continue;
4599 pfnote (savenstr (lasthit, p-lasthit), TRUE,
4600 lb.buffer, lb.len, lineno, linecharno);
4601 break; /* We only tag a line once */
4602 }
4603 }
4604 }
4605 }
4606
4607 #define TEX_LESC '\\'
4608 #define TEX_SESC '!'
4609 #define TEX_cmt '%'
4610
4611 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4612 chars accordingly. */
4613 static void
4614 TEX_mode (inf)
4615 FILE *inf;
4616 {
4617 int c;
4618
4619 while ((c = getc (inf)) != EOF)
4620 {
4621 /* Skip to next line if we hit the TeX comment char. */
4622 if (c == TEX_cmt)
4623 while (c != '\n')
4624 c = getc (inf);
4625 else if (c == TEX_LESC || c == TEX_SESC )
4626 break;
4627 }
4628
4629 if (c == TEX_LESC)
4630 {
4631 TEX_esc = TEX_LESC;
4632 TEX_opgrp = '{';
4633 TEX_clgrp = '}';
4634 }
4635 else
4636 {
4637 TEX_esc = TEX_SESC;
4638 TEX_opgrp = '<';
4639 TEX_clgrp = '>';
4640 }
4641 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4642 No attempt is made to correct the situation. */
4643 rewind (inf);
4644 }
4645
4646 /* Read environment and prepend it to the default string.
4647 Build token table. */
4648 static struct TEX_tabent *
4649 TEX_decode_env (evarname, defenv)
4650 char *evarname;
4651 char *defenv;
4652 {
4653 register char *env, *p;
4654
4655 struct TEX_tabent *tab;
4656 int size, i;
4657
4658 /* Append default string to environment. */
4659 env = getenv (evarname);
4660 if (!env)
4661 env = defenv;
4662 else
4663 {
4664 char *oldenv = env;
4665 env = concat (oldenv, defenv, "");
4666 }
4667
4668 /* Allocate a token table */
4669 for (size = 1, p = env; p;)
4670 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4671 size++;
4672 /* Add 1 to leave room for null terminator. */
4673 tab = xnew (size + 1, struct TEX_tabent);
4674
4675 /* Unpack environment string into token table. Be careful about */
4676 /* zero-length strings (leading ':', "::" and trailing ':') */
4677 for (i = 0; *env;)
4678 {
4679 p = etags_strchr (env, ':');
4680 if (!p) /* End of environment string. */
4681 p = env + strlen (env);
4682 if (p - env > 0)
4683 { /* Only non-zero strings. */
4684 tab[i].name = savenstr (env, p - env);
4685 tab[i].len = strlen (tab[i].name);
4686 i++;
4687 }
4688 if (*p)
4689 env = p + 1;
4690 else
4691 {
4692 tab[i].name = NULL; /* Mark end of table. */
4693 tab[i].len = 0;
4694 break;
4695 }
4696 }
4697 return tab;
4698 }
4699
4700 /* If the text at CP matches one of the tag-defining TeX command names,
4701 return the pointer to the first occurrence of that command in TEX_toktab.
4702 Otherwise return -1.
4703 Keep the capital `T' in `token' for dumb truncating compilers
4704 (this distinguishes it from `TEX_toktab' */
4705 static int
4706 TEX_Token (cp)
4707 char *cp;
4708 {
4709 int i;
4710
4711 for (i = 0; TEX_toktab[i].len > 0; i++)
4712 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4713 return i;
4714 return -1;
4715 }
4716
4717 \f
4718 /* Texinfo support. Dave Love, Mar. 2000. */
4719 static void
4720 Texinfo_nodes (inf)
4721 FILE * inf;
4722 {
4723 char *cp, *start;
4724 LOOP_ON_INPUT_LINES (inf, lb, cp)
4725 if (LOOKING_AT (cp, "@node"))
4726 {
4727 start = cp;
4728 while (*cp != '\0' && *cp != ',')
4729 cp++;
4730 pfnote (savenstr (start, cp - start), TRUE,
4731 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4732 }
4733 }
4734
4735 \f
4736 /*
4737 * Prolog support
4738 *
4739 * Assumes that the predicate or rule starts at column 0.
4740 * Only the first clause of a predicate or rule is added.
4741 * Original code by Sunichirou Sugou (1989)
4742 * Rewritten by Anders Lindgren (1996)
4743 */
4744 static int prolog_pr __P((char *, char *));
4745 static void prolog_skip_comment __P((linebuffer *, FILE *));
4746 static int prolog_atom __P((char *, int));
4747
4748 static void
4749 Prolog_functions (inf)
4750 FILE *inf;
4751 {
4752 char *cp, *last;
4753 int len;
4754 int allocated;
4755
4756 allocated = 0;
4757 len = 0;
4758 last = NULL;
4759
4760 LOOP_ON_INPUT_LINES (inf, lb, cp)
4761 {
4762 if (cp[0] == '\0') /* Empty line */
4763 continue;
4764 else if (iswhite (cp[0])) /* Not a predicate */
4765 continue;
4766 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4767 prolog_skip_comment (&lb, inf);
4768 else if ((len = prolog_pr (cp, last)) > 0)
4769 {
4770 /* Predicate or rule. Store the function name so that we
4771 only generate a tag for the first clause. */
4772 if (last == NULL)
4773 last = xnew(len + 1, char);
4774 else if (len + 1 > allocated)
4775 xrnew (last, len + 1, char);
4776 allocated = len + 1;
4777 strncpy (last, cp, len);
4778 last[len] = '\0';
4779 }
4780 }
4781 }
4782
4783
4784 static void
4785 prolog_skip_comment (plb, inf)
4786 linebuffer *plb;
4787 FILE *inf;
4788 {
4789 char *cp;
4790
4791 do
4792 {
4793 for (cp = plb->buffer; *cp != '\0'; cp++)
4794 if (cp[0] == '*' && cp[1] == '/')
4795 return;
4796 lineno++;
4797 linecharno += readline (plb, inf);
4798 }
4799 while (!feof(inf));
4800 }
4801
4802 /*
4803 * A predicate or rule definition is added if it matches:
4804 * <beginning of line><Prolog Atom><whitespace>(
4805 * or <beginning of line><Prolog Atom><whitespace>:-
4806 *
4807 * It is added to the tags database if it doesn't match the
4808 * name of the previous clause header.
4809 *
4810 * Return the size of the name of the predicate or rule, or 0 if no
4811 * header was found.
4812 */
4813 static int
4814 prolog_pr (s, last)
4815 char *s;
4816 char *last; /* Name of last clause. */
4817 {
4818 int pos;
4819 int len;
4820
4821 pos = prolog_atom (s, 0);
4822 if (pos < 1)
4823 return 0;
4824
4825 len = pos;
4826 pos = skip_spaces (s + pos) - s;
4827
4828 if ((s[pos] == '.'
4829 || (s[pos] == '(' && (pos += 1))
4830 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
4831 && (last == NULL /* save only the first clause */
4832 || len != strlen (last)
4833 || !strneq (s, last, len)))
4834 {
4835 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4836 return len;
4837 }
4838 else
4839 return 0;
4840 }
4841
4842 /*
4843 * Consume a Prolog atom.
4844 * Return the number of bytes consumed, or -1 if there was an error.
4845 *
4846 * A prolog atom, in this context, could be one of:
4847 * - An alphanumeric sequence, starting with a lower case letter.
4848 * - A quoted arbitrary string. Single quotes can escape themselves.
4849 * Backslash quotes everything.
4850 */
4851 static int
4852 prolog_atom (s, pos)
4853 char *s;
4854 int pos;
4855 {
4856 int origpos;
4857
4858 origpos = pos;
4859
4860 if (ISLOWER(s[pos]) || (s[pos] == '_'))
4861 {
4862 /* The atom is unquoted. */
4863 pos++;
4864 while (ISALNUM(s[pos]) || (s[pos] == '_'))
4865 {
4866 pos++;
4867 }
4868 return pos - origpos;
4869 }
4870 else if (s[pos] == '\'')
4871 {
4872 pos++;
4873
4874 while (1)
4875 {
4876 if (s[pos] == '\'')
4877 {
4878 pos++;
4879 if (s[pos] != '\'')
4880 break;
4881 pos++; /* A double quote */
4882 }
4883 else if (s[pos] == '\0')
4884 /* Multiline quoted atoms are ignored. */
4885 return -1;
4886 else if (s[pos] == '\\')
4887 {
4888 if (s[pos+1] == '\0')
4889 return -1;
4890 pos += 2;
4891 }
4892 else
4893 pos++;
4894 }
4895 return pos - origpos;
4896 }
4897 else
4898 return -1;
4899 }
4900
4901 \f
4902 /*
4903 * Support for Erlang
4904 *
4905 * Generates tags for functions, defines, and records.
4906 * Assumes that Erlang functions start at column 0.
4907 * Original code by Anders Lindgren (1996)
4908 */
4909 static int erlang_func __P((char *, char *));
4910 static void erlang_attribute __P((char *));
4911 static int erlang_atom __P((char *, int));
4912
4913 static void
4914 Erlang_functions (inf)
4915 FILE *inf;
4916 {
4917 char *cp, *last;
4918 int len;
4919 int allocated;
4920
4921 allocated = 0;
4922 len = 0;
4923 last = NULL;
4924
4925 LOOP_ON_INPUT_LINES (inf, lb, cp)
4926 {
4927 if (cp[0] == '\0') /* Empty line */
4928 continue;
4929 else if (iswhite (cp[0])) /* Not function nor attribute */
4930 continue;
4931 else if (cp[0] == '%') /* comment */
4932 continue;
4933 else if (cp[0] == '"') /* Sometimes, strings start in column one */
4934 continue;
4935 else if (cp[0] == '-') /* attribute, e.g. "-define" */
4936 {
4937 erlang_attribute (cp);
4938 last = NULL;
4939 }
4940 else if ((len = erlang_func (cp, last)) > 0)
4941 {
4942 /*
4943 * Function. Store the function name so that we only
4944 * generates a tag for the first clause.
4945 */
4946 if (last == NULL)
4947 last = xnew (len + 1, char);
4948 else if (len + 1 > allocated)
4949 xrnew (last, len + 1, char);
4950 allocated = len + 1;
4951 strncpy (last, cp, len);
4952 last[len] = '\0';
4953 }
4954 }
4955 }
4956
4957
4958 /*
4959 * A function definition is added if it matches:
4960 * <beginning of line><Erlang Atom><whitespace>(
4961 *
4962 * It is added to the tags database if it doesn't match the
4963 * name of the previous clause header.
4964 *
4965 * Return the size of the name of the function, or 0 if no function
4966 * was found.
4967 */
4968 static int
4969 erlang_func (s, last)
4970 char *s;
4971 char *last; /* Name of last clause. */
4972 {
4973 int pos;
4974 int len;
4975
4976 pos = erlang_atom (s, 0);
4977 if (pos < 1)
4978 return 0;
4979
4980 len = pos;
4981 pos = skip_spaces (s + pos) - s;
4982
4983 /* Save only the first clause. */
4984 if (s[pos++] == '('
4985 && (last == NULL
4986 || len != (int)strlen (last)
4987 || !strneq (s, last, len)))
4988 {
4989 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4990 return len;
4991 }
4992
4993 return 0;
4994 }
4995
4996
4997 /*
4998 * Handle attributes. Currently, tags are generated for defines
4999 * and records.
5000 *
5001 * They are on the form:
5002 * -define(foo, bar).
5003 * -define(Foo(M, N), M+N).
5004 * -record(graph, {vtab = notable, cyclic = true}).
5005 */
5006 static void
5007 erlang_attribute (s)
5008 char *s;
5009 {
5010 int pos;
5011 int len;
5012
5013 if (LOOKING_AT (s, "-define") || LOOKING_AT (s, "-record"))
5014 {
5015 if (s[pos++] == '(')
5016 {
5017 pos = skip_spaces (s + pos) - s;
5018 len = erlang_atom (s, pos);
5019 if (len != 0)
5020 pfnote (savenstr (& s[pos], len), TRUE,
5021 s, pos + len, lineno, linecharno);
5022 }
5023 }
5024 return;
5025 }
5026
5027
5028 /*
5029 * Consume an Erlang atom (or variable).
5030 * Return the number of bytes consumed, or -1 if there was an error.
5031 */
5032 static int
5033 erlang_atom (s, pos)
5034 char *s;
5035 int pos;
5036 {
5037 int origpos;
5038
5039 origpos = pos;
5040
5041 if (ISALPHA (s[pos]) || s[pos] == '_')
5042 {
5043 /* The atom is unquoted. */
5044 pos++;
5045 while (ISALNUM (s[pos]) || s[pos] == '_')
5046 pos++;
5047 return pos - origpos;
5048 }
5049 else if (s[pos] == '\'')
5050 {
5051 pos++;
5052
5053 while (1)
5054 {
5055 if (s[pos] == '\'')
5056 {
5057 pos++;
5058 break;
5059 }
5060 else if (s[pos] == '\0')
5061 /* Multiline quoted atoms are ignored. */
5062 return -1;
5063 else if (s[pos] == '\\')
5064 {
5065 if (s[pos+1] == '\0')
5066 return -1;
5067 pos += 2;
5068 }
5069 else
5070 pos++;
5071 }
5072 return pos - origpos;
5073 }
5074 else
5075 return -1;
5076 }
5077
5078 \f
5079 #ifdef ETAGS_REGEXPS
5080
5081 static char *scan_separators __P((char *));
5082 static void analyse_regex __P((char *, bool));
5083 static void add_regex __P((char *, bool, language *));
5084 static char *substitute __P((char *, char *, struct re_registers *));
5085
5086 /* Take a string like "/blah/" and turn it into "blah", making sure
5087 that the first and last characters are the same, and handling
5088 quoted separator characters. Actually, stops on the occurrence of
5089 an unquoted separator. Also turns "\t" into a Tab character.
5090 Returns pointer to terminating separator. Works in place. Null
5091 terminates name string. */
5092 static char *
5093 scan_separators (name)
5094 char *name;
5095 {
5096 char sep = name[0];
5097 char *copyto = name;
5098 bool quoted = FALSE;
5099
5100 for (++name; *name != '\0'; ++name)
5101 {
5102 if (quoted)
5103 {
5104 if (*name == 't')
5105 *copyto++ = '\t';
5106 else if (*name == sep)
5107 *copyto++ = sep;
5108 else
5109 {
5110 /* Something else is quoted, so preserve the quote. */
5111 *copyto++ = '\\';
5112 *copyto++ = *name;
5113 }
5114 quoted = FALSE;
5115 }
5116 else if (*name == '\\')
5117 quoted = TRUE;
5118 else if (*name == sep)
5119 break;
5120 else
5121 *copyto++ = *name;
5122 }
5123
5124 /* Terminate copied string. */
5125 *copyto = '\0';
5126 return name;
5127 }
5128
5129 /* Look at the argument of --regex or --no-regex and do the right
5130 thing. Same for each line of a regexp file. */
5131 static void
5132 analyse_regex (regex_arg, ignore_case)
5133 char *regex_arg;
5134 bool ignore_case;
5135 {
5136 if (regex_arg == NULL)
5137 {
5138 free_patterns (); /* --no-regex: remove existing regexps */
5139 return;
5140 }
5141
5142 /* A real --regexp option or a line in a regexp file. */
5143 switch (regex_arg[0])
5144 {
5145 /* Comments in regexp file or null arg to --regex. */
5146 case '\0':
5147 case ' ':
5148 case '\t':
5149 break;
5150
5151 /* Read a regex file. This is recursive and may result in a
5152 loop, which will stop when the file descriptors are exhausted. */
5153 case '@':
5154 {
5155 FILE *regexfp;
5156 linebuffer regexbuf;
5157 char *regexfile = regex_arg + 1;
5158
5159 /* regexfile is a file containing regexps, one per line. */
5160 regexfp = fopen (regexfile, "r");
5161 if (regexfp == NULL)
5162 {
5163 pfatal (regexfile);
5164 return;
5165 }
5166 initbuffer (&regexbuf);
5167 while (readline_internal (&regexbuf, regexfp) > 0)
5168 analyse_regex (regexbuf.buffer, ignore_case);
5169 free (regexbuf.buffer);
5170 fclose (regexfp);
5171 }
5172 break;
5173
5174 /* Regexp to be used for a specific language only. */
5175 case '{':
5176 {
5177 language *lang;
5178 char *lang_name = regex_arg + 1;
5179 char *cp;
5180
5181 for (cp = lang_name; *cp != '}'; cp++)
5182 if (*cp == '\0')
5183 {
5184 error ("unterminated language name in regex: %s", regex_arg);
5185 return;
5186 }
5187 *cp = '\0';
5188 lang = get_language_from_langname (lang_name);
5189 if (lang == NULL)
5190 return;
5191 add_regex (cp + 1, ignore_case, lang);
5192 }
5193 break;
5194
5195 /* Regexp to be used for any language. */
5196 default:
5197 add_regex (regex_arg, ignore_case, NULL);
5198 break;
5199 }
5200 }
5201
5202 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5203 expression, into a real regular expression by compiling it. */
5204 static void
5205 add_regex (regexp_pattern, ignore_case, lang)
5206 char *regexp_pattern;
5207 bool ignore_case;
5208 language *lang;
5209 {
5210 static struct re_pattern_buffer zeropattern;
5211 char *name;
5212 const char *err;
5213 struct re_pattern_buffer *patbuf;
5214 pattern *pp;
5215
5216
5217 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
5218 {
5219 error ("%s: unterminated regexp", regexp_pattern);
5220 return;
5221 }
5222 name = scan_separators (regexp_pattern);
5223 if (regexp_pattern[0] == '\0')
5224 {
5225 error ("null regexp", (char *)NULL);
5226 return;
5227 }
5228 (void) scan_separators (name);
5229
5230 patbuf = xnew (1, struct re_pattern_buffer);
5231 *patbuf = zeropattern;
5232 if (ignore_case)
5233 patbuf->translate = lc_trans; /* translation table to fold case */
5234
5235 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
5236 if (err != NULL)
5237 {
5238 error ("%s while compiling pattern", err);
5239 return;
5240 }
5241
5242 pp = p_head;
5243 p_head = xnew (1, pattern);
5244 p_head->regex = savestr (regexp_pattern);
5245 p_head->p_next = pp;
5246 p_head->lang = lang;
5247 p_head->pat = patbuf;
5248 p_head->name_pattern = savestr (name);
5249 p_head->error_signaled = FALSE;
5250 }
5251
5252 /*
5253 * Do the substitutions indicated by the regular expression and
5254 * arguments.
5255 */
5256 static char *
5257 substitute (in, out, regs)
5258 char *in, *out;
5259 struct re_registers *regs;
5260 {
5261 char *result, *t;
5262 int size, dig, diglen;
5263
5264 result = NULL;
5265 size = strlen (out);
5266
5267 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5268 if (out[size - 1] == '\\')
5269 fatal ("pattern error in \"%s\"", out);
5270 for (t = etags_strchr (out, '\\');
5271 t != NULL;
5272 t = etags_strchr (t + 2, '\\'))
5273 if (ISDIGIT (t[1]))
5274 {
5275 dig = t[1] - '0';
5276 diglen = regs->end[dig] - regs->start[dig];
5277 size += diglen - 2;
5278 }
5279 else
5280 size -= 1;
5281
5282 /* Allocate space and do the substitutions. */
5283 result = xnew (size + 1, char);
5284
5285 for (t = result; *out != '\0'; out++)
5286 if (*out == '\\' && ISDIGIT (*++out))
5287 {
5288 dig = *out - '0';
5289 diglen = regs->end[dig] - regs->start[dig];
5290 strncpy (t, in + regs->start[dig], diglen);
5291 t += diglen;
5292 }
5293 else
5294 *t++ = *out;
5295 *t = '\0';
5296
5297 assert (t <= result + size && t - result == (int)strlen (result));
5298
5299 return result;
5300 }
5301
5302 /* Deallocate all patterns. */
5303 static void
5304 free_patterns ()
5305 {
5306 pattern *pp;
5307 while (p_head != NULL)
5308 {
5309 pp = p_head->p_next;
5310 free (p_head->regex);
5311 free (p_head->name_pattern);
5312 free (p_head);
5313 p_head = pp;
5314 }
5315 return;
5316 }
5317 #endif /* ETAGS_REGEXPS */
5318
5319 \f
5320 static bool
5321 nocase_tail (cp)
5322 char *cp;
5323 {
5324 register int len = 0;
5325
5326 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5327 cp++, len++;
5328 if (*cp == '\0' && !intoken (dbp[len]))
5329 {
5330 dbp += len;
5331 return TRUE;
5332 }
5333 return FALSE;
5334 }
5335
5336 static char *
5337 get_tag (bp)
5338 register char *bp;
5339 {
5340 register char *cp, *name;
5341
5342 if (*bp == '\0')
5343 return NULL;
5344 /* Go till you get to white space or a syntactic break */
5345 for (cp = bp + 1; !notinname (*cp); cp++)
5346 continue;
5347 name = savenstr (bp, cp-bp);
5348 pfnote (name, TRUE,
5349 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5350 return name;
5351 }
5352
5353 /* Initialize a linebuffer for use */
5354 static void
5355 initbuffer (lbp)
5356 linebuffer *lbp;
5357 {
5358 lbp->size = (DEBUG) ? 3 : 200;
5359 lbp->buffer = xnew (lbp->size, char);
5360 lbp->buffer[0] = '\0';
5361 lbp->len = 0;
5362 }
5363
5364 /*
5365 * Read a line of text from `stream' into `lbp', excluding the
5366 * newline or CR-NL, if any. Return the number of characters read from
5367 * `stream', which is the length of the line including the newline.
5368 *
5369 * On DOS or Windows we do not count the CR character, if any, before the
5370 * NL, in the returned length; this mirrors the behavior of emacs on those
5371 * platforms (for text files, it translates CR-NL to NL as it reads in the
5372 * file).
5373 */
5374 static long
5375 readline_internal (lbp, stream)
5376 linebuffer *lbp;
5377 register FILE *stream;
5378 {
5379 char *buffer = lbp->buffer;
5380 register char *p = lbp->buffer;
5381 register char *pend;
5382 int chars_deleted;
5383
5384 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5385
5386 while (1)
5387 {
5388 register int c = getc (stream);
5389 if (p == pend)
5390 {
5391 /* We're at the end of linebuffer: expand it. */
5392 lbp->size *= 2;
5393 xrnew (buffer, lbp->size, char);
5394 p += buffer - lbp->buffer;
5395 pend = buffer + lbp->size;
5396 lbp->buffer = buffer;
5397 }
5398 if (c == EOF)
5399 {
5400 *p = '\0';
5401 chars_deleted = 0;
5402 break;
5403 }
5404 if (c == '\n')
5405 {
5406 if (p > buffer && p[-1] == '\r')
5407 {
5408 p -= 1;
5409 #ifdef DOS_NT
5410 /* Assume CRLF->LF translation will be performed by Emacs
5411 when loading this file, so CRs won't appear in the buffer.
5412 It would be cleaner to compensate within Emacs;
5413 however, Emacs does not know how many CRs were deleted
5414 before any given point in the file. */
5415 chars_deleted = 1;
5416 #else
5417 chars_deleted = 2;
5418 #endif
5419 }
5420 else
5421 {
5422 chars_deleted = 1;
5423 }
5424 *p = '\0';
5425 break;
5426 }
5427 *p++ = c;
5428 }
5429 lbp->len = p - buffer;
5430
5431 return lbp->len + chars_deleted;
5432 }
5433
5434 /*
5435 * Like readline_internal, above, but in addition try to match the
5436 * input line against relevant regular expressions.
5437 */
5438 static long
5439 readline (lbp, stream)
5440 linebuffer *lbp;
5441 FILE *stream;
5442 {
5443 /* Read new line. */
5444 long result = readline_internal (lbp, stream);
5445
5446 /* Honour #line directives. */
5447 if (!no_line_directive
5448 && result > 12 && strneq (lbp->buffer, "#line ", 6))
5449 {
5450 int start, lno;
5451
5452 if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
5453 {
5454 char *endp = lbp->buffer + start;
5455
5456 while ((endp = etags_strchr (endp, '"')) != NULL
5457 && endp[-1] == '\\')
5458 endp++;
5459 if (endp != NULL)
5460 {
5461 char *absname, *name = lbp->buffer + start;
5462 *endp = '\0';
5463
5464 canonicalize_filename(name); /* for DOS */
5465 absname = absolute_filename (name, curfiledir);
5466 if (filename_is_absolute (name)
5467 || filename_is_absolute (curfile))
5468 name = absname;
5469 else
5470 {
5471 name = relative_filename (absname, tagfiledir);
5472 free (absname);
5473 }
5474
5475 if (streq (curtagfname, name))
5476 free (name);
5477 else
5478 curtagfname = name;
5479 lineno = lno;
5480 nocharno = TRUE; /* do not use char position for tags */
5481 return readline (lbp, stream);
5482 }
5483 }
5484 }
5485
5486 #ifdef ETAGS_REGEXPS
5487 {
5488 int match;
5489 pattern *pp;
5490
5491 /* Match against relevant patterns. */
5492 if (lbp->len > 0)
5493 for (pp = p_head; pp != NULL; pp = pp->p_next)
5494 {
5495 /* Only use generic regexps or those for the current language. */
5496 if (pp->lang != NULL && pp->lang != curlang)
5497 continue;
5498
5499 match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
5500 switch (match)
5501 {
5502 case -2:
5503 /* Some error. */
5504 if (!pp->error_signaled)
5505 {
5506 error ("error while matching \"%s\"", pp->regex);
5507 pp->error_signaled = TRUE;
5508 }
5509 break;
5510 case -1:
5511 /* No match. */
5512 break;
5513 default:
5514 /* Match occurred. Construct a tag. */
5515 if (pp->name_pattern[0] != '\0')
5516 {
5517 /* Make a named tag. */
5518 char *name = substitute (lbp->buffer,
5519 pp->name_pattern, &pp->regs);
5520 if (name != NULL)
5521 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5522 }
5523 else
5524 {
5525 /* Make an unnamed tag. */
5526 pfnote ((char *)NULL, TRUE,
5527 lbp->buffer, match, lineno, linecharno);
5528 }
5529 break;
5530 }
5531 }
5532 }
5533 #endif /* ETAGS_REGEXPS */
5534
5535 return result;
5536 }
5537
5538 \f
5539 /*
5540 * Return a pointer to a space of size strlen(cp)+1 allocated
5541 * with xnew where the string CP has been copied.
5542 */
5543 static char *
5544 savestr (cp)
5545 char *cp;
5546 {
5547 return savenstr (cp, strlen (cp));
5548 }
5549
5550 /*
5551 * Return a pointer to a space of size LEN+1 allocated with xnew where
5552 * the string CP has been copied for at most the first LEN characters.
5553 */
5554 static char *
5555 savenstr (cp, len)
5556 char *cp;
5557 int len;
5558 {
5559 register char *dp;
5560
5561 dp = xnew (len + 1, char);
5562 strncpy (dp, cp, len);
5563 dp[len] = '\0';
5564 return dp;
5565 }
5566
5567 /*
5568 * Return the ptr in sp at which the character c last
5569 * appears; NULL if not found
5570 *
5571 * Identical to POSIX strrchr, included for portability.
5572 */
5573 static char *
5574 etags_strrchr (sp, c)
5575 register const char *sp;
5576 register int c;
5577 {
5578 register const char *r;
5579
5580 r = NULL;
5581 do
5582 {
5583 if (*sp == c)
5584 r = sp;
5585 } while (*sp++);
5586 return (char *)r;
5587 }
5588
5589
5590 /*
5591 * Return the ptr in sp at which the character c first
5592 * appears; NULL if not found
5593 *
5594 * Identical to POSIX strchr, included for portability.
5595 */
5596 static char *
5597 etags_strchr (sp, c)
5598 register const char *sp;
5599 register int c;
5600 {
5601 do
5602 {
5603 if (*sp == c)
5604 return (char *)sp;
5605 } while (*sp++);
5606 return NULL;
5607 }
5608
5609 /* Skip spaces, return new pointer. */
5610 static char *
5611 skip_spaces (cp)
5612 char *cp;
5613 {
5614 while (iswhite (*cp))
5615 cp++;
5616 return cp;
5617 }
5618
5619 /* Skip non spaces, return new pointer. */
5620 static char *
5621 skip_non_spaces (cp)
5622 char *cp;
5623 {
5624 while (*cp != '\0' && !iswhite (*cp))
5625 cp++;
5626 return cp;
5627 }
5628
5629 /* Print error message and exit. */
5630 void
5631 fatal (s1, s2)
5632 char *s1, *s2;
5633 {
5634 error (s1, s2);
5635 exit (BAD);
5636 }
5637
5638 static void
5639 pfatal (s1)
5640 char *s1;
5641 {
5642 perror (s1);
5643 exit (BAD);
5644 }
5645
5646 static void
5647 suggest_asking_for_help ()
5648 {
5649 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5650 progname,
5651 #ifdef LONG_OPTIONS
5652 "--help"
5653 #else
5654 "-h"
5655 #endif
5656 );
5657 exit (BAD);
5658 }
5659
5660 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
5661 static void
5662 error (s1, s2)
5663 const char *s1, *s2;
5664 {
5665 fprintf (stderr, "%s: ", progname);
5666 fprintf (stderr, s1, s2);
5667 fprintf (stderr, "\n");
5668 }
5669
5670 /* Return a newly-allocated string whose contents
5671 concatenate those of s1, s2, s3. */
5672 static char *
5673 concat (s1, s2, s3)
5674 char *s1, *s2, *s3;
5675 {
5676 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5677 char *result = xnew (len1 + len2 + len3 + 1, char);
5678
5679 strcpy (result, s1);
5680 strcpy (result + len1, s2);
5681 strcpy (result + len1 + len2, s3);
5682 result[len1 + len2 + len3] = '\0';
5683
5684 return result;
5685 }
5686
5687 \f
5688 /* Does the same work as the system V getcwd, but does not need to
5689 guess the buffer size in advance. */
5690 static char *
5691 etags_getcwd ()
5692 {
5693 #ifdef HAVE_GETCWD
5694 int bufsize = 200;
5695 char *path = xnew (bufsize, char);
5696
5697 while (getcwd (path, bufsize) == NULL)
5698 {
5699 if (errno != ERANGE)
5700 pfatal ("getcwd");
5701 bufsize *= 2;
5702 free (path);
5703 path = xnew (bufsize, char);
5704 }
5705
5706 canonicalize_filename (path);
5707 return path;
5708
5709 #else /* not HAVE_GETCWD */
5710 #if MSDOS
5711
5712 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
5713
5714 getwd (path);
5715
5716 for (p = path; *p != '\0'; p++)
5717 if (*p == '\\')
5718 *p = '/';
5719 else
5720 *p = lowcase (*p);
5721
5722 return strdup (path);
5723 #else /* not MSDOS */
5724 linebuffer path;
5725 FILE *pipe;
5726
5727 initbuffer (&path);
5728 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5729 if (pipe == NULL || readline_internal (&path, pipe) == 0)
5730 pfatal ("pwd");
5731 pclose (pipe);
5732
5733 return path.buffer;
5734 #endif /* not MSDOS */
5735 #endif /* not HAVE_GETCWD */
5736 }
5737
5738 /* Return a newly allocated string containing the file name of FILE
5739 relative to the absolute directory DIR (which should end with a slash). */
5740 static char *
5741 relative_filename (file, dir)
5742 char *file, *dir;
5743 {
5744 char *fp, *dp, *afn, *res;
5745 int i;
5746
5747 /* Find the common root of file and dir (with a trailing slash). */
5748 afn = absolute_filename (file, cwd);
5749 fp = afn;
5750 dp = dir;
5751 while (*fp++ == *dp++)
5752 continue;
5753 fp--, dp--; /* back to the first differing char */
5754 #ifdef DOS_NT
5755 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5756 return afn;
5757 #endif
5758 do /* look at the equal chars until '/' */
5759 fp--, dp--;
5760 while (*fp != '/');
5761
5762 /* Build a sequence of "../" strings for the resulting relative file name. */
5763 i = 0;
5764 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5765 i += 1;
5766 res = xnew (3*i + strlen (fp + 1) + 1, char);
5767 res[0] = '\0';
5768 while (i-- > 0)
5769 strcat (res, "../");
5770
5771 /* Add the file name relative to the common root of file and dir. */
5772 strcat (res, fp + 1);
5773 free (afn);
5774
5775 return res;
5776 }
5777
5778 /* Return a newly allocated string containing the absolute file name
5779 of FILE given DIR (which should end with a slash). */
5780 static char *
5781 absolute_filename (file, dir)
5782 char *file, *dir;
5783 {
5784 char *slashp, *cp, *res;
5785
5786 if (filename_is_absolute (file))
5787 res = savestr (file);
5788 #ifdef DOS_NT
5789 /* We don't support non-absolute file names with a drive
5790 letter, like `d:NAME' (it's too much hassle). */
5791 else if (file[1] == ':')
5792 fatal ("%s: relative file names with drive letters not supported", file);
5793 #endif
5794 else
5795 res = concat (dir, file, "");
5796
5797 /* Delete the "/dirname/.." and "/." substrings. */
5798 slashp = etags_strchr (res, '/');
5799 while (slashp != NULL && slashp[0] != '\0')
5800 {
5801 if (slashp[1] == '.')
5802 {
5803 if (slashp[2] == '.'
5804 && (slashp[3] == '/' || slashp[3] == '\0'))
5805 {
5806 cp = slashp;
5807 do
5808 cp--;
5809 while (cp >= res && !filename_is_absolute (cp));
5810 if (cp < res)
5811 cp = slashp; /* the absolute name begins with "/.." */
5812 #ifdef DOS_NT
5813 /* Under MSDOS and NT we get `d:/NAME' as absolute
5814 file name, so the luser could say `d:/../NAME'.
5815 We silently treat this as `d:/NAME'. */
5816 else if (cp[0] != '/')
5817 cp = slashp;
5818 #endif
5819 strcpy (cp, slashp + 3);
5820 slashp = cp;
5821 continue;
5822 }
5823 else if (slashp[2] == '/' || slashp[2] == '\0')
5824 {
5825 strcpy (slashp, slashp + 2);
5826 continue;
5827 }
5828 }
5829
5830 slashp = etags_strchr (slashp + 1, '/');
5831 }
5832
5833 if (res[0] == '\0')
5834 return savestr ("/");
5835 else
5836 return res;
5837 }
5838
5839 /* Return a newly allocated string containing the absolute
5840 file name of dir where FILE resides given DIR (which should
5841 end with a slash). */
5842 static char *
5843 absolute_dirname (file, dir)
5844 char *file, *dir;
5845 {
5846 char *slashp, *res;
5847 char save;
5848
5849 canonicalize_filename (file);
5850 slashp = etags_strrchr (file, '/');
5851 if (slashp == NULL)
5852 return savestr (dir);
5853 save = slashp[1];
5854 slashp[1] = '\0';
5855 res = absolute_filename (file, dir);
5856 slashp[1] = save;
5857
5858 return res;
5859 }
5860
5861 /* Whether the argument string is an absolute file name. The argument
5862 string must have been canonicalized with canonicalize_filename. */
5863 static bool
5864 filename_is_absolute (fn)
5865 char *fn;
5866 {
5867 return (fn[0] == '/'
5868 #ifdef DOS_NT
5869 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
5870 #endif
5871 );
5872 }
5873
5874 /* Translate backslashes into slashes. Works in place. */
5875 static void
5876 canonicalize_filename (fn)
5877 register char *fn;
5878 {
5879 #ifdef DOS_NT
5880 /* Canonicalize drive letter case. */
5881 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
5882 fn[0] = upcase (fn[0]);
5883 /* Convert backslashes to slashes. */
5884 for (; *fn != '\0'; fn++)
5885 if (*fn == '\\')
5886 *fn = '/';
5887 #else
5888 /* No action. */
5889 fn = NULL; /* shut up the compiler */
5890 #endif
5891 }
5892
5893 /* Set the minimum size of a string contained in a linebuffer. */
5894 static void
5895 linebuffer_setlen (lbp, toksize)
5896 linebuffer *lbp;
5897 int toksize;
5898 {
5899 while (lbp->size <= toksize)
5900 {
5901 lbp->size *= 2;
5902 xrnew (lbp->buffer, lbp->size, char);
5903 }
5904 lbp->len = toksize;
5905 }
5906
5907 /* Like malloc but get fatal error if memory is exhausted. */
5908 static PTR
5909 xmalloc (size)
5910 unsigned int size;
5911 {
5912 PTR result = (PTR) malloc (size);
5913 if (result == NULL)
5914 fatal ("virtual memory exhausted", (char *)NULL);
5915 return result;
5916 }
5917
5918 static PTR
5919 xrealloc (ptr, size)
5920 char *ptr;
5921 unsigned int size;
5922 {
5923 PTR result = (PTR) realloc (ptr, size);
5924 if (result == NULL)
5925 fatal ("virtual memory exhausted", (char *)NULL);
5926 return result;
5927 }
5928
5929 /*
5930 * Local Variables:
5931 * c-indentation-style: gnu
5932 * indent-tabs-mode: t
5933 * tab-width: 8
5934 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer")
5935 * End:
5936 */