(ISALNUM, ISALPHA, ISDIGIT, ISLOWER): New macros.
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs
2 Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99, 2000
3 Free Software Foundation, Inc. and Ken Arnold
4
5 This file is not considered part of GNU Emacs.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 /*
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * Sam Kendall added C++.
28 * Francesco Potorti` reorganised C and C++ based on work by Joe Wells.
29 * Regexp tags by Tom Tromey.
30 *
31 * Francesco Potorti` (pot@gnu.org) is the current maintainer.
32 */
33
34 char pot_etags_version[] = "@(#) pot revision number is 13.44";
35
36 #define TRUE 1
37 #define FALSE 0
38
39 #ifndef DEBUG
40 # define DEBUG FALSE
41 #endif
42
43 #if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
44 # define P_(proto) proto
45 #else
46 # define P_(proto) ()
47 #endif
48
49 #ifdef HAVE_CONFIG_H
50 # include <config.h>
51 /* On some systems, Emacs defines static as nothing for the sake
52 of unexec. We don't want that here since we don't use unexec. */
53 # undef static
54 # define ETAGS_REGEXPS /* use the regexp features */
55 # define LONG_OPTIONS /* accept long options */
56 #endif /* HAVE_CONFIG_H */
57
58 #ifndef _GNU_SOURCE
59 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
60 #endif
61
62 #ifdef MSDOS
63 # undef MSDOS
64 # define MSDOS TRUE
65 # include <fcntl.h>
66 # include <sys/param.h>
67 # include <io.h>
68 # ifndef HAVE_CONFIG_H
69 # define DOS_NT
70 # include <sys/config.h>
71 # endif
72 #else
73 # define MSDOS FALSE
74 #endif /* MSDOS */
75
76 #ifdef WINDOWSNT
77 # include <stdlib.h>
78 # include <fcntl.h>
79 # include <string.h>
80 # include <direct.h>
81 # include <io.h>
82 # define MAXPATHLEN _MAX_PATH
83 # ifdef HAVE_CONFIG_H
84 # undef HAVE_NTGUI
85 # else
86 # define DOS_NT
87 # endif /* not HAVE_CONFIG_H */
88 # ifndef HAVE_GETCWD
89 # define HAVE_GETCWD
90 # endif /* undef HAVE_GETCWD */
91 #else /* !WINDOWSNT */
92 # ifdef STDC_HEADERS
93 # include <stdlib.h>
94 # include <string.h>
95 # else
96 extern char *getenv ();
97 # endif
98 #endif /* !WINDOWSNT */
99
100 #ifdef HAVE_UNISTD_H
101 # include <unistd.h>
102 #else
103 # if defined (HAVE_GETCWD) && !WINDOWSNT
104 extern char *getcwd (char *buf, size_t size);
105 # endif
106 #endif /* HAVE_UNISTD_H */
107
108 #include <stdio.h>
109 #include <ctype.h>
110 #include <errno.h>
111 #ifndef errno
112 extern int errno;
113 #endif
114 #include <sys/types.h>
115 #include <sys/stat.h>
116
117 #if !defined (S_ISREG) && defined (S_IFREG)
118 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
119 #endif
120
121 #ifdef LONG_OPTIONS
122 # include <getopt.h>
123 #else
124 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
125 extern char *optarg;
126 extern int optind, opterr;
127 #endif /* LONG_OPTIONS */
128
129 #ifdef ETAGS_REGEXPS
130 # include <regex.h>
131 #endif /* ETAGS_REGEXPS */
132
133 /* Define CTAGS to make the program "ctags" compatible with the usual one.
134 Leave it undefined to make the program "etags", which makes emacs-style
135 tag tables and tags typedefs, #defines and struct/union/enum by default. */
136 #ifdef CTAGS
137 # undef CTAGS
138 # define CTAGS TRUE
139 #else
140 # define CTAGS FALSE
141 #endif
142
143 /* Exit codes for success and failure. */
144 #ifdef VMS
145 # define GOOD 1
146 # define BAD 0
147 #else
148 # define GOOD 0
149 # define BAD 1
150 #endif
151
152 /* C extensions. */
153 #define C_PLPL 0x00001 /* C++ */
154 #define C_STAR 0x00003 /* C* */
155 #define C_JAVA 0x00005 /* JAVA */
156 #define YACC 0x10000 /* yacc file */
157
158 #define streq(s,t) ((DEBUG && (s) == NULL && (t) == NULL \
159 && (abort (), 1)) || !strcmp (s, t))
160 #define strneq(s,t,n) ((DEBUG && (s) == NULL && (t) == NULL \
161 && (abort (), 1)) || !strncmp (s, t, n))
162
163 #define lowcase(c) tolower ((unsigned char)(c))
164 #define UPCASE(c) toupper ((unsigned char)(c))
165
166 #define CHARS 256 /* 2^sizeof(char) */
167 #define CHAR(x) ((unsigned int)x & (CHARS - 1))
168 #define iswhite(c) (_wht[CHAR(c)]) /* c is white */
169 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name */
170 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token */
171 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token */
172 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens */
173
174 #define ISALNUM(c) isalnum ((unsigned char) (c))
175 #define ISALPHA(c) isalpha ((unsigned char) (c))
176 #define ISDIGIT(c) isdigit ((unsigned char) (c))
177 #define ISLOWER(c) islower ((unsigned char) (c))
178
179
180 /*
181 * xnew, xrnew -- allocate, reallocate storage
182 *
183 * SYNOPSIS: Type *xnew (int n, Type);
184 * Type *xrnew (OldPointer, int n, Type);
185 */
186 #ifdef chkmalloc
187 # include "chkmalloc.h"
188 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
189 (n) * sizeof (Type)))
190 # define xrnew(op,n,Type) ((Type *) trace_realloc (__FILE__, __LINE__, \
191 (op), (n) * sizeof (Type)))
192 #else
193 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
194 # define xrnew(op,n,Type) ((Type *) xrealloc ((op), (n) * sizeof (Type)))
195 #endif
196
197 typedef int bool;
198
199 typedef void Lang_function P_((FILE *));
200
201 typedef struct
202 {
203 char *suffix;
204 char *command; /* Takes one arg and decompresses to stdout */
205 } compressor;
206
207 typedef struct
208 {
209 char *name;
210 Lang_function *function;
211 char **suffixes;
212 char **interpreters;
213 } language;
214
215 typedef struct node_st
216 { /* sorting structure */
217 char *name; /* function or type name */
218 char *file; /* file name */
219 bool is_func; /* use pattern or line no */
220 bool been_warned; /* set if noticed dup */
221 int lno; /* line number tag is on */
222 long cno; /* character number line starts on */
223 char *pat; /* search pattern */
224 struct node_st *left, *right; /* left and right sons */
225 } node;
226
227 /*
228 * A `linebuffer' is a structure which holds a line of text.
229 * `readline_internal' reads a line from a stream into a linebuffer
230 * and works regardless of the length of the line.
231 * SIZE is the size of BUFFER, LEN is the length of the string in
232 * BUFFER after readline reads it.
233 */
234 typedef struct
235 {
236 long size;
237 int len;
238 char *buffer;
239 } linebuffer;
240
241 /* Many compilers barf on this:
242 Lang_function Ada_funcs;
243 so let's write it this way */
244 static void Ada_funcs P_((FILE *));
245 static void Asm_labels P_((FILE *));
246 static void C_entries P_((int c_ext, FILE *));
247 static void default_C_entries P_((FILE *));
248 static void plain_C_entries P_((FILE *));
249 static void Cjava_entries P_((FILE *));
250 static void Cobol_paragraphs P_((FILE *));
251 static void Cplusplus_entries P_((FILE *));
252 static void Cstar_entries P_((FILE *));
253 static void Erlang_functions P_((FILE *));
254 static void Fortran_functions P_((FILE *));
255 static void Yacc_entries P_((FILE *));
256 static void Lisp_functions P_((FILE *));
257 static void Pascal_functions P_((FILE *));
258 static void Perl_functions P_((FILE *));
259 static void Postscript_functions P_((FILE *));
260 static void Prolog_functions P_((FILE *));
261 static void Python_functions P_((FILE *));
262 static void Scheme_functions P_((FILE *));
263 static void TeX_functions P_((FILE *));
264 static void Texinfo_functions P_ ((FILE *));
265 static void just_read_file P_((FILE *));
266
267 static void print_language_names P_((void));
268 static void print_version P_((void));
269 static void print_help P_((void));
270 int main P_((int, char **));
271 static int number_len P_((long));
272
273 static compressor *get_compressor_from_suffix P_((char *, char **));
274 static language *get_language_from_name P_((char *));
275 static language *get_language_from_interpreter P_((char *));
276 static language *get_language_from_suffix P_((char *));
277 static int total_size_of_entries P_((node *));
278 static long readline P_((linebuffer *, FILE *));
279 static long readline_internal P_((linebuffer *, FILE *));
280 static void get_tag P_((char *));
281
282 #ifdef ETAGS_REGEXPS
283 static void analyse_regex P_((char *, bool));
284 static void add_regex P_((char *, bool, language *));
285 static void free_patterns P_((void));
286 #endif /* ETAGS_REGEXPS */
287 static void error P_((const char *, const char *));
288 static void suggest_asking_for_help P_((void));
289 static void fatal P_((char *, char *));
290 static void pfatal P_((char *));
291 static void add_node P_((node *, node **));
292
293 static void init P_((void));
294 static void initbuffer P_((linebuffer *));
295 static void find_entries P_((char *, FILE *));
296 static void free_tree P_((node *));
297 static void pfnote P_((char *, bool, char *, int, int, long));
298 static void new_pfnote P_((char *, int, bool, char *, int, int, long));
299 static void process_file P_((char *));
300 static void put_entries P_((node *));
301 static void takeprec P_((void));
302
303 static char *concat P_((char *, char *, char *));
304 static char *skip_spaces P_((char *));
305 static char *skip_non_spaces P_((char *));
306 static char *savenstr P_((char *, int));
307 static char *savestr P_((char *));
308 static char *etags_strchr P_((const char *, int));
309 static char *etags_strrchr P_((const char *, int));
310 static char *etags_getcwd P_((void));
311 static char *relative_filename P_((char *, char *));
312 static char *absolute_filename P_((char *, char *));
313 static char *absolute_dirname P_((char *, char *));
314 static bool filename_is_absolute P_((char *f));
315 static void canonicalize_filename P_((char *));
316 static void grow_linebuffer P_((linebuffer *, int));
317 long *xmalloc P_((unsigned int));
318 long *xrealloc P_((char *, unsigned int));
319
320 \f
321 char searchar = '/'; /* use /.../ searches */
322
323 char *tagfile; /* output file */
324 char *progname; /* name this program was invoked with */
325 char *cwd; /* current working directory */
326 char *tagfiledir; /* directory of tagfile */
327 FILE *tagf; /* ioptr for tags file */
328
329 char *curfile; /* current input file name */
330 language *curlang; /* current language */
331
332 int lineno; /* line number of current line */
333 long charno; /* current character number */
334 long linecharno; /* charno of start of current line */
335 char *dbp; /* pointer to start of current tag */
336
337 node *head; /* the head of the binary tree of tags */
338
339 linebuffer lb; /* the current line */
340 linebuffer token_name; /* used by C_entries as a temporary area */
341 struct
342 {
343 long linepos;
344 linebuffer lb; /* used by C_entries instead of lb */
345 } lbs[2];
346
347 /* boolean "functions" (see init) */
348 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
349 char
350 /* white chars */
351 *white = " \f\t\n\r\v",
352 /* not in a name */
353 *nonam = " \f\t\n\r(=,[;",
354 /* token ending chars */
355 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
356 /* token starting chars */
357 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
358 /* valid in-token chars */
359 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
360
361 bool append_to_tagfile; /* -a: append to tags */
362 /* The following four default to TRUE for etags, but to FALSE for ctags. */
363 bool typedefs; /* -t: create tags for C and Ada typedefs */
364 bool typedefs_and_cplusplus; /* -T: create tags for C typedefs, level */
365 /* 0 struct/enum/union decls, and C++ */
366 /* member functions. */
367 bool constantypedefs; /* -d: create tags for C #define, enum */
368 /* constants and variables. */
369 /* -D: opposite of -d. Default under ctags. */
370 bool declarations; /* --declarations: tag them and extern in C&Co*/
371 bool globals; /* create tags for global variables */
372 bool members; /* create tags for C member variables */
373 bool update; /* -u: update tags */
374 bool vgrind_style; /* -v: create vgrind style index output */
375 bool no_warnings; /* -w: suppress warnings */
376 bool cxref_style; /* -x: create cxref style output */
377 bool cplusplus; /* .[hc] means C++, not C */
378 bool noindentypedefs; /* -I: ignore indentation in C */
379 bool packages_only; /* --packages-only: in Ada, only tag packages*/
380
381 #ifdef LONG_OPTIONS
382 struct option longopts[] =
383 {
384 { "packages-only", no_argument, &packages_only, TRUE },
385 { "append", no_argument, NULL, 'a' },
386 { "backward-search", no_argument, NULL, 'B' },
387 { "c++", no_argument, NULL, 'C' },
388 { "cxref", no_argument, NULL, 'x' },
389 { "defines", no_argument, NULL, 'd' },
390 { "declarations", no_argument, &declarations, TRUE },
391 { "no-defines", no_argument, NULL, 'D' },
392 { "globals", no_argument, &globals, TRUE },
393 { "no-globals", no_argument, &globals, FALSE },
394 { "help", no_argument, NULL, 'h' },
395 { "help", no_argument, NULL, 'H' },
396 { "ignore-indentation", no_argument, NULL, 'I' },
397 { "include", required_argument, NULL, 'i' },
398 { "language", required_argument, NULL, 'l' },
399 { "members", no_argument, &members, TRUE },
400 { "no-members", no_argument, &members, FALSE },
401 { "no-warn", no_argument, NULL, 'w' },
402 { "output", required_argument, NULL, 'o' },
403 #ifdef ETAGS_REGEXPS
404 { "regex", required_argument, NULL, 'r' },
405 { "no-regex", no_argument, NULL, 'R' },
406 { "ignore-case-regex", required_argument, NULL, 'c' },
407 #endif /* ETAGS_REGEXPS */
408 { "typedefs", no_argument, NULL, 't' },
409 { "typedefs-and-c++", no_argument, NULL, 'T' },
410 { "update", no_argument, NULL, 'u' },
411 { "version", no_argument, NULL, 'V' },
412 { "vgrind", no_argument, NULL, 'v' },
413 { NULL }
414 };
415 #endif /* LONG_OPTIONS */
416
417 #ifdef ETAGS_REGEXPS
418 /* Structure defining a regular expression. Elements are
419 the compiled pattern, and the name string. */
420 typedef struct pattern
421 {
422 struct pattern *p_next;
423 language *language;
424 char *regex;
425 struct re_pattern_buffer *pattern;
426 struct re_registers regs;
427 char *name_pattern;
428 bool error_signaled;
429 } pattern;
430
431 /* List of all regexps. */
432 pattern *p_head = NULL;
433
434 /* How many characters in the character set. (From regex.c.) */
435 #define CHAR_SET_SIZE 256
436 /* Translation table for case-insensitive matching. */
437 char lc_trans[CHAR_SET_SIZE];
438 #endif /* ETAGS_REGEXPS */
439
440 compressor compressors[] =
441 {
442 { "z", "gzip -d -c"},
443 { "Z", "gzip -d -c"},
444 { "gz", "gzip -d -c"},
445 { "GZ", "gzip -d -c"},
446 { "bz2", "bzip2 -d -c" },
447 { NULL }
448 };
449
450 /*
451 * Language stuff.
452 */
453
454 /* Non-NULL if language fixed. */
455 language *forced_lang = NULL;
456
457 /* Ada code */
458 char *Ada_suffixes [] =
459 { "ads", "adb", "ada", NULL };
460
461 /* Assembly code */
462 char *Asm_suffixes [] = { "a", /* Unix assembler */
463 "asm", /* Microcontroller assembly */
464 "def", /* BSO/Tasking definition includes */
465 "inc", /* Microcontroller include files */
466 "ins", /* Microcontroller include files */
467 "s", "sa", /* Unix assembler */
468 "S", /* cpp-processed Unix assembler */
469 "src", /* BSO/Tasking C compiler output */
470 NULL
471 };
472
473 /* Note that .c and .h can be considered C++, if the --c++ flag was
474 given. That is why default_C_entries is called here. */
475 char *default_C_suffixes [] =
476 { "c", "h", NULL };
477
478 char *Cplusplus_suffixes [] =
479 { "C", "H", "c++", "cc", "cpp", "cxx", "h++", "hh", "hpp", "hxx",
480 "M", /* Objective C++ */
481 "pdb", /* Postscript with C syntax */
482 NULL };
483
484 char *Cjava_suffixes [] =
485 { "java", NULL };
486
487 char *Cobol_suffixes [] =
488 { "COB", "cob", NULL };
489
490 char *Cstar_suffixes [] =
491 { "cs", "hs", NULL };
492
493 char *Erlang_suffixes [] =
494 { "erl", "hrl", NULL };
495
496 char *Fortran_suffixes [] =
497 { "F", "f", "f90", "for", NULL };
498
499 char *Lisp_suffixes [] =
500 { "cl", "clisp", "el", "l", "lisp", "lsp", "ml", "LSP", NULL };
501
502 char *Pascal_suffixes [] =
503 { "p", "pas", NULL };
504
505 char *Perl_suffixes [] =
506 { "pl", "pm", NULL };
507 char *Perl_interpreters [] =
508 { "perl", "@PERL@", NULL };
509
510 char *plain_C_suffixes [] =
511 { "pc", /* Pro*C file */
512 "m", /* Objective C file */
513 "lm", /* Objective lex file */
514 NULL };
515
516 char *Postscript_suffixes [] =
517 { "ps", "psw", NULL }; /* .psw is for PSWrap */
518
519 char *Prolog_suffixes [] =
520 { "prolog", NULL };
521
522 char *Python_suffixes [] =
523 { "py", NULL };
524
525 /* Can't do the `SCM' or `scm' prefix with a version number. */
526 char *Scheme_suffixes [] =
527 { "SCM", "SM", "oak", "sch", "scheme", "scm", "sm", "ss", "t", NULL };
528
529 char *TeX_suffixes [] =
530 { "TeX", "bib", "clo", "cls", "ltx", "sty", "tex", NULL };
531
532 char *Texinfo_suffixes [] =
533 { "texi", "txi", "texinfo", NULL };
534
535 char *Yacc_suffixes [] =
536 { "y", "ym", "yy", "yxx", "y++", NULL }; /* .ym is Objective yacc file */
537
538 /*
539 * Table of languages.
540 *
541 * It is ok for a given function to be listed under more than one
542 * name. I just didn't.
543 */
544
545 language lang_names [] =
546 {
547 { "ada", Ada_funcs, Ada_suffixes, NULL },
548 { "asm", Asm_labels, Asm_suffixes, NULL },
549 { "c", default_C_entries, default_C_suffixes, NULL },
550 { "c++", Cplusplus_entries, Cplusplus_suffixes, NULL },
551 { "c*", Cstar_entries, Cstar_suffixes, NULL },
552 { "cobol", Cobol_paragraphs, Cobol_suffixes, NULL },
553 { "erlang", Erlang_functions, Erlang_suffixes, NULL },
554 { "fortran", Fortran_functions, Fortran_suffixes, NULL },
555 { "java", Cjava_entries, Cjava_suffixes, NULL },
556 { "lisp", Lisp_functions, Lisp_suffixes, NULL },
557 { "pascal", Pascal_functions, Pascal_suffixes, NULL },
558 { "perl", Perl_functions, Perl_suffixes, Perl_interpreters },
559 { "postscript", Postscript_functions, Postscript_suffixes, NULL },
560 { "proc", plain_C_entries, plain_C_suffixes, NULL },
561 { "prolog", Prolog_functions, Prolog_suffixes, NULL },
562 { "python", Python_functions, Python_suffixes, NULL },
563 { "scheme", Scheme_functions, Scheme_suffixes, NULL },
564 { "tex", TeX_functions, TeX_suffixes, NULL },
565 { "texinfo", Texinfo_functions, Texinfo_suffixes, NULL },
566 { "yacc", Yacc_entries, Yacc_suffixes, NULL },
567 { "auto", NULL }, /* default guessing scheme */
568 { "none", just_read_file }, /* regexp matching only */
569 { NULL, NULL } /* end of list */
570 };
571 \f
572 static void
573 print_language_names ()
574 {
575 language *lang;
576 char **ext;
577
578 puts ("\nThese are the currently supported languages, along with the\n\
579 default file name suffixes:");
580 for (lang = lang_names; lang->name != NULL; lang++)
581 {
582 printf ("\t%s\t", lang->name);
583 if (lang->suffixes != NULL)
584 for (ext = lang->suffixes; *ext != NULL; ext++)
585 printf (" .%s", *ext);
586 puts ("");
587 }
588 puts ("Where `auto' means use default language for files based on file\n\
589 name suffix, and `none' means only do regexp processing on files.\n\
590 If no language is specified and no matching suffix is found,\n\
591 the first line of the file is read for a sharp-bang (#!) sequence\n\
592 followed by the name of an interpreter. If no such sequence is found,\n\
593 Fortran is tried first; if no tags are found, C is tried next.\n\
594 Compressed files are supported using gzip and bzip2.");
595 }
596
597 #ifndef EMACS_NAME
598 # define EMACS_NAME "GNU Emacs"
599 #endif
600 #ifndef VERSION
601 # define VERSION "21"
602 #endif
603 static void
604 print_version ()
605 {
606 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
607 puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
608 puts ("This program is distributed under the same terms as Emacs");
609
610 exit (GOOD);
611 }
612
613 static void
614 print_help ()
615 {
616 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
617 \n\
618 These are the options accepted by %s.\n", progname, progname);
619 #ifdef LONG_OPTIONS
620 puts ("You may use unambiguous abbreviations for the long option names.");
621 #else
622 puts ("Long option names do not work with this executable, as it is not\n\
623 linked with GNU getopt.");
624 #endif /* LONG_OPTIONS */
625 puts ("A - as file name means read names from stdin (one per line).");
626 if (!CTAGS)
627 printf (" Absolute names are stored in the output file as they are.\n\
628 Relative ones are stored relative to the output file's directory.");
629 puts ("\n");
630
631 puts ("-a, --append\n\
632 Append tag entries to existing tags file.");
633
634 puts ("--packages-only\n\
635 For Ada files, only generate tags for packages .");
636
637 if (CTAGS)
638 puts ("-B, --backward-search\n\
639 Write the search commands for the tag entries using '?', the\n\
640 backward-search command instead of '/', the forward-search command.");
641
642 puts ("-C, --c++\n\
643 Treat files whose name suffix defaults to C language as C++ files.");
644
645 puts ("--declarations\n\
646 In C and derived languages, create tags for function declarations,");
647 if (CTAGS)
648 puts ("\tand create tags for extern variables if --globals is used.");
649 else
650 puts
651 ("\tand create tags for extern variables unless --no-globals is used.");
652
653 if (CTAGS)
654 puts ("-d, --defines\n\
655 Create tag entries for C #define constants and enum constants, too.");
656 else
657 puts ("-D, --no-defines\n\
658 Don't create tag entries for C #define constants and enum constants.\n\
659 This makes the tags file smaller.");
660
661 if (!CTAGS)
662 {
663 puts ("-i FILE, --include=FILE\n\
664 Include a note in tag file indicating that, when searching for\n\
665 a tag, one should also consult the tags file FILE after\n\
666 checking the current file.");
667 puts ("-l LANG, --language=LANG\n\
668 Force the following files to be considered as written in the\n\
669 named language up to the next --language=LANG option.");
670 }
671
672 if (CTAGS)
673 puts ("--globals\n\
674 Create tag entries for global variables in some languages.");
675 else
676 puts ("--no-globals\n\
677 Do not create tag entries for global variables in some\n\
678 languages. This makes the tags file smaller.");
679 puts ("--members\n\
680 Create tag entries for member variables in C and derived languages.");
681
682 #ifdef ETAGS_REGEXPS
683 puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
684 Make a tag for each line matching pattern REGEXP in the following\n\
685 files. {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
686 regexfile is a file containing one REGEXP per line.\n\
687 REGEXP is anchored (as if preceded by ^).\n\
688 The form /REGEXP/NAME/ creates a named tag.\n\
689 For example Tcl named tags can be created with:\n\
690 --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
691 puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
692 Like -r, --regex but ignore case when matching expressions.");
693 puts ("-R, --no-regex\n\
694 Don't create tags from regexps for the following files.");
695 #endif /* ETAGS_REGEXPS */
696 puts ("-o FILE, --output=FILE\n\
697 Write the tags to FILE.");
698 puts ("-I, --ignore-indentation\n\
699 Don't rely on indentation quite as much as normal. Currently,\n\
700 this means not to assume that a closing brace in the first\n\
701 column is the final brace of a function or structure\n\
702 definition in C and C++.");
703
704 if (CTAGS)
705 {
706 puts ("-t, --typedefs\n\
707 Generate tag entries for C and Ada typedefs.");
708 puts ("-T, --typedefs-and-c++\n\
709 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
710 and C++ member functions.");
711 puts ("-u, --update\n\
712 Update the tag entries for the given files, leaving tag\n\
713 entries for other files in place. Currently, this is\n\
714 implemented by deleting the existing entries for the given\n\
715 files and then rewriting the new entries at the end of the\n\
716 tags file. It is often faster to simply rebuild the entire\n\
717 tag file than to use this.");
718 puts ("-v, --vgrind\n\
719 Generates an index of items intended for human consumption,\n\
720 similar to the output of vgrind. The index is sorted, and\n\
721 gives the page number of each item.");
722 puts ("-w, --no-warn\n\
723 Suppress warning messages about entries defined in multiple\n\
724 files.");
725 puts ("-x, --cxref\n\
726 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
727 The output uses line numbers instead of page numbers, but\n\
728 beyond that the differences are cosmetic; try both to see\n\
729 which you like.");
730 }
731
732 puts ("-V, --version\n\
733 Print the version of the program.\n\
734 -h, --help\n\
735 Print this help message.");
736
737 print_language_names ();
738
739 puts ("");
740 puts ("Report bugs to bug-gnu-emacs@gnu.org");
741
742 exit (GOOD);
743 }
744
745 \f
746 enum argument_type
747 {
748 at_language,
749 at_regexp,
750 at_filename,
751 at_icregexp
752 };
753
754 /* This structure helps us allow mixing of --lang and file names. */
755 typedef struct
756 {
757 enum argument_type arg_type;
758 char *what;
759 language *lang; /* language of the regexp */
760 } argument;
761
762 #ifdef VMS /* VMS specific functions */
763
764 #define EOS '\0'
765
766 /* This is a BUG! ANY arbitrary limit is a BUG!
767 Won't someone please fix this? */
768 #define MAX_FILE_SPEC_LEN 255
769 typedef struct {
770 short curlen;
771 char body[MAX_FILE_SPEC_LEN + 1];
772 } vspec;
773
774 /*
775 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
776 returning in each successive call the next file name matching the input
777 spec. The function expects that each in_spec passed
778 to it will be processed to completion; in particular, up to and
779 including the call following that in which the last matching name
780 is returned, the function ignores the value of in_spec, and will
781 only start processing a new spec with the following call.
782 If an error occurs, on return out_spec contains the value
783 of in_spec when the error occurred.
784
785 With each successive file name returned in out_spec, the
786 function's return value is one. When there are no more matching
787 names the function returns zero. If on the first call no file
788 matches in_spec, or there is any other error, -1 is returned.
789 */
790
791 #include <rmsdef.h>
792 #include <descrip.h>
793 #define OUTSIZE MAX_FILE_SPEC_LEN
794 static short
795 fn_exp (out, in)
796 vspec *out;
797 char *in;
798 {
799 static long context = 0;
800 static struct dsc$descriptor_s o;
801 static struct dsc$descriptor_s i;
802 static bool pass1 = TRUE;
803 long status;
804 short retval;
805
806 if (pass1)
807 {
808 pass1 = FALSE;
809 o.dsc$a_pointer = (char *) out;
810 o.dsc$w_length = (short)OUTSIZE;
811 i.dsc$a_pointer = in;
812 i.dsc$w_length = (short)strlen(in);
813 i.dsc$b_dtype = DSC$K_DTYPE_T;
814 i.dsc$b_class = DSC$K_CLASS_S;
815 o.dsc$b_dtype = DSC$K_DTYPE_VT;
816 o.dsc$b_class = DSC$K_CLASS_VS;
817 }
818 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
819 {
820 out->body[out->curlen] = EOS;
821 return 1;
822 }
823 else if (status == RMS$_NMF)
824 retval = 0;
825 else
826 {
827 strcpy(out->body, in);
828 retval = -1;
829 }
830 lib$find_file_end(&context);
831 pass1 = TRUE;
832 return retval;
833 }
834
835 /*
836 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
837 name of each file specified by the provided arg expanding wildcards.
838 */
839 static char *
840 gfnames (arg, p_error)
841 char *arg;
842 bool *p_error;
843 {
844 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
845
846 switch (fn_exp (&filename, arg))
847 {
848 case 1:
849 *p_error = FALSE;
850 return filename.body;
851 case 0:
852 *p_error = FALSE;
853 return NULL;
854 default:
855 *p_error = TRUE;
856 return filename.body;
857 }
858 }
859
860 #ifndef OLD /* Newer versions of VMS do provide `system'. */
861 system (cmd)
862 char *cmd;
863 {
864 error ("%s", "system() function not implemented under VMS");
865 }
866 #endif
867
868 #define VERSION_DELIM ';'
869 char *massage_name (s)
870 char *s;
871 {
872 char *start = s;
873
874 for ( ; *s; s++)
875 if (*s == VERSION_DELIM)
876 {
877 *s = EOS;
878 break;
879 }
880 else
881 *s = lowcase (*s);
882 return start;
883 }
884 #endif /* VMS */
885
886 \f
887 int
888 main (argc, argv)
889 int argc;
890 char *argv[];
891 {
892 int i;
893 unsigned int nincluded_files;
894 char **included_files;
895 char *this_file;
896 argument *argbuffer;
897 int current_arg, file_count;
898 linebuffer filename_lb;
899 #ifdef VMS
900 bool got_err;
901 #endif
902
903 #ifdef DOS_NT
904 _fmode = O_BINARY; /* all of files are treated as binary files */
905 #endif /* DOS_NT */
906
907 progname = argv[0];
908 nincluded_files = 0;
909 included_files = xnew (argc, char *);
910 current_arg = 0;
911 file_count = 0;
912
913 /* Allocate enough no matter what happens. Overkill, but each one
914 is small. */
915 argbuffer = xnew (argc, argument);
916
917 #ifdef ETAGS_REGEXPS
918 /* Set syntax for regular expression routines. */
919 re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
920 /* Translation table for case-insensitive search. */
921 for (i = 0; i < CHAR_SET_SIZE; i++)
922 lc_trans[i] = lowcase (i);
923 #endif /* ETAGS_REGEXPS */
924
925 /*
926 * If etags, always find typedefs and structure tags. Why not?
927 * Also default is to find macro constants, enum constants and
928 * global variables.
929 */
930 if (!CTAGS)
931 {
932 typedefs = typedefs_and_cplusplus = constantypedefs = TRUE;
933 globals = TRUE;
934 members = FALSE;
935 }
936
937 while (1)
938 {
939 int opt;
940 char *optstring;
941
942 #ifdef ETAGS_REGEXPS
943 optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
944 #else
945 optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
946 #endif /* ETAGS_REGEXPS */
947
948 #ifndef LONG_OPTIONS
949 optstring = optstring + 1;
950 #endif /* LONG_OPTIONS */
951
952 opt = getopt_long (argc, argv, optstring, longopts, 0);
953 if (opt == EOF)
954 break;
955
956 switch (opt)
957 {
958 case 0:
959 /* If getopt returns 0, then it has already processed a
960 long-named option. We should do nothing. */
961 break;
962
963 case 1:
964 /* This means that a file name has been seen. Record it. */
965 argbuffer[current_arg].arg_type = at_filename;
966 argbuffer[current_arg].what = optarg;
967 ++current_arg;
968 ++file_count;
969 break;
970
971 /* Common options. */
972 case 'a': append_to_tagfile = TRUE; break;
973 case 'C': cplusplus = TRUE; break;
974 case 'd': constantypedefs = TRUE; break;
975 case 'D': constantypedefs = FALSE; break;
976 case 'f': /* for compatibility with old makefiles */
977 case 'o':
978 if (tagfile)
979 {
980 error ("-o option may only be given once.", (char *)NULL);
981 suggest_asking_for_help ();
982 }
983 tagfile = optarg;
984 break;
985 case 'I':
986 case 'S': /* for backward compatibility */
987 noindentypedefs = TRUE;
988 break;
989 case 'l':
990 {
991 language *lang = get_language_from_name (optarg);
992 if (lang != NULL)
993 {
994 argbuffer[current_arg].lang = lang;
995 argbuffer[current_arg].arg_type = at_language;
996 ++current_arg;
997 }
998 }
999 break;
1000 #ifdef ETAGS_REGEXPS
1001 case 'r':
1002 argbuffer[current_arg].arg_type = at_regexp;
1003 argbuffer[current_arg].what = optarg;
1004 ++current_arg;
1005 break;
1006 case 'R':
1007 argbuffer[current_arg].arg_type = at_regexp;
1008 argbuffer[current_arg].what = NULL;
1009 ++current_arg;
1010 break;
1011 case 'c':
1012 argbuffer[current_arg].arg_type = at_icregexp;
1013 argbuffer[current_arg].what = optarg;
1014 ++current_arg;
1015 break;
1016 #endif /* ETAGS_REGEXPS */
1017 case 'V':
1018 print_version ();
1019 break;
1020 case 'h':
1021 case 'H':
1022 print_help ();
1023 break;
1024 case 't':
1025 typedefs = TRUE;
1026 break;
1027 case 'T':
1028 typedefs = typedefs_and_cplusplus = TRUE;
1029 break;
1030 #if (!CTAGS)
1031 /* Etags options */
1032 case 'i':
1033 included_files[nincluded_files++] = optarg;
1034 break;
1035 #else /* CTAGS */
1036 /* Ctags options. */
1037 case 'B': searchar = '?'; break;
1038 case 'u': update = TRUE; break;
1039 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1040 case 'x': cxref_style = TRUE; break;
1041 case 'w': no_warnings = TRUE; break;
1042 #endif /* CTAGS */
1043 default:
1044 suggest_asking_for_help ();
1045 }
1046 }
1047
1048 for (; optind < argc; ++optind)
1049 {
1050 argbuffer[current_arg].arg_type = at_filename;
1051 argbuffer[current_arg].what = argv[optind];
1052 ++current_arg;
1053 ++file_count;
1054 }
1055
1056 if (nincluded_files == 0 && file_count == 0)
1057 {
1058 error ("no input files specified.", (char *)NULL);
1059 suggest_asking_for_help ();
1060 }
1061
1062 if (tagfile == NULL)
1063 tagfile = CTAGS ? "tags" : "TAGS";
1064 cwd = etags_getcwd (); /* the current working directory */
1065 if (cwd[strlen (cwd) - 1] != '/')
1066 {
1067 char *oldcwd = cwd;
1068 cwd = concat (oldcwd, "/", "");
1069 free (oldcwd);
1070 }
1071 if (streq (tagfile, "-"))
1072 tagfiledir = cwd;
1073 else
1074 tagfiledir = absolute_dirname (tagfile, cwd);
1075
1076 init (); /* set up boolean "functions" */
1077
1078 initbuffer (&lb);
1079 initbuffer (&token_name);
1080 initbuffer (&lbs[0].lb);
1081 initbuffer (&lbs[1].lb);
1082 initbuffer (&filename_lb);
1083
1084 if (!CTAGS)
1085 {
1086 if (streq (tagfile, "-"))
1087 {
1088 tagf = stdout;
1089 #ifdef DOS_NT
1090 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1091 doesn't take effect until after `stdout' is already open). */
1092 if (!isatty (fileno (stdout)))
1093 setmode (fileno (stdout), O_BINARY);
1094 #endif /* DOS_NT */
1095 }
1096 else
1097 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1098 if (tagf == NULL)
1099 pfatal (tagfile);
1100 }
1101
1102 /*
1103 * Loop through files finding functions.
1104 */
1105 for (i = 0; i < current_arg; ++i)
1106 {
1107 switch (argbuffer[i].arg_type)
1108 {
1109 case at_language:
1110 forced_lang = argbuffer[i].lang;
1111 break;
1112 #ifdef ETAGS_REGEXPS
1113 case at_regexp:
1114 analyse_regex (argbuffer[i].what, FALSE);
1115 break;
1116 case at_icregexp:
1117 analyse_regex (argbuffer[i].what, TRUE);
1118 break;
1119 #endif
1120 case at_filename:
1121 #ifdef VMS
1122 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1123 {
1124 if (got_err)
1125 {
1126 error ("can't find file %s\n", this_file);
1127 argc--, argv++;
1128 }
1129 else
1130 {
1131 this_file = massage_name (this_file);
1132 }
1133 #else
1134 this_file = argbuffer[i].what;
1135 #endif
1136 /* Input file named "-" means read file names from stdin
1137 (one per line) and use them. */
1138 if (streq (this_file, "-"))
1139 while (readline_internal (&filename_lb, stdin) > 0)
1140 process_file (filename_lb.buffer);
1141 else
1142 process_file (this_file);
1143 #ifdef VMS
1144 }
1145 #endif
1146 break;
1147 }
1148 }
1149
1150 #ifdef ETAGS_REGEXPS
1151 free_patterns ();
1152 #endif /* ETAGS_REGEXPS */
1153
1154 if (!CTAGS)
1155 {
1156 while (nincluded_files-- > 0)
1157 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1158
1159 fclose (tagf);
1160 exit (GOOD);
1161 }
1162
1163 /* If CTAGS, we are here. process_file did not write the tags yet,
1164 because we want them ordered. Let's do it now. */
1165 if (cxref_style)
1166 {
1167 put_entries (head);
1168 free_tree (head);
1169 head = NULL;
1170 exit (GOOD);
1171 }
1172
1173 if (update)
1174 {
1175 char cmd[BUFSIZ];
1176 for (i = 0; i < current_arg; ++i)
1177 {
1178 if (argbuffer[i].arg_type != at_filename)
1179 continue;
1180 sprintf (cmd,
1181 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1182 tagfile, argbuffer[i].what, tagfile);
1183 if (system (cmd) != GOOD)
1184 fatal ("failed to execute shell command", (char *)NULL);
1185 }
1186 append_to_tagfile = TRUE;
1187 }
1188
1189 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1190 if (tagf == NULL)
1191 pfatal (tagfile);
1192 put_entries (head);
1193 free_tree (head);
1194 head = NULL;
1195 fclose (tagf);
1196
1197 if (update)
1198 {
1199 char cmd[BUFSIZ];
1200 sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1201 exit (system (cmd));
1202 }
1203 return GOOD;
1204 }
1205
1206
1207
1208 /*
1209 * Return a compressor given the file name. If EXTPTR is non-zero,
1210 * return a pointer into FILE where the compressor-specific
1211 * extension begins. If no compressor is found, NULL is returned
1212 * and EXTPTR is not significant.
1213 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca>
1214 */
1215 static compressor *
1216 get_compressor_from_suffix (file, extptr)
1217 char *file;
1218 char **extptr;
1219 {
1220 compressor *compr;
1221 char *slash, *suffix;
1222
1223 /* This relies on FN to be after canonicalize_filename,
1224 so we don't need to consider backslashes on DOS_NT. */
1225 slash = etags_strrchr (file, '/');
1226 suffix = etags_strrchr (file, '.');
1227 if (suffix == NULL || suffix < slash)
1228 return NULL;
1229 if (extptr != NULL)
1230 *extptr = suffix;
1231 suffix += 1;
1232 /* Let those poor souls who live with DOS 8+3 file name limits get
1233 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1234 Only the first do loop is run if not MSDOS */
1235 do
1236 {
1237 for (compr = compressors; compr->suffix != NULL; compr++)
1238 if (streq (compr->suffix, suffix))
1239 return compr;
1240 if (!MSDOS)
1241 break; /* do it only once: not really a loop */
1242 if (extptr != NULL)
1243 *extptr = ++suffix;
1244 } while (*suffix != '\0');
1245 return NULL;
1246 }
1247
1248
1249
1250 /*
1251 * Return a language given the name.
1252 */
1253 static language *
1254 get_language_from_name (name)
1255 char *name;
1256 {
1257 language *lang;
1258
1259 if (name == NULL)
1260 error ("empty language name", (char *)NULL);
1261 else
1262 {
1263 for (lang = lang_names; lang->name != NULL; lang++)
1264 if (streq (name, lang->name))
1265 return lang;
1266 error ("unknown language \"%s\"", name);
1267 }
1268
1269 return NULL;
1270 }
1271
1272
1273 /*
1274 * Return a language given the interpreter name.
1275 */
1276 static language *
1277 get_language_from_interpreter (interpreter)
1278 char *interpreter;
1279 {
1280 language *lang;
1281 char **iname;
1282
1283 if (interpreter == NULL)
1284 return NULL;
1285 for (lang = lang_names; lang->name != NULL; lang++)
1286 if (lang->interpreters != NULL)
1287 for (iname = lang->interpreters; *iname != NULL; iname++)
1288 if (streq (*iname, interpreter))
1289 return lang;
1290
1291 return NULL;
1292 }
1293
1294
1295
1296 /*
1297 * Return a language given the file name.
1298 */
1299 static language *
1300 get_language_from_suffix (file)
1301 char *file;
1302 {
1303 language *lang;
1304 char **ext, *suffix;
1305
1306 suffix = etags_strrchr (file, '.');
1307 if (suffix == NULL)
1308 return NULL;
1309 suffix += 1;
1310 for (lang = lang_names; lang->name != NULL; lang++)
1311 if (lang->suffixes != NULL)
1312 for (ext = lang->suffixes; *ext != NULL; ext++)
1313 if (streq (*ext, suffix))
1314 return lang;
1315 return NULL;
1316 }
1317
1318
1319
1320 /*
1321 * This routine is called on each file argument.
1322 */
1323 static void
1324 process_file (file)
1325 char *file;
1326 {
1327 struct stat stat_buf;
1328 FILE *inf;
1329 compressor *compr;
1330 char *compressed_name, *uncompressed_name;
1331 char *ext, *real_name;
1332
1333
1334 canonicalize_filename (file);
1335 if (streq (file, tagfile) && !streq (tagfile, "-"))
1336 {
1337 error ("skipping inclusion of %s in self.", file);
1338 return;
1339 }
1340 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1341 {
1342 compressed_name = NULL;
1343 real_name = uncompressed_name = savestr (file);
1344 }
1345 else
1346 {
1347 real_name = compressed_name = savestr (file);
1348 uncompressed_name = savenstr (file, ext - file);
1349 }
1350
1351 /* If the canonicalised uncompressed name has already be dealt with,
1352 skip it silently, else add it to the list. */
1353 {
1354 typedef struct processed_file
1355 {
1356 char *filename;
1357 struct processed_file *next;
1358 } processed_file;
1359 static processed_file *pf_head = NULL;
1360 register processed_file *fnp;
1361
1362 for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1363 if (streq (uncompressed_name, fnp->filename))
1364 goto exit;
1365 fnp = pf_head;
1366 pf_head = xnew (1, struct processed_file);
1367 pf_head->filename = savestr (uncompressed_name);
1368 pf_head->next = fnp;
1369 }
1370
1371 if (stat (real_name, &stat_buf) != 0)
1372 {
1373 /* Reset real_name and try with a different name. */
1374 real_name = NULL;
1375 if (compressed_name != NULL) /* try with the given suffix */
1376 {
1377 if (stat (uncompressed_name, &stat_buf) == 0)
1378 real_name = uncompressed_name;
1379 }
1380 else /* try all possible suffixes */
1381 {
1382 for (compr = compressors; compr->suffix != NULL; compr++)
1383 {
1384 compressed_name = concat (file, ".", compr->suffix);
1385 if (stat (compressed_name, &stat_buf) != 0)
1386 {
1387 if (MSDOS)
1388 {
1389 char *suf = compressed_name + strlen (file);
1390 size_t suflen = strlen (compr->suffix) + 1;
1391 for ( ; suf[1]; suf++, suflen--)
1392 {
1393 memmove (suf, suf + 1, suflen);
1394 if (stat (compressed_name, &stat_buf) == 0)
1395 {
1396 real_name = compressed_name;
1397 break;
1398 }
1399 }
1400 if (real_name != NULL)
1401 break;
1402 } /* MSDOS */
1403 free (compressed_name);
1404 compressed_name = NULL;
1405 }
1406 else
1407 {
1408 real_name = compressed_name;
1409 break;
1410 }
1411 }
1412 }
1413 if (real_name == NULL)
1414 {
1415 perror (file);
1416 goto exit;
1417 }
1418 } /* try with a different name */
1419
1420 if (!S_ISREG (stat_buf.st_mode))
1421 {
1422 error ("skipping %s: it is not a regular file.", real_name);
1423 goto exit;
1424 }
1425 if (real_name == compressed_name)
1426 {
1427 char *cmd = concat (compr->command, " ", real_name);
1428 inf = popen (cmd, "r");
1429 free (cmd);
1430 }
1431 else
1432 inf = fopen (real_name, "r");
1433 if (inf == NULL)
1434 {
1435 perror (real_name);
1436 goto exit;
1437 }
1438
1439 find_entries (uncompressed_name, inf);
1440
1441 if (real_name == compressed_name)
1442 pclose (inf);
1443 else
1444 fclose (inf);
1445
1446 if (!CTAGS)
1447 {
1448 char *filename;
1449
1450 if (filename_is_absolute (uncompressed_name))
1451 {
1452 /* file is an absolute file name. Canonicalise it. */
1453 filename = absolute_filename (uncompressed_name, cwd);
1454 }
1455 else
1456 {
1457 /* file is a file name relative to cwd. Make it relative
1458 to the directory of the tags file. */
1459 filename = relative_filename (uncompressed_name, tagfiledir);
1460 }
1461 fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1462 free (filename);
1463 put_entries (head);
1464 free_tree (head);
1465 head = NULL;
1466 }
1467
1468 exit:
1469 if (compressed_name) free(compressed_name);
1470 if (uncompressed_name) free(uncompressed_name);
1471 return;
1472 }
1473
1474 /*
1475 * This routine sets up the boolean pseudo-functions which work
1476 * by setting boolean flags dependent upon the corresponding character.
1477 * Every char which is NOT in that string is not a white char. Therefore,
1478 * all of the array "_wht" is set to FALSE, and then the elements
1479 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1480 * of a char is TRUE if it is the string "white", else FALSE.
1481 */
1482 static void
1483 init ()
1484 {
1485 register char *sp;
1486 register int i;
1487
1488 for (i = 0; i < CHARS; i++)
1489 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1490 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1491 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1492 notinname('\0') = notinname('\n');
1493 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1494 begtoken('\0') = begtoken('\n');
1495 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1496 intoken('\0') = intoken('\n');
1497 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1498 endtoken('\0') = endtoken('\n');
1499 }
1500
1501 /*
1502 * This routine opens the specified file and calls the function
1503 * which finds the function and type definitions.
1504 */
1505 node *last_node = NULL;
1506
1507 static void
1508 find_entries (file, inf)
1509 char *file;
1510 FILE *inf;
1511 {
1512 char *cp;
1513 language *lang;
1514 node *old_last_node;
1515
1516 /* Memory leakage here: the string pointed by curfile is
1517 never released, because curfile is copied into np->file
1518 for each node, to be used in CTAGS mode. The amount of
1519 memory leaked here is the sum of the lengths of the
1520 file names. */
1521 curfile = savestr (file);
1522
1523 /* If user specified a language, use it. */
1524 lang = forced_lang;
1525 if (lang != NULL && lang->function != NULL)
1526 {
1527 curlang = lang;
1528 lang->function (inf);
1529 return;
1530 }
1531
1532 /* Try to guess the language given the file name. */
1533 lang = get_language_from_suffix (file);
1534 if (lang != NULL && lang->function != NULL)
1535 {
1536 curlang = lang;
1537 lang->function (inf);
1538 return;
1539 }
1540
1541 /* Look for sharp-bang as the first two characters. */
1542 if (readline_internal (&lb, inf) > 0
1543 && lb.len >= 2
1544 && lb.buffer[0] == '#'
1545 && lb.buffer[1] == '!')
1546 {
1547 char *lp;
1548
1549 /* Set lp to point at the first char after the last slash in the
1550 line or, if no slashes, at the first nonblank. Then set cp to
1551 the first successive blank and terminate the string. */
1552 lp = etags_strrchr (lb.buffer+2, '/');
1553 if (lp != NULL)
1554 lp += 1;
1555 else
1556 lp = skip_spaces (lb.buffer + 2);
1557 cp = skip_non_spaces (lp);
1558 *cp = '\0';
1559
1560 if (strlen (lp) > 0)
1561 {
1562 lang = get_language_from_interpreter (lp);
1563 if (lang != NULL && lang->function != NULL)
1564 {
1565 curlang = lang;
1566 lang->function (inf);
1567 return;
1568 }
1569 }
1570 }
1571 /* We rewind here, even if inf may be a pipe. We fail if the
1572 length of the first line is longer than the pipe block size,
1573 which is unlikely. */
1574 rewind (inf);
1575
1576 /* Try Fortran. */
1577 old_last_node = last_node;
1578 curlang = get_language_from_name ("fortran");
1579 Fortran_functions (inf);
1580
1581 /* No Fortran entries found. Try C. */
1582 if (old_last_node == last_node)
1583 {
1584 /* We do not tag if rewind fails.
1585 Only the file name will be recorded in the tags file. */
1586 rewind (inf);
1587 curlang = get_language_from_name (cplusplus ? "c++" : "c");
1588 default_C_entries (inf);
1589 }
1590 return;
1591 }
1592 \f
1593 /* Record a tag. */
1594 static void
1595 pfnote (name, is_func, linestart, linelen, lno, cno)
1596 char *name; /* tag name, or NULL if unnamed */
1597 bool is_func; /* tag is a function */
1598 char *linestart; /* start of the line where tag is */
1599 int linelen; /* length of the line where tag is */
1600 int lno; /* line number */
1601 long cno; /* character number */
1602 {
1603 register node *np;
1604
1605 if (CTAGS && name == NULL)
1606 return;
1607
1608 np = xnew (1, node);
1609
1610 /* If ctags mode, change name "main" to M<thisfilename>. */
1611 if (CTAGS && !cxref_style && streq (name, "main"))
1612 {
1613 register char *fp = etags_strrchr (curfile, '/');
1614 np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1615 fp = etags_strrchr (np->name, '.');
1616 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1617 fp[0] = '\0';
1618 }
1619 else
1620 np->name = name;
1621 np->been_warned = FALSE;
1622 np->file = curfile;
1623 np->is_func = is_func;
1624 np->lno = lno;
1625 /* Our char numbers are 0-base, because of C language tradition?
1626 ctags compatibility? old versions compatibility? I don't know.
1627 Anyway, since emacs's are 1-base we expect etags.el to take care
1628 of the difference. If we wanted to have 1-based numbers, we would
1629 uncomment the +1 below. */
1630 np->cno = cno /* + 1 */ ;
1631 np->left = np->right = NULL;
1632 if (CTAGS && !cxref_style)
1633 {
1634 if (strlen (linestart) < 50)
1635 np->pat = concat (linestart, "$", "");
1636 else
1637 np->pat = savenstr (linestart, 50);
1638 }
1639 else
1640 np->pat = savenstr (linestart, linelen);
1641
1642 add_node (np, &head);
1643 }
1644
1645 /* Date: Wed, 22 Jan 1997 02:56:31 -0500 [last amended 18 Sep 1997]
1646 * From: Sam Kendall <kendall@mv.mv.com>
1647 * Subject: Proposal for firming up the TAGS format specification
1648 * To: F.Potorti@cnuce.cnr.it
1649 *
1650 * pfnote should emit the optimized form [unnamed tag] only if:
1651 * 1. name does not contain any of the characters " \t\r\n(),;";
1652 * 2. linestart contains name as either a rightmost, or rightmost but
1653 * one character, substring;
1654 * 3. the character, if any, immediately before name in linestart must
1655 * be one of the characters " \t(),;";
1656 * 4. the character, if any, immediately after name in linestart must
1657 * also be one of the characters " \t(),;".
1658 *
1659 * The real implementation uses the notinname() macro, which recognises
1660 * characters slightly different form " \t\r\n(),;". See the variable
1661 * `nonam'.
1662 */
1663 #define traditional_tag_style TRUE
1664 static void
1665 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1666 char *name; /* tag name, or NULL if unnamed */
1667 int namelen; /* tag length */
1668 bool is_func; /* tag is a function */
1669 char *linestart; /* start of the line where tag is */
1670 int linelen; /* length of the line where tag is */
1671 int lno; /* line number */
1672 long cno; /* character number */
1673 {
1674 register char *cp;
1675 bool named;
1676
1677 named = TRUE;
1678 if (!CTAGS)
1679 {
1680 for (cp = name; !notinname (*cp); cp++)
1681 continue;
1682 if (*cp == '\0') /* rule #1 */
1683 {
1684 cp = linestart + linelen - namelen;
1685 if (notinname (linestart[linelen-1]))
1686 cp -= 1; /* rule #4 */
1687 if (cp >= linestart /* rule #2 */
1688 && (cp == linestart
1689 || notinname (cp[-1])) /* rule #3 */
1690 && strneq (name, cp, namelen)) /* rule #2 */
1691 named = FALSE; /* use unnamed tag */
1692 }
1693 }
1694
1695 if (named)
1696 name = savenstr (name, namelen);
1697 else
1698 name = NULL;
1699 pfnote (name, is_func, linestart, linelen, lno, cno);
1700 }
1701
1702 /*
1703 * free_tree ()
1704 * recurse on left children, iterate on right children.
1705 */
1706 static void
1707 free_tree (np)
1708 register node *np;
1709 {
1710 while (np)
1711 {
1712 register node *node_right = np->right;
1713 free_tree (np->left);
1714 if (np->name != NULL)
1715 free (np->name);
1716 free (np->pat);
1717 free (np);
1718 np = node_right;
1719 }
1720 }
1721
1722 /*
1723 * add_node ()
1724 * Adds a node to the tree of nodes. In etags mode, we don't keep
1725 * it sorted; we just keep a linear list. In ctags mode, maintain
1726 * an ordered tree, with no attempt at balancing.
1727 *
1728 * add_node is the only function allowed to add nodes, so it can
1729 * maintain state.
1730 */
1731 static void
1732 add_node (np, cur_node_p)
1733 node *np, **cur_node_p;
1734 {
1735 register int dif;
1736 register node *cur_node = *cur_node_p;
1737
1738 if (cur_node == NULL)
1739 {
1740 *cur_node_p = np;
1741 last_node = np;
1742 return;
1743 }
1744
1745 if (!CTAGS)
1746 {
1747 /* Etags Mode */
1748 if (last_node == NULL)
1749 fatal ("internal error in add_node", (char *)NULL);
1750 last_node->right = np;
1751 last_node = np;
1752 }
1753 else
1754 {
1755 /* Ctags Mode */
1756 dif = strcmp (np->name, cur_node->name);
1757
1758 /*
1759 * If this tag name matches an existing one, then
1760 * do not add the node, but maybe print a warning.
1761 */
1762 if (!dif)
1763 {
1764 if (streq (np->file, cur_node->file))
1765 {
1766 if (!no_warnings)
1767 {
1768 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1769 np->file, lineno, np->name);
1770 fprintf (stderr, "Second entry ignored\n");
1771 }
1772 }
1773 else if (!cur_node->been_warned && !no_warnings)
1774 {
1775 fprintf
1776 (stderr,
1777 "Duplicate entry in files %s and %s: %s (Warning only)\n",
1778 np->file, cur_node->file, np->name);
1779 cur_node->been_warned = TRUE;
1780 }
1781 return;
1782 }
1783
1784 /* Actually add the node */
1785 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1786 }
1787 }
1788 \f
1789 static void
1790 put_entries (np)
1791 register node *np;
1792 {
1793 register char *sp;
1794
1795 if (np == NULL)
1796 return;
1797
1798 /* Output subentries that precede this one */
1799 put_entries (np->left);
1800
1801 /* Output this entry */
1802
1803 if (!CTAGS)
1804 {
1805 if (np->name != NULL)
1806 fprintf (tagf, "%s\177%s\001%d,%ld\n",
1807 np->pat, np->name, np->lno, np->cno);
1808 else
1809 fprintf (tagf, "%s\177%d,%ld\n",
1810 np->pat, np->lno, np->cno);
1811 }
1812 else
1813 {
1814 if (np->name == NULL)
1815 error ("internal error: NULL name in ctags mode.", (char *)NULL);
1816
1817 if (cxref_style)
1818 {
1819 if (vgrind_style)
1820 fprintf (stdout, "%s %s %d\n",
1821 np->name, np->file, (np->lno + 63) / 64);
1822 else
1823 fprintf (stdout, "%-16s %3d %-16s %s\n",
1824 np->name, np->lno, np->file, np->pat);
1825 }
1826 else
1827 {
1828 fprintf (tagf, "%s\t%s\t", np->name, np->file);
1829
1830 if (np->is_func)
1831 { /* a function */
1832 putc (searchar, tagf);
1833 putc ('^', tagf);
1834
1835 for (sp = np->pat; *sp; sp++)
1836 {
1837 if (*sp == '\\' || *sp == searchar)
1838 putc ('\\', tagf);
1839 putc (*sp, tagf);
1840 }
1841 putc (searchar, tagf);
1842 }
1843 else
1844 { /* a typedef; text pattern inadequate */
1845 fprintf (tagf, "%d", np->lno);
1846 }
1847 putc ('\n', tagf);
1848 }
1849 }
1850
1851 /* Output subentries that follow this one */
1852 put_entries (np->right);
1853 }
1854
1855 /* Length of a number's decimal representation. */
1856 static int
1857 number_len (num)
1858 long num;
1859 {
1860 int len = 1;
1861 while ((num /= 10) > 0)
1862 len += 1;
1863 return len;
1864 }
1865
1866 /*
1867 * Return total number of characters that put_entries will output for
1868 * the nodes in the subtree of the specified node. Works only if
1869 * we are not ctags, but called only in that case. This count
1870 * is irrelevant with the new tags.el, but is still supplied for
1871 * backward compatibility.
1872 */
1873 static int
1874 total_size_of_entries (np)
1875 register node *np;
1876 {
1877 register int total;
1878
1879 if (np == NULL)
1880 return 0;
1881
1882 for (total = 0; np != NULL; np = np->right)
1883 {
1884 /* Count left subentries. */
1885 total += total_size_of_entries (np->left);
1886
1887 /* Count this entry */
1888 total += strlen (np->pat) + 1;
1889 total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1890 if (np->name != NULL)
1891 total += 1 + strlen (np->name); /* \001name */
1892 }
1893
1894 return total;
1895 }
1896 \f
1897 /*
1898 * The C symbol tables.
1899 */
1900 enum sym_type
1901 {
1902 st_none,
1903 st_C_objprot, st_C_objimpl, st_C_objend,
1904 st_C_gnumacro,
1905 st_C_ignore,
1906 st_C_javastruct,
1907 st_C_operator,
1908 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1909 };
1910
1911 static unsigned int hash P_((const char *, unsigned int));
1912 static struct C_stab_entry * in_word_set P_((const char *, unsigned int));
1913 static enum sym_type C_symtype P_((char *, int, int));
1914
1915 /* Feed stuff between (but not including) %[ and %] lines to:
1916 gperf -c -k 1,3 -o -p -r -t
1917 %[
1918 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1919 %%
1920 if, 0, st_C_ignore
1921 for, 0, st_C_ignore
1922 while, 0, st_C_ignore
1923 switch, 0, st_C_ignore
1924 return, 0, st_C_ignore
1925 @interface, 0, st_C_objprot
1926 @protocol, 0, st_C_objprot
1927 @implementation,0, st_C_objimpl
1928 @end, 0, st_C_objend
1929 import, C_JAVA, st_C_ignore
1930 package, C_JAVA, st_C_ignore
1931 friend, C_PLPL, st_C_ignore
1932 extends, C_JAVA, st_C_javastruct
1933 implements, C_JAVA, st_C_javastruct
1934 interface, C_JAVA, st_C_struct
1935 class, C_PLPL, st_C_struct
1936 namespace, C_PLPL, st_C_struct
1937 domain, C_STAR, st_C_struct
1938 union, 0, st_C_struct
1939 struct, 0, st_C_struct
1940 extern, 0, st_C_extern
1941 enum, 0, st_C_enum
1942 typedef, 0, st_C_typedef
1943 define, 0, st_C_define
1944 operator, C_PLPL, st_C_operator
1945 bool, C_PLPL, st_C_typespec
1946 long, 0, st_C_typespec
1947 short, 0, st_C_typespec
1948 int, 0, st_C_typespec
1949 char, 0, st_C_typespec
1950 float, 0, st_C_typespec
1951 double, 0, st_C_typespec
1952 signed, 0, st_C_typespec
1953 unsigned, 0, st_C_typespec
1954 auto, 0, st_C_typespec
1955 void, 0, st_C_typespec
1956 static, 0, st_C_typespec
1957 const, 0, st_C_typespec
1958 volatile, 0, st_C_typespec
1959 explicit, C_PLPL, st_C_typespec
1960 mutable, C_PLPL, st_C_typespec
1961 typename, C_PLPL, st_C_typespec
1962 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
1963 DEFUN, 0, st_C_gnumacro
1964 SYSCALL, 0, st_C_gnumacro
1965 ENTRY, 0, st_C_gnumacro
1966 PSEUDO, 0, st_C_gnumacro
1967 # These are defined inside C functions, so currently they are not met.
1968 # EXFUN used in glibc, DEFVAR_* in emacs.
1969 #EXFUN, 0, st_C_gnumacro
1970 #DEFVAR_, 0, st_C_gnumacro
1971 %]
1972 and replace lines between %< and %> with its output. */
1973 /*%<*/
1974 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
1975 /* Command-line: gperf -c -k 1,3 -o -p -r -t */
1976 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
1977
1978 #define TOTAL_KEYWORDS 46
1979 #define MIN_WORD_LENGTH 2
1980 #define MAX_WORD_LENGTH 15
1981 #define MIN_HASH_VALUE 13
1982 #define MAX_HASH_VALUE 123
1983 /* maximum key range = 111, duplicates = 0 */
1984
1985 #ifdef __GNUC__
1986 __inline
1987 #endif
1988 static unsigned int
1989 hash (str, len)
1990 register const char *str;
1991 register unsigned int len;
1992 {
1993 static unsigned char asso_values[] =
1994 {
1995 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1996 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1997 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1998 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1999 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2000 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2001 124, 124, 124, 124, 3, 124, 124, 124, 43, 6,
2002 11, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2003 11, 124, 124, 58, 7, 124, 124, 124, 124, 124,
2004 124, 124, 124, 124, 124, 124, 124, 57, 7, 42,
2005 4, 14, 52, 0, 124, 53, 124, 124, 29, 11,
2006 6, 35, 32, 124, 29, 34, 59, 58, 51, 24,
2007 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2008 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2009 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2010 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2011 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2012 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2013 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2014 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2015 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2016 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2017 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2018 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2019 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2020 124, 124, 124, 124, 124, 124
2021 };
2022 register int hval = len;
2023
2024 switch (hval)
2025 {
2026 default:
2027 case 3:
2028 hval += asso_values[(unsigned char)str[2]];
2029 case 2:
2030 case 1:
2031 hval += asso_values[(unsigned char)str[0]];
2032 break;
2033 }
2034 return hval;
2035 }
2036
2037 #ifdef __GNUC__
2038 __inline
2039 #endif
2040 static struct C_stab_entry *
2041 in_word_set (str, len)
2042 register const char *str;
2043 register unsigned int len;
2044 {
2045 static struct C_stab_entry wordlist[] =
2046 {
2047 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2048 {""}, {""}, {""}, {""},
2049 {"@end", 0, st_C_objend},
2050 {""}, {""}, {""}, {""},
2051 {"ENTRY", 0, st_C_gnumacro},
2052 {"@interface", 0, st_C_objprot},
2053 {""},
2054 {"domain", C_STAR, st_C_struct},
2055 {""},
2056 {"PSEUDO", 0, st_C_gnumacro},
2057 {""}, {""},
2058 {"namespace", C_PLPL, st_C_struct},
2059 {""}, {""},
2060 {"@implementation",0, st_C_objimpl},
2061 {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2062 {"long", 0, st_C_typespec},
2063 {"signed", 0, st_C_typespec},
2064 {"@protocol", 0, st_C_objprot},
2065 {""}, {""}, {""}, {""},
2066 {"bool", C_PLPL, st_C_typespec},
2067 {""}, {""}, {""}, {""}, {""}, {""},
2068 {"const", 0, st_C_typespec},
2069 {"explicit", C_PLPL, st_C_typespec},
2070 {"if", 0, st_C_ignore},
2071 {""},
2072 {"operator", C_PLPL, st_C_operator},
2073 {""},
2074 {"DEFUN", 0, st_C_gnumacro},
2075 {""}, {""},
2076 {"define", 0, st_C_define},
2077 {""}, {""}, {""}, {""}, {""},
2078 {"double", 0, st_C_typespec},
2079 {"struct", 0, st_C_struct},
2080 {""}, {""}, {""}, {""},
2081 {"short", 0, st_C_typespec},
2082 {""},
2083 {"enum", 0, st_C_enum},
2084 {"mutable", C_PLPL, st_C_typespec},
2085 {""},
2086 {"extern", 0, st_C_extern},
2087 {"extends", C_JAVA, st_C_javastruct},
2088 {"package", C_JAVA, st_C_ignore},
2089 {"while", 0, st_C_ignore},
2090 {""},
2091 {"for", 0, st_C_ignore},
2092 {""}, {""}, {""},
2093 {"volatile", 0, st_C_typespec},
2094 {""}, {""},
2095 {"import", C_JAVA, st_C_ignore},
2096 {"float", 0, st_C_typespec},
2097 {"switch", 0, st_C_ignore},
2098 {"return", 0, st_C_ignore},
2099 {"implements", C_JAVA, st_C_javastruct},
2100 {""},
2101 {"static", 0, st_C_typespec},
2102 {"typedef", 0, st_C_typedef},
2103 {"typename", C_PLPL, st_C_typespec},
2104 {"unsigned", 0, st_C_typespec},
2105 {""}, {""},
2106 {"char", 0, st_C_typespec},
2107 {"class", C_PLPL, st_C_struct},
2108 {""}, {""}, {""},
2109 {"void", 0, st_C_typespec},
2110 {""}, {""},
2111 {"friend", C_PLPL, st_C_ignore},
2112 {""}, {""}, {""},
2113 {"int", 0, st_C_typespec},
2114 {"union", 0, st_C_struct},
2115 {""}, {""}, {""},
2116 {"auto", 0, st_C_typespec},
2117 {"interface", C_JAVA, st_C_struct},
2118 {""},
2119 {"SYSCALL", 0, st_C_gnumacro}
2120 };
2121
2122 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2123 {
2124 register int key = hash (str, len);
2125
2126 if (key <= MAX_HASH_VALUE && key >= 0)
2127 {
2128 register const char *s = wordlist[key].name;
2129
2130 if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2131 return &wordlist[key];
2132 }
2133 }
2134 return 0;
2135 }
2136 /*%>*/
2137
2138 static enum sym_type
2139 C_symtype (str, len, c_ext)
2140 char *str;
2141 int len;
2142 int c_ext;
2143 {
2144 register struct C_stab_entry *se = in_word_set (str, len);
2145
2146 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2147 return st_none;
2148 return se->type;
2149 }
2150 \f
2151 /*
2152 * C functions and variables are recognized using a simple
2153 * finite automaton. fvdef is its state variable.
2154 */
2155 enum
2156 {
2157 fvnone, /* nothing seen */
2158 foperator, /* func: operator keyword seen (cplpl) */
2159 fvnameseen, /* function or variable name seen */
2160 fstartlist, /* func: just after open parenthesis */
2161 finlist, /* func: in parameter list */
2162 flistseen, /* func: after parameter list */
2163 fignore, /* func: before open brace */
2164 vignore /* var-like: ignore until ';' */
2165 } fvdef;
2166
2167 bool fvextern; /* func or var: extern keyword seen; */
2168
2169 /*
2170 * typedefs are recognized using a simple finite automaton.
2171 * typdef is its state variable.
2172 */
2173 enum
2174 {
2175 tnone, /* nothing seen */
2176 tkeyseen, /* typedef keyword seen */
2177 ttypeseen, /* defined type seen */
2178 tinbody, /* inside typedef body */
2179 tend, /* just before typedef tag */
2180 tignore /* junk after typedef tag */
2181 } typdef;
2182
2183
2184 /*
2185 * struct-like structures (enum, struct and union) are recognized
2186 * using another simple finite automaton. `structdef' is its state
2187 * variable.
2188 */
2189 enum
2190 {
2191 snone, /* nothing seen yet */
2192 skeyseen, /* struct-like keyword seen */
2193 stagseen, /* struct-like tag seen */
2194 scolonseen, /* colon seen after struct-like tag */
2195 sinbody /* in struct body: recognize member func defs*/
2196 } structdef;
2197
2198 /*
2199 * When structdef is stagseen, scolonseen, or sinbody, structtag is the
2200 * struct tag, and structtype is the type of the preceding struct-like
2201 * keyword.
2202 */
2203 char *structtag = "<uninited>";
2204 enum sym_type structtype;
2205
2206 /*
2207 * When objdef is different from onone, objtag is the name of the class.
2208 */
2209 char *objtag = "<uninited>";
2210
2211 /*
2212 * Yet another little state machine to deal with preprocessor lines.
2213 */
2214 enum
2215 {
2216 dnone, /* nothing seen */
2217 dsharpseen, /* '#' seen as first char on line */
2218 ddefineseen, /* '#' and 'define' seen */
2219 dignorerest /* ignore rest of line */
2220 } definedef;
2221
2222 /*
2223 * State machine for Objective C protocols and implementations.
2224 * Tom R.Hageman <tom@basil.icce.rug.nl>
2225 */
2226 enum
2227 {
2228 onone, /* nothing seen */
2229 oprotocol, /* @interface or @protocol seen */
2230 oimplementation, /* @implementations seen */
2231 otagseen, /* class name seen */
2232 oparenseen, /* parenthesis before category seen */
2233 ocatseen, /* category name seen */
2234 oinbody, /* in @implementation body */
2235 omethodsign, /* in @implementation body, after +/- */
2236 omethodtag, /* after method name */
2237 omethodcolon, /* after method colon */
2238 omethodparm, /* after method parameter */
2239 oignore /* wait for @end */
2240 } objdef;
2241
2242
2243 /*
2244 * Use this structure to keep info about the token read, and how it
2245 * should be tagged. Used by the make_C_tag function to build a tag.
2246 */
2247 typedef struct
2248 {
2249 bool valid;
2250 char *str;
2251 bool named;
2252 int linelen;
2253 int lineno;
2254 long linepos;
2255 char *buffer;
2256 } token;
2257
2258 token tok; /* latest token read */
2259
2260 /*
2261 * Set this to TRUE, and the next token considered is called a function.
2262 * Used only for GNU emacs's function-defining macros.
2263 */
2264 bool next_token_is_func;
2265
2266 /*
2267 * TRUE in the rules part of a yacc file, FALSE outside (parse as C).
2268 */
2269 bool yacc_rules;
2270
2271 /*
2272 * methodlen is the length of the method name stored in token_name.
2273 */
2274 int methodlen;
2275
2276 static bool consider_token P_((char *, int, int, int, int, int, bool *));
2277 static void make_C_tag P_((bool));
2278
2279 /*
2280 * consider_token ()
2281 * checks to see if the current token is at the start of a
2282 * function or variable, or corresponds to a typedef, or
2283 * is a struct/union/enum tag, or #define, or an enum constant.
2284 *
2285 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2286 * with args. C_EXT is which language we are looking at.
2287 *
2288 * Globals
2289 * fvdef IN OUT
2290 * structdef IN OUT
2291 * definedef IN OUT
2292 * typdef IN OUT
2293 * objdef IN OUT
2294 * next_token_is_func IN OUT
2295 */
2296
2297 static bool
2298 consider_token (str, len, c, c_ext, cblev, parlev, is_func_or_var)
2299 register char *str; /* IN: token pointer */
2300 register int len; /* IN: token length */
2301 register int c; /* IN: first char after the token */
2302 int c_ext; /* IN: C extensions mask */
2303 int cblev; /* IN: curly brace level */
2304 int parlev; /* IN: parenthesis level */
2305 bool *is_func_or_var; /* OUT: function or variable found */
2306 {
2307 enum sym_type toktype = C_symtype (str, len, c_ext);
2308
2309 /*
2310 * Advance the definedef state machine.
2311 */
2312 switch (definedef)
2313 {
2314 case dnone:
2315 /* We're not on a preprocessor line. */
2316 break;
2317 case dsharpseen:
2318 if (toktype == st_C_define)
2319 {
2320 definedef = ddefineseen;
2321 }
2322 else
2323 {
2324 definedef = dignorerest;
2325 }
2326 return FALSE;
2327 case ddefineseen:
2328 /*
2329 * Make a tag for any macro, unless it is a constant
2330 * and constantypedefs is FALSE.
2331 */
2332 definedef = dignorerest;
2333 *is_func_or_var = (c == '(');
2334 if (!*is_func_or_var && !constantypedefs)
2335 return FALSE;
2336 else
2337 return TRUE;
2338 case dignorerest:
2339 return FALSE;
2340 default:
2341 error ("internal error: definedef value.", (char *)NULL);
2342 }
2343
2344 /*
2345 * Now typedefs
2346 */
2347 switch (typdef)
2348 {
2349 case tnone:
2350 if (toktype == st_C_typedef)
2351 {
2352 if (typedefs)
2353 typdef = tkeyseen;
2354 fvextern = FALSE;
2355 fvdef = fvnone;
2356 return FALSE;
2357 }
2358 break;
2359 case tkeyseen:
2360 switch (toktype)
2361 {
2362 case st_none:
2363 case st_C_typespec:
2364 case st_C_struct:
2365 case st_C_enum:
2366 typdef = ttypeseen;
2367 break;
2368 }
2369 /* Do not return here, so the structdef stuff has a chance. */
2370 break;
2371 case tend:
2372 switch (toktype)
2373 {
2374 case st_C_typespec:
2375 case st_C_struct:
2376 case st_C_enum:
2377 return FALSE;
2378 }
2379 return TRUE;
2380 }
2381
2382 /*
2383 * This structdef business is currently only invoked when cblev==0.
2384 * It should be recursively invoked whatever the curly brace level,
2385 * and a stack of states kept, to allow for definitions of structs
2386 * within structs.
2387 *
2388 * This structdef business is NOT invoked when we are ctags and the
2389 * file is plain C. This is because a struct tag may have the same
2390 * name as another tag, and this loses with ctags.
2391 */
2392 switch (toktype)
2393 {
2394 case st_C_javastruct:
2395 if (structdef == stagseen)
2396 structdef = scolonseen;
2397 return FALSE;
2398 case st_C_struct:
2399 case st_C_enum:
2400 if (typdef == tkeyseen
2401 || (typedefs_and_cplusplus && cblev == 0 && structdef == snone))
2402 {
2403 structdef = skeyseen;
2404 structtype = toktype;
2405 }
2406 return FALSE;
2407 }
2408
2409 if (structdef == skeyseen)
2410 {
2411 /* Save the tag for struct/union/class, for functions and variables
2412 that may be defined inside. */
2413 if (structtype == st_C_struct)
2414 structtag = savenstr (str, len);
2415 else
2416 structtag = "<enum>";
2417 structdef = stagseen;
2418 return TRUE;
2419 }
2420
2421 if (typdef != tnone)
2422 definedef = dnone;
2423
2424 /* Detect GNU macros.
2425
2426 Writers of emacs code are recommended to put the
2427 first two args of a DEFUN on the same line.
2428
2429 The DEFUN macro, used in emacs C source code, has a first arg
2430 that is a string (the lisp function name), and a second arg that
2431 is a C function name. Since etags skips strings, the second arg
2432 is tagged. This is unfortunate, as it would be better to tag the
2433 first arg. The simplest way to deal with this problem would be
2434 to name the tag with a name built from the function name, by
2435 removing the initial 'F' character and substituting '-' for '_'.
2436 Anyway, this assumes that the conventions of naming lisp
2437 functions will never change. Currently, this method is not
2438 implemented. */
2439 if (definedef == dnone && toktype == st_C_gnumacro)
2440 {
2441 next_token_is_func = TRUE;
2442 return FALSE;
2443 }
2444 if (next_token_is_func)
2445 {
2446 next_token_is_func = FALSE;
2447 fvdef = fignore;
2448 *is_func_or_var = TRUE;
2449 return TRUE;
2450 }
2451
2452 /* Detect Objective C constructs. */
2453 switch (objdef)
2454 {
2455 case onone:
2456 switch (toktype)
2457 {
2458 case st_C_objprot:
2459 objdef = oprotocol;
2460 return FALSE;
2461 case st_C_objimpl:
2462 objdef = oimplementation;
2463 return FALSE;
2464 }
2465 break;
2466 case oimplementation:
2467 /* Save the class tag for functions or variables defined inside. */
2468 objtag = savenstr (str, len);
2469 objdef = oinbody;
2470 return FALSE;
2471 case oprotocol:
2472 /* Save the class tag for categories. */
2473 objtag = savenstr (str, len);
2474 objdef = otagseen;
2475 *is_func_or_var = TRUE;
2476 return TRUE;
2477 case oparenseen:
2478 objdef = ocatseen;
2479 *is_func_or_var = TRUE;
2480 return TRUE;
2481 case oinbody:
2482 break;
2483 case omethodsign:
2484 if (parlev == 0)
2485 {
2486 objdef = omethodtag;
2487 methodlen = len;
2488 grow_linebuffer (&token_name, methodlen + 1);
2489 strncpy (token_name.buffer, str, len);
2490 token_name.buffer[methodlen] = '\0';
2491 token_name.len = methodlen;
2492 return TRUE;
2493 }
2494 return FALSE;
2495 case omethodcolon:
2496 if (parlev == 0)
2497 objdef = omethodparm;
2498 return FALSE;
2499 case omethodparm:
2500 if (parlev == 0)
2501 {
2502 objdef = omethodtag;
2503 methodlen += len;
2504 grow_linebuffer (&token_name, methodlen + 1);
2505 strncat (token_name.buffer, str, len);
2506 token_name.len = methodlen;
2507 return TRUE;
2508 }
2509 return FALSE;
2510 case oignore:
2511 if (toktype == st_C_objend)
2512 {
2513 /* Memory leakage here: the string pointed by objtag is
2514 never released, because many tests would be needed to
2515 avoid breaking on incorrect input code. The amount of
2516 memory leaked here is the sum of the lengths of the
2517 class tags.
2518 free (objtag); */
2519 objdef = onone;
2520 }
2521 return FALSE;
2522 }
2523
2524 /* A function, variable or enum constant? */
2525 switch (toktype)
2526 {
2527 case st_C_extern:
2528 fvextern = TRUE;
2529 /* FALLTHRU */
2530 case st_C_typespec:
2531 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2532 fvdef = fvnone; /* should be useless */
2533 return FALSE;
2534 case st_C_ignore:
2535 fvextern = FALSE;
2536 fvdef = vignore;
2537 return FALSE;
2538 case st_C_operator:
2539 fvdef = foperator;
2540 *is_func_or_var = TRUE;
2541 return TRUE;
2542 case st_none:
2543 if ((c_ext & C_PLPL) && strneq (str+len-10, "::operator", 10))
2544 {
2545 fvdef = foperator;
2546 *is_func_or_var = TRUE;
2547 return TRUE;
2548 }
2549 if (constantypedefs && structdef == sinbody && structtype == st_C_enum)
2550 return TRUE;
2551 if (fvdef == fvnone)
2552 {
2553 fvdef = fvnameseen; /* function or variable */
2554 *is_func_or_var = TRUE;
2555 return TRUE;
2556 }
2557 break;
2558 }
2559
2560 return FALSE;
2561 }
2562
2563 /*
2564 * C_entries ()
2565 * This routine finds functions, variables, typedefs,
2566 * #define's, enum constants and struct/union/enum definitions in
2567 * C syntax and adds them to the list.
2568 */
2569 #define current_lb_is_new (newndx == curndx)
2570 #define switch_line_buffers() (curndx = 1 - curndx)
2571
2572 #define curlb (lbs[curndx].lb)
2573 #define othlb (lbs[1-curndx].lb)
2574 #define newlb (lbs[newndx].lb)
2575 #define curlinepos (lbs[curndx].linepos)
2576 #define othlinepos (lbs[1-curndx].linepos)
2577 #define newlinepos (lbs[newndx].linepos)
2578
2579 #define CNL_SAVE_DEFINEDEF() \
2580 do { \
2581 curlinepos = charno; \
2582 lineno++; \
2583 linecharno = charno; \
2584 charno += readline (&curlb, inf); \
2585 lp = curlb.buffer; \
2586 quotednl = FALSE; \
2587 newndx = curndx; \
2588 } while (0)
2589
2590 #define CNL() \
2591 do { \
2592 CNL_SAVE_DEFINEDEF(); \
2593 if (savetok.valid) \
2594 { \
2595 tok = savetok; \
2596 savetok.valid = FALSE; \
2597 } \
2598 definedef = dnone; \
2599 } while (0)
2600
2601
2602 static void
2603 make_C_tag (isfun)
2604 bool isfun;
2605 {
2606 /* This function should never be called when tok.valid is FALSE, but
2607 we must protect against invalid input or internal errors. */
2608 if (tok.valid)
2609 {
2610 if (traditional_tag_style)
2611 {
2612 /* This was the original code. Now we call new_pfnote instead,
2613 which uses the new method for naming tags (see new_pfnote). */
2614 char *name = NULL;
2615
2616 if (CTAGS || tok.named)
2617 name = savestr (token_name.buffer);
2618 pfnote (name, isfun,
2619 tok.buffer, tok.linelen, tok.lineno, tok.linepos);
2620 }
2621 else
2622 new_pfnote (token_name.buffer, token_name.len, isfun,
2623 tok.buffer, tok.linelen, tok.lineno, tok.linepos);
2624 tok.valid = FALSE;
2625 }
2626 else if (DEBUG)
2627 abort ();
2628 }
2629
2630
2631 static void
2632 C_entries (c_ext, inf)
2633 int c_ext; /* extension of C */
2634 FILE *inf; /* input file */
2635 {
2636 register char c; /* latest char read; '\0' for end of line */
2637 register char *lp; /* pointer one beyond the character `c' */
2638 int curndx, newndx; /* indices for current and new lb */
2639 register int tokoff; /* offset in line of start of current token */
2640 register int toklen; /* length of current token */
2641 char *qualifier; /* string used to qualify names */
2642 int qlen; /* length of qualifier */
2643 int cblev; /* current curly brace level */
2644 int parlev; /* current parenthesis level */
2645 bool incomm, inquote, inchar, quotednl, midtoken;
2646 bool purec, cplpl, cjava;
2647 token savetok; /* token saved during preprocessor handling */
2648
2649
2650 tokoff = toklen = 0; /* keep compiler quiet */
2651 curndx = newndx = 0;
2652 lineno = 0;
2653 charno = 0;
2654 lp = curlb.buffer;
2655 *lp = 0;
2656
2657 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2658 structdef = snone; definedef = dnone; objdef = onone;
2659 next_token_is_func = yacc_rules = FALSE;
2660 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2661 tok.valid = savetok.valid = FALSE;
2662 cblev = 0;
2663 parlev = 0;
2664 purec = !(c_ext & ~YACC); /* no extensions (apart from possibly yacc) */
2665 cplpl = (c_ext & C_PLPL) == C_PLPL;
2666 cjava = (c_ext & C_JAVA) == C_JAVA;
2667 if (cjava)
2668 { qualifier = "."; qlen = 1; }
2669 else
2670 { qualifier = "::"; qlen = 2; }
2671
2672 while (!feof (inf))
2673 {
2674 c = *lp++;
2675 if (c == '\\')
2676 {
2677 /* If we're at the end of the line, the next character is a
2678 '\0'; don't skip it, because it's the thing that tells us
2679 to read the next line. */
2680 if (*lp == '\0')
2681 {
2682 quotednl = TRUE;
2683 continue;
2684 }
2685 lp++;
2686 c = ' ';
2687 }
2688 else if (incomm)
2689 {
2690 switch (c)
2691 {
2692 case '*':
2693 if (*lp == '/')
2694 {
2695 c = *lp++;
2696 incomm = FALSE;
2697 }
2698 break;
2699 case '\0':
2700 /* Newlines inside comments do not end macro definitions in
2701 traditional cpp. */
2702 CNL_SAVE_DEFINEDEF ();
2703 break;
2704 }
2705 continue;
2706 }
2707 else if (inquote)
2708 {
2709 switch (c)
2710 {
2711 case '"':
2712 inquote = FALSE;
2713 break;
2714 case '\0':
2715 /* Newlines inside strings do not end macro definitions
2716 in traditional cpp, even though compilers don't
2717 usually accept them. */
2718 CNL_SAVE_DEFINEDEF ();
2719 break;
2720 }
2721 continue;
2722 }
2723 else if (inchar)
2724 {
2725 switch (c)
2726 {
2727 case '\0':
2728 /* Hmmm, something went wrong. */
2729 CNL ();
2730 /* FALLTHRU */
2731 case '\'':
2732 inchar = FALSE;
2733 break;
2734 }
2735 continue;
2736 }
2737 else
2738 switch (c)
2739 {
2740 case '"':
2741 inquote = TRUE;
2742 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2743 {
2744 fvextern = FALSE;
2745 fvdef = fvnone;
2746 }
2747 continue;
2748 case '\'':
2749 inchar = TRUE;
2750 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2751 {
2752 fvextern = FALSE;
2753 fvdef = fvnone;
2754 }
2755 continue;
2756 case '/':
2757 if (*lp == '*')
2758 {
2759 lp++;
2760 incomm = TRUE;
2761 continue;
2762 }
2763 else if (/* cplpl && */ *lp == '/')
2764 {
2765 c = '\0';
2766 break;
2767 }
2768 else
2769 break;
2770 case '%':
2771 if ((c_ext & YACC) && *lp == '%')
2772 {
2773 /* entering or exiting rules section in yacc file */
2774 lp++;
2775 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2776 typdef = tnone; structdef = snone;
2777 next_token_is_func = FALSE;
2778 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2779 cblev = 0;
2780 yacc_rules = !yacc_rules;
2781 continue;
2782 }
2783 else
2784 break;
2785 case '#':
2786 if (definedef == dnone)
2787 {
2788 char *cp;
2789 bool cpptoken = TRUE;
2790
2791 /* Look back on this line. If all blanks, or nonblanks
2792 followed by an end of comment, this is a preprocessor
2793 token. */
2794 for (cp = newlb.buffer; cp < lp-1; cp++)
2795 if (!iswhite (*cp))
2796 {
2797 if (*cp == '*' && *(cp+1) == '/')
2798 {
2799 cp++;
2800 cpptoken = TRUE;
2801 }
2802 else
2803 cpptoken = FALSE;
2804 }
2805 if (cpptoken)
2806 definedef = dsharpseen;
2807 } /* if (definedef == dnone) */
2808
2809 continue;
2810 } /* switch (c) */
2811
2812
2813 /* Consider token only if some complicated conditions are satisfied. */
2814 if ((definedef != dnone
2815 || (cblev == 0 && structdef != scolonseen)
2816 || (cblev == 1 && cplpl && structdef == sinbody)
2817 || (structdef == sinbody && purec))
2818 && typdef != tignore
2819 && definedef != dignorerest
2820 && fvdef != finlist)
2821 {
2822 if (midtoken)
2823 {
2824 if (endtoken (c))
2825 {
2826 bool funorvar = FALSE;
2827
2828 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
2829 {
2830 /*
2831 * This handles :: in the middle, but not at the
2832 * beginning of an identifier. Also, space-separated
2833 * :: is not recognised.
2834 */
2835 lp += 2;
2836 toklen += 2;
2837 c = lp[-1];
2838 goto intok;
2839 }
2840 else
2841 {
2842 if (yacc_rules
2843 || consider_token (newlb.buffer + tokoff, toklen, c,
2844 c_ext, cblev, parlev, &funorvar))
2845 {
2846 if (fvdef == foperator)
2847 {
2848 char *oldlp = lp;
2849 lp = skip_spaces (lp-1);
2850 if (*lp != '\0')
2851 lp += 1;
2852 while (*lp != '\0'
2853 && !iswhite (*lp) && *lp != '(')
2854 lp += 1;
2855 c = *lp++;
2856 toklen += lp - oldlp;
2857 }
2858 tok.named = FALSE;
2859 if (!purec
2860 && funorvar
2861 && definedef == dnone
2862 && structdef == sinbody)
2863 /* function or var defined in C++ class body */
2864 {
2865 int len = strlen (structtag) + qlen + toklen;
2866 grow_linebuffer (&token_name, len + 1);
2867 strcpy (token_name.buffer, structtag);
2868 strcat (token_name.buffer, qualifier);
2869 strncat (token_name.buffer,
2870 newlb.buffer + tokoff, toklen);
2871 token_name.len = len;
2872 tok.named = TRUE;
2873 }
2874 else if (objdef == ocatseen)
2875 /* Objective C category */
2876 {
2877 int len = strlen (objtag) + 2 + toklen;
2878 grow_linebuffer (&token_name, len + 1);
2879 strcpy (token_name.buffer, objtag);
2880 strcat (token_name.buffer, "(");
2881 strncat (token_name.buffer,
2882 newlb.buffer + tokoff, toklen);
2883 strcat (token_name.buffer, ")");
2884 token_name.len = len;
2885 tok.named = TRUE;
2886 }
2887 else if (objdef == omethodtag
2888 || objdef == omethodparm)
2889 /* Objective C method */
2890 {
2891 tok.named = TRUE;
2892 }
2893 else
2894 {
2895 grow_linebuffer (&token_name, toklen + 1);
2896 strncpy (token_name.buffer,
2897 newlb.buffer + tokoff, toklen);
2898 token_name.buffer[toklen] = '\0';
2899 token_name.len = toklen;
2900 /* Name macros and members. */
2901 tok.named = (structdef == stagseen
2902 || typdef == ttypeseen
2903 || typdef == tend
2904 || (funorvar
2905 && definedef == dignorerest)
2906 || (funorvar
2907 && definedef == dnone
2908 && structdef == sinbody));
2909 }
2910 tok.lineno = lineno;
2911 tok.linelen = tokoff + toklen + 1;
2912 tok.buffer = newlb.buffer;
2913 tok.linepos = newlinepos;
2914 tok.valid = TRUE;
2915
2916 if (definedef == dnone
2917 && (fvdef == fvnameseen
2918 || fvdef == foperator
2919 || structdef == stagseen
2920 || typdef == tend
2921 || objdef != onone))
2922 {
2923 if (current_lb_is_new)
2924 switch_line_buffers ();
2925 }
2926 else
2927 make_C_tag (funorvar);
2928 }
2929 midtoken = FALSE;
2930 }
2931 } /* if (endtoken (c)) */
2932 else if (intoken (c))
2933 intok:
2934 {
2935 toklen++;
2936 continue;
2937 }
2938 } /* if (midtoken) */
2939 else if (begtoken (c))
2940 {
2941 switch (definedef)
2942 {
2943 case dnone:
2944 switch (fvdef)
2945 {
2946 case fstartlist:
2947 fvdef = finlist;
2948 continue;
2949 case flistseen:
2950 make_C_tag (TRUE); /* a function */
2951 fvdef = fignore;
2952 break;
2953 case fvnameseen:
2954 fvdef = fvnone;
2955 break;
2956 }
2957 if (structdef == stagseen && !cjava)
2958 structdef = snone;
2959 break;
2960 case dsharpseen:
2961 savetok = tok;
2962 }
2963 if (!yacc_rules || lp == newlb.buffer + 1)
2964 {
2965 tokoff = lp - 1 - newlb.buffer;
2966 toklen = 1;
2967 midtoken = TRUE;
2968 }
2969 continue;
2970 } /* if (begtoken) */
2971 } /* if must look at token */
2972
2973
2974 /* Detect end of line, colon, comma, semicolon and various braces
2975 after having handled a token.*/
2976 switch (c)
2977 {
2978 case ':':
2979 if (definedef != dnone)
2980 break;
2981 switch (objdef)
2982 {
2983 case otagseen:
2984 objdef = oignore;
2985 make_C_tag (TRUE); /* an Objective C class */
2986 break;
2987 case omethodtag:
2988 case omethodparm:
2989 objdef = omethodcolon;
2990 methodlen += 1;
2991 grow_linebuffer (&token_name, methodlen + 1);
2992 strcat (token_name.buffer, ":");
2993 token_name.len = methodlen;
2994 break;
2995 }
2996 if (structdef == stagseen)
2997 structdef = scolonseen;
2998 else
2999 switch (fvdef)
3000 {
3001 case fvnameseen:
3002 if (yacc_rules)
3003 {
3004 make_C_tag (FALSE); /* a yacc function */
3005 fvdef = fignore;
3006 }
3007 break;
3008 case fstartlist:
3009 fvextern = FALSE;
3010 fvdef = fvnone;
3011 break;
3012 }
3013 break;
3014 case ';':
3015 if (definedef != dnone)
3016 break;
3017 if (cblev == 0)
3018 switch (typdef)
3019 {
3020 case tend:
3021 make_C_tag (FALSE); /* a typedef */
3022 /* FALLTHRU */
3023 default:
3024 typdef = tnone;
3025 }
3026 switch (fvdef)
3027 {
3028 case fignore:
3029 break;
3030 case fvnameseen:
3031 if ((members && cblev == 1)
3032 || (globals && cblev == 0 && (!fvextern || declarations)))
3033 make_C_tag (FALSE); /* a variable */
3034 fvextern = FALSE;
3035 fvdef = fvnone;
3036 tok.valid = FALSE;
3037 break;
3038 case flistseen:
3039 if (declarations && (cblev == 0 || cblev == 1))
3040 make_C_tag (TRUE); /* a function declaration */
3041 /* FALLTHRU */
3042 default:
3043 fvextern = FALSE;
3044 fvdef = fvnone;
3045 /* The following instruction invalidates the token.
3046 Probably the token should be invalidated in all
3047 other cases where some state machine is reset. */
3048 tok.valid = FALSE;
3049 }
3050 if (structdef == stagseen)
3051 structdef = snone;
3052 break;
3053 case ',':
3054 if (definedef != dnone)
3055 break;
3056 switch (objdef)
3057 {
3058 case omethodtag:
3059 case omethodparm:
3060 make_C_tag (TRUE); /* an Objective C method */
3061 objdef = oinbody;
3062 break;
3063 }
3064 switch (fvdef)
3065 {
3066 case foperator:
3067 case finlist:
3068 case fignore:
3069 case vignore:
3070 break;
3071 case fvnameseen:
3072 if ((members && cblev == 1)
3073 || (globals && cblev == 0 && (!fvextern || declarations)))
3074 make_C_tag (FALSE); /* a variable */
3075 break;
3076 default:
3077 fvdef = fvnone;
3078 }
3079 if (structdef == stagseen)
3080 structdef = snone;
3081 break;
3082 case '[':
3083 if (definedef != dnone)
3084 break;
3085 if (cblev == 0 && typdef == tend)
3086 {
3087 typdef = tignore;
3088 make_C_tag (FALSE); /* a typedef */
3089 break;
3090 }
3091 switch (fvdef)
3092 {
3093 case foperator:
3094 case finlist:
3095 case fignore:
3096 case vignore:
3097 break;
3098 case fvnameseen:
3099 if ((members && cblev == 1)
3100 || (globals && cblev == 0 && (!fvextern || declarations)))
3101 make_C_tag (FALSE); /* a variable */
3102 /* FALLTHRU */
3103 default:
3104 fvdef = fvnone;
3105 }
3106 if (structdef == stagseen)
3107 structdef = snone;
3108 break;
3109 case '(':
3110 if (definedef != dnone)
3111 break;
3112 if (objdef == otagseen && parlev == 0)
3113 objdef = oparenseen;
3114 switch (fvdef)
3115 {
3116 case fvnameseen:
3117 if (typdef == ttypeseen
3118 && tok.valid
3119 && *lp != '*'
3120 && structdef != sinbody)
3121 {
3122 /* This handles constructs like:
3123 typedef void OperatorFun (int fun); */
3124 make_C_tag (FALSE);
3125 typdef = tignore;
3126 }
3127 /* FALLTHRU */
3128 case foperator:
3129 fvdef = fstartlist;
3130 break;
3131 case flistseen:
3132 fvdef = finlist;
3133 break;
3134 }
3135 parlev++;
3136 break;
3137 case ')':
3138 if (definedef != dnone)
3139 break;
3140 if (objdef == ocatseen && parlev == 1)
3141 {
3142 make_C_tag (TRUE); /* an Objective C category */
3143 objdef = oignore;
3144 }
3145 if (--parlev == 0)
3146 {
3147 switch (fvdef)
3148 {
3149 case fstartlist:
3150 case finlist:
3151 fvdef = flistseen;
3152 break;
3153 }
3154 if (cblev == 0 && (typdef == tend))
3155 {
3156 typdef = tignore;
3157 make_C_tag (FALSE); /* a typedef */
3158 }
3159 }
3160 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3161 parlev = 0;
3162 break;
3163 case '{':
3164 if (definedef != dnone)
3165 break;
3166 if (typdef == ttypeseen)
3167 typdef = tinbody;
3168 switch (structdef)
3169 {
3170 case skeyseen: /* unnamed struct */
3171 structdef = sinbody;
3172 structtag = "_anonymous_";
3173 break;
3174 case stagseen:
3175 case scolonseen: /* named struct */
3176 structdef = sinbody;
3177 make_C_tag (FALSE); /* a struct */
3178 break;
3179 }
3180 switch (fvdef)
3181 {
3182 case flistseen:
3183 make_C_tag (TRUE); /* a function */
3184 /* FALLTHRU */
3185 case fignore:
3186 fvdef = fvnone;
3187 break;
3188 case fvnone:
3189 switch (objdef)
3190 {
3191 case otagseen:
3192 make_C_tag (TRUE); /* an Objective C class */
3193 objdef = oignore;
3194 break;
3195 case omethodtag:
3196 case omethodparm:
3197 make_C_tag (TRUE); /* an Objective C method */
3198 objdef = oinbody;
3199 break;
3200 default:
3201 /* Neutralize `extern "C" {' grot. */
3202 if (cblev == 0 && structdef == snone && typdef == tnone)
3203 cblev = -1;
3204 }
3205 }
3206 cblev++;
3207 break;
3208 case '*':
3209 if (definedef != dnone)
3210 break;
3211 if (fvdef == fstartlist)
3212 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3213 break;
3214 case '}':
3215 if (definedef != dnone)
3216 break;
3217 if (!noindentypedefs && lp == newlb.buffer + 1)
3218 {
3219 cblev = 0; /* reset curly brace level if first column */
3220 parlev = 0; /* also reset paren level, just in case... */
3221 }
3222 else if (cblev > 0)
3223 cblev--;
3224 if (cblev == 0)
3225 {
3226 if (typdef == tinbody)
3227 typdef = tend;
3228 /* Memory leakage here: the string pointed by structtag is
3229 never released, because I fear to miss something and
3230 break things while freeing the area. The amount of
3231 memory leaked here is the sum of the lengths of the
3232 struct tags.
3233 if (structdef == sinbody)
3234 free (structtag); */
3235
3236 structdef = snone;
3237 structtag = "<error>";
3238 }
3239 break;
3240 case '=':
3241 if (definedef != dnone)
3242 break;
3243 switch (fvdef)
3244 {
3245 case foperator:
3246 case finlist:
3247 case fignore:
3248 case vignore:
3249 break;
3250 case fvnameseen:
3251 if ((members && cblev == 1)
3252 || (globals && cblev == 0 && (!fvextern || declarations)))
3253 make_C_tag (FALSE); /* a variable */
3254 /* FALLTHRU */
3255 default:
3256 fvdef = vignore;
3257 }
3258 break;
3259 case '+':
3260 case '-':
3261 if (objdef == oinbody && cblev == 0)
3262 {
3263 objdef = omethodsign;
3264 break;
3265 }
3266 /* FALLTHRU */
3267 case '#': case '~': case '&': case '%': case '/': case '|':
3268 case '^': case '!': case '<': case '>': case '.': case '?': case ']':
3269 if (definedef != dnone)
3270 break;
3271 /* These surely cannot follow a function tag in C. */
3272 switch (fvdef)
3273 {
3274 case foperator:
3275 case finlist:
3276 case fignore:
3277 case vignore:
3278 break;
3279 default:
3280 fvdef = fvnone;
3281 }
3282 break;
3283 case '\0':
3284 if (objdef == otagseen)
3285 {
3286 make_C_tag (TRUE); /* an Objective C class */
3287 objdef = oignore;
3288 }
3289 /* If a macro spans multiple lines don't reset its state. */
3290 if (quotednl)
3291 CNL_SAVE_DEFINEDEF ();
3292 else
3293 CNL ();
3294 break;
3295 } /* switch (c) */
3296
3297 } /* while not eof */
3298 }
3299
3300 /*
3301 * Process either a C++ file or a C file depending on the setting
3302 * of a global flag.
3303 */
3304 static void
3305 default_C_entries (inf)
3306 FILE *inf;
3307 {
3308 C_entries (cplusplus ? C_PLPL : 0, inf);
3309 }
3310
3311 /* Always do plain ANSI C. */
3312 static void
3313 plain_C_entries (inf)
3314 FILE *inf;
3315 {
3316 C_entries (0, inf);
3317 }
3318
3319 /* Always do C++. */
3320 static void
3321 Cplusplus_entries (inf)
3322 FILE *inf;
3323 {
3324 C_entries (C_PLPL, inf);
3325 }
3326
3327 /* Always do Java. */
3328 static void
3329 Cjava_entries (inf)
3330 FILE *inf;
3331 {
3332 C_entries (C_JAVA, inf);
3333 }
3334
3335 /* Always do C*. */
3336 static void
3337 Cstar_entries (inf)
3338 FILE *inf;
3339 {
3340 C_entries (C_STAR, inf);
3341 }
3342
3343 /* Always do Yacc. */
3344 static void
3345 Yacc_entries (inf)
3346 FILE *inf;
3347 {
3348 C_entries (YACC, inf);
3349 }
3350 \f
3351 /* A useful macro. */
3352 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3353 for (lineno = charno = 0; /* loop initialization */ \
3354 !feof (file_pointer) /* loop test */ \
3355 && (lineno++, /* instructions at start of loop */ \
3356 linecharno = charno, \
3357 charno += readline (&line_buffer, file_pointer), \
3358 char_pointer = lb.buffer, \
3359 TRUE); \
3360 )
3361
3362
3363 /*
3364 * Read a file, but do no processing. This is used to do regexp
3365 * matching on files that have no language defined.
3366 */
3367 static void
3368 just_read_file (inf)
3369 FILE *inf;
3370 {
3371 register char *dummy;
3372
3373 LOOP_ON_INPUT_LINES (inf, lb, dummy)
3374 continue;
3375 }
3376 \f
3377 /* Fortran parsing */
3378
3379 static bool tail P_((char *));
3380 static void takeprec P_((void));
3381 static void getit P_((FILE *));
3382
3383 static bool
3384 tail (cp)
3385 char *cp;
3386 {
3387 register int len = 0;
3388
3389 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
3390 cp++, len++;
3391 if (*cp == '\0' && !intoken (dbp[len]))
3392 {
3393 dbp += len;
3394 return TRUE;
3395 }
3396 return FALSE;
3397 }
3398
3399 static void
3400 takeprec ()
3401 {
3402 dbp = skip_spaces (dbp);
3403 if (*dbp != '*')
3404 return;
3405 dbp++;
3406 dbp = skip_spaces (dbp);
3407 if (strneq (dbp, "(*)", 3))
3408 {
3409 dbp += 3;
3410 return;
3411 }
3412 if (!ISDIGIT (*dbp))
3413 {
3414 --dbp; /* force failure */
3415 return;
3416 }
3417 do
3418 dbp++;
3419 while (ISDIGIT (*dbp));
3420 }
3421
3422 static void
3423 getit (inf)
3424 FILE *inf;
3425 {
3426 register char *cp;
3427
3428 dbp = skip_spaces (dbp);
3429 if (*dbp == '\0')
3430 {
3431 lineno++;
3432 linecharno = charno;
3433 charno += readline (&lb, inf);
3434 dbp = lb.buffer;
3435 if (dbp[5] != '&')
3436 return;
3437 dbp += 6;
3438 dbp = skip_spaces (dbp);
3439 }
3440 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3441 return;
3442 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3443 continue;
3444 pfnote (savenstr (dbp, cp-dbp), TRUE,
3445 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3446 }
3447
3448
3449 static void
3450 Fortran_functions (inf)
3451 FILE *inf;
3452 {
3453 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3454 {
3455 if (*dbp == '%')
3456 dbp++; /* Ratfor escape to fortran */
3457 dbp = skip_spaces (dbp);
3458 if (*dbp == '\0')
3459 continue;
3460 switch (lowcase (*dbp))
3461 {
3462 case 'i':
3463 if (tail ("integer"))
3464 takeprec ();
3465 break;
3466 case 'r':
3467 if (tail ("real"))
3468 takeprec ();
3469 break;
3470 case 'l':
3471 if (tail ("logical"))
3472 takeprec ();
3473 break;
3474 case 'c':
3475 if (tail ("complex") || tail ("character"))
3476 takeprec ();
3477 break;
3478 case 'd':
3479 if (tail ("double"))
3480 {
3481 dbp = skip_spaces (dbp);
3482 if (*dbp == '\0')
3483 continue;
3484 if (tail ("precision"))
3485 break;
3486 continue;
3487 }
3488 break;
3489 }
3490 dbp = skip_spaces (dbp);
3491 if (*dbp == '\0')
3492 continue;
3493 switch (lowcase (*dbp))
3494 {
3495 case 'f':
3496 if (tail ("function"))
3497 getit (inf);
3498 continue;
3499 case 's':
3500 if (tail ("subroutine"))
3501 getit (inf);
3502 continue;
3503 case 'e':
3504 if (tail ("entry"))
3505 getit (inf);
3506 continue;
3507 case 'b':
3508 if (tail ("blockdata") || tail ("block data"))
3509 {
3510 dbp = skip_spaces (dbp);
3511 if (*dbp == '\0') /* assume un-named */
3512 pfnote (savestr ("blockdata"), TRUE,
3513 lb.buffer, dbp - lb.buffer, lineno, linecharno);
3514 else
3515 getit (inf); /* look for name */
3516 }
3517 continue;
3518 }
3519 }
3520 }
3521 \f
3522 /*
3523 * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be>, 1998-04-24
3524 * Ada parsing
3525 */
3526
3527 static void adagetit P_((FILE *, char *));
3528
3529 /* Once we are positioned after an "interesting" keyword, let's get
3530 the real tag value necessary. */
3531 static void
3532 adagetit (inf, name_qualifier)
3533 FILE *inf;
3534 char *name_qualifier;
3535 {
3536 register char *cp;
3537 char *name;
3538 char c;
3539
3540 while (!feof (inf))
3541 {
3542 dbp = skip_spaces (dbp);
3543 if (*dbp == '\0'
3544 || (dbp[0] == '-' && dbp[1] == '-'))
3545 {
3546 lineno++;
3547 linecharno = charno;
3548 charno += readline (&lb, inf);
3549 dbp = lb.buffer;
3550 }
3551 switch (*dbp)
3552 {
3553 case 'b':
3554 case 'B':
3555 if (tail ("body"))
3556 {
3557 /* Skipping body of procedure body or package body or ....
3558 resetting qualifier to body instead of spec. */
3559 name_qualifier = "/b";
3560 continue;
3561 }
3562 break;
3563 case 't':
3564 case 'T':
3565 /* Skipping type of task type or protected type ... */
3566 if (tail ("type"))
3567 continue;
3568 break;
3569 }
3570 if (*dbp == '"')
3571 {
3572 dbp += 1;
3573 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3574 continue;
3575 }
3576 else
3577 {
3578 dbp = skip_spaces (dbp);
3579 for (cp = dbp;
3580 (*cp != '\0'
3581 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
3582 cp++)
3583 continue;
3584 if (cp == dbp)
3585 return;
3586 }
3587 c = *cp;
3588 *cp = '\0';
3589 name = concat (dbp, name_qualifier, "");
3590 *cp = c;
3591 pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3592 if (c == '"')
3593 dbp = cp + 1;
3594 return;
3595 }
3596 }
3597
3598 static void
3599 Ada_funcs (inf)
3600 FILE *inf;
3601 {
3602 bool inquote = FALSE;
3603
3604 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3605 {
3606 while (*dbp != '\0')
3607 {
3608 /* Skip a string i.e. "abcd". */
3609 if (inquote || (*dbp == '"'))
3610 {
3611 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3612 if (dbp != NULL)
3613 {
3614 inquote = FALSE;
3615 dbp += 1;
3616 continue; /* advance char */
3617 }
3618 else
3619 {
3620 inquote = TRUE;
3621 break; /* advance line */
3622 }
3623 }
3624
3625 /* Skip comments. */
3626 if (dbp[0] == '-' && dbp[1] == '-')
3627 break; /* advance line */
3628
3629 /* Skip character enclosed in single quote i.e. 'a'
3630 and skip single quote starting an attribute i.e. 'Image. */
3631 if (*dbp == '\'')
3632 {
3633 dbp++ ;
3634 if (*dbp != '\0')
3635 dbp++;
3636 continue;
3637 }
3638
3639 /* Search for beginning of a token. */
3640 if (!begtoken (*dbp))
3641 {
3642 dbp++;
3643 continue; /* advance char */
3644 }
3645
3646 /* We are at the beginning of a token. */
3647 switch (*dbp)
3648 {
3649 case 'f':
3650 case 'F':
3651 if (!packages_only && tail ("function"))
3652 adagetit (inf, "/f");
3653 else
3654 break; /* from switch */
3655 continue; /* advance char */
3656 case 'p':
3657 case 'P':
3658 if (!packages_only && tail ("procedure"))
3659 adagetit (inf, "/p");
3660 else if (tail ("package"))
3661 adagetit (inf, "/s");
3662 else if (tail ("protected")) /* protected type */
3663 adagetit (inf, "/t");
3664 else
3665 break; /* from switch */
3666 continue; /* advance char */
3667 case 't':
3668 case 'T':
3669 if (!packages_only && tail ("task"))
3670 adagetit (inf, "/k");
3671 else if (typedefs && !packages_only && tail ("type"))
3672 {
3673 adagetit (inf, "/t");
3674 while (*dbp != '\0')
3675 dbp += 1;
3676 }
3677 else
3678 break; /* from switch */
3679 continue; /* advance char */
3680 }
3681
3682 /* Look for the end of the token. */
3683 while (!endtoken (*dbp))
3684 dbp++;
3685
3686 } /* advance char */
3687 } /* advance line */
3688 }
3689 \f
3690 /*
3691 * Bob Weiner, Motorola Inc., 4/3/94
3692 * Unix and microcontroller assembly tag handling
3693 * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
3694 */
3695 static void
3696 Asm_labels (inf)
3697 FILE *inf;
3698 {
3699 register char *cp;
3700
3701 LOOP_ON_INPUT_LINES (inf, lb, cp)
3702 {
3703 /* If first char is alphabetic or one of [_.$], test for colon
3704 following identifier. */
3705 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3706 {
3707 /* Read past label. */
3708 cp++;
3709 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3710 cp++;
3711 if (*cp == ':' || iswhite (*cp))
3712 {
3713 /* Found end of label, so copy it and add it to the table. */
3714 pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3715 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3716 }
3717 }
3718 }
3719 }
3720 \f
3721 /*
3722 * Perl support by Bart Robinson <lomew@cs.utah.edu>
3723 * enhanced by Michael Ernst <mernst@alum.mit.edu>
3724 * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
3725 * Perl variable names: /^(my|local).../
3726 */
3727 static void
3728 Perl_functions (inf)
3729 FILE *inf;
3730 {
3731 register char *cp;
3732
3733 LOOP_ON_INPUT_LINES (inf, lb, cp)
3734 {
3735 if (*cp++ == 's'
3736 && *cp++ == 'u'
3737 && *cp++ == 'b' && iswhite (*cp++))
3738 {
3739 cp = skip_spaces (cp);
3740 if (*cp != '\0')
3741 {
3742 char *sp = cp;
3743 while (*cp != '\0'
3744 && !iswhite (*cp) && *cp != '{' && *cp != '(')
3745 cp++;
3746 pfnote (savenstr (sp, cp-sp), TRUE,
3747 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3748 }
3749 }
3750 else if (globals /* only if tagging global vars is enabled */
3751 && ((cp = lb.buffer,
3752 *cp++ == 'm'
3753 && *cp++ == 'y')
3754 || (cp = lb.buffer,
3755 *cp++ == 'l'
3756 && *cp++ == 'o'
3757 && *cp++ == 'c'
3758 && *cp++ == 'a'
3759 && *cp++ == 'l'))
3760 && (*cp == '(' || iswhite (*cp)))
3761 {
3762 /* After "my" or "local", but before any following paren or space. */
3763 char *varname = NULL;
3764
3765 cp = skip_spaces (cp);
3766 if (*cp == '$' || *cp == '@' || *cp == '%')
3767 {
3768 char* varstart = ++cp;
3769 while (ISALNUM (*cp) || *cp == '_')
3770 cp++;
3771 varname = savenstr (varstart, cp-varstart);
3772 }
3773 else
3774 {
3775 /* Should be examining a variable list at this point;
3776 could insist on seeing an open parenthesis. */
3777 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
3778 cp++;
3779 }
3780
3781 /* Perhaps I should back cp up one character, so the TAGS table
3782 doesn't mention (and so depend upon) the following char. */
3783 pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
3784 FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3785 }
3786 }
3787 }
3788 \f
3789 /*
3790 * Python support by Eric S. Raymond <esr@thyrsus.com>
3791 * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
3792 */
3793 static void
3794 Python_functions (inf)
3795 FILE *inf;
3796 {
3797 register char *cp;
3798
3799 LOOP_ON_INPUT_LINES (inf, lb, cp)
3800 {
3801 if (*cp++ == 'd'
3802 && *cp++ == 'e'
3803 && *cp++ == 'f' && iswhite (*cp++))
3804 {
3805 cp = skip_spaces (cp);
3806 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
3807 cp++;
3808 pfnote (NULL, TRUE,
3809 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3810 }
3811
3812 cp = lb.buffer;
3813 if (*cp++ == 'c'
3814 && *cp++ == 'l'
3815 && *cp++ == 'a'
3816 && *cp++ == 's'
3817 && *cp++ == 's' && iswhite (*cp++))
3818 {
3819 cp = skip_spaces (cp);
3820 while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
3821 cp++;
3822 pfnote (NULL, TRUE,
3823 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3824 }
3825 }
3826 }
3827 \f
3828 /* Idea by Corny de Souza
3829 * Cobol tag functions
3830 * We could look for anything that could be a paragraph name.
3831 * i.e. anything that starts in column 8 is one word and ends in a full stop.
3832 */
3833 static void
3834 Cobol_paragraphs (inf)
3835 FILE *inf;
3836 {
3837 register char *bp, *ep;
3838
3839 LOOP_ON_INPUT_LINES (inf, lb, bp)
3840 {
3841 if (lb.len < 9)
3842 continue;
3843 bp += 8;
3844
3845 /* If eoln, compiler option or comment ignore whole line. */
3846 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
3847 continue;
3848
3849 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
3850 continue;
3851 if (*ep++ == '.')
3852 pfnote (savenstr (bp, ep-bp), TRUE,
3853 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
3854 }
3855 }
3856 \f
3857 /* Added by Mosur Mohan, 4/22/88 */
3858 /* Pascal parsing */
3859
3860 /*
3861 * Locates tags for procedures & functions. Doesn't do any type- or
3862 * var-definitions. It does look for the keyword "extern" or
3863 * "forward" immediately following the procedure statement; if found,
3864 * the tag is skipped.
3865 */
3866 static void
3867 Pascal_functions (inf)
3868 FILE *inf;
3869 {
3870 linebuffer tline; /* mostly copied from C_entries */
3871 long save_lcno;
3872 int save_lineno, save_len;
3873 char c, *cp, *namebuf;
3874
3875 bool /* each of these flags is TRUE iff: */
3876 incomment, /* point is inside a comment */
3877 inquote, /* point is inside '..' string */
3878 get_tagname, /* point is after PROCEDURE/FUNCTION
3879 keyword, so next item = potential tag */
3880 found_tag, /* point is after a potential tag */
3881 inparms, /* point is within parameter-list */
3882 verify_tag; /* point has passed the parm-list, so the
3883 next token will determine whether this
3884 is a FORWARD/EXTERN to be ignored, or
3885 whether it is a real tag */
3886
3887 save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
3888 namebuf = NULL; /* keep compiler quiet */
3889 lineno = 0;
3890 charno = 0;
3891 dbp = lb.buffer;
3892 *dbp = '\0';
3893 initbuffer (&tline);
3894
3895 incomment = inquote = FALSE;
3896 found_tag = FALSE; /* have a proc name; check if extern */
3897 get_tagname = FALSE; /* have found "procedure" keyword */
3898 inparms = FALSE; /* found '(' after "proc" */
3899 verify_tag = FALSE; /* check if "extern" is ahead */
3900
3901
3902 while (!feof (inf)) /* long main loop to get next char */
3903 {
3904 c = *dbp++;
3905 if (c == '\0') /* if end of line */
3906 {
3907 lineno++;
3908 linecharno = charno;
3909 charno += readline (&lb, inf);
3910 dbp = lb.buffer;
3911 if (*dbp == '\0')
3912 continue;
3913 if (!((found_tag && verify_tag)
3914 || get_tagname))
3915 c = *dbp++; /* only if don't need *dbp pointing
3916 to the beginning of the name of
3917 the procedure or function */
3918 }
3919 if (incomment)
3920 {
3921 if (c == '}') /* within { } comments */
3922 incomment = FALSE;
3923 else if (c == '*' && *dbp == ')') /* within (* *) comments */
3924 {
3925 dbp++;
3926 incomment = FALSE;
3927 }
3928 continue;
3929 }
3930 else if (inquote)
3931 {
3932 if (c == '\'')
3933 inquote = FALSE;
3934 continue;
3935 }
3936 else
3937 switch (c)
3938 {
3939 case '\'':
3940 inquote = TRUE; /* found first quote */
3941 continue;
3942 case '{': /* found open { comment */
3943 incomment = TRUE;
3944 continue;
3945 case '(':
3946 if (*dbp == '*') /* found open (* comment */
3947 {
3948 incomment = TRUE;
3949 dbp++;
3950 }
3951 else if (found_tag) /* found '(' after tag, i.e., parm-list */
3952 inparms = TRUE;
3953 continue;
3954 case ')': /* end of parms list */
3955 if (inparms)
3956 inparms = FALSE;
3957 continue;
3958 case ';':
3959 if (found_tag && !inparms) /* end of proc or fn stmt */
3960 {
3961 verify_tag = TRUE;
3962 break;
3963 }
3964 continue;
3965 }
3966 if (found_tag && verify_tag && (*dbp != ' '))
3967 {
3968 /* check if this is an "extern" declaration */
3969 if (*dbp == '\0')
3970 continue;
3971 if (lowcase (*dbp == 'e'))
3972 {
3973 if (tail ("extern")) /* superfluous, really! */
3974 {
3975 found_tag = FALSE;
3976 verify_tag = FALSE;
3977 }
3978 }
3979 else if (lowcase (*dbp) == 'f')
3980 {
3981 if (tail ("forward")) /* check for forward reference */
3982 {
3983 found_tag = FALSE;
3984 verify_tag = FALSE;
3985 }
3986 }
3987 if (found_tag && verify_tag) /* not external proc, so make tag */
3988 {
3989 found_tag = FALSE;
3990 verify_tag = FALSE;
3991 pfnote (namebuf, TRUE,
3992 tline.buffer, save_len, save_lineno, save_lcno);
3993 continue;
3994 }
3995 }
3996 if (get_tagname) /* grab name of proc or fn */
3997 {
3998 if (*dbp == '\0')
3999 continue;
4000
4001 /* save all values for later tagging */
4002 grow_linebuffer (&tline, lb.len + 1);
4003 strcpy (tline.buffer, lb.buffer);
4004 save_lineno = lineno;
4005 save_lcno = linecharno;
4006
4007 /* grab block name */
4008 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4009 continue;
4010 namebuf = savenstr (dbp, cp-dbp);
4011 dbp = cp; /* set dbp to e-o-token */
4012 save_len = dbp - lb.buffer + 1;
4013 get_tagname = FALSE;
4014 found_tag = TRUE;
4015 continue;
4016
4017 /* and proceed to check for "extern" */
4018 }
4019 else if (!incomment && !inquote && !found_tag)
4020 {
4021 /* check for proc/fn keywords */
4022 switch (lowcase (c))
4023 {
4024 case 'p':
4025 if (tail ("rocedure")) /* c = 'p', dbp has advanced */
4026 get_tagname = TRUE;
4027 continue;
4028 case 'f':
4029 if (tail ("unction"))
4030 get_tagname = TRUE;
4031 continue;
4032 }
4033 }
4034 } /* while not eof */
4035
4036 free (tline.buffer);
4037 }
4038 \f
4039 /*
4040 * lisp tag functions
4041 * look for (def or (DEF, quote or QUOTE
4042 */
4043
4044 static int L_isdef P_((char *));
4045 static int L_isquote P_((char *));
4046 static void L_getit P_((void));
4047
4048 static int
4049 L_isdef (strp)
4050 register char *strp;
4051 {
4052 return ((strp[1] == 'd' || strp[1] == 'D')
4053 && (strp[2] == 'e' || strp[2] == 'E')
4054 && (strp[3] == 'f' || strp[3] == 'F'));
4055 }
4056
4057 static int
4058 L_isquote (strp)
4059 register char *strp;
4060 {
4061 return ((*++strp == 'q' || *strp == 'Q')
4062 && (*++strp == 'u' || *strp == 'U')
4063 && (*++strp == 'o' || *strp == 'O')
4064 && (*++strp == 't' || *strp == 'T')
4065 && (*++strp == 'e' || *strp == 'E')
4066 && iswhite (*++strp));
4067 }
4068
4069 static void
4070 L_getit ()
4071 {
4072 register char *cp;
4073
4074 if (*dbp == '\'') /* Skip prefix quote */
4075 dbp++;
4076 else if (*dbp == '(')
4077 {
4078 if (L_isquote (dbp))
4079 dbp += 7; /* Skip "(quote " */
4080 else
4081 dbp += 1; /* Skip "(" before name in (defstruct (foo)) */
4082 dbp = skip_spaces (dbp);
4083 }
4084
4085 for (cp = dbp /*+1*/;
4086 *cp != '\0' && *cp != '(' && !iswhite(*cp) && *cp != ')';
4087 cp++)
4088 continue;
4089 if (cp == dbp)
4090 return;
4091
4092 pfnote (savenstr (dbp, cp-dbp), TRUE,
4093 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4094 }
4095
4096 static void
4097 Lisp_functions (inf)
4098 FILE *inf;
4099 {
4100 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4101 {
4102 if (dbp[0] == '(')
4103 {
4104 if (L_isdef (dbp))
4105 {
4106 dbp = skip_non_spaces (dbp);
4107 dbp = skip_spaces (dbp);
4108 L_getit ();
4109 }
4110 else
4111 {
4112 /* Check for (foo::defmumble name-defined ... */
4113 do
4114 dbp++;
4115 while (*dbp != '\0' && !iswhite (*dbp)
4116 && *dbp != ':' && *dbp != '(' && *dbp != ')');
4117 if (*dbp == ':')
4118 {
4119 do
4120 dbp++;
4121 while (*dbp == ':');
4122
4123 if (L_isdef (dbp - 1))
4124 {
4125 dbp = skip_non_spaces (dbp);
4126 dbp = skip_spaces (dbp);
4127 L_getit ();
4128 }
4129 }
4130 }
4131 }
4132 }
4133 }
4134 \f
4135 /*
4136 * Postscript tag functions
4137 * Just look for lines where the first character is '/'
4138 * Richard Mlynarik <mly@adoc.xerox.com>
4139 * Also look at "defineps" for PSWrap
4140 * suggested by Masatake YAMATO <masata-y@is.aist-nara.ac.jp>
4141 */
4142 static void
4143 Postscript_functions (inf)
4144 FILE *inf;
4145 {
4146 register char *bp, *ep;
4147
4148 LOOP_ON_INPUT_LINES (inf, lb, bp)
4149 {
4150 if (bp[0] == '/')
4151 {
4152 for (ep = bp+1;
4153 *ep != '\0' && *ep != ' ' && *ep != '{';
4154 ep++)
4155 continue;
4156 pfnote (savenstr (bp, ep-bp), TRUE,
4157 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4158 }
4159 else if (strneq (bp, "defineps", 8))
4160 {
4161 bp = skip_non_spaces (bp);
4162 bp = skip_spaces (bp);
4163 get_tag (bp);
4164 }
4165 }
4166 }
4167
4168 \f
4169 /*
4170 * Scheme tag functions
4171 * look for (def... xyzzy
4172 * look for (def... (xyzzy
4173 * look for (def ... ((...(xyzzy ....
4174 * look for (set! xyzzy
4175 */
4176
4177 static void
4178 Scheme_functions (inf)
4179 FILE *inf;
4180 {
4181 register char *bp;
4182
4183 LOOP_ON_INPUT_LINES (inf, lb, bp)
4184 {
4185 if (bp[0] == '('
4186 && (bp[1] == 'D' || bp[1] == 'd')
4187 && (bp[2] == 'E' || bp[2] == 'e')
4188 && (bp[3] == 'F' || bp[3] == 'f'))
4189 {
4190 bp = skip_non_spaces (bp);
4191 /* Skip over open parens and white space */
4192 while (iswhite (*bp) || *bp == '(')
4193 bp++;
4194 get_tag (bp);
4195 }
4196 if (bp[0] == '('
4197 && (bp[1] == 'S' || bp[1] == 's')
4198 && (bp[2] == 'E' || bp[2] == 'e')
4199 && (bp[3] == 'T' || bp[3] == 't')
4200 && (bp[4] == '!' || bp[4] == '!')
4201 && (iswhite (bp[5])))
4202 {
4203 bp = skip_non_spaces (bp);
4204 bp = skip_spaces (bp);
4205 get_tag (bp);
4206 }
4207 }
4208 }
4209 \f
4210 /* Find tags in TeX and LaTeX input files. */
4211
4212 /* TEX_toktab is a table of TeX control sequences that define tags.
4213 Each TEX_tabent records one such control sequence.
4214 CONVERT THIS TO USE THE Stab TYPE!! */
4215 struct TEX_tabent
4216 {
4217 char *name;
4218 int len;
4219 };
4220
4221 struct TEX_tabent *TEX_toktab = NULL; /* Table with tag tokens */
4222
4223 /* Default set of control sequences to put into TEX_toktab.
4224 The value of environment var TEXTAGS is prepended to this. */
4225
4226 char *TEX_defenv = "\
4227 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4228 :part:appendix:entry:index";
4229
4230 static void TEX_mode P_((FILE *));
4231 static struct TEX_tabent *TEX_decode_env P_((char *, char *));
4232 static int TEX_Token P_((char *));
4233
4234 char TEX_esc = '\\';
4235 char TEX_opgrp = '{';
4236 char TEX_clgrp = '}';
4237
4238 /*
4239 * TeX/LaTeX scanning loop.
4240 */
4241 static void
4242 TeX_functions (inf)
4243 FILE *inf;
4244 {
4245 char *cp, *lasthit;
4246 register int i;
4247
4248 /* Select either \ or ! as escape character. */
4249 TEX_mode (inf);
4250
4251 /* Initialize token table once from environment. */
4252 if (!TEX_toktab)
4253 TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4254
4255 LOOP_ON_INPUT_LINES (inf, lb, cp)
4256 {
4257 lasthit = cp;
4258 /* Look at each esc in line. */
4259 while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4260 {
4261 if (*++cp == '\0')
4262 break;
4263 linecharno += cp - lasthit;
4264 lasthit = cp;
4265 i = TEX_Token (lasthit);
4266 if (i >= 0)
4267 {
4268 /* We seem to include the TeX command in the tag name.
4269 register char *p;
4270 for (p = lasthit + TEX_toktab[i].len;
4271 *p != '\0' && *p != TEX_clgrp;
4272 p++)
4273 continue; */
4274 pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4275 lb.buffer, lb.len, lineno, linecharno);
4276 break; /* We only tag a line once */
4277 }
4278 }
4279 }
4280 }
4281
4282 #define TEX_LESC '\\'
4283 #define TEX_SESC '!'
4284 #define TEX_cmt '%'
4285
4286 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4287 chars accordingly. */
4288 static void
4289 TEX_mode (inf)
4290 FILE *inf;
4291 {
4292 int c;
4293
4294 while ((c = getc (inf)) != EOF)
4295 {
4296 /* Skip to next line if we hit the TeX comment char. */
4297 if (c == TEX_cmt)
4298 while (c != '\n')
4299 c = getc (inf);
4300 else if (c == TEX_LESC || c == TEX_SESC )
4301 break;
4302 }
4303
4304 if (c == TEX_LESC)
4305 {
4306 TEX_esc = TEX_LESC;
4307 TEX_opgrp = '{';
4308 TEX_clgrp = '}';
4309 }
4310 else
4311 {
4312 TEX_esc = TEX_SESC;
4313 TEX_opgrp = '<';
4314 TEX_clgrp = '>';
4315 }
4316 /* If the input file is compressed, inf is a pipe, and rewind may fail.
4317 No attempt is made to correct the situation. */
4318 rewind (inf);
4319 }
4320
4321 /* Read environment and prepend it to the default string.
4322 Build token table. */
4323 static struct TEX_tabent *
4324 TEX_decode_env (evarname, defenv)
4325 char *evarname;
4326 char *defenv;
4327 {
4328 register char *env, *p;
4329
4330 struct TEX_tabent *tab;
4331 int size, i;
4332
4333 /* Append default string to environment. */
4334 env = getenv (evarname);
4335 if (!env)
4336 env = defenv;
4337 else
4338 {
4339 char *oldenv = env;
4340 env = concat (oldenv, defenv, "");
4341 }
4342
4343 /* Allocate a token table */
4344 for (size = 1, p = env; p;)
4345 if ((p = etags_strchr (p, ':')) && *++p != '\0')
4346 size++;
4347 /* Add 1 to leave room for null terminator. */
4348 tab = xnew (size + 1, struct TEX_tabent);
4349
4350 /* Unpack environment string into token table. Be careful about */
4351 /* zero-length strings (leading ':', "::" and trailing ':') */
4352 for (i = 0; *env;)
4353 {
4354 p = etags_strchr (env, ':');
4355 if (!p) /* End of environment string. */
4356 p = env + strlen (env);
4357 if (p - env > 0)
4358 { /* Only non-zero strings. */
4359 tab[i].name = savenstr (env, p - env);
4360 tab[i].len = strlen (tab[i].name);
4361 i++;
4362 }
4363 if (*p)
4364 env = p + 1;
4365 else
4366 {
4367 tab[i].name = NULL; /* Mark end of table. */
4368 tab[i].len = 0;
4369 break;
4370 }
4371 }
4372 return tab;
4373 }
4374
4375 /* If the text at CP matches one of the tag-defining TeX command names,
4376 return the pointer to the first occurrence of that command in TEX_toktab.
4377 Otherwise return -1.
4378 Keep the capital `T' in `token' for dumb truncating compilers
4379 (this distinguishes it from `TEX_toktab' */
4380 static int
4381 TEX_Token (cp)
4382 char *cp;
4383 {
4384 int i;
4385
4386 for (i = 0; TEX_toktab[i].len > 0; i++)
4387 if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4388 return i;
4389 return -1;
4390 }
4391 \f
4392 /* Texinfo support. Dave Love, Mar. 2000. */
4393 static void
4394 Texinfo_functions (inf)
4395 FILE * inf;
4396 {
4397 char *cp, *start;
4398 LOOP_ON_INPUT_LINES (inf, lb, cp)
4399 {
4400 if ((*cp++ == '@' && *cp++ == 'n' && *cp++ == 'o' && *cp++ == 'd'
4401 && *cp++ == 'e' && iswhite (*cp++)))
4402 {
4403 while (iswhite (*cp))
4404 cp++;
4405 start = cp;
4406 while (*cp != '\0' && *cp != ',')
4407 cp++;
4408 pfnote (savenstr (start, cp - start), TRUE,
4409 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4410 }
4411 }
4412 }
4413 \f
4414 /*
4415 * Prolog support (rewritten) by Anders Lindgren, Mar. 96
4416 *
4417 * Assumes that the predicate starts at column 0.
4418 * Only the first clause of a predicate is added.
4419 */
4420 static int prolog_pred P_((char *, char *));
4421 static void prolog_skip_comment P_((linebuffer *, FILE *));
4422 static int prolog_atom P_((char *, int));
4423
4424 static void
4425 Prolog_functions (inf)
4426 FILE *inf;
4427 {
4428 char *cp, *last;
4429 int len;
4430 int allocated;
4431
4432 allocated = 0;
4433 len = 0;
4434 last = NULL;
4435
4436 LOOP_ON_INPUT_LINES (inf, lb, cp)
4437 {
4438 if (cp[0] == '\0') /* Empty line */
4439 continue;
4440 else if (iswhite (cp[0])) /* Not a predicate */
4441 continue;
4442 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
4443 prolog_skip_comment (&lb, inf);
4444 else if ((len = prolog_pred (cp, last)) > 0)
4445 {
4446 /* Predicate. Store the function name so that we only
4447 generate a tag for the first clause. */
4448 if (last == NULL)
4449 last = xnew(len + 1, char);
4450 else if (len + 1 > allocated)
4451 last = xrnew (last, len + 1, char);
4452 allocated = len + 1;
4453 strncpy (last, cp, len);
4454 last[len] = '\0';
4455 }
4456 }
4457 }
4458
4459
4460 static void
4461 prolog_skip_comment (plb, inf)
4462 linebuffer *plb;
4463 FILE *inf;
4464 {
4465 char *cp;
4466
4467 do
4468 {
4469 for (cp = plb->buffer; *cp != '\0'; cp++)
4470 if (cp[0] == '*' && cp[1] == '/')
4471 return;
4472 lineno++;
4473 linecharno += readline (plb, inf);
4474 }
4475 while (!feof(inf));
4476 }
4477
4478 /*
4479 * A predicate definition is added if it matches:
4480 * <beginning of line><Prolog Atom><whitespace>(
4481 *
4482 * It is added to the tags database if it doesn't match the
4483 * name of the previous clause header.
4484 *
4485 * Return the size of the name of the predicate, or 0 if no header
4486 * was found.
4487 */
4488 static int
4489 prolog_pred (s, last)
4490 char *s;
4491 char *last; /* Name of last clause. */
4492 {
4493 int pos;
4494 int len;
4495
4496 pos = prolog_atom (s, 0);
4497 if (pos < 1)
4498 return 0;
4499
4500 len = pos;
4501 pos = skip_spaces (s + pos) - s;
4502
4503 if ((s[pos] == '(') || (s[pos] == '.'))
4504 {
4505 if (s[pos] == '(')
4506 pos++;
4507
4508 /* Save only the first clause. */
4509 if (last == NULL
4510 || len != (int)strlen (last)
4511 || !strneq (s, last, len))
4512 {
4513 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4514 return len;
4515 }
4516 }
4517 return 0;
4518 }
4519
4520 /*
4521 * Consume a Prolog atom.
4522 * Return the number of bytes consumed, or -1 if there was an error.
4523 *
4524 * A prolog atom, in this context, could be one of:
4525 * - An alphanumeric sequence, starting with a lower case letter.
4526 * - A quoted arbitrary string. Single quotes can escape themselves.
4527 * Backslash quotes everything.
4528 */
4529 static int
4530 prolog_atom (s, pos)
4531 char *s;
4532 int pos;
4533 {
4534 int origpos;
4535
4536 origpos = pos;
4537
4538 if (ISLOWER(s[pos]) || (s[pos] == '_'))
4539 {
4540 /* The atom is unquoted. */
4541 pos++;
4542 while (ISALNUM(s[pos]) || (s[pos] == '_'))
4543 {
4544 pos++;
4545 }
4546 return pos - origpos;
4547 }
4548 else if (s[pos] == '\'')
4549 {
4550 pos++;
4551
4552 while (1)
4553 {
4554 if (s[pos] == '\'')
4555 {
4556 pos++;
4557 if (s[pos] != '\'')
4558 break;
4559 pos++; /* A double quote */
4560 }
4561 else if (s[pos] == '\0')
4562 /* Multiline quoted atoms are ignored. */
4563 return -1;
4564 else if (s[pos] == '\\')
4565 {
4566 if (s[pos+1] == '\0')
4567 return -1;
4568 pos += 2;
4569 }
4570 else
4571 pos++;
4572 }
4573 return pos - origpos;
4574 }
4575 else
4576 return -1;
4577 }
4578 \f
4579 /*
4580 * Support for Erlang -- Anders Lindgren, Feb 1996.
4581 *
4582 * Generates tags for functions, defines, and records.
4583 *
4584 * Assumes that Erlang functions start at column 0.
4585 */
4586 static int erlang_func P_((char *, char *));
4587 static void erlang_attribute P_((char *));
4588 static int erlang_atom P_((char *, int));
4589
4590 static void
4591 Erlang_functions (inf)
4592 FILE *inf;
4593 {
4594 char *cp, *last;
4595 int len;
4596 int allocated;
4597
4598 allocated = 0;
4599 len = 0;
4600 last = NULL;
4601
4602 LOOP_ON_INPUT_LINES (inf, lb, cp)
4603 {
4604 if (cp[0] == '\0') /* Empty line */
4605 continue;
4606 else if (iswhite (cp[0])) /* Not function nor attribute */
4607 continue;
4608 else if (cp[0] == '%') /* comment */
4609 continue;
4610 else if (cp[0] == '"') /* Sometimes, strings start in column one */
4611 continue;
4612 else if (cp[0] == '-') /* attribute, e.g. "-define" */
4613 {
4614 erlang_attribute (cp);
4615 last = NULL;
4616 }
4617 else if ((len = erlang_func (cp, last)) > 0)
4618 {
4619 /*
4620 * Function. Store the function name so that we only
4621 * generates a tag for the first clause.
4622 */
4623 if (last == NULL)
4624 last = xnew (len + 1, char);
4625 else if (len + 1 > allocated)
4626 last = xrnew (last, len + 1, char);
4627 allocated = len + 1;
4628 strncpy (last, cp, len);
4629 last[len] = '\0';
4630 }
4631 }
4632 }
4633
4634
4635 /*
4636 * A function definition is added if it matches:
4637 * <beginning of line><Erlang Atom><whitespace>(
4638 *
4639 * It is added to the tags database if it doesn't match the
4640 * name of the previous clause header.
4641 *
4642 * Return the size of the name of the function, or 0 if no function
4643 * was found.
4644 */
4645 static int
4646 erlang_func (s, last)
4647 char *s;
4648 char *last; /* Name of last clause. */
4649 {
4650 int pos;
4651 int len;
4652
4653 pos = erlang_atom (s, 0);
4654 if (pos < 1)
4655 return 0;
4656
4657 len = pos;
4658 pos = skip_spaces (s + pos) - s;
4659
4660 /* Save only the first clause. */
4661 if (s[pos++] == '('
4662 && (last == NULL
4663 || len != (int)strlen (last)
4664 || !strneq (s, last, len)))
4665 {
4666 pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4667 return len;
4668 }
4669
4670 return 0;
4671 }
4672
4673
4674 /*
4675 * Handle attributes. Currently, tags are generated for defines
4676 * and records.
4677 *
4678 * They are on the form:
4679 * -define(foo, bar).
4680 * -define(Foo(M, N), M+N).
4681 * -record(graph, {vtab = notable, cyclic = true}).
4682 */
4683 static void
4684 erlang_attribute (s)
4685 char *s;
4686 {
4687 int pos;
4688 int len;
4689
4690 if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
4691 {
4692 pos = skip_spaces (s + 7) - s;
4693 if (s[pos++] == '(')
4694 {
4695 pos = skip_spaces (s + pos) - s;
4696 len = erlang_atom (s, pos);
4697 if (len != 0)
4698 pfnote (savenstr (& s[pos], len), TRUE,
4699 s, pos + len, lineno, linecharno);
4700 }
4701 }
4702 return;
4703 }
4704
4705
4706 /*
4707 * Consume an Erlang atom (or variable).
4708 * Return the number of bytes consumed, or -1 if there was an error.
4709 */
4710 static int
4711 erlang_atom (s, pos)
4712 char *s;
4713 int pos;
4714 {
4715 int origpos;
4716
4717 origpos = pos;
4718
4719 if (ISALPHA (s[pos]) || s[pos] == '_')
4720 {
4721 /* The atom is unquoted. */
4722 pos++;
4723 while (ISALNUM (s[pos]) || s[pos] == '_')
4724 pos++;
4725 return pos - origpos;
4726 }
4727 else if (s[pos] == '\'')
4728 {
4729 pos++;
4730
4731 while (1)
4732 {
4733 if (s[pos] == '\'')
4734 {
4735 pos++;
4736 break;
4737 }
4738 else if (s[pos] == '\0')
4739 /* Multiline quoted atoms are ignored. */
4740 return -1;
4741 else if (s[pos] == '\\')
4742 {
4743 if (s[pos+1] == '\0')
4744 return -1;
4745 pos += 2;
4746 }
4747 else
4748 pos++;
4749 }
4750 return pos - origpos;
4751 }
4752 else
4753 return -1;
4754 }
4755 \f
4756 #ifdef ETAGS_REGEXPS
4757
4758 static char *scan_separators P_((char *));
4759 static void analyse_regex P_((char *, bool));
4760 static void add_regex P_((char *, bool, language *));
4761 static char *substitute P_((char *, char *, struct re_registers *));
4762
4763 /* Take a string like "/blah/" and turn it into "blah", making sure
4764 that the first and last characters are the same, and handling
4765 quoted separator characters. Actually, stops on the occurrence of
4766 an unquoted separator. Also turns "\t" into a Tab character.
4767 Returns pointer to terminating separator. Works in place. Null
4768 terminates name string. */
4769 static char *
4770 scan_separators (name)
4771 char *name;
4772 {
4773 char sep = name[0];
4774 char *copyto = name;
4775 bool quoted = FALSE;
4776
4777 for (++name; *name != '\0'; ++name)
4778 {
4779 if (quoted)
4780 {
4781 if (*name == 't')
4782 *copyto++ = '\t';
4783 else if (*name == sep)
4784 *copyto++ = sep;
4785 else
4786 {
4787 /* Something else is quoted, so preserve the quote. */
4788 *copyto++ = '\\';
4789 *copyto++ = *name;
4790 }
4791 quoted = FALSE;
4792 }
4793 else if (*name == '\\')
4794 quoted = TRUE;
4795 else if (*name == sep)
4796 break;
4797 else
4798 *copyto++ = *name;
4799 }
4800
4801 /* Terminate copied string. */
4802 *copyto = '\0';
4803 return name;
4804 }
4805
4806 /* Look at the argument of --regex or --no-regex and do the right
4807 thing. Same for each line of a regexp file. */
4808 static void
4809 analyse_regex (regex_arg, ignore_case)
4810 char *regex_arg;
4811 bool ignore_case;
4812 {
4813 if (regex_arg == NULL)
4814 free_patterns (); /* --no-regex: remove existing regexps */
4815
4816 /* A real --regexp option or a line in a regexp file. */
4817 switch (regex_arg[0])
4818 {
4819 /* Comments in regexp file or null arg to --regex. */
4820 case '\0':
4821 case ' ':
4822 case '\t':
4823 break;
4824
4825 /* Read a regex file. This is recursive and may result in a
4826 loop, which will stop when the file descriptors are exhausted. */
4827 case '@':
4828 {
4829 FILE *regexfp;
4830 linebuffer regexbuf;
4831 char *regexfile = regex_arg + 1;
4832
4833 /* regexfile is a file containing regexps, one per line. */
4834 regexfp = fopen (regexfile, "r");
4835 if (regexfp == NULL)
4836 {
4837 pfatal (regexfile);
4838 return;
4839 }
4840 initbuffer (&regexbuf);
4841 while (readline_internal (&regexbuf, regexfp) > 0)
4842 analyse_regex (regexbuf.buffer, ignore_case);
4843 free (regexbuf.buffer);
4844 fclose (regexfp);
4845 }
4846 break;
4847
4848 /* Regexp to be used for a specific language only. */
4849 case '{':
4850 {
4851 language *lang;
4852 char *lang_name = regex_arg + 1;
4853 char *cp;
4854
4855 for (cp = lang_name; *cp != '}'; cp++)
4856 if (*cp == '\0')
4857 {
4858 error ("unterminated language name in regex: %s", regex_arg);
4859 return;
4860 }
4861 *cp = '\0';
4862 lang = get_language_from_name (lang_name);
4863 if (lang == NULL)
4864 return;
4865 add_regex (cp + 1, ignore_case, lang);
4866 }
4867 break;
4868
4869 /* Regexp to be used for any language. */
4870 default:
4871 add_regex (regex_arg, ignore_case, NULL);
4872 break;
4873 }
4874 }
4875
4876 /* Turn a name, which is an ed-style (but Emacs syntax) regular
4877 expression, into a real regular expression by compiling it. */
4878 static void
4879 add_regex (regexp_pattern, ignore_case, lang)
4880 char *regexp_pattern;
4881 bool ignore_case;
4882 language *lang;
4883 {
4884 char *name;
4885 const char *err;
4886 struct re_pattern_buffer *patbuf;
4887 pattern *pp;
4888
4889
4890 if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
4891 {
4892 error ("%s: unterminated regexp", regexp_pattern);
4893 return;
4894 }
4895 name = scan_separators (regexp_pattern);
4896 if (regexp_pattern[0] == '\0')
4897 {
4898 error ("null regexp", (char *)NULL);
4899 return;
4900 }
4901 (void) scan_separators (name);
4902
4903 patbuf = xnew (1, struct re_pattern_buffer);
4904 /* Translation table to fold case if appropriate. */
4905 patbuf->translate = (ignore_case) ? lc_trans : NULL;
4906 patbuf->fastmap = NULL;
4907 patbuf->buffer = NULL;
4908 patbuf->allocated = 0;
4909
4910 err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
4911 if (err != NULL)
4912 {
4913 error ("%s while compiling pattern", err);
4914 return;
4915 }
4916
4917 pp = p_head;
4918 p_head = xnew (1, pattern);
4919 p_head->regex = savestr (regexp_pattern);
4920 p_head->p_next = pp;
4921 p_head->language = lang;
4922 p_head->pattern = patbuf;
4923 p_head->name_pattern = savestr (name);
4924 p_head->error_signaled = FALSE;
4925 }
4926
4927 /*
4928 * Do the substitutions indicated by the regular expression and
4929 * arguments.
4930 */
4931 static char *
4932 substitute (in, out, regs)
4933 char *in, *out;
4934 struct re_registers *regs;
4935 {
4936 char *result, *t;
4937 int size, dig, diglen;
4938
4939 result = NULL;
4940 size = strlen (out);
4941
4942 /* Pass 1: figure out how much to allocate by finding all \N strings. */
4943 if (out[size - 1] == '\\')
4944 fatal ("pattern error in \"%s\"", out);
4945 for (t = etags_strchr (out, '\\');
4946 t != NULL;
4947 t = etags_strchr (t + 2, '\\'))
4948 if (ISDIGIT (t[1]))
4949 {
4950 dig = t[1] - '0';
4951 diglen = regs->end[dig] - regs->start[dig];
4952 size += diglen - 2;
4953 }
4954 else
4955 size -= 1;
4956
4957 /* Allocate space and do the substitutions. */
4958 result = xnew (size + 1, char);
4959
4960 for (t = result; *out != '\0'; out++)
4961 if (*out == '\\' && ISDIGIT (*++out))
4962 {
4963 /* Using "dig2" satisfies my debugger. Bleah. */
4964 dig = *out - '0';
4965 diglen = regs->end[dig] - regs->start[dig];
4966 strncpy (t, in + regs->start[dig], diglen);
4967 t += diglen;
4968 }
4969 else
4970 *t++ = *out;
4971 *t = '\0';
4972
4973 if (DEBUG && (t > result + size || t - result != (int)strlen (result)))
4974 abort ();
4975
4976 return result;
4977 }
4978
4979 /* Deallocate all patterns. */
4980 static void
4981 free_patterns ()
4982 {
4983 pattern *pp;
4984 while (p_head != NULL)
4985 {
4986 pp = p_head->p_next;
4987 free (p_head->regex);
4988 free (p_head->name_pattern);
4989 free (p_head);
4990 p_head = pp;
4991 }
4992 return;
4993 }
4994 \f
4995 static void
4996 get_tag (bp)
4997 register char *bp;
4998 {
4999 register char *cp;
5000
5001 if (*bp == '\0')
5002 return;
5003 /* Go till you get to white space or a syntactic break */
5004 for (cp = bp + 1;
5005 *cp != '\0' && *cp != '(' && *cp != ')' && !iswhite (*cp);
5006 cp++)
5007 continue;
5008 pfnote (savenstr (bp, cp-bp), TRUE,
5009 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5010 }
5011
5012 #endif /* ETAGS_REGEXPS */
5013 /* Initialize a linebuffer for use */
5014 static void
5015 initbuffer (lbp)
5016 linebuffer *lbp;
5017 {
5018 lbp->size = 200;
5019 lbp->buffer = xnew (200, char);
5020 }
5021
5022 /*
5023 * Read a line of text from `stream' into `lbp', excluding the
5024 * newline or CR-NL, if any. Return the number of characters read from
5025 * `stream', which is the length of the line including the newline.
5026 *
5027 * On DOS or Windows we do not count the CR character, if any, before the
5028 * NL, in the returned length; this mirrors the behavior of emacs on those
5029 * platforms (for text files, it translates CR-NL to NL as it reads in the
5030 * file).
5031 */
5032 static long
5033 readline_internal (lbp, stream)
5034 linebuffer *lbp;
5035 register FILE *stream;
5036 {
5037 char *buffer = lbp->buffer;
5038 register char *p = lbp->buffer;
5039 register char *pend;
5040 int chars_deleted;
5041
5042 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5043
5044 while (1)
5045 {
5046 register int c = getc (stream);
5047 if (p == pend)
5048 {
5049 /* We're at the end of linebuffer: expand it. */
5050 lbp->size *= 2;
5051 buffer = xrnew (buffer, lbp->size, char);
5052 p += buffer - lbp->buffer;
5053 pend = buffer + lbp->size;
5054 lbp->buffer = buffer;
5055 }
5056 if (c == EOF)
5057 {
5058 *p = '\0';
5059 chars_deleted = 0;
5060 break;
5061 }
5062 if (c == '\n')
5063 {
5064 if (p > buffer && p[-1] == '\r')
5065 {
5066 p -= 1;
5067 #ifdef DOS_NT
5068 /* Assume CRLF->LF translation will be performed by Emacs
5069 when loading this file, so CRs won't appear in the buffer.
5070 It would be cleaner to compensate within Emacs;
5071 however, Emacs does not know how many CRs were deleted
5072 before any given point in the file. */
5073 chars_deleted = 1;
5074 #else
5075 chars_deleted = 2;
5076 #endif
5077 }
5078 else
5079 {
5080 chars_deleted = 1;
5081 }
5082 *p = '\0';
5083 break;
5084 }
5085 *p++ = c;
5086 }
5087 lbp->len = p - buffer;
5088
5089 return lbp->len + chars_deleted;
5090 }
5091
5092 /*
5093 * Like readline_internal, above, but in addition try to match the
5094 * input line against relevant regular expressions.
5095 */
5096 static long
5097 readline (lbp, stream)
5098 linebuffer *lbp;
5099 FILE *stream;
5100 {
5101 /* Read new line. */
5102 long result = readline_internal (lbp, stream);
5103 #ifdef ETAGS_REGEXPS
5104 int match;
5105 pattern *pp;
5106
5107 /* Match against relevant patterns. */
5108 if (lbp->len > 0)
5109 for (pp = p_head; pp != NULL; pp = pp->p_next)
5110 {
5111 /* Only use generic regexps or those for the current language. */
5112 if (pp->language != NULL && pp->language != curlang)
5113 continue;
5114
5115 match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
5116 switch (match)
5117 {
5118 case -2:
5119 /* Some error. */
5120 if (!pp->error_signaled)
5121 {
5122 error ("error while matching \"%s\"", pp->regex);
5123 pp->error_signaled = TRUE;
5124 }
5125 break;
5126 case -1:
5127 /* No match. */
5128 break;
5129 default:
5130 /* Match occurred. Construct a tag. */
5131 if (pp->name_pattern[0] != '\0')
5132 {
5133 /* Make a named tag. */
5134 char *name = substitute (lbp->buffer,
5135 pp->name_pattern, &pp->regs);
5136 if (name != NULL)
5137 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5138 }
5139 else
5140 {
5141 /* Make an unnamed tag. */
5142 pfnote ((char *)NULL, TRUE,
5143 lbp->buffer, match, lineno, linecharno);
5144 }
5145 break;
5146 }
5147 }
5148 #endif /* ETAGS_REGEXPS */
5149
5150 return result;
5151 }
5152 \f
5153 /*
5154 * Return a pointer to a space of size strlen(cp)+1 allocated
5155 * with xnew where the string CP has been copied.
5156 */
5157 static char *
5158 savestr (cp)
5159 char *cp;
5160 {
5161 return savenstr (cp, strlen (cp));
5162 }
5163
5164 /*
5165 * Return a pointer to a space of size LEN+1 allocated with xnew where
5166 * the string CP has been copied for at most the first LEN characters.
5167 */
5168 static char *
5169 savenstr (cp, len)
5170 char *cp;
5171 int len;
5172 {
5173 register char *dp;
5174
5175 dp = xnew (len + 1, char);
5176 strncpy (dp, cp, len);
5177 dp[len] = '\0';
5178 return dp;
5179 }
5180
5181 /*
5182 * Return the ptr in sp at which the character c last
5183 * appears; NULL if not found
5184 *
5185 * Identical to POSIX strrchr, included for portability.
5186 */
5187 static char *
5188 etags_strrchr (sp, c)
5189 register const char *sp;
5190 register int c;
5191 {
5192 register const char *r;
5193
5194 r = NULL;
5195 do
5196 {
5197 if (*sp == c)
5198 r = sp;
5199 } while (*sp++);
5200 return (char *)r;
5201 }
5202
5203
5204 /*
5205 * Return the ptr in sp at which the character c first
5206 * appears; NULL if not found
5207 *
5208 * Identical to POSIX strchr, included for portability.
5209 */
5210 static char *
5211 etags_strchr (sp, c)
5212 register const char *sp;
5213 register int c;
5214 {
5215 do
5216 {
5217 if (*sp == c)
5218 return (char *)sp;
5219 } while (*sp++);
5220 return NULL;
5221 }
5222
5223 /* Skip spaces, return new pointer. */
5224 static char *
5225 skip_spaces (cp)
5226 char *cp;
5227 {
5228 while (iswhite (*cp))
5229 cp++;
5230 return cp;
5231 }
5232
5233 /* Skip non spaces, return new pointer. */
5234 static char *
5235 skip_non_spaces (cp)
5236 char *cp;
5237 {
5238 while (*cp != '\0' && !iswhite (*cp))
5239 cp++;
5240 return cp;
5241 }
5242
5243 /* Print error message and exit. */
5244 static void
5245 fatal (s1, s2)
5246 char *s1, *s2;
5247 {
5248 error (s1, s2);
5249 exit (BAD);
5250 }
5251
5252 static void
5253 pfatal (s1)
5254 char *s1;
5255 {
5256 perror (s1);
5257 exit (BAD);
5258 }
5259
5260 static void
5261 suggest_asking_for_help ()
5262 {
5263 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5264 progname,
5265 #ifdef LONG_OPTIONS
5266 "--help"
5267 #else
5268 "-h"
5269 #endif
5270 );
5271 exit (BAD);
5272 }
5273
5274 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
5275 static void
5276 error (s1, s2)
5277 const char *s1, *s2;
5278 {
5279 fprintf (stderr, "%s: ", progname);
5280 fprintf (stderr, s1, s2);
5281 fprintf (stderr, "\n");
5282 }
5283
5284 /* Return a newly-allocated string whose contents
5285 concatenate those of s1, s2, s3. */
5286 static char *
5287 concat (s1, s2, s3)
5288 char *s1, *s2, *s3;
5289 {
5290 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5291 char *result = xnew (len1 + len2 + len3 + 1, char);
5292
5293 strcpy (result, s1);
5294 strcpy (result + len1, s2);
5295 strcpy (result + len1 + len2, s3);
5296 result[len1 + len2 + len3] = '\0';
5297
5298 return result;
5299 }
5300 \f
5301 /* Does the same work as the system V getcwd, but does not need to
5302 guess the buffer size in advance. */
5303 static char *
5304 etags_getcwd ()
5305 {
5306 #ifdef HAVE_GETCWD
5307 int bufsize = 200;
5308 char *path = xnew (bufsize, char);
5309
5310 while (getcwd (path, bufsize) == NULL)
5311 {
5312 if (errno != ERANGE)
5313 pfatal ("getcwd");
5314 bufsize *= 2;
5315 free (path);
5316 path = xnew (bufsize, char);
5317 }
5318
5319 canonicalize_filename (path);
5320 return path;
5321
5322 #else /* not HAVE_GETCWD */
5323 #ifdef MSDOS
5324 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
5325
5326 getwd (path);
5327
5328 for (p = path; *p != '\0'; p++)
5329 if (*p == '\\')
5330 *p = '/';
5331 else
5332 *p = lowcase (*p);
5333
5334 return strdup (path);
5335 #else /* not MSDOS */
5336 linebuffer path;
5337 FILE *pipe;
5338
5339 initbuffer (&path);
5340 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5341 if (pipe == NULL || readline_internal (&path, pipe) == 0)
5342 pfatal ("pwd");
5343 pclose (pipe);
5344
5345 return path.buffer;
5346 #endif /* not MSDOS */
5347 #endif /* not HAVE_GETCWD */
5348 }
5349
5350 /* Return a newly allocated string containing the file name of FILE
5351 relative to the absolute directory DIR (which should end with a slash). */
5352 static char *
5353 relative_filename (file, dir)
5354 char *file, *dir;
5355 {
5356 char *fp, *dp, *afn, *res;
5357 int i;
5358
5359 /* Find the common root of file and dir (with a trailing slash). */
5360 afn = absolute_filename (file, cwd);
5361 fp = afn;
5362 dp = dir;
5363 while (*fp++ == *dp++)
5364 continue;
5365 fp--, dp--; /* back to the first differing char */
5366 #ifdef DOS_NT
5367 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5368 return afn;
5369 #endif
5370 do /* look at the equal chars until '/' */
5371 fp--, dp--;
5372 while (*fp != '/');
5373
5374 /* Build a sequence of "../" strings for the resulting relative file name. */
5375 i = 0;
5376 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5377 i += 1;
5378 res = xnew (3*i + strlen (fp + 1) + 1, char);
5379 res[0] = '\0';
5380 while (i-- > 0)
5381 strcat (res, "../");
5382
5383 /* Add the file name relative to the common root of file and dir. */
5384 strcat (res, fp + 1);
5385 free (afn);
5386
5387 return res;
5388 }
5389
5390 /* Return a newly allocated string containing the absolute file name
5391 of FILE given DIR (which should end with a slash). */
5392 static char *
5393 absolute_filename (file, dir)
5394 char *file, *dir;
5395 {
5396 char *slashp, *cp, *res;
5397
5398 if (filename_is_absolute (file))
5399 res = savestr (file);
5400 #ifdef DOS_NT
5401 /* We don't support non-absolute file names with a drive
5402 letter, like `d:NAME' (it's too much hassle). */
5403 else if (file[1] == ':')
5404 fatal ("%s: relative file names with drive letters not supported", file);
5405 #endif
5406 else
5407 res = concat (dir, file, "");
5408
5409 /* Delete the "/dirname/.." and "/." substrings. */
5410 slashp = etags_strchr (res, '/');
5411 while (slashp != NULL && slashp[0] != '\0')
5412 {
5413 if (slashp[1] == '.')
5414 {
5415 if (slashp[2] == '.'
5416 && (slashp[3] == '/' || slashp[3] == '\0'))
5417 {
5418 cp = slashp;
5419 do
5420 cp--;
5421 while (cp >= res && !filename_is_absolute (cp));
5422 if (cp < res)
5423 cp = slashp; /* the absolute name begins with "/.." */
5424 #ifdef DOS_NT
5425 /* Under MSDOS and NT we get `d:/NAME' as absolute
5426 file name, so the luser could say `d:/../NAME'.
5427 We silently treat this as `d:/NAME'. */
5428 else if (cp[0] != '/')
5429 cp = slashp;
5430 #endif
5431 strcpy (cp, slashp + 3);
5432 slashp = cp;
5433 continue;
5434 }
5435 else if (slashp[2] == '/' || slashp[2] == '\0')
5436 {
5437 strcpy (slashp, slashp + 2);
5438 continue;
5439 }
5440 }
5441
5442 slashp = etags_strchr (slashp + 1, '/');
5443 }
5444
5445 if (res[0] == '\0')
5446 return savestr ("/");
5447 else
5448 return res;
5449 }
5450
5451 /* Return a newly allocated string containing the absolute
5452 file name of dir where FILE resides given DIR (which should
5453 end with a slash). */
5454 static char *
5455 absolute_dirname (file, dir)
5456 char *file, *dir;
5457 {
5458 char *slashp, *res;
5459 char save;
5460
5461 canonicalize_filename (file);
5462 slashp = etags_strrchr (file, '/');
5463 if (slashp == NULL)
5464 return savestr (dir);
5465 save = slashp[1];
5466 slashp[1] = '\0';
5467 res = absolute_filename (file, dir);
5468 slashp[1] = save;
5469
5470 return res;
5471 }
5472
5473 /* Whether the argument string is an absolute file name. The argument
5474 string must have been canonicalized with canonicalize_filename. */
5475 static bool
5476 filename_is_absolute (fn)
5477 char *fn;
5478 {
5479 return (fn[0] == '/'
5480 #ifdef DOS_NT
5481 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
5482 #endif
5483 );
5484 }
5485
5486 /* Translate backslashes into slashes. Works in place. */
5487 static void
5488 canonicalize_filename (fn)
5489 register char *fn;
5490 {
5491 #ifdef DOS_NT
5492 /* Canonicalize drive letter case. */
5493 if (fn[0] && fn[1] == ':' && ISLOWER (fn[0]))
5494 fn[0] = upcase (fn[0]);
5495 /* Convert backslashes to slashes. */
5496 for (; *fn != '\0'; fn++)
5497 if (*fn == '\\')
5498 *fn = '/';
5499 #else
5500 /* No action. */
5501 fn = NULL; /* shut up the compiler */
5502 #endif
5503 }
5504
5505 /* Increase the size of a linebuffer. */
5506 static void
5507 grow_linebuffer (lbp, toksize)
5508 linebuffer *lbp;
5509 int toksize;
5510 {
5511 while (lbp->size < toksize)
5512 lbp->size *= 2;
5513 lbp->buffer = xrnew (lbp->buffer, lbp->size, char);
5514 }
5515
5516 /* Like malloc but get fatal error if memory is exhausted. */
5517 long *
5518 xmalloc (size)
5519 unsigned int size;
5520 {
5521 long *result = (long *) malloc (size);
5522 if (result == NULL)
5523 fatal ("virtual memory exhausted", (char *)NULL);
5524 return result;
5525 }
5526
5527 long *
5528 xrealloc (ptr, size)
5529 char *ptr;
5530 unsigned int size;
5531 {
5532 long *result = (long *) realloc (ptr, size);
5533 if (result == NULL)
5534 fatal ("virtual memory exhausted", (char *)NULL);
5535 return result;
5536 }