[EXIT_SUCCESS, EXIT_FAILURE]: Define them when no
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
3 Free Software Foundation, Inc. and Ken Arnold
4
5 This file is not considered part of GNU Emacs.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 /*
22 * Authors:
23 * Ctags originally by Ken Arnold.
24 * Fortran added by Jim Kleckner.
25 * Ed Pelegri-Llopart added C typedefs.
26 * Gnu Emacs TAGS format and modifications by RMS?
27 * 1989 Sam Kendall added C++.
28 * 1992 Joseph B. Wells improved C and C++ parsing.
29 * 1993 Francesco Potortì reorganised C and C++.
30 * 1994 Line-by-line regexp tags by Tom Tromey.
31 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
32 * 2002 #line directives by Francesco Potortì.
33 *
34 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
35 */
36
37 /*
38 * If you want to add support for a new language, start by looking at the LUA
39 * language, which is the simplest. Alternatively, consider shipping a
40 * configuration file containing regexp definitions for etags.
41 */
42
43 char pot_etags_version[] = "@(#) pot revision number is 17.4";
44
45 #define TRUE 1
46 #define FALSE 0
47
48 #ifdef DEBUG
49 # undef DEBUG
50 # define DEBUG TRUE
51 #else
52 # define DEBUG FALSE
53 # define NDEBUG /* disable assert */
54 #endif
55
56 #ifdef HAVE_CONFIG_H
57 # include <config.h>
58 /* On some systems, Emacs defines static as nothing for the sake
59 of unexec. We don't want that here since we don't use unexec. */
60 # undef static
61 # define ETAGS_REGEXPS /* use the regexp features */
62 # define LONG_OPTIONS /* accept long options */
63 # ifndef PTR /* for Xemacs */
64 # define PTR void *
65 # endif
66 # ifndef __P /* for Xemacs */
67 # define __P(args) args
68 # endif
69 #else /* no config.h */
70 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
71 # define __P(args) args /* use prototypes */
72 # define PTR void * /* for generic pointers */
73 # else /* not standard C */
74 # define __P(args) () /* no prototypes */
75 # define const /* remove const for old compilers' sake */
76 # define PTR long * /* don't use void* */
77 # endif
78 #endif /* !HAVE_CONFIG_H */
79
80 #ifndef _GNU_SOURCE
81 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
82 #endif
83
84 #ifdef LONG_OPTIONS
85 # undef LONG_OPTIONS
86 # define LONG_OPTIONS TRUE
87 #else
88 # define LONG_OPTIONS FALSE
89 #endif
90
91 /* WIN32_NATIVE is for Xemacs.
92 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
93 #ifdef WIN32_NATIVE
94 # undef MSDOS
95 # undef WINDOWSNT
96 # define WINDOWSNT
97 #endif /* WIN32_NATIVE */
98
99 #ifdef MSDOS
100 # undef MSDOS
101 # define MSDOS TRUE
102 # include <fcntl.h>
103 # include <sys/param.h>
104 # include <io.h>
105 # ifndef HAVE_CONFIG_H
106 # define DOS_NT
107 # include <sys/config.h>
108 # endif
109 #else
110 # define MSDOS FALSE
111 #endif /* MSDOS */
112
113 #ifdef WINDOWSNT
114 # include <stdlib.h>
115 # include <fcntl.h>
116 # include <string.h>
117 # include <direct.h>
118 # include <io.h>
119 # define MAXPATHLEN _MAX_PATH
120 # undef HAVE_NTGUI
121 # undef DOS_NT
122 # define DOS_NT
123 # ifndef HAVE_GETCWD
124 # define HAVE_GETCWD
125 # endif /* undef HAVE_GETCWD */
126 #else /* not WINDOWSNT */
127 # ifdef STDC_HEADERS
128 # include <stdlib.h>
129 # include <string.h>
130 # else /* no standard C headers */
131 extern char *getenv ();
132 # ifdef VMS
133 # define EXIT_SUCCESS 1
134 # define EXIT_FAILURE 0
135 # else /* no VMS */
136 # define EXIT_SUCCESS 0
137 # define EXIT_FAILURE 1
138 # endif
139 # endif
140 #endif /* !WINDOWSNT */
141
142 #ifdef HAVE_UNISTD_H
143 # include <unistd.h>
144 #else
145 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
146 extern char *getcwd (char *buf, size_t size);
147 # endif
148 #endif /* HAVE_UNISTD_H */
149
150 #include <stdio.h>
151 #include <ctype.h>
152 #include <errno.h>
153 #ifndef errno
154 extern int errno;
155 #endif
156 #include <sys/types.h>
157 #include <sys/stat.h>
158
159 #include <assert.h>
160 #ifdef NDEBUG
161 # undef assert /* some systems have a buggy assert.h */
162 # define assert(x) ((void) 0)
163 #endif
164
165 #if !defined (S_ISREG) && defined (S_IFREG)
166 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
167 #endif
168
169 #if LONG_OPTIONS
170 # include <getopt.h>
171 #else
172 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
173 extern char *optarg;
174 extern int optind, opterr;
175 #endif /* LONG_OPTIONS */
176
177 #ifdef ETAGS_REGEXPS
178 # ifndef HAVE_CONFIG_H /* this is a standalone compilation */
179 # ifdef __CYGWIN__ /* compiling on Cygwin */
180 !!! NOTICE !!!
181 the regex.h distributed with Cygwin is not compatible with etags, alas!
182 If you want regular expression support, you should delete this notice and
183 arrange to use the GNU regex.h and regex.c.
184 # endif
185 # endif
186 # include <regex.h>
187 #endif /* ETAGS_REGEXPS */
188
189 /* Define CTAGS to make the program "ctags" compatible with the usual one.
190 Leave it undefined to make the program "etags", which makes emacs-style
191 tag tables and tags typedefs, #defines and struct/union/enum by default. */
192 #ifdef CTAGS
193 # undef CTAGS
194 # define CTAGS TRUE
195 #else
196 # define CTAGS FALSE
197 #endif
198
199 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
200 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
201 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
202 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
203
204 #define CHARS 256 /* 2^sizeof(char) */
205 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
206 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
207 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
208 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
209 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
210 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
211
212 #define ISALNUM(c) isalnum (CHAR(c))
213 #define ISALPHA(c) isalpha (CHAR(c))
214 #define ISDIGIT(c) isdigit (CHAR(c))
215 #define ISLOWER(c) islower (CHAR(c))
216
217 #define lowcase(c) tolower (CHAR(c))
218 #define upcase(c) toupper (CHAR(c))
219
220
221 /*
222 * xnew, xrnew -- allocate, reallocate storage
223 *
224 * SYNOPSIS: Type *xnew (int n, Type);
225 * void xrnew (OldPointer, int n, Type);
226 */
227 #if DEBUG
228 # include "chkmalloc.h"
229 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
230 (n) * sizeof (Type)))
231 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
232 (char *) (op), (n) * sizeof (Type)))
233 #else
234 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
235 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
236 (char *) (op), (n) * sizeof (Type)))
237 #endif
238
239 #define bool int
240
241 typedef void Lang_function __P((FILE *));
242
243 typedef struct
244 {
245 char *suffix; /* file name suffix for this compressor */
246 char *command; /* takes one arg and decompresses to stdout */
247 } compressor;
248
249 typedef struct
250 {
251 char *name; /* language name */
252 char *help; /* detailed help for the language */
253 Lang_function *function; /* parse function */
254 char **suffixes; /* name suffixes of this language's files */
255 char **filenames; /* names of this language's files */
256 char **interpreters; /* interpreters for this language */
257 bool metasource; /* source used to generate other sources */
258 } language;
259
260 typedef struct fdesc
261 {
262 struct fdesc *next; /* for the linked list */
263 char *infname; /* uncompressed input file name */
264 char *infabsname; /* absolute uncompressed input file name */
265 char *infabsdir; /* absolute dir of input file */
266 char *taggedfname; /* file name to write in tagfile */
267 language *lang; /* language of file */
268 char *prop; /* file properties to write in tagfile */
269 bool usecharno; /* etags tags shall contain char number */
270 bool written; /* entry written in the tags file */
271 } fdesc;
272
273 typedef struct node_st
274 { /* sorting structure */
275 struct node_st *left, *right; /* left and right sons */
276 fdesc *fdp; /* description of file to whom tag belongs */
277 char *name; /* tag name */
278 char *regex; /* search regexp */
279 bool valid; /* write this tag on the tag file */
280 bool is_func; /* function tag: use regexp in CTAGS mode */
281 bool been_warned; /* warning already given for duplicated tag */
282 int lno; /* line number tag is on */
283 long cno; /* character number line starts on */
284 } node;
285
286 /*
287 * A `linebuffer' is a structure which holds a line of text.
288 * `readline_internal' reads a line from a stream into a linebuffer
289 * and works regardless of the length of the line.
290 * SIZE is the size of BUFFER, LEN is the length of the string in
291 * BUFFER after readline reads it.
292 */
293 typedef struct
294 {
295 long size;
296 int len;
297 char *buffer;
298 } linebuffer;
299
300 /* Used to support mixing of --lang and file names. */
301 typedef struct
302 {
303 enum {
304 at_language, /* a language specification */
305 at_regexp, /* a regular expression */
306 at_filename, /* a file name */
307 at_stdin, /* read from stdin here */
308 at_end /* stop parsing the list */
309 } arg_type; /* argument type */
310 language *lang; /* language associated with the argument */
311 char *what; /* the argument itself */
312 } argument;
313
314 #ifdef ETAGS_REGEXPS
315 /* Structure defining a regular expression. */
316 typedef struct regexp
317 {
318 struct regexp *p_next; /* pointer to next in list */
319 language *lang; /* if set, use only for this language */
320 char *pattern; /* the regexp pattern */
321 char *name; /* tag name */
322 struct re_pattern_buffer *pat; /* the compiled pattern */
323 struct re_registers regs; /* re registers */
324 bool error_signaled; /* already signaled for this regexp */
325 bool force_explicit_name; /* do not allow implict tag name */
326 bool ignore_case; /* ignore case when matching */
327 bool multi_line; /* do a multi-line match on the whole file */
328 } regexp;
329 #endif /* ETAGS_REGEXPS */
330
331
332 /* Many compilers barf on this:
333 Lang_function Ada_funcs;
334 so let's write it this way */
335 static void Ada_funcs __P((FILE *));
336 static void Asm_labels __P((FILE *));
337 static void C_entries __P((int c_ext, FILE *));
338 static void default_C_entries __P((FILE *));
339 static void plain_C_entries __P((FILE *));
340 static void Cjava_entries __P((FILE *));
341 static void Cobol_paragraphs __P((FILE *));
342 static void Cplusplus_entries __P((FILE *));
343 static void Cstar_entries __P((FILE *));
344 static void Erlang_functions __P((FILE *));
345 static void Fortran_functions __P((FILE *));
346 static void HTML_labels __P((FILE *));
347 static void Lisp_functions __P((FILE *));
348 static void Lua_functions __P((FILE *));
349 static void Makefile_targets __P((FILE *));
350 static void Pascal_functions __P((FILE *));
351 static void Perl_functions __P((FILE *));
352 static void PHP_functions __P((FILE *));
353 static void PS_functions __P((FILE *));
354 static void Prolog_functions __P((FILE *));
355 static void Python_functions __P((FILE *));
356 static void Scheme_functions __P((FILE *));
357 static void TeX_commands __P((FILE *));
358 static void Texinfo_nodes __P((FILE *));
359 static void Yacc_entries __P((FILE *));
360 static void just_read_file __P((FILE *));
361
362 static void print_language_names __P((void));
363 static void print_version __P((void));
364 static void print_help __P((argument *));
365 int main __P((int, char **));
366
367 static compressor *get_compressor_from_suffix __P((char *, char **));
368 static language *get_language_from_langname __P((const char *));
369 static language *get_language_from_interpreter __P((char *));
370 static language *get_language_from_filename __P((char *, bool));
371 static void readline __P((linebuffer *, FILE *));
372 static long readline_internal __P((linebuffer *, FILE *));
373 static bool nocase_tail __P((char *));
374 static void get_tag __P((char *, char **));
375
376 #ifdef ETAGS_REGEXPS
377 static void analyse_regex __P((char *));
378 static void free_regexps __P((void));
379 static void regex_tag_multiline __P((void));
380 #endif /* ETAGS_REGEXPS */
381 static void error __P((const char *, const char *));
382 static void suggest_asking_for_help __P((void));
383 void fatal __P((char *, char *));
384 static void pfatal __P((char *));
385 static void add_node __P((node *, node **));
386
387 static void init __P((void));
388 static void process_file_name __P((char *, language *));
389 static void process_file __P((FILE *, char *, language *));
390 static void find_entries __P((FILE *));
391 static void free_tree __P((node *));
392 static void free_fdesc __P((fdesc *));
393 static void pfnote __P((char *, bool, char *, int, int, long));
394 static void make_tag __P((char *, int, bool, char *, int, int, long));
395 static void invalidate_nodes __P((fdesc *, node **));
396 static void put_entries __P((node *));
397
398 static char *concat __P((char *, char *, char *));
399 static char *skip_spaces __P((char *));
400 static char *skip_non_spaces __P((char *));
401 static char *savenstr __P((char *, int));
402 static char *savestr __P((char *));
403 static char *etags_strchr __P((const char *, int));
404 static char *etags_strrchr __P((const char *, int));
405 static int etags_strcasecmp __P((const char *, const char *));
406 static int etags_strncasecmp __P((const char *, const char *, int));
407 static char *etags_getcwd __P((void));
408 static char *relative_filename __P((char *, char *));
409 static char *absolute_filename __P((char *, char *));
410 static char *absolute_dirname __P((char *, char *));
411 static bool filename_is_absolute __P((char *f));
412 static void canonicalize_filename __P((char *));
413 static void linebuffer_init __P((linebuffer *));
414 static void linebuffer_setlen __P((linebuffer *, int));
415 static PTR xmalloc __P((unsigned int));
416 static PTR xrealloc __P((char *, unsigned int));
417
418 \f
419 static char searchar = '/'; /* use /.../ searches */
420
421 static char *tagfile; /* output file */
422 static char *progname; /* name this program was invoked with */
423 static char *cwd; /* current working directory */
424 static char *tagfiledir; /* directory of tagfile */
425 static FILE *tagf; /* ioptr for tags file */
426
427 static fdesc *fdhead; /* head of file description list */
428 static fdesc *curfdp; /* current file description */
429 static int lineno; /* line number of current line */
430 static long charno; /* current character number */
431 static long linecharno; /* charno of start of current line */
432 static char *dbp; /* pointer to start of current tag */
433
434 static const int invalidcharno = -1;
435
436 static node *nodehead; /* the head of the binary tree of tags */
437 static node *last_node; /* the last node created */
438
439 static linebuffer lb; /* the current line */
440 static linebuffer filebuf; /* a buffer containing the whole file */
441 static linebuffer token_name; /* a buffer containing a tag name */
442
443 /* boolean "functions" (see init) */
444 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
445 static char
446 /* white chars */
447 *white = " \f\t\n\r\v",
448 /* not in a name */
449 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
450 /* token ending chars */
451 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
452 /* token starting chars */
453 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
454 /* valid in-token chars */
455 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
456
457 static bool append_to_tagfile; /* -a: append to tags */
458 /* The next four default to TRUE for etags, but to FALSE for ctags. */
459 static bool typedefs; /* -t: create tags for C and Ada typedefs */
460 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
461 /* 0 struct/enum/union decls, and C++ */
462 /* member functions. */
463 static bool constantypedefs; /* -d: create tags for C #define, enum */
464 /* constants and variables. */
465 /* -D: opposite of -d. Default under ctags. */
466 static bool globals; /* create tags for global variables */
467 static bool declarations; /* --declarations: tag them and extern in C&Co*/
468 static bool members; /* create tags for C member variables */
469 static bool no_line_directive; /* ignore #line directives (undocumented) */
470 static bool update; /* -u: update tags */
471 static bool vgrind_style; /* -v: create vgrind style index output */
472 static bool no_warnings; /* -w: suppress warnings */
473 static bool cxref_style; /* -x: create cxref style output */
474 static bool cplusplus; /* .[hc] means C++, not C */
475 static bool ignoreindent; /* -I: ignore indentation in C */
476 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
477
478 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
479 static bool parsing_stdin; /* --parse-stdin used */
480
481 #ifdef ETAGS_REGEXPS
482 static regexp *p_head; /* list of all regexps */
483 static bool need_filebuf; /* some regexes are multi-line */
484 #else
485 # define need_filebuf FALSE
486 #endif /* ETAGS_REGEXPS */
487
488 #if LONG_OPTIONS
489 static struct option longopts[] =
490 {
491 { "packages-only", no_argument, &packages_only, TRUE },
492 { "c++", no_argument, NULL, 'C' },
493 { "declarations", no_argument, &declarations, TRUE },
494 { "no-line-directive", no_argument, &no_line_directive, TRUE },
495 { "help", no_argument, NULL, 'h' },
496 { "help", no_argument, NULL, 'H' },
497 { "ignore-indentation", no_argument, NULL, 'I' },
498 { "language", required_argument, NULL, 'l' },
499 { "members", no_argument, &members, TRUE },
500 { "no-members", no_argument, &members, FALSE },
501 { "output", required_argument, NULL, 'o' },
502 #ifdef ETAGS_REGEXPS
503 { "regex", required_argument, NULL, 'r' },
504 { "no-regex", no_argument, NULL, 'R' },
505 { "ignore-case-regex", required_argument, NULL, 'c' },
506 #endif /* ETAGS_REGEXPS */
507 { "parse-stdin", required_argument, NULL, STDIN },
508 { "version", no_argument, NULL, 'V' },
509
510 #if CTAGS /* Etags options */
511 { "backward-search", no_argument, NULL, 'B' },
512 { "cxref", no_argument, NULL, 'x' },
513 { "defines", no_argument, NULL, 'd' },
514 { "globals", no_argument, &globals, TRUE },
515 { "typedefs", no_argument, NULL, 't' },
516 { "typedefs-and-c++", no_argument, NULL, 'T' },
517 { "update", no_argument, NULL, 'u' },
518 { "vgrind", no_argument, NULL, 'v' },
519 { "no-warn", no_argument, NULL, 'w' },
520
521 #else /* Ctags options */
522 { "append", no_argument, NULL, 'a' },
523 { "no-defines", no_argument, NULL, 'D' },
524 { "no-globals", no_argument, &globals, FALSE },
525 { "include", required_argument, NULL, 'i' },
526 #endif
527 { NULL }
528 };
529 #endif /* LONG_OPTIONS */
530
531 static compressor compressors[] =
532 {
533 { "z", "gzip -d -c"},
534 { "Z", "gzip -d -c"},
535 { "gz", "gzip -d -c"},
536 { "GZ", "gzip -d -c"},
537 { "bz2", "bzip2 -d -c" },
538 { NULL }
539 };
540
541 /*
542 * Language stuff.
543 */
544
545 /* Ada code */
546 static char *Ada_suffixes [] =
547 { "ads", "adb", "ada", NULL };
548 static char Ada_help [] =
549 "In Ada code, functions, procedures, packages, tasks and types are\n\
550 tags. Use the `--packages-only' option to create tags for\n\
551 packages only.\n\
552 Ada tag names have suffixes indicating the type of entity:\n\
553 Entity type: Qualifier:\n\
554 ------------ ----------\n\
555 function /f\n\
556 procedure /p\n\
557 package spec /s\n\
558 package body /b\n\
559 type /t\n\
560 task /k\n\
561 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
562 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
563 will just search for any tag `bidule'.";
564
565 /* Assembly code */
566 static char *Asm_suffixes [] =
567 { "a", /* Unix assembler */
568 "asm", /* Microcontroller assembly */
569 "def", /* BSO/Tasking definition includes */
570 "inc", /* Microcontroller include files */
571 "ins", /* Microcontroller include files */
572 "s", "sa", /* Unix assembler */
573 "S", /* cpp-processed Unix assembler */
574 "src", /* BSO/Tasking C compiler output */
575 NULL
576 };
577 static char Asm_help [] =
578 "In assembler code, labels appearing at the beginning of a line,\n\
579 followed by a colon, are tags.";
580
581
582 /* Note that .c and .h can be considered C++, if the --c++ flag was
583 given, or if the `class' or `template' keyowrds are met inside the file.
584 That is why default_C_entries is called for these. */
585 static char *default_C_suffixes [] =
586 { "c", "h", NULL };
587 static char default_C_help [] =
588 "In C code, any C function or typedef is a tag, and so are\n\
589 definitions of `struct', `union' and `enum'. `#define' macro\n\
590 definitions and `enum' constants are tags unless you specify\n\
591 `--no-defines'. Global variables are tags unless you specify\n\
592 `--no-globals'. Use of `--no-globals' and `--no-defines'\n\
593 can make the tags table file much smaller.\n\
594 You can tag function declarations and external variables by\n\
595 using `--declarations', and struct members by using `--members'.";
596
597 static char *Cplusplus_suffixes [] =
598 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
599 "M", /* Objective C++ */
600 "pdb", /* Postscript with C syntax */
601 NULL };
602 static char Cplusplus_help [] =
603 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
604 --help --lang=c --lang=c++ for full help.)\n\
605 In addition to C tags, member functions are also recognized, and\n\
606 optionally member variables if you use the `--members' option.\n\
607 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
608 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
609 `operator+'.";
610
611 static char *Cjava_suffixes [] =
612 { "java", NULL };
613 static char Cjava_help [] =
614 "In Java code, all the tags constructs of C and C++ code are\n\
615 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
616
617
618 static char *Cobol_suffixes [] =
619 { "COB", "cob", NULL };
620 static char Cobol_help [] =
621 "In Cobol code, tags are paragraph names; that is, any word\n\
622 starting in column 8 and followed by a period.";
623
624 static char *Cstar_suffixes [] =
625 { "cs", "hs", NULL };
626
627 static char *Erlang_suffixes [] =
628 { "erl", "hrl", NULL };
629 static char Erlang_help [] =
630 "In Erlang code, the tags are the functions, records and macros\n\
631 defined in the file.";
632
633 static char *Fortran_suffixes [] =
634 { "F", "f", "f90", "for", NULL };
635 static char Fortran_help [] =
636 "In Fortran code, functions, subroutines and block data are tags.";
637
638 static char *HTML_suffixes [] =
639 { "htm", "html", "shtml", NULL };
640 static char HTML_help [] =
641 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
642 `h3' headers. Also, tags are `name=' in anchors and all\n\
643 occurrences of `id='.";
644
645 static char *Lisp_suffixes [] =
646 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
647 static char Lisp_help [] =
648 "In Lisp code, any function defined with `defun', any variable\n\
649 defined with `defvar' or `defconst', and in general the first\n\
650 argument of any expression that starts with `(def' in column zero\n\
651 is a tag.";
652
653 static char *Lua_suffixes [] =
654 { "lua", "LUA", NULL };
655 static char Lua_help [] =
656 "In Lua scripts, all functions are tags.";
657
658 static char *Makefile_filenames [] =
659 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
660 static char Makefile_help [] =
661 "In makefiles, targets are tags; additionally, variables are tags\n\
662 unless you specify `--no-globals'.";
663
664 static char *Objc_suffixes [] =
665 { "lm", /* Objective lex file */
666 "m", /* Objective C file */
667 NULL };
668 static char Objc_help [] =
669 "In Objective C code, tags include Objective C definitions for classes,\n\
670 class categories, methods and protocols. Tags for variables and\n\
671 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
672
673 static char *Pascal_suffixes [] =
674 { "p", "pas", NULL };
675 static char Pascal_help [] =
676 "In Pascal code, the tags are the functions and procedures defined\n\
677 in the file.";
678
679 static char *Perl_suffixes [] =
680 { "pl", "pm", NULL };
681 static char *Perl_interpreters [] =
682 { "perl", "@PERL@", NULL };
683 static char Perl_help [] =
684 "In Perl code, the tags are the packages, subroutines and variables\n\
685 defined by the `package', `sub', `my' and `local' keywords. Use\n\
686 `--globals' if you want to tag global variables. Tags for\n\
687 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
688 defined in the default package is `main::SUB'.";
689
690 static char *PHP_suffixes [] =
691 { "php", "php3", "php4", NULL };
692 static char PHP_help [] =
693 "In PHP code, tags are functions, classes and defines. When using\n\
694 the `--members' option, vars are tags too.";
695
696 static char *plain_C_suffixes [] =
697 { "pc", /* Pro*C file */
698 NULL };
699
700 static char *PS_suffixes [] =
701 { "ps", "psw", NULL }; /* .psw is for PSWrap */
702 static char PS_help [] =
703 "In PostScript code, the tags are the functions.";
704
705 static char *Prolog_suffixes [] =
706 { "prolog", NULL };
707 static char Prolog_help [] =
708 "In Prolog code, tags are predicates and rules at the beginning of\n\
709 line.";
710
711 static char *Python_suffixes [] =
712 { "py", NULL };
713 static char Python_help [] =
714 "In Python code, `def' or `class' at the beginning of a line\n\
715 generate a tag.";
716
717 /* Can't do the `SCM' or `scm' prefix with a version number. */
718 static char *Scheme_suffixes [] =
719 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
720 static char Scheme_help [] =
721 "In Scheme code, tags include anything defined with `def' or with a\n\
722 construct whose name starts with `def'. They also include\n\
723 variables set with `set!' at top level in the file.";
724
725 static char *TeX_suffixes [] =
726 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
727 static char TeX_help [] =
728 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
729 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
730 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
731 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
732 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
733 \n\
734 Other commands can be specified by setting the environment variable\n\
735 `TEXTAGS' to a colon-separated list like, for example,\n\
736 TEXTAGS=\"mycommand:myothercommand\".";
737
738
739 static char *Texinfo_suffixes [] =
740 { "texi", "texinfo", "txi", NULL };
741 static char Texinfo_help [] =
742 "for texinfo files, lines starting with @node are tagged.";
743
744 static char *Yacc_suffixes [] =
745 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
746 static char Yacc_help [] =
747 "In Bison or Yacc input files, each rule defines as a tag the\n\
748 nonterminal it constructs. The portions of the file that contain\n\
749 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
750 for full help).";
751
752 static char auto_help [] =
753 "`auto' is not a real language, it indicates to use\n\
754 a default language for files base on file name suffix and file contents.";
755
756 static char none_help [] =
757 "`none' is not a real language, it indicates to only do\n\
758 regexp processing on files.";
759
760 static char no_lang_help [] =
761 "No detailed help available for this language.";
762
763
764 /*
765 * Table of languages.
766 *
767 * It is ok for a given function to be listed under more than one
768 * name. I just didn't.
769 */
770
771 static language lang_names [] =
772 {
773 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
774 { "asm", Asm_help, Asm_labels, Asm_suffixes },
775 { "c", default_C_help, default_C_entries, default_C_suffixes },
776 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
777 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
778 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
779 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
780 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
781 { "html", HTML_help, HTML_labels, HTML_suffixes },
782 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
783 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
784 { "lua", Lua_help, Lua_functions, Lua_suffixes },
785 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
786 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
787 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
788 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
789 { "php", PHP_help, PHP_functions, PHP_suffixes },
790 { "postscript",PS_help, PS_functions, PS_suffixes },
791 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
792 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
793 { "python", Python_help, Python_functions, Python_suffixes },
794 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
795 { "tex", TeX_help, TeX_commands, TeX_suffixes },
796 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
797 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
798 { "auto", auto_help }, /* default guessing scheme */
799 { "none", none_help, just_read_file }, /* regexp matching only */
800 { NULL } /* end of list */
801 };
802
803 \f
804 static void
805 print_language_names ()
806 {
807 language *lang;
808 char **name, **ext;
809
810 puts ("\nThese are the currently supported languages, along with the\n\
811 default file names and dot suffixes:");
812 for (lang = lang_names; lang->name != NULL; lang++)
813 {
814 printf (" %-*s", 10, lang->name);
815 if (lang->filenames != NULL)
816 for (name = lang->filenames; *name != NULL; name++)
817 printf (" %s", *name);
818 if (lang->suffixes != NULL)
819 for (ext = lang->suffixes; *ext != NULL; ext++)
820 printf (" .%s", *ext);
821 puts ("");
822 }
823 puts ("where `auto' means use default language for files based on file\n\
824 name suffix, and `none' means only do regexp processing on files.\n\
825 If no language is specified and no matching suffix is found,\n\
826 the first line of the file is read for a sharp-bang (#!) sequence\n\
827 followed by the name of an interpreter. If no such sequence is found,\n\
828 Fortran is tried first; if no tags are found, C is tried next.\n\
829 When parsing any C file, a \"class\" or \"template\" keyword\n\
830 switches to C++.");
831 puts ("Compressed files are supported using gzip and bzip2.\n\
832 \n\
833 For detailed help on a given language use, for example,\n\
834 etags --help --lang=ada.");
835 }
836
837 #ifndef EMACS_NAME
838 # define EMACS_NAME "standalone"
839 #endif
840 #ifndef VERSION
841 # define VERSION "version"
842 #endif
843 static void
844 print_version ()
845 {
846 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
847 puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
848 puts ("This program is distributed under the same terms as Emacs");
849
850 exit (EXIT_SUCCESS);
851 }
852
853 static void
854 print_help (argbuffer)
855 argument *argbuffer;
856 {
857 bool help_for_lang = FALSE;
858
859 for (; argbuffer->arg_type != at_end; argbuffer++)
860 if (argbuffer->arg_type == at_language)
861 {
862 if (help_for_lang)
863 puts ("");
864 puts (argbuffer->lang->help);
865 help_for_lang = TRUE;
866 }
867
868 if (help_for_lang)
869 exit (EXIT_SUCCESS);
870
871 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
872 \n\
873 These are the options accepted by %s.\n", progname, progname);
874 if (LONG_OPTIONS)
875 puts ("You may use unambiguous abbreviations for the long option names.");
876 else
877 puts ("Long option names do not work with this executable, as it is not\n\
878 linked with GNU getopt.");
879 puts (" A - as file name means read names from stdin (one per line).\n\
880 Absolute names are stored in the output file as they are.\n\
881 Relative ones are stored relative to the output file's directory.\n");
882
883 if (!CTAGS)
884 puts ("-a, --append\n\
885 Append tag entries to existing tags file.");
886
887 puts ("--packages-only\n\
888 For Ada files, only generate tags for packages.");
889
890 if (CTAGS)
891 puts ("-B, --backward-search\n\
892 Write the search commands for the tag entries using '?', the\n\
893 backward-search command instead of '/', the forward-search command.");
894
895 /* This option is mostly obsolete, because etags can now automatically
896 detect C++. Retained for backward compatibility and for debugging and
897 experimentation. In principle, we could want to tag as C++ even
898 before any "class" or "template" keyword.
899 puts ("-C, --c++\n\
900 Treat files whose name suffix defaults to C language as C++ files.");
901 */
902
903 puts ("--declarations\n\
904 In C and derived languages, create tags for function declarations,");
905 if (CTAGS)
906 puts ("\tand create tags for extern variables if --globals is used.");
907 else
908 puts
909 ("\tand create tags for extern variables unless --no-globals is used.");
910
911 if (CTAGS)
912 puts ("-d, --defines\n\
913 Create tag entries for C #define constants and enum constants, too.");
914 else
915 puts ("-D, --no-defines\n\
916 Don't create tag entries for C #define constants and enum constants.\n\
917 This makes the tags file smaller.");
918
919 if (!CTAGS)
920 puts ("-i FILE, --include=FILE\n\
921 Include a note in tag file indicating that, when searching for\n\
922 a tag, one should also consult the tags file FILE after\n\
923 checking the current file.");
924
925 puts ("-l LANG, --language=LANG\n\
926 Force the following files to be considered as written in the\n\
927 named language up to the next --language=LANG option.");
928
929 if (CTAGS)
930 puts ("--globals\n\
931 Create tag entries for global variables in some languages.");
932 else
933 puts ("--no-globals\n\
934 Do not create tag entries for global variables in some\n\
935 languages. This makes the tags file smaller.");
936 puts ("--members\n\
937 Create tag entries for members of structures in some languages.");
938
939 #ifdef ETAGS_REGEXPS
940 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
941 Make a tag for each line matching a regular expression pattern\n\
942 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
943 files only. REGEXFILE is a file containing one REGEXP per line.\n\
944 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
945 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
946 puts (" If TAGNAME/ is present, the tags created are named.\n\
947 For example Tcl named tags can be created with:\n\
948 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
949 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
950 `m' means to allow multi-line matches, `s' implies `m' and\n\
951 causes dot to match any character, including newline.");
952 puts ("-R, --no-regex\n\
953 Don't create tags from regexps for the following files.");
954 #endif /* ETAGS_REGEXPS */
955 puts ("-I, --ignore-indentation\n\
956 In C and C++ do not assume that a closing brace in the first\n\
957 column is the final brace of a function or structure definition.");
958 puts ("-o FILE, --output=FILE\n\
959 Write the tags to FILE.");
960 puts ("--parse-stdin=NAME\n\
961 Read from standard input and record tags as belonging to file NAME.");
962
963 if (CTAGS)
964 {
965 puts ("-t, --typedefs\n\
966 Generate tag entries for C and Ada typedefs.");
967 puts ("-T, --typedefs-and-c++\n\
968 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
969 and C++ member functions.");
970 }
971
972 if (CTAGS)
973 puts ("-u, --update\n\
974 Update the tag entries for the given files, leaving tag\n\
975 entries for other files in place. Currently, this is\n\
976 implemented by deleting the existing entries for the given\n\
977 files and then rewriting the new entries at the end of the\n\
978 tags file. It is often faster to simply rebuild the entire\n\
979 tag file than to use this.");
980
981 if (CTAGS)
982 {
983 puts ("-v, --vgrind\n\
984 Generates an index of items intended for human consumption,\n\
985 similar to the output of vgrind. The index is sorted, and\n\
986 gives the page number of each item.");
987 puts ("-w, --no-warn\n\
988 Suppress warning messages about entries defined in multiple\n\
989 files.");
990 puts ("-x, --cxref\n\
991 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
992 The output uses line numbers instead of page numbers, but\n\
993 beyond that the differences are cosmetic; try both to see\n\
994 which you like.");
995 }
996
997 puts ("-V, --version\n\
998 Print the version of the program.\n\
999 -h, --help\n\
1000 Print this help message.\n\
1001 Followed by one or more `--language' options prints detailed\n\
1002 help about tag generation for the specified languages.");
1003
1004 print_language_names ();
1005
1006 puts ("");
1007 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1008
1009 exit (EXIT_SUCCESS);
1010 }
1011
1012 \f
1013 #ifdef VMS /* VMS specific functions */
1014
1015 #define EOS '\0'
1016
1017 /* This is a BUG! ANY arbitrary limit is a BUG!
1018 Won't someone please fix this? */
1019 #define MAX_FILE_SPEC_LEN 255
1020 typedef struct {
1021 short curlen;
1022 char body[MAX_FILE_SPEC_LEN + 1];
1023 } vspec;
1024
1025 /*
1026 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1027 returning in each successive call the next file name matching the input
1028 spec. The function expects that each in_spec passed
1029 to it will be processed to completion; in particular, up to and
1030 including the call following that in which the last matching name
1031 is returned, the function ignores the value of in_spec, and will
1032 only start processing a new spec with the following call.
1033 If an error occurs, on return out_spec contains the value
1034 of in_spec when the error occurred.
1035
1036 With each successive file name returned in out_spec, the
1037 function's return value is one. When there are no more matching
1038 names the function returns zero. If on the first call no file
1039 matches in_spec, or there is any other error, -1 is returned.
1040 */
1041
1042 #include <rmsdef.h>
1043 #include <descrip.h>
1044 #define OUTSIZE MAX_FILE_SPEC_LEN
1045 static short
1046 fn_exp (out, in)
1047 vspec *out;
1048 char *in;
1049 {
1050 static long context = 0;
1051 static struct dsc$descriptor_s o;
1052 static struct dsc$descriptor_s i;
1053 static bool pass1 = TRUE;
1054 long status;
1055 short retval;
1056
1057 if (pass1)
1058 {
1059 pass1 = FALSE;
1060 o.dsc$a_pointer = (char *) out;
1061 o.dsc$w_length = (short)OUTSIZE;
1062 i.dsc$a_pointer = in;
1063 i.dsc$w_length = (short)strlen(in);
1064 i.dsc$b_dtype = DSC$K_DTYPE_T;
1065 i.dsc$b_class = DSC$K_CLASS_S;
1066 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1067 o.dsc$b_class = DSC$K_CLASS_VS;
1068 }
1069 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1070 {
1071 out->body[out->curlen] = EOS;
1072 return 1;
1073 }
1074 else if (status == RMS$_NMF)
1075 retval = 0;
1076 else
1077 {
1078 strcpy(out->body, in);
1079 retval = -1;
1080 }
1081 lib$find_file_end(&context);
1082 pass1 = TRUE;
1083 return retval;
1084 }
1085
1086 /*
1087 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1088 name of each file specified by the provided arg expanding wildcards.
1089 */
1090 static char *
1091 gfnames (arg, p_error)
1092 char *arg;
1093 bool *p_error;
1094 {
1095 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1096
1097 switch (fn_exp (&filename, arg))
1098 {
1099 case 1:
1100 *p_error = FALSE;
1101 return filename.body;
1102 case 0:
1103 *p_error = FALSE;
1104 return NULL;
1105 default:
1106 *p_error = TRUE;
1107 return filename.body;
1108 }
1109 }
1110
1111 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1112 system (cmd)
1113 char *cmd;
1114 {
1115 error ("%s", "system() function not implemented under VMS");
1116 }
1117 #endif
1118
1119 #define VERSION_DELIM ';'
1120 char *massage_name (s)
1121 char *s;
1122 {
1123 char *start = s;
1124
1125 for ( ; *s; s++)
1126 if (*s == VERSION_DELIM)
1127 {
1128 *s = EOS;
1129 break;
1130 }
1131 else
1132 *s = lowcase (*s);
1133 return start;
1134 }
1135 #endif /* VMS */
1136
1137 \f
1138 int
1139 main (argc, argv)
1140 int argc;
1141 char *argv[];
1142 {
1143 int i;
1144 unsigned int nincluded_files;
1145 char **included_files;
1146 argument *argbuffer;
1147 int current_arg, file_count;
1148 linebuffer filename_lb;
1149 bool help_asked = FALSE;
1150 #ifdef VMS
1151 bool got_err;
1152 #endif
1153 char *optstring;
1154 int opt;
1155
1156
1157 #ifdef DOS_NT
1158 _fmode = O_BINARY; /* all of files are treated as binary files */
1159 #endif /* DOS_NT */
1160
1161 progname = argv[0];
1162 nincluded_files = 0;
1163 included_files = xnew (argc, char *);
1164 current_arg = 0;
1165 file_count = 0;
1166
1167 /* Allocate enough no matter what happens. Overkill, but each one
1168 is small. */
1169 argbuffer = xnew (argc, argument);
1170
1171 /*
1172 * If etags, always find typedefs and structure tags. Why not?
1173 * Also default to find macro constants, enum constants and
1174 * global variables.
1175 */
1176 if (!CTAGS)
1177 {
1178 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1179 globals = TRUE;
1180 }
1181
1182 optstring = "-";
1183 #ifdef ETAGS_REGEXPS
1184 optstring = "-r:Rc:";
1185 #endif /* ETAGS_REGEXPS */
1186 if (LONG_OPTIONS)
1187 optstring += 1;
1188 optstring = concat (optstring,
1189 "Cf:Il:o:SVhH",
1190 (CTAGS) ? "BxdtTuvw" : "aDi:");
1191
1192 while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1193 switch (opt)
1194 {
1195 case 0:
1196 /* If getopt returns 0, then it has already processed a
1197 long-named option. We should do nothing. */
1198 break;
1199
1200 case 1:
1201 /* This means that a file name has been seen. Record it. */
1202 argbuffer[current_arg].arg_type = at_filename;
1203 argbuffer[current_arg].what = optarg;
1204 ++current_arg;
1205 ++file_count;
1206 break;
1207
1208 case STDIN:
1209 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1210 argbuffer[current_arg].arg_type = at_stdin;
1211 argbuffer[current_arg].what = optarg;
1212 ++current_arg;
1213 ++file_count;
1214 if (parsing_stdin)
1215 fatal ("cannot parse standard input more than once", (char *)NULL);
1216 parsing_stdin = TRUE;
1217 break;
1218
1219 /* Common options. */
1220 case 'C': cplusplus = TRUE; break;
1221 case 'f': /* for compatibility with old makefiles */
1222 case 'o':
1223 if (tagfile)
1224 {
1225 error ("-o option may only be given once.", (char *)NULL);
1226 suggest_asking_for_help ();
1227 /* NOTREACHED */
1228 }
1229 tagfile = optarg;
1230 break;
1231 case 'I':
1232 case 'S': /* for backward compatibility */
1233 ignoreindent = TRUE;
1234 break;
1235 case 'l':
1236 {
1237 language *lang = get_language_from_langname (optarg);
1238 if (lang != NULL)
1239 {
1240 argbuffer[current_arg].lang = lang;
1241 argbuffer[current_arg].arg_type = at_language;
1242 ++current_arg;
1243 }
1244 }
1245 break;
1246 case 'c':
1247 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1248 optarg = concat (optarg, "i", ""); /* memory leak here */
1249 /* FALLTHRU */
1250 case 'r':
1251 argbuffer[current_arg].arg_type = at_regexp;
1252 argbuffer[current_arg].what = optarg;
1253 ++current_arg;
1254 break;
1255 case 'R':
1256 argbuffer[current_arg].arg_type = at_regexp;
1257 argbuffer[current_arg].what = NULL;
1258 ++current_arg;
1259 break;
1260 case 'V':
1261 print_version ();
1262 break;
1263 case 'h':
1264 case 'H':
1265 help_asked = TRUE;
1266 break;
1267
1268 /* Etags options */
1269 case 'a': append_to_tagfile = TRUE; break;
1270 case 'D': constantypedefs = FALSE; break;
1271 case 'i': included_files[nincluded_files++] = optarg; break;
1272
1273 /* Ctags options. */
1274 case 'B': searchar = '?'; break;
1275 case 'd': constantypedefs = TRUE; break;
1276 case 't': typedefs = TRUE; break;
1277 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1278 case 'u': update = TRUE; break;
1279 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1280 case 'x': cxref_style = TRUE; break;
1281 case 'w': no_warnings = TRUE; break;
1282 default:
1283 suggest_asking_for_help ();
1284 /* NOTREACHED */
1285 }
1286
1287 for (; optind < argc; optind++)
1288 {
1289 argbuffer[current_arg].arg_type = at_filename;
1290 argbuffer[current_arg].what = argv[optind];
1291 ++current_arg;
1292 ++file_count;
1293 }
1294
1295 argbuffer[current_arg].arg_type = at_end;
1296
1297 if (help_asked)
1298 print_help (argbuffer);
1299 /* NOTREACHED */
1300
1301 if (nincluded_files == 0 && file_count == 0)
1302 {
1303 error ("no input files specified.", (char *)NULL);
1304 suggest_asking_for_help ();
1305 /* NOTREACHED */
1306 }
1307
1308 if (tagfile == NULL)
1309 tagfile = CTAGS ? "tags" : "TAGS";
1310 cwd = etags_getcwd (); /* the current working directory */
1311 if (cwd[strlen (cwd) - 1] != '/')
1312 {
1313 char *oldcwd = cwd;
1314 cwd = concat (oldcwd, "/", "");
1315 free (oldcwd);
1316 }
1317 if (streq (tagfile, "-"))
1318 tagfiledir = cwd;
1319 else
1320 tagfiledir = absolute_dirname (tagfile, cwd);
1321
1322 init (); /* set up boolean "functions" */
1323
1324 linebuffer_init (&lb);
1325 linebuffer_init (&filename_lb);
1326 linebuffer_init (&filebuf);
1327 linebuffer_init (&token_name);
1328
1329 if (!CTAGS)
1330 {
1331 if (streq (tagfile, "-"))
1332 {
1333 tagf = stdout;
1334 #ifdef DOS_NT
1335 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1336 doesn't take effect until after `stdout' is already open). */
1337 if (!isatty (fileno (stdout)))
1338 setmode (fileno (stdout), O_BINARY);
1339 #endif /* DOS_NT */
1340 }
1341 else
1342 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1343 if (tagf == NULL)
1344 pfatal (tagfile);
1345 }
1346
1347 /*
1348 * Loop through files finding functions.
1349 */
1350 for (i = 0; i < current_arg; i++)
1351 {
1352 static language *lang; /* non-NULL if language is forced */
1353 char *this_file;
1354
1355 switch (argbuffer[i].arg_type)
1356 {
1357 case at_language:
1358 lang = argbuffer[i].lang;
1359 break;
1360 #ifdef ETAGS_REGEXPS
1361 case at_regexp:
1362 analyse_regex (argbuffer[i].what);
1363 break;
1364 #endif
1365 case at_filename:
1366 #ifdef VMS
1367 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1368 {
1369 if (got_err)
1370 {
1371 error ("can't find file %s\n", this_file);
1372 argc--, argv++;
1373 }
1374 else
1375 {
1376 this_file = massage_name (this_file);
1377 }
1378 #else
1379 this_file = argbuffer[i].what;
1380 #endif
1381 /* Input file named "-" means read file names from stdin
1382 (one per line) and use them. */
1383 if (streq (this_file, "-"))
1384 {
1385 if (parsing_stdin)
1386 fatal ("cannot parse standard input AND read file names from it",
1387 (char *)NULL);
1388 while (readline_internal (&filename_lb, stdin) > 0)
1389 process_file_name (filename_lb.buffer, lang);
1390 }
1391 else
1392 process_file_name (this_file, lang);
1393 #ifdef VMS
1394 }
1395 #endif
1396 break;
1397 case at_stdin:
1398 this_file = argbuffer[i].what;
1399 process_file (stdin, this_file, lang);
1400 break;
1401 }
1402 }
1403
1404 #ifdef ETAGS_REGEXPS
1405 free_regexps ();
1406 #endif /* ETAGS_REGEXPS */
1407 free (lb.buffer);
1408 free (filebuf.buffer);
1409 free (token_name.buffer);
1410
1411 if (!CTAGS || cxref_style)
1412 {
1413 put_entries (nodehead); /* write the remainig tags (ETAGS) */
1414 free_tree (nodehead);
1415 nodehead = NULL;
1416 if (!CTAGS)
1417 {
1418 fdesc *fdp;
1419
1420 /* Output file entries that have no tags. */
1421 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1422 if (!fdp->written)
1423 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1424
1425 while (nincluded_files-- > 0)
1426 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1427 }
1428
1429 if (fclose (tagf) == EOF)
1430 pfatal (tagfile);
1431 exit (EXIT_SUCCESS);
1432 }
1433
1434 if (update)
1435 {
1436 char cmd[BUFSIZ];
1437 for (i = 0; i < current_arg; ++i)
1438 {
1439 switch (argbuffer[i].arg_type)
1440 {
1441 case at_filename:
1442 case at_stdin:
1443 break;
1444 default:
1445 continue; /* the for loop */
1446 }
1447 sprintf (cmd,
1448 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1449 tagfile, argbuffer[i].what, tagfile);
1450 if (system (cmd) != EXIT_SUCCESS)
1451 fatal ("failed to execute shell command", (char *)NULL);
1452 }
1453 append_to_tagfile = TRUE;
1454 }
1455
1456 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1457 if (tagf == NULL)
1458 pfatal (tagfile);
1459 put_entries (nodehead); /* write all the tags (CTAGS) */
1460 free_tree (nodehead);
1461 nodehead = NULL;
1462 if (fclose (tagf) == EOF)
1463 pfatal (tagfile);
1464
1465 if (update)
1466 {
1467 char cmd[2*BUFSIZ+10];
1468 sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1469 exit (system (cmd));
1470 }
1471 return EXIT_SUCCESS;
1472 }
1473
1474
1475 /*
1476 * Return a compressor given the file name. If EXTPTR is non-zero,
1477 * return a pointer into FILE where the compressor-specific
1478 * extension begins. If no compressor is found, NULL is returned
1479 * and EXTPTR is not significant.
1480 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1481 */
1482 static compressor *
1483 get_compressor_from_suffix (file, extptr)
1484 char *file;
1485 char **extptr;
1486 {
1487 compressor *compr;
1488 char *slash, *suffix;
1489
1490 /* This relies on FN to be after canonicalize_filename,
1491 so we don't need to consider backslashes on DOS_NT. */
1492 slash = etags_strrchr (file, '/');
1493 suffix = etags_strrchr (file, '.');
1494 if (suffix == NULL || suffix < slash)
1495 return NULL;
1496 if (extptr != NULL)
1497 *extptr = suffix;
1498 suffix += 1;
1499 /* Let those poor souls who live with DOS 8+3 file name limits get
1500 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1501 Only the first do loop is run if not MSDOS */
1502 do
1503 {
1504 for (compr = compressors; compr->suffix != NULL; compr++)
1505 if (streq (compr->suffix, suffix))
1506 return compr;
1507 if (!MSDOS)
1508 break; /* do it only once: not really a loop */
1509 if (extptr != NULL)
1510 *extptr = ++suffix;
1511 } while (*suffix != '\0');
1512 return NULL;
1513 }
1514
1515
1516
1517 /*
1518 * Return a language given the name.
1519 */
1520 static language *
1521 get_language_from_langname (name)
1522 const char *name;
1523 {
1524 language *lang;
1525
1526 if (name == NULL)
1527 error ("empty language name", (char *)NULL);
1528 else
1529 {
1530 for (lang = lang_names; lang->name != NULL; lang++)
1531 if (streq (name, lang->name))
1532 return lang;
1533 error ("unknown language \"%s\"", name);
1534 }
1535
1536 return NULL;
1537 }
1538
1539
1540 /*
1541 * Return a language given the interpreter name.
1542 */
1543 static language *
1544 get_language_from_interpreter (interpreter)
1545 char *interpreter;
1546 {
1547 language *lang;
1548 char **iname;
1549
1550 if (interpreter == NULL)
1551 return NULL;
1552 for (lang = lang_names; lang->name != NULL; lang++)
1553 if (lang->interpreters != NULL)
1554 for (iname = lang->interpreters; *iname != NULL; iname++)
1555 if (streq (*iname, interpreter))
1556 return lang;
1557
1558 return NULL;
1559 }
1560
1561
1562
1563 /*
1564 * Return a language given the file name.
1565 */
1566 static language *
1567 get_language_from_filename (file, case_sensitive)
1568 char *file;
1569 bool case_sensitive;
1570 {
1571 language *lang;
1572 char **name, **ext, *suffix;
1573
1574 /* Try whole file name first. */
1575 for (lang = lang_names; lang->name != NULL; lang++)
1576 if (lang->filenames != NULL)
1577 for (name = lang->filenames; *name != NULL; name++)
1578 if ((case_sensitive)
1579 ? streq (*name, file)
1580 : strcaseeq (*name, file))
1581 return lang;
1582
1583 /* If not found, try suffix after last dot. */
1584 suffix = etags_strrchr (file, '.');
1585 if (suffix == NULL)
1586 return NULL;
1587 suffix += 1;
1588 for (lang = lang_names; lang->name != NULL; lang++)
1589 if (lang->suffixes != NULL)
1590 for (ext = lang->suffixes; *ext != NULL; ext++)
1591 if ((case_sensitive)
1592 ? streq (*ext, suffix)
1593 : strcaseeq (*ext, suffix))
1594 return lang;
1595 return NULL;
1596 }
1597
1598 \f
1599 /*
1600 * This routine is called on each file argument.
1601 */
1602 static void
1603 process_file_name (file, lang)
1604 char *file;
1605 language *lang;
1606 {
1607 struct stat stat_buf;
1608 FILE *inf;
1609 fdesc *fdp;
1610 compressor *compr;
1611 char *compressed_name, *uncompressed_name;
1612 char *ext, *real_name;
1613 int retval;
1614
1615 canonicalize_filename (file);
1616 if (streq (file, tagfile) && !streq (tagfile, "-"))
1617 {
1618 error ("skipping inclusion of %s in self.", file);
1619 return;
1620 }
1621 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1622 {
1623 compressed_name = NULL;
1624 real_name = uncompressed_name = savestr (file);
1625 }
1626 else
1627 {
1628 real_name = compressed_name = savestr (file);
1629 uncompressed_name = savenstr (file, ext - file);
1630 }
1631
1632 /* If the canonicalized uncompressed name
1633 has already been dealt with, skip it silently. */
1634 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1635 {
1636 assert (fdp->infname != NULL);
1637 if (streq (uncompressed_name, fdp->infname))
1638 goto cleanup;
1639 }
1640
1641 if (stat (real_name, &stat_buf) != 0)
1642 {
1643 /* Reset real_name and try with a different name. */
1644 real_name = NULL;
1645 if (compressed_name != NULL) /* try with the given suffix */
1646 {
1647 if (stat (uncompressed_name, &stat_buf) == 0)
1648 real_name = uncompressed_name;
1649 }
1650 else /* try all possible suffixes */
1651 {
1652 for (compr = compressors; compr->suffix != NULL; compr++)
1653 {
1654 compressed_name = concat (file, ".", compr->suffix);
1655 if (stat (compressed_name, &stat_buf) != 0)
1656 {
1657 if (MSDOS)
1658 {
1659 char *suf = compressed_name + strlen (file);
1660 size_t suflen = strlen (compr->suffix) + 1;
1661 for ( ; suf[1]; suf++, suflen--)
1662 {
1663 memmove (suf, suf + 1, suflen);
1664 if (stat (compressed_name, &stat_buf) == 0)
1665 {
1666 real_name = compressed_name;
1667 break;
1668 }
1669 }
1670 if (real_name != NULL)
1671 break;
1672 } /* MSDOS */
1673 free (compressed_name);
1674 compressed_name = NULL;
1675 }
1676 else
1677 {
1678 real_name = compressed_name;
1679 break;
1680 }
1681 }
1682 }
1683 if (real_name == NULL)
1684 {
1685 perror (file);
1686 goto cleanup;
1687 }
1688 } /* try with a different name */
1689
1690 if (!S_ISREG (stat_buf.st_mode))
1691 {
1692 error ("skipping %s: it is not a regular file.", real_name);
1693 goto cleanup;
1694 }
1695 if (real_name == compressed_name)
1696 {
1697 char *cmd = concat (compr->command, " ", real_name);
1698 inf = (FILE *) popen (cmd, "r");
1699 free (cmd);
1700 }
1701 else
1702 inf = fopen (real_name, "r");
1703 if (inf == NULL)
1704 {
1705 perror (real_name);
1706 goto cleanup;
1707 }
1708
1709 process_file (inf, uncompressed_name, lang);
1710
1711 if (real_name == compressed_name)
1712 retval = pclose (inf);
1713 else
1714 retval = fclose (inf);
1715 if (retval < 0)
1716 pfatal (file);
1717
1718 cleanup:
1719 if (compressed_name) free (compressed_name);
1720 if (uncompressed_name) free (uncompressed_name);
1721 last_node = NULL;
1722 curfdp = NULL;
1723 return;
1724 }
1725
1726 static void
1727 process_file (fh, fn, lang)
1728 FILE *fh;
1729 char *fn;
1730 language *lang;
1731 {
1732 static const fdesc emptyfdesc;
1733 fdesc *fdp;
1734
1735 /* Create a new input file description entry. */
1736 fdp = xnew (1, fdesc);
1737 *fdp = emptyfdesc;
1738 fdp->next = fdhead;
1739 fdp->infname = savestr (fn);
1740 fdp->lang = lang;
1741 fdp->infabsname = absolute_filename (fn, cwd);
1742 fdp->infabsdir = absolute_dirname (fn, cwd);
1743 if (filename_is_absolute (fn))
1744 {
1745 /* An absolute file name. Canonicalize it. */
1746 fdp->taggedfname = absolute_filename (fn, NULL);
1747 }
1748 else
1749 {
1750 /* A file name relative to cwd. Make it relative
1751 to the directory of the tags file. */
1752 fdp->taggedfname = relative_filename (fn, tagfiledir);
1753 }
1754 fdp->usecharno = TRUE; /* use char position when making tags */
1755 fdp->prop = NULL;
1756 fdp->written = FALSE; /* not written on tags file yet */
1757
1758 fdhead = fdp;
1759 curfdp = fdhead; /* the current file description */
1760
1761 find_entries (fh);
1762
1763 /* If not Ctags, and if this is not metasource and if it contained no #line
1764 directives, we can write the tags and free all nodes pointing to
1765 curfdp. */
1766 if (!CTAGS
1767 && curfdp->usecharno /* no #line directives in this file */
1768 && !curfdp->lang->metasource)
1769 {
1770 node *np, *prev;
1771
1772 /* Look for the head of the sublist relative to this file. See add_node
1773 for the structure of the node tree. */
1774 prev = NULL;
1775 for (np = nodehead; np != NULL; prev = np, np = np->left)
1776 if (np->fdp == curfdp)
1777 break;
1778
1779 /* If we generated tags for this file, write and delete them. */
1780 if (np != NULL)
1781 {
1782 /* This is the head of the last sublist, if any. The following
1783 instructions depend on this being true. */
1784 assert (np->left == NULL);
1785
1786 assert (fdhead == curfdp);
1787 assert (last_node->fdp == curfdp);
1788 put_entries (np); /* write tags for file curfdp->taggedfname */
1789 free_tree (np); /* remove the written nodes */
1790 if (prev == NULL)
1791 nodehead = NULL; /* no nodes left */
1792 else
1793 prev->left = NULL; /* delete the pointer to the sublist */
1794 }
1795 }
1796 }
1797
1798 /*
1799 * This routine sets up the boolean pseudo-functions which work
1800 * by setting boolean flags dependent upon the corresponding character.
1801 * Every char which is NOT in that string is not a white char. Therefore,
1802 * all of the array "_wht" is set to FALSE, and then the elements
1803 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1804 * of a char is TRUE if it is the string "white", else FALSE.
1805 */
1806 static void
1807 init ()
1808 {
1809 register char *sp;
1810 register int i;
1811
1812 for (i = 0; i < CHARS; i++)
1813 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1814 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1815 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1816 notinname('\0') = notinname('\n');
1817 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1818 begtoken('\0') = begtoken('\n');
1819 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1820 intoken('\0') = intoken('\n');
1821 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1822 endtoken('\0') = endtoken('\n');
1823 }
1824
1825 /*
1826 * This routine opens the specified file and calls the function
1827 * which finds the function and type definitions.
1828 */
1829 static void
1830 find_entries (inf)
1831 FILE *inf;
1832 {
1833 char *cp;
1834 language *lang = curfdp->lang;
1835 Lang_function *parser = NULL;
1836
1837 /* If user specified a language, use it. */
1838 if (lang != NULL && lang->function != NULL)
1839 {
1840 parser = lang->function;
1841 }
1842
1843 /* Else try to guess the language given the file name. */
1844 if (parser == NULL)
1845 {
1846 lang = get_language_from_filename (curfdp->infname, TRUE);
1847 if (lang != NULL && lang->function != NULL)
1848 {
1849 curfdp->lang = lang;
1850 parser = lang->function;
1851 }
1852 }
1853
1854 /* Else look for sharp-bang as the first two characters. */
1855 if (parser == NULL
1856 && readline_internal (&lb, inf) > 0
1857 && lb.len >= 2
1858 && lb.buffer[0] == '#'
1859 && lb.buffer[1] == '!')
1860 {
1861 char *lp;
1862
1863 /* Set lp to point at the first char after the last slash in the
1864 line or, if no slashes, at the first nonblank. Then set cp to
1865 the first successive blank and terminate the string. */
1866 lp = etags_strrchr (lb.buffer+2, '/');
1867 if (lp != NULL)
1868 lp += 1;
1869 else
1870 lp = skip_spaces (lb.buffer + 2);
1871 cp = skip_non_spaces (lp);
1872 *cp = '\0';
1873
1874 if (strlen (lp) > 0)
1875 {
1876 lang = get_language_from_interpreter (lp);
1877 if (lang != NULL && lang->function != NULL)
1878 {
1879 curfdp->lang = lang;
1880 parser = lang->function;
1881 }
1882 }
1883 }
1884
1885 /* We rewind here, even if inf may be a pipe. We fail if the
1886 length of the first line is longer than the pipe block size,
1887 which is unlikely. */
1888 rewind (inf);
1889
1890 /* Else try to guess the language given the case insensitive file name. */
1891 if (parser == NULL)
1892 {
1893 lang = get_language_from_filename (curfdp->infname, FALSE);
1894 if (lang != NULL && lang->function != NULL)
1895 {
1896 curfdp->lang = lang;
1897 parser = lang->function;
1898 }
1899 }
1900
1901 /* Else try Fortran or C. */
1902 if (parser == NULL)
1903 {
1904 node *old_last_node = last_node;
1905
1906 curfdp->lang = get_language_from_langname ("fortran");
1907 find_entries (inf);
1908
1909 if (old_last_node == last_node)
1910 /* No Fortran entries found. Try C. */
1911 {
1912 /* We do not tag if rewind fails.
1913 Only the file name will be recorded in the tags file. */
1914 rewind (inf);
1915 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1916 find_entries (inf);
1917 }
1918 return;
1919 }
1920
1921 if (!no_line_directive
1922 && curfdp->lang != NULL && curfdp->lang->metasource)
1923 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1924 file, or anyway we parsed a file that is automatically generated from
1925 this one. If this is the case, the bingo.c file contained #line
1926 directives that generated tags pointing to this file. Let's delete
1927 them all before parsing this file, which is the real source. */
1928 {
1929 fdesc **fdpp = &fdhead;
1930 while (*fdpp != NULL)
1931 if (*fdpp != curfdp
1932 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1933 /* We found one of those! We must delete both the file description
1934 and all tags referring to it. */
1935 {
1936 fdesc *badfdp = *fdpp;
1937
1938 /* Delete the tags referring to badfdp->taggedfname
1939 that were obtained from badfdp->infname. */
1940 invalidate_nodes (badfdp, &nodehead);
1941
1942 *fdpp = badfdp->next; /* remove the bad description from the list */
1943 free_fdesc (badfdp);
1944 }
1945 else
1946 fdpp = &(*fdpp)->next; /* advance the list pointer */
1947 }
1948
1949 assert (parser != NULL);
1950
1951 /* Generic initialisations before reading from file. */
1952 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1953
1954 /* Generic initialisations before parsing file with readline. */
1955 lineno = 0; /* reset global line number */
1956 charno = 0; /* reset global char number */
1957 linecharno = 0; /* reset global char number of line start */
1958
1959 parser (inf);
1960
1961 #ifdef ETAGS_REGEXPS
1962 regex_tag_multiline ();
1963 #endif /* ETAGS_REGEXPS */
1964 }
1965
1966 \f
1967 /*
1968 * Check whether an implicitly named tag should be created,
1969 * then call `pfnote'.
1970 * NAME is a string that is internally copied by this function.
1971 *
1972 * TAGS format specification
1973 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1974 * The following is explained in some more detail in etc/ETAGS.EBNF.
1975 *
1976 * make_tag creates tags with "implicit tag names" (unnamed tags)
1977 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1978 * 1. NAME does not contain any of the characters in NONAM;
1979 * 2. LINESTART contains name as either a rightmost, or rightmost but
1980 * one character, substring;
1981 * 3. the character, if any, immediately before NAME in LINESTART must
1982 * be a character in NONAM;
1983 * 4. the character, if any, immediately after NAME in LINESTART must
1984 * also be a character in NONAM.
1985 *
1986 * The implementation uses the notinname() macro, which recognises the
1987 * characters stored in the string `nonam'.
1988 * etags.el needs to use the same characters that are in NONAM.
1989 */
1990 static void
1991 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1992 char *name; /* tag name, or NULL if unnamed */
1993 int namelen; /* tag length */
1994 bool is_func; /* tag is a function */
1995 char *linestart; /* start of the line where tag is */
1996 int linelen; /* length of the line where tag is */
1997 int lno; /* line number */
1998 long cno; /* character number */
1999 {
2000 bool named = (name != NULL && namelen > 0);
2001
2002 if (!CTAGS && named) /* maybe set named to false */
2003 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2004 such that etags.el can guess a name from it. */
2005 {
2006 int i;
2007 register char *cp = name;
2008
2009 for (i = 0; i < namelen; i++)
2010 if (notinname (*cp++))
2011 break;
2012 if (i == namelen) /* rule #1 */
2013 {
2014 cp = linestart + linelen - namelen;
2015 if (notinname (linestart[linelen-1]))
2016 cp -= 1; /* rule #4 */
2017 if (cp >= linestart /* rule #2 */
2018 && (cp == linestart
2019 || notinname (cp[-1])) /* rule #3 */
2020 && strneq (name, cp, namelen)) /* rule #2 */
2021 named = FALSE; /* use implicit tag name */
2022 }
2023 }
2024
2025 if (named)
2026 name = savenstr (name, namelen);
2027 else
2028 name = NULL;
2029 pfnote (name, is_func, linestart, linelen, lno, cno);
2030 }
2031
2032 /* Record a tag. */
2033 static void
2034 pfnote (name, is_func, linestart, linelen, lno, cno)
2035 char *name; /* tag name, or NULL if unnamed */
2036 bool is_func; /* tag is a function */
2037 char *linestart; /* start of the line where tag is */
2038 int linelen; /* length of the line where tag is */
2039 int lno; /* line number */
2040 long cno; /* character number */
2041 {
2042 register node *np;
2043
2044 assert (name == NULL || name[0] != '\0');
2045 if (CTAGS && name == NULL)
2046 return;
2047
2048 np = xnew (1, node);
2049
2050 /* If ctags mode, change name "main" to M<thisfilename>. */
2051 if (CTAGS && !cxref_style && streq (name, "main"))
2052 {
2053 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2054 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2055 fp = etags_strrchr (np->name, '.');
2056 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2057 fp[0] = '\0';
2058 }
2059 else
2060 np->name = name;
2061 np->valid = TRUE;
2062 np->been_warned = FALSE;
2063 np->fdp = curfdp;
2064 np->is_func = is_func;
2065 np->lno = lno;
2066 if (np->fdp->usecharno)
2067 /* Our char numbers are 0-base, because of C language tradition?
2068 ctags compatibility? old versions compatibility? I don't know.
2069 Anyway, since emacs's are 1-base we expect etags.el to take care
2070 of the difference. If we wanted to have 1-based numbers, we would
2071 uncomment the +1 below. */
2072 np->cno = cno /* + 1 */ ;
2073 else
2074 np->cno = invalidcharno;
2075 np->left = np->right = NULL;
2076 if (CTAGS && !cxref_style)
2077 {
2078 if (strlen (linestart) < 50)
2079 np->regex = concat (linestart, "$", "");
2080 else
2081 np->regex = savenstr (linestart, 50);
2082 }
2083 else
2084 np->regex = savenstr (linestart, linelen);
2085
2086 add_node (np, &nodehead);
2087 }
2088
2089 /*
2090 * free_tree ()
2091 * recurse on left children, iterate on right children.
2092 */
2093 static void
2094 free_tree (np)
2095 register node *np;
2096 {
2097 while (np)
2098 {
2099 register node *node_right = np->right;
2100 free_tree (np->left);
2101 if (np->name != NULL)
2102 free (np->name);
2103 free (np->regex);
2104 free (np);
2105 np = node_right;
2106 }
2107 }
2108
2109 /*
2110 * free_fdesc ()
2111 * delete a file description
2112 */
2113 static void
2114 free_fdesc (fdp)
2115 register fdesc *fdp;
2116 {
2117 if (fdp->infname != NULL) free (fdp->infname);
2118 if (fdp->infabsname != NULL) free (fdp->infabsname);
2119 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2120 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2121 if (fdp->prop != NULL) free (fdp->prop);
2122 free (fdp);
2123 }
2124
2125 /*
2126 * add_node ()
2127 * Adds a node to the tree of nodes. In etags mode, sort by file
2128 * name. In ctags mode, sort by tag name. Make no attempt at
2129 * balancing.
2130 *
2131 * add_node is the only function allowed to add nodes, so it can
2132 * maintain state.
2133 */
2134 static void
2135 add_node (np, cur_node_p)
2136 node *np, **cur_node_p;
2137 {
2138 register int dif;
2139 register node *cur_node = *cur_node_p;
2140
2141 if (cur_node == NULL)
2142 {
2143 *cur_node_p = np;
2144 last_node = np;
2145 return;
2146 }
2147
2148 if (!CTAGS)
2149 /* Etags Mode */
2150 {
2151 /* For each file name, tags are in a linked sublist on the right
2152 pointer. The first tags of different files are a linked list
2153 on the left pointer. last_node points to the end of the last
2154 used sublist. */
2155 if (last_node != NULL && last_node->fdp == np->fdp)
2156 {
2157 /* Let's use the same sublist as the last added node. */
2158 assert (last_node->right == NULL);
2159 last_node->right = np;
2160 last_node = np;
2161 }
2162 else if (cur_node->fdp == np->fdp)
2163 {
2164 /* Scanning the list we found the head of a sublist which is
2165 good for us. Let's scan this sublist. */
2166 add_node (np, &cur_node->right);
2167 }
2168 else
2169 /* The head of this sublist is not good for us. Let's try the
2170 next one. */
2171 add_node (np, &cur_node->left);
2172 } /* if ETAGS mode */
2173
2174 else
2175 {
2176 /* Ctags Mode */
2177 dif = strcmp (np->name, cur_node->name);
2178
2179 /*
2180 * If this tag name matches an existing one, then
2181 * do not add the node, but maybe print a warning.
2182 */
2183 if (!dif)
2184 {
2185 if (np->fdp == cur_node->fdp)
2186 {
2187 if (!no_warnings)
2188 {
2189 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2190 np->fdp->infname, lineno, np->name);
2191 fprintf (stderr, "Second entry ignored\n");
2192 }
2193 }
2194 else if (!cur_node->been_warned && !no_warnings)
2195 {
2196 fprintf
2197 (stderr,
2198 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2199 np->fdp->infname, cur_node->fdp->infname, np->name);
2200 cur_node->been_warned = TRUE;
2201 }
2202 return;
2203 }
2204
2205 /* Actually add the node */
2206 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2207 } /* if CTAGS mode */
2208 }
2209
2210 /*
2211 * invalidate_nodes ()
2212 * Scan the node tree and invalidate all nodes pointing to the
2213 * given file description (CTAGS case) or free them (ETAGS case).
2214 */
2215 static void
2216 invalidate_nodes (badfdp, npp)
2217 fdesc *badfdp;
2218 node **npp;
2219 {
2220 node *np = *npp;
2221
2222 if (np == NULL)
2223 return;
2224
2225 if (CTAGS)
2226 {
2227 if (np->left != NULL)
2228 invalidate_nodes (badfdp, &np->left);
2229 if (np->fdp == badfdp)
2230 np->valid = FALSE;
2231 if (np->right != NULL)
2232 invalidate_nodes (badfdp, &np->right);
2233 }
2234 else
2235 {
2236 assert (np->fdp != NULL);
2237 if (np->fdp == badfdp)
2238 {
2239 *npp = np->left; /* detach the sublist from the list */
2240 np->left = NULL; /* isolate it */
2241 free_tree (np); /* free it */
2242 invalidate_nodes (badfdp, npp);
2243 }
2244 else
2245 invalidate_nodes (badfdp, &np->left);
2246 }
2247 }
2248
2249 \f
2250 static int total_size_of_entries __P((node *));
2251 static int number_len __P((long));
2252
2253 /* Length of a non-negative number's decimal representation. */
2254 static int
2255 number_len (num)
2256 long num;
2257 {
2258 int len = 1;
2259 while ((num /= 10) > 0)
2260 len += 1;
2261 return len;
2262 }
2263
2264 /*
2265 * Return total number of characters that put_entries will output for
2266 * the nodes in the linked list at the right of the specified node.
2267 * This count is irrelevant with etags.el since emacs 19.34 at least,
2268 * but is still supplied for backward compatibility.
2269 */
2270 static int
2271 total_size_of_entries (np)
2272 register node *np;
2273 {
2274 register int total = 0;
2275
2276 for (; np != NULL; np = np->right)
2277 if (np->valid)
2278 {
2279 total += strlen (np->regex) + 1; /* pat\177 */
2280 if (np->name != NULL)
2281 total += strlen (np->name) + 1; /* name\001 */
2282 total += number_len ((long) np->lno) + 1; /* lno, */
2283 if (np->cno != invalidcharno) /* cno */
2284 total += number_len (np->cno);
2285 total += 1; /* newline */
2286 }
2287
2288 return total;
2289 }
2290
2291 static void
2292 put_entries (np)
2293 register node *np;
2294 {
2295 register char *sp;
2296 static fdesc *fdp = NULL;
2297
2298 if (np == NULL)
2299 return;
2300
2301 /* Output subentries that precede this one */
2302 if (CTAGS)
2303 put_entries (np->left);
2304
2305 /* Output this entry */
2306 if (np->valid)
2307 {
2308 if (!CTAGS)
2309 {
2310 /* Etags mode */
2311 if (fdp != np->fdp)
2312 {
2313 fdp = np->fdp;
2314 fprintf (tagf, "\f\n%s,%d\n",
2315 fdp->taggedfname, total_size_of_entries (np));
2316 fdp->written = TRUE;
2317 }
2318 fputs (np->regex, tagf);
2319 fputc ('\177', tagf);
2320 if (np->name != NULL)
2321 {
2322 fputs (np->name, tagf);
2323 fputc ('\001', tagf);
2324 }
2325 fprintf (tagf, "%d,", np->lno);
2326 if (np->cno != invalidcharno)
2327 fprintf (tagf, "%ld", np->cno);
2328 fputs ("\n", tagf);
2329 }
2330 else
2331 {
2332 /* Ctags mode */
2333 if (np->name == NULL)
2334 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2335
2336 if (cxref_style)
2337 {
2338 if (vgrind_style)
2339 fprintf (stdout, "%s %s %d\n",
2340 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2341 else
2342 fprintf (stdout, "%-16s %3d %-16s %s\n",
2343 np->name, np->lno, np->fdp->taggedfname, np->regex);
2344 }
2345 else
2346 {
2347 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2348
2349 if (np->is_func)
2350 { /* function or #define macro with args */
2351 putc (searchar, tagf);
2352 putc ('^', tagf);
2353
2354 for (sp = np->regex; *sp; sp++)
2355 {
2356 if (*sp == '\\' || *sp == searchar)
2357 putc ('\\', tagf);
2358 putc (*sp, tagf);
2359 }
2360 putc (searchar, tagf);
2361 }
2362 else
2363 { /* anything else; text pattern inadequate */
2364 fprintf (tagf, "%d", np->lno);
2365 }
2366 putc ('\n', tagf);
2367 }
2368 }
2369 } /* if this node contains a valid tag */
2370
2371 /* Output subentries that follow this one */
2372 put_entries (np->right);
2373 if (!CTAGS)
2374 put_entries (np->left);
2375 }
2376
2377 \f
2378 /* C extensions. */
2379 #define C_EXT 0x00fff /* C extensions */
2380 #define C_PLAIN 0x00000 /* C */
2381 #define C_PLPL 0x00001 /* C++ */
2382 #define C_STAR 0x00003 /* C* */
2383 #define C_JAVA 0x00005 /* JAVA */
2384 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2385 #define YACC 0x10000 /* yacc file */
2386
2387 /*
2388 * The C symbol tables.
2389 */
2390 enum sym_type
2391 {
2392 st_none,
2393 st_C_objprot, st_C_objimpl, st_C_objend,
2394 st_C_gnumacro,
2395 st_C_ignore, st_C_attribute,
2396 st_C_javastruct,
2397 st_C_operator,
2398 st_C_class, st_C_template,
2399 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2400 };
2401
2402 static unsigned int hash __P((const char *, unsigned int));
2403 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2404 static enum sym_type C_symtype __P((char *, int, int));
2405
2406 /* Feed stuff between (but not including) %[ and %] lines to:
2407 gperf -m 5
2408 %[
2409 %compare-strncmp
2410 %enum
2411 %struct-type
2412 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2413 %%
2414 if, 0, st_C_ignore
2415 for, 0, st_C_ignore
2416 while, 0, st_C_ignore
2417 switch, 0, st_C_ignore
2418 return, 0, st_C_ignore
2419 __attribute__, 0, st_C_attribute
2420 @interface, 0, st_C_objprot
2421 @protocol, 0, st_C_objprot
2422 @implementation,0, st_C_objimpl
2423 @end, 0, st_C_objend
2424 import, (C_JAVA & !C_PLPL), st_C_ignore
2425 package, (C_JAVA & !C_PLPL), st_C_ignore
2426 friend, C_PLPL, st_C_ignore
2427 extends, (C_JAVA & !C_PLPL), st_C_javastruct
2428 implements, (C_JAVA & !C_PLPL), st_C_javastruct
2429 interface, (C_JAVA & !C_PLPL), st_C_struct
2430 class, 0, st_C_class
2431 namespace, C_PLPL, st_C_struct
2432 domain, C_STAR, st_C_struct
2433 union, 0, st_C_struct
2434 struct, 0, st_C_struct
2435 extern, 0, st_C_extern
2436 enum, 0, st_C_enum
2437 typedef, 0, st_C_typedef
2438 define, 0, st_C_define
2439 operator, C_PLPL, st_C_operator
2440 template, 0, st_C_template
2441 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2442 DEFUN, 0, st_C_gnumacro
2443 SYSCALL, 0, st_C_gnumacro
2444 ENTRY, 0, st_C_gnumacro
2445 PSEUDO, 0, st_C_gnumacro
2446 # These are defined inside C functions, so currently they are not met.
2447 # EXFUN used in glibc, DEFVAR_* in emacs.
2448 #EXFUN, 0, st_C_gnumacro
2449 #DEFVAR_, 0, st_C_gnumacro
2450 %]
2451 and replace lines between %< and %> with its output, then:
2452 - remove the #if characterset check
2453 - make in_word_set static and not inline. */
2454 /*%<*/
2455 /* C code produced by gperf version 3.0.1 */
2456 /* Command-line: gperf -m 5 */
2457 /* Computed positions: -k'1-2' */
2458
2459 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2460 /* maximum key range = 31, duplicates = 0 */
2461
2462 #ifdef __GNUC__
2463 __inline
2464 #else
2465 #ifdef __cplusplus
2466 inline
2467 #endif
2468 #endif
2469 static unsigned int
2470 hash (str, len)
2471 register const char *str;
2472 register unsigned int len;
2473 {
2474 static unsigned char asso_values[] =
2475 {
2476 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2477 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2478 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2479 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2480 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2481 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2482 34, 34, 34, 34, 1, 34, 34, 34, 14, 14,
2483 34, 34, 34, 34, 34, 34, 34, 34, 13, 34,
2484 13, 34, 34, 12, 34, 34, 34, 34, 34, 11,
2485 34, 34, 34, 34, 34, 8, 34, 11, 34, 12,
2486 11, 0, 1, 34, 7, 0, 34, 34, 11, 9,
2487 0, 4, 0, 34, 7, 4, 14, 21, 34, 15,
2488 0, 2, 34, 34, 34, 34, 34, 34, 34, 34,
2489 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2490 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2491 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2492 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2493 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2494 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2495 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2496 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2497 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2498 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2499 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2500 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2501 34, 34, 34, 34, 34, 34
2502 };
2503 return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
2504 }
2505
2506 static struct C_stab_entry *
2507 in_word_set (str, len)
2508 register const char *str;
2509 register unsigned int len;
2510 {
2511 enum
2512 {
2513 TOTAL_KEYWORDS = 31,
2514 MIN_WORD_LENGTH = 2,
2515 MAX_WORD_LENGTH = 15,
2516 MIN_HASH_VALUE = 3,
2517 MAX_HASH_VALUE = 33
2518 };
2519
2520 static struct C_stab_entry wordlist[] =
2521 {
2522 {""}, {""}, {""},
2523 {"if", 0, st_C_ignore},
2524 {"enum", 0, st_C_enum},
2525 {"@end", 0, st_C_objend},
2526 {"extern", 0, st_C_extern},
2527 {"extends", (C_JAVA & !C_PLPL), st_C_javastruct},
2528 {"for", 0, st_C_ignore},
2529 {"interface", (C_JAVA & !C_PLPL), st_C_struct},
2530 {"@protocol", 0, st_C_objprot},
2531 {"@interface", 0, st_C_objprot},
2532 {"operator", C_PLPL, st_C_operator},
2533 {"return", 0, st_C_ignore},
2534 {"friend", C_PLPL, st_C_ignore},
2535 {"import", (C_JAVA & !C_PLPL), st_C_ignore},
2536 {"@implementation",0, st_C_objimpl},
2537 {"define", 0, st_C_define},
2538 {"package", (C_JAVA & !C_PLPL), st_C_ignore},
2539 {"implements", (C_JAVA & !C_PLPL), st_C_javastruct},
2540 {"namespace", C_PLPL, st_C_struct},
2541 {"domain", C_STAR, st_C_struct},
2542 {"template", 0, st_C_template},
2543 {"typedef", 0, st_C_typedef},
2544 {"struct", 0, st_C_struct},
2545 {"switch", 0, st_C_ignore},
2546 {"union", 0, st_C_struct},
2547 {"while", 0, st_C_ignore},
2548 {"class", 0, st_C_class},
2549 {"__attribute__", 0, st_C_attribute},
2550 {"SYSCALL", 0, st_C_gnumacro},
2551 {"PSEUDO", 0, st_C_gnumacro},
2552 {"ENTRY", 0, st_C_gnumacro},
2553 {"DEFUN", 0, st_C_gnumacro}
2554 };
2555
2556 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2557 {
2558 register int key = hash (str, len);
2559
2560 if (key <= MAX_HASH_VALUE && key >= 0)
2561 {
2562 register const char *s = wordlist[key].name;
2563
2564 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2565 return &wordlist[key];
2566 }
2567 }
2568 return 0;
2569 }
2570 /*%>*/
2571
2572 static enum sym_type
2573 C_symtype (str, len, c_ext)
2574 char *str;
2575 int len;
2576 int c_ext;
2577 {
2578 register struct C_stab_entry *se = in_word_set (str, len);
2579
2580 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2581 return st_none;
2582 return se->type;
2583 }
2584
2585 \f
2586 /*
2587 * Ignoring __attribute__ ((list))
2588 */
2589 static bool inattribute; /* looking at an __attribute__ construct */
2590
2591 /*
2592 * C functions and variables are recognized using a simple
2593 * finite automaton. fvdef is its state variable.
2594 */
2595 static enum
2596 {
2597 fvnone, /* nothing seen */
2598 fdefunkey, /* Emacs DEFUN keyword seen */
2599 fdefunname, /* Emacs DEFUN name seen */
2600 foperator, /* func: operator keyword seen (cplpl) */
2601 fvnameseen, /* function or variable name seen */
2602 fstartlist, /* func: just after open parenthesis */
2603 finlist, /* func: in parameter list */
2604 flistseen, /* func: after parameter list */
2605 fignore, /* func: before open brace */
2606 vignore /* var-like: ignore until ';' */
2607 } fvdef;
2608
2609 static bool fvextern; /* func or var: extern keyword seen; */
2610
2611 /*
2612 * typedefs are recognized using a simple finite automaton.
2613 * typdef is its state variable.
2614 */
2615 static enum
2616 {
2617 tnone, /* nothing seen */
2618 tkeyseen, /* typedef keyword seen */
2619 ttypeseen, /* defined type seen */
2620 tinbody, /* inside typedef body */
2621 tend, /* just before typedef tag */
2622 tignore /* junk after typedef tag */
2623 } typdef;
2624
2625 /*
2626 * struct-like structures (enum, struct and union) are recognized
2627 * using another simple finite automaton. `structdef' is its state
2628 * variable.
2629 */
2630 static enum
2631 {
2632 snone, /* nothing seen yet,
2633 or in struct body if bracelev > 0 */
2634 skeyseen, /* struct-like keyword seen */
2635 stagseen, /* struct-like tag seen */
2636 scolonseen /* colon seen after struct-like tag */
2637 } structdef;
2638
2639 /*
2640 * When objdef is different from onone, objtag is the name of the class.
2641 */
2642 static char *objtag = "<uninited>";
2643
2644 /*
2645 * Yet another little state machine to deal with preprocessor lines.
2646 */
2647 static enum
2648 {
2649 dnone, /* nothing seen */
2650 dsharpseen, /* '#' seen as first char on line */
2651 ddefineseen, /* '#' and 'define' seen */
2652 dignorerest /* ignore rest of line */
2653 } definedef;
2654
2655 /*
2656 * State machine for Objective C protocols and implementations.
2657 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2658 */
2659 static enum
2660 {
2661 onone, /* nothing seen */
2662 oprotocol, /* @interface or @protocol seen */
2663 oimplementation, /* @implementations seen */
2664 otagseen, /* class name seen */
2665 oparenseen, /* parenthesis before category seen */
2666 ocatseen, /* category name seen */
2667 oinbody, /* in @implementation body */
2668 omethodsign, /* in @implementation body, after +/- */
2669 omethodtag, /* after method name */
2670 omethodcolon, /* after method colon */
2671 omethodparm, /* after method parameter */
2672 oignore /* wait for @end */
2673 } objdef;
2674
2675
2676 /*
2677 * Use this structure to keep info about the token read, and how it
2678 * should be tagged. Used by the make_C_tag function to build a tag.
2679 */
2680 static struct tok
2681 {
2682 char *line; /* string containing the token */
2683 int offset; /* where the token starts in LINE */
2684 int length; /* token length */
2685 /*
2686 The previous members can be used to pass strings around for generic
2687 purposes. The following ones specifically refer to creating tags. In this
2688 case the token contained here is the pattern that will be used to create a
2689 tag.
2690 */
2691 bool valid; /* do not create a tag; the token should be
2692 invalidated whenever a state machine is
2693 reset prematurely */
2694 bool named; /* create a named tag */
2695 int lineno; /* source line number of tag */
2696 long linepos; /* source char number of tag */
2697 } token; /* latest token read */
2698
2699 /*
2700 * Variables and functions for dealing with nested structures.
2701 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2702 */
2703 static void pushclass_above __P((int, char *, int));
2704 static void popclass_above __P((int));
2705 static void write_classname __P((linebuffer *, char *qualifier));
2706
2707 static struct {
2708 char **cname; /* nested class names */
2709 int *bracelev; /* nested class brace level */
2710 int nl; /* class nesting level (elements used) */
2711 int size; /* length of the array */
2712 } cstack; /* stack for nested declaration tags */
2713 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2714 #define nestlev (cstack.nl)
2715 /* After struct keyword or in struct body, not inside a nested function. */
2716 #define instruct (structdef == snone && nestlev > 0 \
2717 && bracelev == cstack.bracelev[nestlev-1] + 1)
2718
2719 static void
2720 pushclass_above (bracelev, str, len)
2721 int bracelev;
2722 char *str;
2723 int len;
2724 {
2725 int nl;
2726
2727 popclass_above (bracelev);
2728 nl = cstack.nl;
2729 if (nl >= cstack.size)
2730 {
2731 int size = cstack.size *= 2;
2732 xrnew (cstack.cname, size, char *);
2733 xrnew (cstack.bracelev, size, int);
2734 }
2735 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2736 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2737 cstack.bracelev[nl] = bracelev;
2738 cstack.nl = nl + 1;
2739 }
2740
2741 static void
2742 popclass_above (bracelev)
2743 int bracelev;
2744 {
2745 int nl;
2746
2747 for (nl = cstack.nl - 1;
2748 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2749 nl--)
2750 {
2751 if (cstack.cname[nl] != NULL)
2752 free (cstack.cname[nl]);
2753 cstack.nl = nl;
2754 }
2755 }
2756
2757 static void
2758 write_classname (cn, qualifier)
2759 linebuffer *cn;
2760 char *qualifier;
2761 {
2762 int i, len;
2763 int qlen = strlen (qualifier);
2764
2765 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2766 {
2767 len = 0;
2768 cn->len = 0;
2769 cn->buffer[0] = '\0';
2770 }
2771 else
2772 {
2773 len = strlen (cstack.cname[0]);
2774 linebuffer_setlen (cn, len);
2775 strcpy (cn->buffer, cstack.cname[0]);
2776 }
2777 for (i = 1; i < cstack.nl; i++)
2778 {
2779 char *s;
2780 int slen;
2781
2782 s = cstack.cname[i];
2783 if (s == NULL)
2784 continue;
2785 slen = strlen (s);
2786 len += slen + qlen;
2787 linebuffer_setlen (cn, len);
2788 strncat (cn->buffer, qualifier, qlen);
2789 strncat (cn->buffer, s, slen);
2790 }
2791 }
2792
2793 \f
2794 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2795 static void make_C_tag __P((bool));
2796
2797 /*
2798 * consider_token ()
2799 * checks to see if the current token is at the start of a
2800 * function or variable, or corresponds to a typedef, or
2801 * is a struct/union/enum tag, or #define, or an enum constant.
2802 *
2803 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2804 * with args. C_EXTP points to which language we are looking at.
2805 *
2806 * Globals
2807 * fvdef IN OUT
2808 * structdef IN OUT
2809 * definedef IN OUT
2810 * typdef IN OUT
2811 * objdef IN OUT
2812 */
2813
2814 static bool
2815 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2816 register char *str; /* IN: token pointer */
2817 register int len; /* IN: token length */
2818 register int c; /* IN: first char after the token */
2819 int *c_extp; /* IN, OUT: C extensions mask */
2820 int bracelev; /* IN: brace level */
2821 int parlev; /* IN: parenthesis level */
2822 bool *is_func_or_var; /* OUT: function or variable found */
2823 {
2824 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2825 structtype is the type of the preceding struct-like keyword, and
2826 structbracelev is the brace level where it has been seen. */
2827 static enum sym_type structtype;
2828 static int structbracelev;
2829 static enum sym_type toktype;
2830
2831
2832 toktype = C_symtype (str, len, *c_extp);
2833
2834 /*
2835 * Skip __attribute__
2836 */
2837 if (toktype == st_C_attribute)
2838 {
2839 inattribute = TRUE;
2840 return FALSE;
2841 }
2842
2843 /*
2844 * Advance the definedef state machine.
2845 */
2846 switch (definedef)
2847 {
2848 case dnone:
2849 /* We're not on a preprocessor line. */
2850 if (toktype == st_C_gnumacro)
2851 {
2852 fvdef = fdefunkey;
2853 return FALSE;
2854 }
2855 break;
2856 case dsharpseen:
2857 if (toktype == st_C_define)
2858 {
2859 definedef = ddefineseen;
2860 }
2861 else
2862 {
2863 definedef = dignorerest;
2864 }
2865 return FALSE;
2866 case ddefineseen:
2867 /*
2868 * Make a tag for any macro, unless it is a constant
2869 * and constantypedefs is FALSE.
2870 */
2871 definedef = dignorerest;
2872 *is_func_or_var = (c == '(');
2873 if (!*is_func_or_var && !constantypedefs)
2874 return FALSE;
2875 else
2876 return TRUE;
2877 case dignorerest:
2878 return FALSE;
2879 default:
2880 error ("internal error: definedef value.", (char *)NULL);
2881 }
2882
2883 /*
2884 * Now typedefs
2885 */
2886 switch (typdef)
2887 {
2888 case tnone:
2889 if (toktype == st_C_typedef)
2890 {
2891 if (typedefs)
2892 typdef = tkeyseen;
2893 fvextern = FALSE;
2894 fvdef = fvnone;
2895 return FALSE;
2896 }
2897 break;
2898 case tkeyseen:
2899 switch (toktype)
2900 {
2901 case st_none:
2902 case st_C_class:
2903 case st_C_struct:
2904 case st_C_enum:
2905 typdef = ttypeseen;
2906 }
2907 break;
2908 case ttypeseen:
2909 if (structdef == snone && fvdef == fvnone)
2910 {
2911 fvdef = fvnameseen;
2912 return TRUE;
2913 }
2914 break;
2915 case tend:
2916 switch (toktype)
2917 {
2918 case st_C_class:
2919 case st_C_struct:
2920 case st_C_enum:
2921 return FALSE;
2922 }
2923 return TRUE;
2924 }
2925
2926 /*
2927 * This structdef business is NOT invoked when we are ctags and the
2928 * file is plain C. This is because a struct tag may have the same
2929 * name as another tag, and this loses with ctags.
2930 */
2931 switch (toktype)
2932 {
2933 case st_C_javastruct:
2934 if (structdef == stagseen)
2935 structdef = scolonseen;
2936 return FALSE;
2937 case st_C_template:
2938 case st_C_class:
2939 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2940 && bracelev == 0
2941 && definedef == dnone && structdef == snone
2942 && typdef == tnone && fvdef == fvnone)
2943 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2944 if (toktype == st_C_template)
2945 break;
2946 /* FALLTHRU */
2947 case st_C_struct:
2948 case st_C_enum:
2949 if (parlev == 0
2950 && fvdef != vignore
2951 && (typdef == tkeyseen
2952 || (typedefs_or_cplusplus && structdef == snone)))
2953 {
2954 structdef = skeyseen;
2955 structtype = toktype;
2956 structbracelev = bracelev;
2957 if (fvdef == fvnameseen)
2958 fvdef = fvnone;
2959 }
2960 return FALSE;
2961 }
2962
2963 if (structdef == skeyseen)
2964 {
2965 structdef = stagseen;
2966 return TRUE;
2967 }
2968
2969 if (typdef != tnone)
2970 definedef = dnone;
2971
2972 /* Detect Objective C constructs. */
2973 switch (objdef)
2974 {
2975 case onone:
2976 switch (toktype)
2977 {
2978 case st_C_objprot:
2979 objdef = oprotocol;
2980 return FALSE;
2981 case st_C_objimpl:
2982 objdef = oimplementation;
2983 return FALSE;
2984 }
2985 break;
2986 case oimplementation:
2987 /* Save the class tag for functions or variables defined inside. */
2988 objtag = savenstr (str, len);
2989 objdef = oinbody;
2990 return FALSE;
2991 case oprotocol:
2992 /* Save the class tag for categories. */
2993 objtag = savenstr (str, len);
2994 objdef = otagseen;
2995 *is_func_or_var = TRUE;
2996 return TRUE;
2997 case oparenseen:
2998 objdef = ocatseen;
2999 *is_func_or_var = TRUE;
3000 return TRUE;
3001 case oinbody:
3002 break;
3003 case omethodsign:
3004 if (parlev == 0)
3005 {
3006 fvdef = fvnone;
3007 objdef = omethodtag;
3008 linebuffer_setlen (&token_name, len);
3009 strncpy (token_name.buffer, str, len);
3010 token_name.buffer[len] = '\0';
3011 return TRUE;
3012 }
3013 return FALSE;
3014 case omethodcolon:
3015 if (parlev == 0)
3016 objdef = omethodparm;
3017 return FALSE;
3018 case omethodparm:
3019 if (parlev == 0)
3020 {
3021 fvdef = fvnone;
3022 objdef = omethodtag;
3023 linebuffer_setlen (&token_name, token_name.len + len);
3024 strncat (token_name.buffer, str, len);
3025 return TRUE;
3026 }
3027 return FALSE;
3028 case oignore:
3029 if (toktype == st_C_objend)
3030 {
3031 /* Memory leakage here: the string pointed by objtag is
3032 never released, because many tests would be needed to
3033 avoid breaking on incorrect input code. The amount of
3034 memory leaked here is the sum of the lengths of the
3035 class tags.
3036 free (objtag); */
3037 objdef = onone;
3038 }
3039 return FALSE;
3040 }
3041
3042 /* A function, variable or enum constant? */
3043 switch (toktype)
3044 {
3045 case st_C_extern:
3046 fvextern = TRUE;
3047 switch (fvdef)
3048 {
3049 case finlist:
3050 case flistseen:
3051 case fignore:
3052 case vignore:
3053 break;
3054 default:
3055 fvdef = fvnone;
3056 }
3057 return FALSE;
3058 case st_C_ignore:
3059 fvextern = FALSE;
3060 fvdef = vignore;
3061 return FALSE;
3062 case st_C_operator:
3063 fvdef = foperator;
3064 *is_func_or_var = TRUE;
3065 return TRUE;
3066 case st_none:
3067 if (constantypedefs
3068 && structdef == snone
3069 && structtype == st_C_enum && bracelev > structbracelev)
3070 return TRUE; /* enum constant */
3071 switch (fvdef)
3072 {
3073 case fdefunkey:
3074 if (bracelev > 0)
3075 break;
3076 fvdef = fdefunname; /* GNU macro */
3077 *is_func_or_var = TRUE;
3078 return TRUE;
3079 case fvnone:
3080 switch (typdef)
3081 {
3082 case ttypeseen:
3083 return FALSE;
3084 case tnone:
3085 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3086 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3087 {
3088 fvdef = vignore;
3089 return FALSE;
3090 }
3091 break;
3092 }
3093 /* FALLTHRU */
3094 case fvnameseen:
3095 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3096 {
3097 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3098 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3099 fvdef = foperator;
3100 *is_func_or_var = TRUE;
3101 return TRUE;
3102 }
3103 if (bracelev > 0 && !instruct)
3104 break;
3105 fvdef = fvnameseen; /* function or variable */
3106 *is_func_or_var = TRUE;
3107 return TRUE;
3108 }
3109 break;
3110 }
3111
3112 return FALSE;
3113 }
3114
3115 \f
3116 /*
3117 * C_entries often keeps pointers to tokens or lines which are older than
3118 * the line currently read. By keeping two line buffers, and switching
3119 * them at end of line, it is possible to use those pointers.
3120 */
3121 static struct
3122 {
3123 long linepos;
3124 linebuffer lb;
3125 } lbs[2];
3126
3127 #define current_lb_is_new (newndx == curndx)
3128 #define switch_line_buffers() (curndx = 1 - curndx)
3129
3130 #define curlb (lbs[curndx].lb)
3131 #define newlb (lbs[newndx].lb)
3132 #define curlinepos (lbs[curndx].linepos)
3133 #define newlinepos (lbs[newndx].linepos)
3134
3135 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3136 #define cplpl (c_ext & C_PLPL)
3137 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3138
3139 #define CNL_SAVE_DEFINEDEF() \
3140 do { \
3141 curlinepos = charno; \
3142 readline (&curlb, inf); \
3143 lp = curlb.buffer; \
3144 quotednl = FALSE; \
3145 newndx = curndx; \
3146 } while (0)
3147
3148 #define CNL() \
3149 do { \
3150 CNL_SAVE_DEFINEDEF(); \
3151 if (savetoken.valid) \
3152 { \
3153 token = savetoken; \
3154 savetoken.valid = FALSE; \
3155 } \
3156 definedef = dnone; \
3157 } while (0)
3158
3159
3160 static void
3161 make_C_tag (isfun)
3162 bool isfun;
3163 {
3164 /* This function should never be called when token.valid is FALSE, but
3165 we must protect against invalid input or internal errors. */
3166 if (!DEBUG && !token.valid)
3167 return;
3168
3169 if (token.valid)
3170 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3171 token.offset+token.length+1, token.lineno, token.linepos);
3172 else /* this case is optimised away if !DEBUG */
3173 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3174 token_name.len + 17, isfun, token.line,
3175 token.offset+token.length+1, token.lineno, token.linepos);
3176
3177 token.valid = FALSE;
3178 }
3179
3180
3181 /*
3182 * C_entries ()
3183 * This routine finds functions, variables, typedefs,
3184 * #define's, enum constants and struct/union/enum definitions in
3185 * C syntax and adds them to the list.
3186 */
3187 static void
3188 C_entries (c_ext, inf)
3189 int c_ext; /* extension of C */
3190 FILE *inf; /* input file */
3191 {
3192 register char c; /* latest char read; '\0' for end of line */
3193 register char *lp; /* pointer one beyond the character `c' */
3194 int curndx, newndx; /* indices for current and new lb */
3195 register int tokoff; /* offset in line of start of current token */
3196 register int toklen; /* length of current token */
3197 char *qualifier; /* string used to qualify names */
3198 int qlen; /* length of qualifier */
3199 int bracelev; /* current brace level */
3200 int bracketlev; /* current bracket level */
3201 int parlev; /* current parenthesis level */
3202 int attrparlev; /* __attribute__ parenthesis level */
3203 int templatelev; /* current template level */
3204 int typdefbracelev; /* bracelev where a typedef struct body begun */
3205 bool incomm, inquote, inchar, quotednl, midtoken;
3206 bool yacc_rules; /* in the rules part of a yacc file */
3207 struct tok savetoken; /* token saved during preprocessor handling */
3208
3209
3210 linebuffer_init (&lbs[0].lb);
3211 linebuffer_init (&lbs[1].lb);
3212 if (cstack.size == 0)
3213 {
3214 cstack.size = (DEBUG) ? 1 : 4;
3215 cstack.nl = 0;
3216 cstack.cname = xnew (cstack.size, char *);
3217 cstack.bracelev = xnew (cstack.size, int);
3218 }
3219
3220 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3221 curndx = newndx = 0;
3222 lp = curlb.buffer;
3223 *lp = 0;
3224
3225 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3226 structdef = snone; definedef = dnone; objdef = onone;
3227 yacc_rules = FALSE;
3228 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3229 token.valid = savetoken.valid = FALSE;
3230 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3231 if (cjava)
3232 { qualifier = "."; qlen = 1; }
3233 else
3234 { qualifier = "::"; qlen = 2; }
3235
3236
3237 while (!feof (inf))
3238 {
3239 c = *lp++;
3240 if (c == '\\')
3241 {
3242 /* If we are at the end of the line, the next character is a
3243 '\0'; do not skip it, because it is what tells us
3244 to read the next line. */
3245 if (*lp == '\0')
3246 {
3247 quotednl = TRUE;
3248 continue;
3249 }
3250 lp++;
3251 c = ' ';
3252 }
3253 else if (incomm)
3254 {
3255 switch (c)
3256 {
3257 case '*':
3258 if (*lp == '/')
3259 {
3260 c = *lp++;
3261 incomm = FALSE;
3262 }
3263 break;
3264 case '\0':
3265 /* Newlines inside comments do not end macro definitions in
3266 traditional cpp. */
3267 CNL_SAVE_DEFINEDEF ();
3268 break;
3269 }
3270 continue;
3271 }
3272 else if (inquote)
3273 {
3274 switch (c)
3275 {
3276 case '"':
3277 inquote = FALSE;
3278 break;
3279 case '\0':
3280 /* Newlines inside strings do not end macro definitions
3281 in traditional cpp, even though compilers don't
3282 usually accept them. */
3283 CNL_SAVE_DEFINEDEF ();
3284 break;
3285 }
3286 continue;
3287 }
3288 else if (inchar)
3289 {
3290 switch (c)
3291 {
3292 case '\0':
3293 /* Hmmm, something went wrong. */
3294 CNL ();
3295 /* FALLTHRU */
3296 case '\'':
3297 inchar = FALSE;
3298 break;
3299 }
3300 continue;
3301 }
3302 else if (bracketlev > 0)
3303 {
3304 switch (c)
3305 {
3306 case ']':
3307 if (--bracketlev > 0)
3308 continue;
3309 break;
3310 case '\0':
3311 CNL_SAVE_DEFINEDEF ();
3312 break;
3313 }
3314 continue;
3315 }
3316 else switch (c)
3317 {
3318 case '"':
3319 inquote = TRUE;
3320 if (inattribute)
3321 break;
3322 switch (fvdef)
3323 {
3324 case fdefunkey:
3325 case fstartlist:
3326 case finlist:
3327 case fignore:
3328 case vignore:
3329 break;
3330 default:
3331 fvextern = FALSE;
3332 fvdef = fvnone;
3333 }
3334 continue;
3335 case '\'':
3336 inchar = TRUE;
3337 if (inattribute)
3338 break;
3339 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3340 {
3341 fvextern = FALSE;
3342 fvdef = fvnone;
3343 }
3344 continue;
3345 case '/':
3346 if (*lp == '*')
3347 {
3348 lp++;
3349 incomm = TRUE;
3350 continue;
3351 }
3352 else if (/* cplpl && */ *lp == '/')
3353 {
3354 c = '\0';
3355 break;
3356 }
3357 else
3358 break;
3359 case '%':
3360 if ((c_ext & YACC) && *lp == '%')
3361 {
3362 /* Entering or exiting rules section in yacc file. */
3363 lp++;
3364 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3365 typdef = tnone; structdef = snone;
3366 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3367 bracelev = 0;
3368 yacc_rules = !yacc_rules;
3369 continue;
3370 }
3371 else
3372 break;
3373 case '#':
3374 if (definedef == dnone)
3375 {
3376 char *cp;
3377 bool cpptoken = TRUE;
3378
3379 /* Look back on this line. If all blanks, or nonblanks
3380 followed by an end of comment, this is a preprocessor
3381 token. */
3382 for (cp = newlb.buffer; cp < lp-1; cp++)
3383 if (!iswhite (*cp))
3384 {
3385 if (*cp == '*' && *(cp+1) == '/')
3386 {
3387 cp++;
3388 cpptoken = TRUE;
3389 }
3390 else
3391 cpptoken = FALSE;
3392 }
3393 if (cpptoken)
3394 definedef = dsharpseen;
3395 } /* if (definedef == dnone) */
3396 continue;
3397 case '[':
3398 bracketlev++;
3399 continue;
3400 } /* switch (c) */
3401
3402
3403 /* Consider token only if some involved conditions are satisfied. */
3404 if (typdef != tignore
3405 && definedef != dignorerest
3406 && fvdef != finlist
3407 && templatelev == 0
3408 && (definedef != dnone
3409 || structdef != scolonseen)
3410 && !inattribute)
3411 {
3412 if (midtoken)
3413 {
3414 if (endtoken (c))
3415 {
3416 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3417 /* This handles :: in the middle,
3418 but not at the beginning of an identifier.
3419 Also, space-separated :: is not recognised. */
3420 {
3421 if (c_ext & C_AUTO) /* automatic detection of C++ */
3422 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3423 lp += 2;
3424 toklen += 2;
3425 c = lp[-1];
3426 goto still_in_token;
3427 }
3428 else
3429 {
3430 bool funorvar = FALSE;
3431
3432 if (yacc_rules
3433 || consider_token (newlb.buffer + tokoff, toklen, c,
3434 &c_ext, bracelev, parlev,
3435 &funorvar))
3436 {
3437 if (fvdef == foperator)
3438 {
3439 char *oldlp = lp;
3440 lp = skip_spaces (lp-1);
3441 if (*lp != '\0')
3442 lp += 1;
3443 while (*lp != '\0'
3444 && !iswhite (*lp) && *lp != '(')
3445 lp += 1;
3446 c = *lp++;
3447 toklen += lp - oldlp;
3448 }
3449 token.named = FALSE;
3450 if (!plainc
3451 && nestlev > 0 && definedef == dnone)
3452 /* in struct body */
3453 {
3454 write_classname (&token_name, qualifier);
3455 linebuffer_setlen (&token_name,
3456 token_name.len+qlen+toklen);
3457 strcat (token_name.buffer, qualifier);
3458 strncat (token_name.buffer,
3459 newlb.buffer + tokoff, toklen);
3460 token.named = TRUE;
3461 }
3462 else if (objdef == ocatseen)
3463 /* Objective C category */
3464 {
3465 int len = strlen (objtag) + 2 + toklen;
3466 linebuffer_setlen (&token_name, len);
3467 strcpy (token_name.buffer, objtag);
3468 strcat (token_name.buffer, "(");
3469 strncat (token_name.buffer,
3470 newlb.buffer + tokoff, toklen);
3471 strcat (token_name.buffer, ")");
3472 token.named = TRUE;
3473 }
3474 else if (objdef == omethodtag
3475 || objdef == omethodparm)
3476 /* Objective C method */
3477 {
3478 token.named = TRUE;
3479 }
3480 else if (fvdef == fdefunname)
3481 /* GNU DEFUN and similar macros */
3482 {
3483 bool defun = (newlb.buffer[tokoff] == 'F');
3484 int off = tokoff;
3485 int len = toklen;
3486
3487 /* Rewrite the tag so that emacs lisp DEFUNs
3488 can be found by their elisp name */
3489 if (defun)
3490 {
3491 off += 1;
3492 len -= 1;
3493 }
3494 len = toklen;
3495 linebuffer_setlen (&token_name, len);
3496 strncpy (token_name.buffer,
3497 newlb.buffer + off, len);
3498 token_name.buffer[len] = '\0';
3499 if (defun)
3500 while (--len >= 0)
3501 if (token_name.buffer[len] == '_')
3502 token_name.buffer[len] = '-';
3503 token.named = defun;
3504 }
3505 else
3506 {
3507 linebuffer_setlen (&token_name, toklen);
3508 strncpy (token_name.buffer,
3509 newlb.buffer + tokoff, toklen);
3510 token_name.buffer[toklen] = '\0';
3511 /* Name macros and members. */
3512 token.named = (structdef == stagseen
3513 || typdef == ttypeseen
3514 || typdef == tend
3515 || (funorvar
3516 && definedef == dignorerest)
3517 || (funorvar
3518 && definedef == dnone
3519 && structdef == snone
3520 && bracelev > 0));
3521 }
3522 token.lineno = lineno;
3523 token.offset = tokoff;
3524 token.length = toklen;
3525 token.line = newlb.buffer;
3526 token.linepos = newlinepos;
3527 token.valid = TRUE;
3528
3529 if (definedef == dnone
3530 && (fvdef == fvnameseen
3531 || fvdef == foperator
3532 || structdef == stagseen
3533 || typdef == tend
3534 || typdef == ttypeseen
3535 || objdef != onone))
3536 {
3537 if (current_lb_is_new)
3538 switch_line_buffers ();
3539 }
3540 else if (definedef != dnone
3541 || fvdef == fdefunname
3542 || instruct)
3543 make_C_tag (funorvar);
3544 }
3545 else /* not yacc and consider_token failed */
3546 {
3547 if (inattribute && fvdef == fignore)
3548 {
3549 /* We have just met __attribute__ after a
3550 function parameter list: do not tag the
3551 function again. */
3552 fvdef = fvnone;
3553 }
3554 }
3555 midtoken = FALSE;
3556 }
3557 } /* if (endtoken (c)) */
3558 else if (intoken (c))
3559 still_in_token:
3560 {
3561 toklen++;
3562 continue;
3563 }
3564 } /* if (midtoken) */
3565 else if (begtoken (c))
3566 {
3567 switch (definedef)
3568 {
3569 case dnone:
3570 switch (fvdef)
3571 {
3572 case fstartlist:
3573 /* This prevents tagging fb in
3574 void (__attribute__((noreturn)) *fb) (void);
3575 Fixing this is not easy and not very important. */
3576 fvdef = finlist;
3577 continue;
3578 case flistseen:
3579 if (plainc || declarations)
3580 {
3581 make_C_tag (TRUE); /* a function */
3582 fvdef = fignore;
3583 }
3584 break;
3585 }
3586 if (structdef == stagseen && !cjava)
3587 {
3588 popclass_above (bracelev);
3589 structdef = snone;
3590 }
3591 break;
3592 case dsharpseen:
3593 savetoken = token;
3594 break;
3595 }
3596 if (!yacc_rules || lp == newlb.buffer + 1)
3597 {
3598 tokoff = lp - 1 - newlb.buffer;
3599 toklen = 1;
3600 midtoken = TRUE;
3601 }
3602 continue;
3603 } /* if (begtoken) */
3604 } /* if must look at token */
3605
3606
3607 /* Detect end of line, colon, comma, semicolon and various braces
3608 after having handled a token.*/
3609 switch (c)
3610 {
3611 case ':':
3612 if (inattribute)
3613 break;
3614 if (yacc_rules && token.offset == 0 && token.valid)
3615 {
3616 make_C_tag (FALSE); /* a yacc function */
3617 break;
3618 }
3619 if (definedef != dnone)
3620 break;
3621 switch (objdef)
3622 {
3623 case otagseen:
3624 objdef = oignore;
3625 make_C_tag (TRUE); /* an Objective C class */
3626 break;
3627 case omethodtag:
3628 case omethodparm:
3629 objdef = omethodcolon;
3630 linebuffer_setlen (&token_name, token_name.len + 1);
3631 strcat (token_name.buffer, ":");
3632 break;
3633 }
3634 if (structdef == stagseen)
3635 {
3636 structdef = scolonseen;
3637 break;
3638 }
3639 /* Should be useless, but may be work as a safety net. */
3640 if (cplpl && fvdef == flistseen)
3641 {
3642 make_C_tag (TRUE); /* a function */
3643 fvdef = fignore;
3644 break;
3645 }
3646 break;
3647 case ';':
3648 if (definedef != dnone || inattribute)
3649 break;
3650 switch (typdef)
3651 {
3652 case tend:
3653 case ttypeseen:
3654 make_C_tag (FALSE); /* a typedef */
3655 typdef = tnone;
3656 fvdef = fvnone;
3657 break;
3658 case tnone:
3659 case tinbody:
3660 case tignore:
3661 switch (fvdef)
3662 {
3663 case fignore:
3664 if (typdef == tignore || cplpl)
3665 fvdef = fvnone;
3666 break;
3667 case fvnameseen:
3668 if ((globals && bracelev == 0 && (!fvextern || declarations))
3669 || (members && instruct))
3670 make_C_tag (FALSE); /* a variable */
3671 fvextern = FALSE;
3672 fvdef = fvnone;
3673 token.valid = FALSE;
3674 break;
3675 case flistseen:
3676 if ((declarations
3677 && (cplpl || !instruct)
3678 && (typdef == tnone || (typdef != tignore && instruct)))
3679 || (members
3680 && plainc && instruct))
3681 make_C_tag (TRUE); /* a function */
3682 /* FALLTHRU */
3683 default:
3684 fvextern = FALSE;
3685 fvdef = fvnone;
3686 if (declarations
3687 && cplpl && structdef == stagseen)
3688 make_C_tag (FALSE); /* forward declaration */
3689 else
3690 token.valid = FALSE;
3691 } /* switch (fvdef) */
3692 /* FALLTHRU */
3693 default:
3694 if (!instruct)
3695 typdef = tnone;
3696 }
3697 if (structdef == stagseen)
3698 structdef = snone;
3699 break;
3700 case ',':
3701 if (definedef != dnone || inattribute)
3702 break;
3703 switch (objdef)
3704 {
3705 case omethodtag:
3706 case omethodparm:
3707 make_C_tag (TRUE); /* an Objective C method */
3708 objdef = oinbody;
3709 break;
3710 }
3711 switch (fvdef)
3712 {
3713 case fdefunkey:
3714 case foperator:
3715 case fstartlist:
3716 case finlist:
3717 case fignore:
3718 case vignore:
3719 break;
3720 case fdefunname:
3721 fvdef = fignore;
3722 break;
3723 case fvnameseen:
3724 if (parlev == 0
3725 && ((globals
3726 && bracelev == 0
3727 && templatelev == 0
3728 && (!fvextern || declarations))
3729 || (members && instruct)))
3730 make_C_tag (FALSE); /* a variable */
3731 break;
3732 case flistseen:
3733 if ((declarations && typdef == tnone && !instruct)
3734 || (members && typdef != tignore && instruct))
3735 {
3736 make_C_tag (TRUE); /* a function */
3737 fvdef = fvnameseen;
3738 }
3739 else if (!declarations)
3740 fvdef = fvnone;
3741 token.valid = FALSE;
3742 break;
3743 default:
3744 fvdef = fvnone;
3745 }
3746 if (structdef == stagseen)
3747 structdef = snone;
3748 break;
3749 case ']':
3750 if (definedef != dnone || inattribute)
3751 break;
3752 if (structdef == stagseen)
3753 structdef = snone;
3754 switch (typdef)
3755 {
3756 case ttypeseen:
3757 case tend:
3758 typdef = tignore;
3759 make_C_tag (FALSE); /* a typedef */
3760 break;
3761 case tnone:
3762 case tinbody:
3763 switch (fvdef)
3764 {
3765 case foperator:
3766 case finlist:
3767 case fignore:
3768 case vignore:
3769 break;
3770 case fvnameseen:
3771 if ((members && bracelev == 1)
3772 || (globals && bracelev == 0
3773 && (!fvextern || declarations)))
3774 make_C_tag (FALSE); /* a variable */
3775 /* FALLTHRU */
3776 default:
3777 fvdef = fvnone;
3778 }
3779 break;
3780 }
3781 break;
3782 case '(':
3783 if (inattribute)
3784 {
3785 attrparlev++;
3786 break;
3787 }
3788 if (definedef != dnone)
3789 break;
3790 if (objdef == otagseen && parlev == 0)
3791 objdef = oparenseen;
3792 switch (fvdef)
3793 {
3794 case fvnameseen:
3795 if (typdef == ttypeseen
3796 && *lp != '*'
3797 && !instruct)
3798 {
3799 /* This handles constructs like:
3800 typedef void OperatorFun (int fun); */
3801 make_C_tag (FALSE);
3802 typdef = tignore;
3803 fvdef = fignore;
3804 break;
3805 }
3806 /* FALLTHRU */
3807 case foperator:
3808 fvdef = fstartlist;
3809 break;
3810 case flistseen:
3811 fvdef = finlist;
3812 break;
3813 }
3814 parlev++;
3815 break;
3816 case ')':
3817 if (inattribute)
3818 {
3819 if (--attrparlev == 0)
3820 inattribute = FALSE;
3821 break;
3822 }
3823 if (definedef != dnone)
3824 break;
3825 if (objdef == ocatseen && parlev == 1)
3826 {
3827 make_C_tag (TRUE); /* an Objective C category */
3828 objdef = oignore;
3829 }
3830 if (--parlev == 0)
3831 {
3832 switch (fvdef)
3833 {
3834 case fstartlist:
3835 case finlist:
3836 fvdef = flistseen;
3837 break;
3838 }
3839 if (!instruct
3840 && (typdef == tend
3841 || typdef == ttypeseen))
3842 {
3843 typdef = tignore;
3844 make_C_tag (FALSE); /* a typedef */
3845 }
3846 }
3847 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3848 parlev = 0;
3849 break;
3850 case '{':
3851 if (definedef != dnone)
3852 break;
3853 if (typdef == ttypeseen)
3854 {
3855 /* Whenever typdef is set to tinbody (currently only
3856 here), typdefbracelev should be set to bracelev. */
3857 typdef = tinbody;
3858 typdefbracelev = bracelev;
3859 }
3860 switch (fvdef)
3861 {
3862 case flistseen:
3863 make_C_tag (TRUE); /* a function */
3864 /* FALLTHRU */
3865 case fignore:
3866 fvdef = fvnone;
3867 break;
3868 case fvnone:
3869 switch (objdef)
3870 {
3871 case otagseen:
3872 make_C_tag (TRUE); /* an Objective C class */
3873 objdef = oignore;
3874 break;
3875 case omethodtag:
3876 case omethodparm:
3877 make_C_tag (TRUE); /* an Objective C method */
3878 objdef = oinbody;
3879 break;
3880 default:
3881 /* Neutralize `extern "C" {' grot. */
3882 if (bracelev == 0 && structdef == snone && nestlev == 0
3883 && typdef == tnone)
3884 bracelev = -1;
3885 }
3886 break;
3887 }
3888 switch (structdef)
3889 {
3890 case skeyseen: /* unnamed struct */
3891 pushclass_above (bracelev, NULL, 0);
3892 structdef = snone;
3893 break;
3894 case stagseen: /* named struct or enum */
3895 case scolonseen: /* a class */
3896 pushclass_above (bracelev,token.line+token.offset, token.length);
3897 structdef = snone;
3898 make_C_tag (FALSE); /* a struct or enum */
3899 break;
3900 }
3901 bracelev++;
3902 break;
3903 case '*':
3904 if (definedef != dnone)
3905 break;
3906 if (fvdef == fstartlist)
3907 {
3908 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3909 token.valid = FALSE;
3910 }
3911 break;
3912 case '}':
3913 if (definedef != dnone)
3914 break;
3915 if (!ignoreindent && lp == newlb.buffer + 1)
3916 {
3917 if (bracelev != 0)
3918 token.valid = FALSE;
3919 bracelev = 0; /* reset brace level if first column */
3920 parlev = 0; /* also reset paren level, just in case... */
3921 }
3922 else if (bracelev > 0)
3923 bracelev--;
3924 else
3925 token.valid = FALSE; /* something gone amiss, token unreliable */
3926 popclass_above (bracelev);
3927 structdef = snone;
3928 /* Only if typdef == tinbody is typdefbracelev significant. */
3929 if (typdef == tinbody && bracelev <= typdefbracelev)
3930 {
3931 assert (bracelev == typdefbracelev);
3932 typdef = tend;
3933 }
3934 break;
3935 case '=':
3936 if (definedef != dnone)
3937 break;
3938 switch (fvdef)
3939 {
3940 case foperator:
3941 case finlist:
3942 case fignore:
3943 case vignore:
3944 break;
3945 case fvnameseen:
3946 if ((members && bracelev == 1)
3947 || (globals && bracelev == 0 && (!fvextern || declarations)))
3948 make_C_tag (FALSE); /* a variable */
3949 /* FALLTHRU */
3950 default:
3951 fvdef = vignore;
3952 }
3953 break;
3954 case '<':
3955 if (cplpl
3956 && (structdef == stagseen || fvdef == fvnameseen))
3957 {
3958 templatelev++;
3959 break;
3960 }
3961 goto resetfvdef;
3962 case '>':
3963 if (templatelev > 0)
3964 {
3965 templatelev--;
3966 break;
3967 }
3968 goto resetfvdef;
3969 case '+':
3970 case '-':
3971 if (objdef == oinbody && bracelev == 0)
3972 {
3973 objdef = omethodsign;
3974 break;
3975 }
3976 /* FALLTHRU */
3977 resetfvdef:
3978 case '#': case '~': case '&': case '%': case '/':
3979 case '|': case '^': case '!': case '.': case '?':
3980 if (definedef != dnone)
3981 break;
3982 /* These surely cannot follow a function tag in C. */
3983 switch (fvdef)
3984 {
3985 case foperator:
3986 case finlist:
3987 case fignore:
3988 case vignore:
3989 break;
3990 default:
3991 fvdef = fvnone;
3992 }
3993 break;
3994 case '\0':
3995 if (objdef == otagseen)
3996 {
3997 make_C_tag (TRUE); /* an Objective C class */
3998 objdef = oignore;
3999 }
4000 /* If a macro spans multiple lines don't reset its state. */
4001 if (quotednl)
4002 CNL_SAVE_DEFINEDEF ();
4003 else
4004 CNL ();
4005 break;
4006 } /* switch (c) */
4007
4008 } /* while not eof */
4009
4010 free (lbs[0].lb.buffer);
4011 free (lbs[1].lb.buffer);
4012 }
4013
4014 /*
4015 * Process either a C++ file or a C file depending on the setting
4016 * of a global flag.
4017 */
4018 static void
4019 default_C_entries (inf)
4020 FILE *inf;
4021 {
4022 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4023 }
4024
4025 /* Always do plain C. */
4026 static void
4027 plain_C_entries (inf)
4028 FILE *inf;
4029 {
4030 C_entries (0, inf);
4031 }
4032
4033 /* Always do C++. */
4034 static void
4035 Cplusplus_entries (inf)
4036 FILE *inf;
4037 {
4038 C_entries (C_PLPL, inf);
4039 }
4040
4041 /* Always do Java. */
4042 static void
4043 Cjava_entries (inf)
4044 FILE *inf;
4045 {
4046 C_entries (C_JAVA, inf);
4047 }
4048
4049 /* Always do C*. */
4050 static void
4051 Cstar_entries (inf)
4052 FILE *inf;
4053 {
4054 C_entries (C_STAR, inf);
4055 }
4056
4057 /* Always do Yacc. */
4058 static void
4059 Yacc_entries (inf)
4060 FILE *inf;
4061 {
4062 C_entries (YACC, inf);
4063 }
4064
4065 \f
4066 /* Useful macros. */
4067 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4068 for (; /* loop initialization */ \
4069 !feof (file_pointer) /* loop test */ \
4070 && /* instructions at start of loop */ \
4071 (readline (&line_buffer, file_pointer), \
4072 char_pointer = line_buffer.buffer, \
4073 TRUE); \
4074 )
4075 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */ \
4076 (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
4077 && notinname ((cp)[sizeof(keyword)-1]) /* end of keyword */ \
4078 && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
4079
4080 /*
4081 * Read a file, but do no processing. This is used to do regexp
4082 * matching on files that have no language defined.
4083 */
4084 static void
4085 just_read_file (inf)
4086 FILE *inf;
4087 {
4088 register char *dummy;
4089
4090 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4091 continue;
4092 }
4093
4094 \f
4095 /* Fortran parsing */
4096
4097 static void F_takeprec __P((void));
4098 static void F_getit __P((FILE *));
4099
4100 static void
4101 F_takeprec ()
4102 {
4103 dbp = skip_spaces (dbp);
4104 if (*dbp != '*')
4105 return;
4106 dbp++;
4107 dbp = skip_spaces (dbp);
4108 if (strneq (dbp, "(*)", 3))
4109 {
4110 dbp += 3;
4111 return;
4112 }
4113 if (!ISDIGIT (*dbp))
4114 {
4115 --dbp; /* force failure */
4116 return;
4117 }
4118 do
4119 dbp++;
4120 while (ISDIGIT (*dbp));
4121 }
4122
4123 static void
4124 F_getit (inf)
4125 FILE *inf;
4126 {
4127 register char *cp;
4128
4129 dbp = skip_spaces (dbp);
4130 if (*dbp == '\0')
4131 {
4132 readline (&lb, inf);
4133 dbp = lb.buffer;
4134 if (dbp[5] != '&')
4135 return;
4136 dbp += 6;
4137 dbp = skip_spaces (dbp);
4138 }
4139 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4140 return;
4141 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4142 continue;
4143 make_tag (dbp, cp-dbp, TRUE,
4144 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4145 }
4146
4147
4148 static void
4149 Fortran_functions (inf)
4150 FILE *inf;
4151 {
4152 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4153 {
4154 if (*dbp == '%')
4155 dbp++; /* Ratfor escape to fortran */
4156 dbp = skip_spaces (dbp);
4157 if (*dbp == '\0')
4158 continue;
4159 switch (lowcase (*dbp))
4160 {
4161 case 'i':
4162 if (nocase_tail ("integer"))
4163 F_takeprec ();
4164 break;
4165 case 'r':
4166 if (nocase_tail ("real"))
4167 F_takeprec ();
4168 break;
4169 case 'l':
4170 if (nocase_tail ("logical"))
4171 F_takeprec ();
4172 break;
4173 case 'c':
4174 if (nocase_tail ("complex") || nocase_tail ("character"))
4175 F_takeprec ();
4176 break;
4177 case 'd':
4178 if (nocase_tail ("double"))
4179 {
4180 dbp = skip_spaces (dbp);
4181 if (*dbp == '\0')
4182 continue;
4183 if (nocase_tail ("precision"))
4184 break;
4185 continue;
4186 }
4187 break;
4188 }
4189 dbp = skip_spaces (dbp);
4190 if (*dbp == '\0')
4191 continue;
4192 switch (lowcase (*dbp))
4193 {
4194 case 'f':
4195 if (nocase_tail ("function"))
4196 F_getit (inf);
4197 continue;
4198 case 's':
4199 if (nocase_tail ("subroutine"))
4200 F_getit (inf);
4201 continue;
4202 case 'e':
4203 if (nocase_tail ("entry"))
4204 F_getit (inf);
4205 continue;
4206 case 'b':
4207 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4208 {
4209 dbp = skip_spaces (dbp);
4210 if (*dbp == '\0') /* assume un-named */
4211 make_tag ("blockdata", 9, TRUE,
4212 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4213 else
4214 F_getit (inf); /* look for name */
4215 }
4216 continue;
4217 }
4218 }
4219 }
4220
4221 \f
4222 /*
4223 * Ada parsing
4224 * Original code by
4225 * Philippe Waroquiers (1998)
4226 */
4227
4228 static void Ada_getit __P((FILE *, char *));
4229
4230 /* Once we are positioned after an "interesting" keyword, let's get
4231 the real tag value necessary. */
4232 static void
4233 Ada_getit (inf, name_qualifier)
4234 FILE *inf;
4235 char *name_qualifier;
4236 {
4237 register char *cp;
4238 char *name;
4239 char c;
4240
4241 while (!feof (inf))
4242 {
4243 dbp = skip_spaces (dbp);
4244 if (*dbp == '\0'
4245 || (dbp[0] == '-' && dbp[1] == '-'))
4246 {
4247 readline (&lb, inf);
4248 dbp = lb.buffer;
4249 }
4250 switch (lowcase(*dbp))
4251 {
4252 case 'b':
4253 if (nocase_tail ("body"))
4254 {
4255 /* Skipping body of procedure body or package body or ....
4256 resetting qualifier to body instead of spec. */
4257 name_qualifier = "/b";
4258 continue;
4259 }
4260 break;
4261 case 't':
4262 /* Skipping type of task type or protected type ... */
4263 if (nocase_tail ("type"))
4264 continue;
4265 break;
4266 }
4267 if (*dbp == '"')
4268 {
4269 dbp += 1;
4270 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4271 continue;
4272 }
4273 else
4274 {
4275 dbp = skip_spaces (dbp);
4276 for (cp = dbp;
4277 (*cp != '\0'
4278 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4279 cp++)
4280 continue;
4281 if (cp == dbp)
4282 return;
4283 }
4284 c = *cp;
4285 *cp = '\0';
4286 name = concat (dbp, name_qualifier, "");
4287 *cp = c;
4288 make_tag (name, strlen (name), TRUE,
4289 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4290 free (name);
4291 if (c == '"')
4292 dbp = cp + 1;
4293 return;
4294 }
4295 }
4296
4297 static void
4298 Ada_funcs (inf)
4299 FILE *inf;
4300 {
4301 bool inquote = FALSE;
4302 bool skip_till_semicolumn = FALSE;
4303
4304 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4305 {
4306 while (*dbp != '\0')
4307 {
4308 /* Skip a string i.e. "abcd". */
4309 if (inquote || (*dbp == '"'))
4310 {
4311 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4312 if (dbp != NULL)
4313 {
4314 inquote = FALSE;
4315 dbp += 1;
4316 continue; /* advance char */
4317 }
4318 else
4319 {
4320 inquote = TRUE;
4321 break; /* advance line */
4322 }
4323 }
4324
4325 /* Skip comments. */
4326 if (dbp[0] == '-' && dbp[1] == '-')
4327 break; /* advance line */
4328
4329 /* Skip character enclosed in single quote i.e. 'a'
4330 and skip single quote starting an attribute i.e. 'Image. */
4331 if (*dbp == '\'')
4332 {
4333 dbp++ ;
4334 if (*dbp != '\0')
4335 dbp++;
4336 continue;
4337 }
4338
4339 if (skip_till_semicolumn)
4340 {
4341 if (*dbp == ';')
4342 skip_till_semicolumn = FALSE;
4343 dbp++;
4344 continue; /* advance char */
4345 }
4346
4347 /* Search for beginning of a token. */
4348 if (!begtoken (*dbp))
4349 {
4350 dbp++;
4351 continue; /* advance char */
4352 }
4353
4354 /* We are at the beginning of a token. */
4355 switch (lowcase(*dbp))
4356 {
4357 case 'f':
4358 if (!packages_only && nocase_tail ("function"))
4359 Ada_getit (inf, "/f");
4360 else
4361 break; /* from switch */
4362 continue; /* advance char */
4363 case 'p':
4364 if (!packages_only && nocase_tail ("procedure"))
4365 Ada_getit (inf, "/p");
4366 else if (nocase_tail ("package"))
4367 Ada_getit (inf, "/s");
4368 else if (nocase_tail ("protected")) /* protected type */
4369 Ada_getit (inf, "/t");
4370 else
4371 break; /* from switch */
4372 continue; /* advance char */
4373
4374 case 'u':
4375 if (typedefs && !packages_only && nocase_tail ("use"))
4376 {
4377 /* when tagging types, avoid tagging use type Pack.Typename;
4378 for this, we will skip everything till a ; */
4379 skip_till_semicolumn = TRUE;
4380 continue; /* advance char */
4381 }
4382
4383 case 't':
4384 if (!packages_only && nocase_tail ("task"))
4385 Ada_getit (inf, "/k");
4386 else if (typedefs && !packages_only && nocase_tail ("type"))
4387 {
4388 Ada_getit (inf, "/t");
4389 while (*dbp != '\0')
4390 dbp += 1;
4391 }
4392 else
4393 break; /* from switch */
4394 continue; /* advance char */
4395 }
4396
4397 /* Look for the end of the token. */
4398 while (!endtoken (*dbp))
4399 dbp++;
4400
4401 } /* advance char */
4402 } /* advance line */
4403 }
4404
4405 \f
4406 /*
4407 * Unix and microcontroller assembly tag handling
4408 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4409 * Idea by Bob Weiner, Motorola Inc. (1994)
4410 */
4411 static void
4412 Asm_labels (inf)
4413 FILE *inf;
4414 {
4415 register char *cp;
4416
4417 LOOP_ON_INPUT_LINES (inf, lb, cp)
4418 {
4419 /* If first char is alphabetic or one of [_.$], test for colon
4420 following identifier. */
4421 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4422 {
4423 /* Read past label. */
4424 cp++;
4425 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4426 cp++;
4427 if (*cp == ':' || iswhite (*cp))
4428 /* Found end of label, so copy it and add it to the table. */
4429 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4430 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4431 }
4432 }
4433 }
4434
4435 \f
4436 /*
4437 * Perl support
4438 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4439 * Perl variable names: /^(my|local).../
4440 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4441 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4442 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4443 */
4444 static void
4445 Perl_functions (inf)
4446 FILE *inf;
4447 {
4448 char *package = savestr ("main"); /* current package name */
4449 register char *cp;
4450
4451 LOOP_ON_INPUT_LINES (inf, lb, cp)
4452 {
4453 skip_spaces(cp);
4454
4455 if (LOOKING_AT (cp, "package"))
4456 {
4457 free (package);
4458 get_tag (cp, &package);
4459 }
4460 else if (LOOKING_AT (cp, "sub"))
4461 {
4462 char *pos;
4463 char *sp = cp;
4464
4465 while (!notinname (*cp))
4466 cp++;
4467 if (cp == sp)
4468 continue; /* nothing found */
4469 if ((pos = etags_strchr (sp, ':')) != NULL
4470 && pos < cp && pos[1] == ':')
4471 /* The name is already qualified. */
4472 make_tag (sp, cp - sp, TRUE,
4473 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4474 else
4475 /* Qualify it. */
4476 {
4477 char savechar, *name;
4478
4479 savechar = *cp;
4480 *cp = '\0';
4481 name = concat (package, "::", sp);
4482 *cp = savechar;
4483 make_tag (name, strlen(name), TRUE,
4484 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4485 free (name);
4486 }
4487 }
4488 else if (globals) /* only if we are tagging global vars */
4489 {
4490 /* Skip a qualifier, if any. */
4491 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4492 /* After "my" or "local", but before any following paren or space. */
4493 char *varstart = cp;
4494
4495 if (qual /* should this be removed? If yes, how? */
4496 && (*cp == '$' || *cp == '@' || *cp == '%'))
4497 {
4498 varstart += 1;
4499 do
4500 cp++;
4501 while (ISALNUM (*cp) || *cp == '_');
4502 }
4503 else if (qual)
4504 {
4505 /* Should be examining a variable list at this point;
4506 could insist on seeing an open parenthesis. */
4507 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4508 cp++;
4509 }
4510 else
4511 continue;
4512
4513 make_tag (varstart, cp - varstart, FALSE,
4514 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4515 }
4516 }
4517 }
4518
4519
4520 /*
4521 * Python support
4522 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4523 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4524 * More ideas by seb bacon <seb@jamkit.com> (2002)
4525 */
4526 static void
4527 Python_functions (inf)
4528 FILE *inf;
4529 {
4530 register char *cp;
4531
4532 LOOP_ON_INPUT_LINES (inf, lb, cp)
4533 {
4534 cp = skip_spaces (cp);
4535 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4536 {
4537 char *name = cp;
4538 while (!notinname (*cp) && *cp != ':')
4539 cp++;
4540 make_tag (name, cp - name, TRUE,
4541 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4542 }
4543 }
4544 }
4545
4546 \f
4547 /*
4548 * PHP support
4549 * Look for:
4550 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4551 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4552 * - /^[ \t]*define\(\"[^\"]+/
4553 * Only with --members:
4554 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4555 * Idea by Diez B. Roggisch (2001)
4556 */
4557 static void
4558 PHP_functions (inf)
4559 FILE *inf;
4560 {
4561 register char *cp, *name;
4562 bool search_identifier = FALSE;
4563
4564 LOOP_ON_INPUT_LINES (inf, lb, cp)
4565 {
4566 cp = skip_spaces (cp);
4567 name = cp;
4568 if (search_identifier
4569 && *cp != '\0')
4570 {
4571 while (!notinname (*cp))
4572 cp++;
4573 make_tag (name, cp - name, TRUE,
4574 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4575 search_identifier = FALSE;
4576 }
4577 else if (LOOKING_AT (cp, "function"))
4578 {
4579 if(*cp == '&')
4580 cp = skip_spaces (cp+1);
4581 if(*cp != '\0')
4582 {
4583 name = cp;
4584 while (!notinname (*cp))
4585 cp++;
4586 make_tag (name, cp - name, TRUE,
4587 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4588 }
4589 else
4590 search_identifier = TRUE;
4591 }
4592 else if (LOOKING_AT (cp, "class"))
4593 {
4594 if (*cp != '\0')
4595 {
4596 name = cp;
4597 while (*cp != '\0' && !iswhite (*cp))
4598 cp++;
4599 make_tag (name, cp - name, FALSE,
4600 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4601 }
4602 else
4603 search_identifier = TRUE;
4604 }
4605 else if (strneq (cp, "define", 6)
4606 && (cp = skip_spaces (cp+6))
4607 && *cp++ == '('
4608 && (*cp == '"' || *cp == '\''))
4609 {
4610 char quote = *cp++;
4611 name = cp;
4612 while (*cp != quote && *cp != '\0')
4613 cp++;
4614 make_tag (name, cp - name, FALSE,
4615 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4616 }
4617 else if (members
4618 && LOOKING_AT (cp, "var")
4619 && *cp == '$')
4620 {
4621 name = cp;
4622 while (!notinname(*cp))
4623 cp++;
4624 make_tag (name, cp - name, FALSE,
4625 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4626 }
4627 }
4628 }
4629
4630 \f
4631 /*
4632 * Cobol tag functions
4633 * We could look for anything that could be a paragraph name.
4634 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4635 * Idea by Corny de Souza (1993)
4636 */
4637 static void
4638 Cobol_paragraphs (inf)
4639 FILE *inf;
4640 {
4641 register char *bp, *ep;
4642
4643 LOOP_ON_INPUT_LINES (inf, lb, bp)
4644 {
4645 if (lb.len < 9)
4646 continue;
4647 bp += 8;
4648
4649 /* If eoln, compiler option or comment ignore whole line. */
4650 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4651 continue;
4652
4653 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4654 continue;
4655 if (*ep++ == '.')
4656 make_tag (bp, ep - bp, TRUE,
4657 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4658 }
4659 }
4660
4661 \f
4662 /*
4663 * Makefile support
4664 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4665 */
4666 static void
4667 Makefile_targets (inf)
4668 FILE *inf;
4669 {
4670 register char *bp;
4671
4672 LOOP_ON_INPUT_LINES (inf, lb, bp)
4673 {
4674 if (*bp == '\t' || *bp == '#')
4675 continue;
4676 while (*bp != '\0' && *bp != '=' && *bp != ':')
4677 bp++;
4678 if (*bp == ':' || (globals && *bp == '='))
4679 make_tag (lb.buffer, bp - lb.buffer, TRUE,
4680 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4681 }
4682 }
4683
4684 \f
4685 /*
4686 * Pascal parsing
4687 * Original code by Mosur K. Mohan (1989)
4688 *
4689 * Locates tags for procedures & functions. Doesn't do any type- or
4690 * var-definitions. It does look for the keyword "extern" or
4691 * "forward" immediately following the procedure statement; if found,
4692 * the tag is skipped.
4693 */
4694 static void
4695 Pascal_functions (inf)
4696 FILE *inf;
4697 {
4698 linebuffer tline; /* mostly copied from C_entries */
4699 long save_lcno;
4700 int save_lineno, namelen, taglen;
4701 char c, *name;
4702
4703 bool /* each of these flags is TRUE iff: */
4704 incomment, /* point is inside a comment */
4705 inquote, /* point is inside '..' string */
4706 get_tagname, /* point is after PROCEDURE/FUNCTION
4707 keyword, so next item = potential tag */
4708 found_tag, /* point is after a potential tag */
4709 inparms, /* point is within parameter-list */
4710 verify_tag; /* point has passed the parm-list, so the
4711 next token will determine whether this
4712 is a FORWARD/EXTERN to be ignored, or
4713 whether it is a real tag */
4714
4715 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4716 name = NULL; /* keep compiler quiet */
4717 dbp = lb.buffer;
4718 *dbp = '\0';
4719 linebuffer_init (&tline);
4720
4721 incomment = inquote = FALSE;
4722 found_tag = FALSE; /* have a proc name; check if extern */
4723 get_tagname = FALSE; /* found "procedure" keyword */
4724 inparms = FALSE; /* found '(' after "proc" */
4725 verify_tag = FALSE; /* check if "extern" is ahead */
4726
4727
4728 while (!feof (inf)) /* long main loop to get next char */
4729 {
4730 c = *dbp++;
4731 if (c == '\0') /* if end of line */
4732 {
4733 readline (&lb, inf);
4734 dbp = lb.buffer;
4735 if (*dbp == '\0')
4736 continue;
4737 if (!((found_tag && verify_tag)
4738 || get_tagname))
4739 c = *dbp++; /* only if don't need *dbp pointing
4740 to the beginning of the name of
4741 the procedure or function */
4742 }
4743 if (incomment)
4744 {
4745 if (c == '}') /* within { } comments */
4746 incomment = FALSE;
4747 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4748 {
4749 dbp++;
4750 incomment = FALSE;
4751 }
4752 continue;
4753 }
4754 else if (inquote)
4755 {
4756 if (c == '\'')
4757 inquote = FALSE;
4758 continue;
4759 }
4760 else
4761 switch (c)
4762 {
4763 case '\'':
4764 inquote = TRUE; /* found first quote */
4765 continue;
4766 case '{': /* found open { comment */
4767 incomment = TRUE;
4768 continue;
4769 case '(':
4770 if (*dbp == '*') /* found open (* comment */
4771 {
4772 incomment = TRUE;
4773 dbp++;
4774 }
4775 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4776 inparms = TRUE;
4777 continue;
4778 case ')': /* end of parms list */
4779 if (inparms)
4780 inparms = FALSE;
4781 continue;
4782 case ';':
4783 if (found_tag && !inparms) /* end of proc or fn stmt */
4784 {
4785 verify_tag = TRUE;
4786 break;
4787 }
4788 continue;
4789 }
4790 if (found_tag && verify_tag && (*dbp != ' '))
4791 {
4792 /* Check if this is an "extern" declaration. */
4793 if (*dbp == '\0')
4794 continue;
4795 if (lowcase (*dbp == 'e'))
4796 {
4797 if (nocase_tail ("extern")) /* superfluous, really! */
4798 {
4799 found_tag = FALSE;
4800 verify_tag = FALSE;
4801 }
4802 }
4803 else if (lowcase (*dbp) == 'f')
4804 {
4805 if (nocase_tail ("forward")) /* check for forward reference */
4806 {
4807 found_tag = FALSE;
4808 verify_tag = FALSE;
4809 }
4810 }
4811 if (found_tag && verify_tag) /* not external proc, so make tag */
4812 {
4813 found_tag = FALSE;
4814 verify_tag = FALSE;
4815 make_tag (name, namelen, TRUE,
4816 tline.buffer, taglen, save_lineno, save_lcno);
4817 continue;
4818 }
4819 }
4820 if (get_tagname) /* grab name of proc or fn */
4821 {
4822 char *cp;
4823
4824 if (*dbp == '\0')
4825 continue;
4826
4827 /* Find block name. */
4828 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4829 continue;
4830
4831 /* Save all values for later tagging. */
4832 linebuffer_setlen (&tline, lb.len);
4833 strcpy (tline.buffer, lb.buffer);
4834 save_lineno = lineno;
4835 save_lcno = linecharno;
4836 name = tline.buffer + (dbp - lb.buffer);
4837 namelen = cp - dbp;
4838 taglen = cp - lb.buffer + 1;
4839
4840 dbp = cp; /* set dbp to e-o-token */
4841 get_tagname = FALSE;
4842 found_tag = TRUE;
4843 continue;
4844
4845 /* And proceed to check for "extern". */
4846 }
4847 else if (!incomment && !inquote && !found_tag)
4848 {
4849 /* Check for proc/fn keywords. */
4850 switch (lowcase (c))
4851 {
4852 case 'p':
4853 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4854 get_tagname = TRUE;
4855 continue;
4856 case 'f':
4857 if (nocase_tail ("unction"))
4858 get_tagname = TRUE;
4859 continue;
4860 }
4861 }
4862 } /* while not eof */
4863
4864 free (tline.buffer);
4865 }
4866
4867 \f
4868 /*
4869 * Lisp tag functions
4870 * look for (def or (DEF, quote or QUOTE
4871 */
4872
4873 static void L_getit __P((void));
4874
4875 static void
4876 L_getit ()
4877 {
4878 if (*dbp == '\'') /* Skip prefix quote */
4879 dbp++;
4880 else if (*dbp == '(')
4881 {
4882 dbp++;
4883 /* Try to skip "(quote " */
4884 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4885 /* Ok, then skip "(" before name in (defstruct (foo)) */
4886 dbp = skip_spaces (dbp);
4887 }
4888 get_tag (dbp, NULL);
4889 }
4890
4891 static void
4892 Lisp_functions (inf)
4893 FILE *inf;
4894 {
4895 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4896 {
4897 if (dbp[0] != '(')
4898 continue;
4899
4900 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4901 {
4902 dbp = skip_non_spaces (dbp);
4903 dbp = skip_spaces (dbp);
4904 L_getit ();
4905 }
4906 else
4907 {
4908 /* Check for (foo::defmumble name-defined ... */
4909 do
4910 dbp++;
4911 while (!notinname (*dbp) && *dbp != ':');
4912 if (*dbp == ':')
4913 {
4914 do
4915 dbp++;
4916 while (*dbp == ':');
4917
4918 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4919 {
4920 dbp = skip_non_spaces (dbp);
4921 dbp = skip_spaces (dbp);
4922 L_getit ();
4923 }
4924 }
4925 }
4926 }
4927 }
4928
4929 \f
4930 /*
4931 * Lua script language parsing
4932 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4933 *
4934 * "function" and "local function" are tags if they start at column 1.
4935 */
4936 static void
4937 Lua_functions (inf)
4938 FILE *inf;
4939 {
4940 register char *bp;
4941
4942 LOOP_ON_INPUT_LINES (inf, lb, bp)
4943 {
4944 if (bp[0] != 'f' && bp[0] != 'l')
4945 continue;
4946
4947 LOOKING_AT (bp, "local"); /* skip possible "local" */
4948
4949 if (LOOKING_AT (bp, "function"))
4950 get_tag (bp, NULL);
4951 }
4952 }
4953
4954 \f
4955 /*
4956 * Postscript tag functions
4957 * Just look for lines where the first character is '/'
4958 * Also look at "defineps" for PSWrap
4959 * Ideas by:
4960 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4961 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4962 */
4963 static void
4964 PS_functions (inf)
4965 FILE *inf;
4966 {
4967 register char *bp, *ep;
4968
4969 LOOP_ON_INPUT_LINES (inf, lb, bp)
4970 {
4971 if (bp[0] == '/')
4972 {
4973 for (ep = bp+1;
4974 *ep != '\0' && *ep != ' ' && *ep != '{';
4975 ep++)
4976 continue;
4977 make_tag (bp, ep - bp, TRUE,
4978 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4979 }
4980 else if (LOOKING_AT (bp, "defineps"))
4981 get_tag (bp, NULL);
4982 }
4983 }
4984
4985 \f
4986 /*
4987 * Scheme tag functions
4988 * look for (def... xyzzy
4989 * (def... (xyzzy
4990 * (def ... ((...(xyzzy ....
4991 * (set! xyzzy
4992 * Original code by Ken Haase (1985?)
4993 */
4994
4995 static void
4996 Scheme_functions (inf)
4997 FILE *inf;
4998 {
4999 register char *bp;
5000
5001 LOOP_ON_INPUT_LINES (inf, lb, bp)
5002 {
5003 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5004 {
5005 bp = skip_non_spaces (bp+4);
5006 /* Skip over open parens and white space */
5007 while (notinname (*bp))
5008 bp++;
5009 get_tag (bp, NULL);
5010 }
5011 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5012 get_tag (bp, NULL);
5013 }
5014 }
5015
5016 \f
5017 /* Find tags in TeX and LaTeX input files. */
5018
5019 /* TEX_toktab is a table of TeX control sequences that define tags.
5020 * Each entry records one such control sequence.
5021 *
5022 * Original code from who knows whom.
5023 * Ideas by:
5024 * Stefan Monnier (2002)
5025 */
5026
5027 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5028
5029 /* Default set of control sequences to put into TEX_toktab.
5030 The value of environment var TEXTAGS is prepended to this. */
5031 static char *TEX_defenv = "\
5032 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5033 :part:appendix:entry:index:def\
5034 :newcommand:renewcommand:newenvironment:renewenvironment";
5035
5036 static void TEX_mode __P((FILE *));
5037 static void TEX_decode_env __P((char *, char *));
5038
5039 static char TEX_esc = '\\';
5040 static char TEX_opgrp = '{';
5041 static char TEX_clgrp = '}';
5042
5043 /*
5044 * TeX/LaTeX scanning loop.
5045 */
5046 static void
5047 TeX_commands (inf)
5048 FILE *inf;
5049 {
5050 char *cp;
5051 linebuffer *key;
5052
5053 /* Select either \ or ! as escape character. */
5054 TEX_mode (inf);
5055
5056 /* Initialize token table once from environment. */
5057 if (TEX_toktab == NULL)
5058 TEX_decode_env ("TEXTAGS", TEX_defenv);
5059
5060 LOOP_ON_INPUT_LINES (inf, lb, cp)
5061 {
5062 /* Look at each TEX keyword in line. */
5063 for (;;)
5064 {
5065 /* Look for a TEX escape. */
5066 while (*cp++ != TEX_esc)
5067 if (cp[-1] == '\0' || cp[-1] == '%')
5068 goto tex_next_line;
5069
5070 for (key = TEX_toktab; key->buffer != NULL; key++)
5071 if (strneq (cp, key->buffer, key->len))
5072 {
5073 register char *p;
5074 int namelen, linelen;
5075 bool opgrp = FALSE;
5076
5077 cp = skip_spaces (cp + key->len);
5078 if (*cp == TEX_opgrp)
5079 {
5080 opgrp = TRUE;
5081 cp++;
5082 }
5083 for (p = cp;
5084 (!iswhite (*p) && *p != '#' &&
5085 *p != TEX_opgrp && *p != TEX_clgrp);
5086 p++)
5087 continue;
5088 namelen = p - cp;
5089 linelen = lb.len;
5090 if (!opgrp || *p == TEX_clgrp)
5091 {
5092 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5093 *p++;
5094 linelen = p - lb.buffer + 1;
5095 }
5096 make_tag (cp, namelen, TRUE,
5097 lb.buffer, linelen, lineno, linecharno);
5098 goto tex_next_line; /* We only tag a line once */
5099 }
5100 }
5101 tex_next_line:
5102 ;
5103 }
5104 }
5105
5106 #define TEX_LESC '\\'
5107 #define TEX_SESC '!'
5108
5109 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5110 chars accordingly. */
5111 static void
5112 TEX_mode (inf)
5113 FILE *inf;
5114 {
5115 int c;
5116
5117 while ((c = getc (inf)) != EOF)
5118 {
5119 /* Skip to next line if we hit the TeX comment char. */
5120 if (c == '%')
5121 while (c != '\n')
5122 c = getc (inf);
5123 else if (c == TEX_LESC || c == TEX_SESC )
5124 break;
5125 }
5126
5127 if (c == TEX_LESC)
5128 {
5129 TEX_esc = TEX_LESC;
5130 TEX_opgrp = '{';
5131 TEX_clgrp = '}';
5132 }
5133 else
5134 {
5135 TEX_esc = TEX_SESC;
5136 TEX_opgrp = '<';
5137 TEX_clgrp = '>';
5138 }
5139 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5140 No attempt is made to correct the situation. */
5141 rewind (inf);
5142 }
5143
5144 /* Read environment and prepend it to the default string.
5145 Build token table. */
5146 static void
5147 TEX_decode_env (evarname, defenv)
5148 char *evarname;
5149 char *defenv;
5150 {
5151 register char *env, *p;
5152 int i, len;
5153
5154 /* Append default string to environment. */
5155 env = getenv (evarname);
5156 if (!env)
5157 env = defenv;
5158 else
5159 {
5160 char *oldenv = env;
5161 env = concat (oldenv, defenv, "");
5162 }
5163
5164 /* Allocate a token table */
5165 for (len = 1, p = env; p;)
5166 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5167 len++;
5168 TEX_toktab = xnew (len, linebuffer);
5169
5170 /* Unpack environment string into token table. Be careful about */
5171 /* zero-length strings (leading ':', "::" and trailing ':') */
5172 for (i = 0; *env != '\0';)
5173 {
5174 p = etags_strchr (env, ':');
5175 if (!p) /* End of environment string. */
5176 p = env + strlen (env);
5177 if (p - env > 0)
5178 { /* Only non-zero strings. */
5179 TEX_toktab[i].buffer = savenstr (env, p - env);
5180 TEX_toktab[i].len = p - env;
5181 i++;
5182 }
5183 if (*p)
5184 env = p + 1;
5185 else
5186 {
5187 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5188 TEX_toktab[i].len = 0;
5189 break;
5190 }
5191 }
5192 }
5193
5194 \f
5195 /* Texinfo support. Dave Love, Mar. 2000. */
5196 static void
5197 Texinfo_nodes (inf)
5198 FILE * inf;
5199 {
5200 char *cp, *start;
5201 LOOP_ON_INPUT_LINES (inf, lb, cp)
5202 if (LOOKING_AT (cp, "@node"))
5203 {
5204 start = cp;
5205 while (*cp != '\0' && *cp != ',')
5206 cp++;
5207 make_tag (start, cp - start, TRUE,
5208 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5209 }
5210 }
5211
5212 \f
5213 /* Similar to LOOKING_AT but does not use notinname, does not skip */
5214 #define LOOKING_AT_NOCASE(cp, kw) /* kw is a constant string */ \
5215 (strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
5216 && ((cp) += sizeof(kw)-1)) /* skip spaces */
5217
5218 /*
5219 * HTML support.
5220 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5221 * Contents of <a name=xxx> are tags with name xxx.
5222 *
5223 * Francesco Potortì, 2002.
5224 */
5225 static void
5226 HTML_labels (inf)
5227 FILE * inf;
5228 {
5229 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5230 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5231 bool intag = FALSE; /* inside an html tag, looking for ID= */
5232 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5233 char *end;
5234
5235
5236 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5237
5238 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5239 for (;;) /* loop on the same line */
5240 {
5241 if (skiptag) /* skip HTML tag */
5242 {
5243 while (*dbp != '\0' && *dbp != '>')
5244 dbp++;
5245 if (*dbp == '>')
5246 {
5247 dbp += 1;
5248 skiptag = FALSE;
5249 continue; /* look on the same line */
5250 }
5251 break; /* go to next line */
5252 }
5253
5254 else if (intag) /* look for "name=" or "id=" */
5255 {
5256 while (*dbp != '\0' && *dbp != '>'
5257 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5258 dbp++;
5259 if (*dbp == '\0')
5260 break; /* go to next line */
5261 if (*dbp == '>')
5262 {
5263 dbp += 1;
5264 intag = FALSE;
5265 continue; /* look on the same line */
5266 }
5267 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5268 || LOOKING_AT_NOCASE (dbp, "id="))
5269 {
5270 bool quoted = (dbp[0] == '"');
5271
5272 if (quoted)
5273 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5274 continue;
5275 else
5276 for (end = dbp; *end != '\0' && intoken (*end); end++)
5277 continue;
5278 linebuffer_setlen (&token_name, end - dbp);
5279 strncpy (token_name.buffer, dbp, end - dbp);
5280 token_name.buffer[end - dbp] = '\0';
5281
5282 dbp = end;
5283 intag = FALSE; /* we found what we looked for */
5284 skiptag = TRUE; /* skip to the end of the tag */
5285 getnext = TRUE; /* then grab the text */
5286 continue; /* look on the same line */
5287 }
5288 dbp += 1;
5289 }
5290
5291 else if (getnext) /* grab next tokens and tag them */
5292 {
5293 dbp = skip_spaces (dbp);
5294 if (*dbp == '\0')
5295 break; /* go to next line */
5296 if (*dbp == '<')
5297 {
5298 intag = TRUE;
5299 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5300 continue; /* look on the same line */
5301 }
5302
5303 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5304 continue;
5305 make_tag (token_name.buffer, token_name.len, TRUE,
5306 dbp, end - dbp, lineno, linecharno);
5307 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5308 getnext = FALSE;
5309 break; /* go to next line */
5310 }
5311
5312 else /* look for an interesting HTML tag */
5313 {
5314 while (*dbp != '\0' && *dbp != '<')
5315 dbp++;
5316 if (*dbp == '\0')
5317 break; /* go to next line */
5318 intag = TRUE;
5319 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5320 {
5321 inanchor = TRUE;
5322 continue; /* look on the same line */
5323 }
5324 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5325 || LOOKING_AT_NOCASE (dbp, "<h1>")
5326 || LOOKING_AT_NOCASE (dbp, "<h2>")
5327 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5328 {
5329 intag = FALSE;
5330 getnext = TRUE;
5331 continue; /* look on the same line */
5332 }
5333 dbp += 1;
5334 }
5335 }
5336 }
5337
5338 \f
5339 /*
5340 * Prolog support
5341 *
5342 * Assumes that the predicate or rule starts at column 0.
5343 * Only the first clause of a predicate or rule is added.
5344 * Original code by Sunichirou Sugou (1989)
5345 * Rewritten by Anders Lindgren (1996)
5346 */
5347 static int prolog_pr __P((char *, char *));
5348 static void prolog_skip_comment __P((linebuffer *, FILE *));
5349 static int prolog_atom __P((char *, int));
5350
5351 static void
5352 Prolog_functions (inf)
5353 FILE *inf;
5354 {
5355 char *cp, *last;
5356 int len;
5357 int allocated;
5358
5359 allocated = 0;
5360 len = 0;
5361 last = NULL;
5362
5363 LOOP_ON_INPUT_LINES (inf, lb, cp)
5364 {
5365 if (cp[0] == '\0') /* Empty line */
5366 continue;
5367 else if (iswhite (cp[0])) /* Not a predicate */
5368 continue;
5369 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5370 prolog_skip_comment (&lb, inf);
5371 else if ((len = prolog_pr (cp, last)) > 0)
5372 {
5373 /* Predicate or rule. Store the function name so that we
5374 only generate a tag for the first clause. */
5375 if (last == NULL)
5376 last = xnew(len + 1, char);
5377 else if (len + 1 > allocated)
5378 xrnew (last, len + 1, char);
5379 allocated = len + 1;
5380 strncpy (last, cp, len);
5381 last[len] = '\0';
5382 }
5383 }
5384 }
5385
5386
5387 static void
5388 prolog_skip_comment (plb, inf)
5389 linebuffer *plb;
5390 FILE *inf;
5391 {
5392 char *cp;
5393
5394 do
5395 {
5396 for (cp = plb->buffer; *cp != '\0'; cp++)
5397 if (cp[0] == '*' && cp[1] == '/')
5398 return;
5399 readline (plb, inf);
5400 }
5401 while (!feof(inf));
5402 }
5403
5404 /*
5405 * A predicate or rule definition is added if it matches:
5406 * <beginning of line><Prolog Atom><whitespace>(
5407 * or <beginning of line><Prolog Atom><whitespace>:-
5408 *
5409 * It is added to the tags database if it doesn't match the
5410 * name of the previous clause header.
5411 *
5412 * Return the size of the name of the predicate or rule, or 0 if no
5413 * header was found.
5414 */
5415 static int
5416 prolog_pr (s, last)
5417 char *s;
5418 char *last; /* Name of last clause. */
5419 {
5420 int pos;
5421 int len;
5422
5423 pos = prolog_atom (s, 0);
5424 if (pos < 1)
5425 return 0;
5426
5427 len = pos;
5428 pos = skip_spaces (s + pos) - s;
5429
5430 if ((s[pos] == '.'
5431 || (s[pos] == '(' && (pos += 1))
5432 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5433 && (last == NULL /* save only the first clause */
5434 || len != strlen (last)
5435 || !strneq (s, last, len)))
5436 {
5437 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5438 return len;
5439 }
5440 else
5441 return 0;
5442 }
5443
5444 /*
5445 * Consume a Prolog atom.
5446 * Return the number of bytes consumed, or -1 if there was an error.
5447 *
5448 * A prolog atom, in this context, could be one of:
5449 * - An alphanumeric sequence, starting with a lower case letter.
5450 * - A quoted arbitrary string. Single quotes can escape themselves.
5451 * Backslash quotes everything.
5452 */
5453 static int
5454 prolog_atom (s, pos)
5455 char *s;
5456 int pos;
5457 {
5458 int origpos;
5459
5460 origpos = pos;
5461
5462 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5463 {
5464 /* The atom is unquoted. */
5465 pos++;
5466 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5467 {
5468 pos++;
5469 }
5470 return pos - origpos;
5471 }
5472 else if (s[pos] == '\'')
5473 {
5474 pos++;
5475
5476 for (;;)
5477 {
5478 if (s[pos] == '\'')
5479 {
5480 pos++;
5481 if (s[pos] != '\'')
5482 break;
5483 pos++; /* A double quote */
5484 }
5485 else if (s[pos] == '\0')
5486 /* Multiline quoted atoms are ignored. */
5487 return -1;
5488 else if (s[pos] == '\\')
5489 {
5490 if (s[pos+1] == '\0')
5491 return -1;
5492 pos += 2;
5493 }
5494 else
5495 pos++;
5496 }
5497 return pos - origpos;
5498 }
5499 else
5500 return -1;
5501 }
5502
5503 \f
5504 /*
5505 * Support for Erlang
5506 *
5507 * Generates tags for functions, defines, and records.
5508 * Assumes that Erlang functions start at column 0.
5509 * Original code by Anders Lindgren (1996)
5510 */
5511 static int erlang_func __P((char *, char *));
5512 static void erlang_attribute __P((char *));
5513 static int erlang_atom __P((char *));
5514
5515 static void
5516 Erlang_functions (inf)
5517 FILE *inf;
5518 {
5519 char *cp, *last;
5520 int len;
5521 int allocated;
5522
5523 allocated = 0;
5524 len = 0;
5525 last = NULL;
5526
5527 LOOP_ON_INPUT_LINES (inf, lb, cp)
5528 {
5529 if (cp[0] == '\0') /* Empty line */
5530 continue;
5531 else if (iswhite (cp[0])) /* Not function nor attribute */
5532 continue;
5533 else if (cp[0] == '%') /* comment */
5534 continue;
5535 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5536 continue;
5537 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5538 {
5539 erlang_attribute (cp);
5540 last = NULL;
5541 }
5542 else if ((len = erlang_func (cp, last)) > 0)
5543 {
5544 /*
5545 * Function. Store the function name so that we only
5546 * generates a tag for the first clause.
5547 */
5548 if (last == NULL)
5549 last = xnew (len + 1, char);
5550 else if (len + 1 > allocated)
5551 xrnew (last, len + 1, char);
5552 allocated = len + 1;
5553 strncpy (last, cp, len);
5554 last[len] = '\0';
5555 }
5556 }
5557 }
5558
5559
5560 /*
5561 * A function definition is added if it matches:
5562 * <beginning of line><Erlang Atom><whitespace>(
5563 *
5564 * It is added to the tags database if it doesn't match the
5565 * name of the previous clause header.
5566 *
5567 * Return the size of the name of the function, or 0 if no function
5568 * was found.
5569 */
5570 static int
5571 erlang_func (s, last)
5572 char *s;
5573 char *last; /* Name of last clause. */
5574 {
5575 int pos;
5576 int len;
5577
5578 pos = erlang_atom (s);
5579 if (pos < 1)
5580 return 0;
5581
5582 len = pos;
5583 pos = skip_spaces (s + pos) - s;
5584
5585 /* Save only the first clause. */
5586 if (s[pos++] == '('
5587 && (last == NULL
5588 || len != (int)strlen (last)
5589 || !strneq (s, last, len)))
5590 {
5591 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5592 return len;
5593 }
5594
5595 return 0;
5596 }
5597
5598
5599 /*
5600 * Handle attributes. Currently, tags are generated for defines
5601 * and records.
5602 *
5603 * They are on the form:
5604 * -define(foo, bar).
5605 * -define(Foo(M, N), M+N).
5606 * -record(graph, {vtab = notable, cyclic = true}).
5607 */
5608 static void
5609 erlang_attribute (s)
5610 char *s;
5611 {
5612 char *cp = s;
5613
5614 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5615 && *cp++ == '(')
5616 {
5617 int len = erlang_atom (skip_spaces (cp));
5618 if (len > 0)
5619 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5620 }
5621 return;
5622 }
5623
5624
5625 /*
5626 * Consume an Erlang atom (or variable).
5627 * Return the number of bytes consumed, or -1 if there was an error.
5628 */
5629 static int
5630 erlang_atom (s)
5631 char *s;
5632 {
5633 int pos = 0;
5634
5635 if (ISALPHA (s[pos]) || s[pos] == '_')
5636 {
5637 /* The atom is unquoted. */
5638 do
5639 pos++;
5640 while (ISALNUM (s[pos]) || s[pos] == '_');
5641 }
5642 else if (s[pos] == '\'')
5643 {
5644 for (pos++; s[pos] != '\''; pos++)
5645 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5646 || (s[pos] == '\\' && s[++pos] == '\0'))
5647 return 0;
5648 pos++;
5649 }
5650
5651 return pos;
5652 }
5653
5654 \f
5655 #ifdef ETAGS_REGEXPS
5656
5657 static char *scan_separators __P((char *));
5658 static void add_regex __P((char *, language *));
5659 static char *substitute __P((char *, char *, struct re_registers *));
5660
5661 /*
5662 * Take a string like "/blah/" and turn it into "blah", verifying
5663 * that the first and last characters are the same, and handling
5664 * quoted separator characters. Actually, stops on the occurrence of
5665 * an unquoted separator. Also process \t, \n, etc. and turn into
5666 * appropriate characters. Works in place. Null terminates name string.
5667 * Returns pointer to terminating separator, or NULL for
5668 * unterminated regexps.
5669 */
5670 static char *
5671 scan_separators (name)
5672 char *name;
5673 {
5674 char sep = name[0];
5675 char *copyto = name;
5676 bool quoted = FALSE;
5677
5678 for (++name; *name != '\0'; ++name)
5679 {
5680 if (quoted)
5681 {
5682 switch (*name)
5683 {
5684 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5685 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5686 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5687 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5688 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5689 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5690 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5691 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5692 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5693 default:
5694 if (*name == sep)
5695 *copyto++ = sep;
5696 else
5697 {
5698 /* Something else is quoted, so preserve the quote. */
5699 *copyto++ = '\\';
5700 *copyto++ = *name;
5701 }
5702 break;
5703 }
5704 quoted = FALSE;
5705 }
5706 else if (*name == '\\')
5707 quoted = TRUE;
5708 else if (*name == sep)
5709 break;
5710 else
5711 *copyto++ = *name;
5712 }
5713 if (*name != sep)
5714 name = NULL; /* signal unterminated regexp */
5715
5716 /* Terminate copied string. */
5717 *copyto = '\0';
5718 return name;
5719 }
5720
5721 /* Look at the argument of --regex or --no-regex and do the right
5722 thing. Same for each line of a regexp file. */
5723 static void
5724 analyse_regex (regex_arg)
5725 char *regex_arg;
5726 {
5727 if (regex_arg == NULL)
5728 {
5729 free_regexps (); /* --no-regex: remove existing regexps */
5730 return;
5731 }
5732
5733 /* A real --regexp option or a line in a regexp file. */
5734 switch (regex_arg[0])
5735 {
5736 /* Comments in regexp file or null arg to --regex. */
5737 case '\0':
5738 case ' ':
5739 case '\t':
5740 break;
5741
5742 /* Read a regex file. This is recursive and may result in a
5743 loop, which will stop when the file descriptors are exhausted. */
5744 case '@':
5745 {
5746 FILE *regexfp;
5747 linebuffer regexbuf;
5748 char *regexfile = regex_arg + 1;
5749
5750 /* regexfile is a file containing regexps, one per line. */
5751 regexfp = fopen (regexfile, "r");
5752 if (regexfp == NULL)
5753 {
5754 pfatal (regexfile);
5755 return;
5756 }
5757 linebuffer_init (&regexbuf);
5758 while (readline_internal (&regexbuf, regexfp) > 0)
5759 analyse_regex (regexbuf.buffer);
5760 free (regexbuf.buffer);
5761 fclose (regexfp);
5762 }
5763 break;
5764
5765 /* Regexp to be used for a specific language only. */
5766 case '{':
5767 {
5768 language *lang;
5769 char *lang_name = regex_arg + 1;
5770 char *cp;
5771
5772 for (cp = lang_name; *cp != '}'; cp++)
5773 if (*cp == '\0')
5774 {
5775 error ("unterminated language name in regex: %s", regex_arg);
5776 return;
5777 }
5778 *cp++ = '\0';
5779 lang = get_language_from_langname (lang_name);
5780 if (lang == NULL)
5781 return;
5782 add_regex (cp, lang);
5783 }
5784 break;
5785
5786 /* Regexp to be used for any language. */
5787 default:
5788 add_regex (regex_arg, NULL);
5789 break;
5790 }
5791 }
5792
5793 /* Separate the regexp pattern, compile it,
5794 and care for optional name and modifiers. */
5795 static void
5796 add_regex (regexp_pattern, lang)
5797 char *regexp_pattern;
5798 language *lang;
5799 {
5800 static struct re_pattern_buffer zeropattern;
5801 char sep, *pat, *name, *modifiers;
5802 const char *err;
5803 struct re_pattern_buffer *patbuf;
5804 regexp *rp;
5805 bool
5806 force_explicit_name = TRUE, /* do not use implicit tag names */
5807 ignore_case = FALSE, /* case is significant */
5808 multi_line = FALSE, /* matches are done one line at a time */
5809 single_line = FALSE; /* dot does not match newline */
5810
5811
5812 if (strlen(regexp_pattern) < 3)
5813 {
5814 error ("null regexp", (char *)NULL);
5815 return;
5816 }
5817 sep = regexp_pattern[0];
5818 name = scan_separators (regexp_pattern);
5819 if (name == NULL)
5820 {
5821 error ("%s: unterminated regexp", regexp_pattern);
5822 return;
5823 }
5824 if (name[1] == sep)
5825 {
5826 error ("null name for regexp \"%s\"", regexp_pattern);
5827 return;
5828 }
5829 modifiers = scan_separators (name);
5830 if (modifiers == NULL) /* no terminating separator --> no name */
5831 {
5832 modifiers = name;
5833 name = "";
5834 }
5835 else
5836 modifiers += 1; /* skip separator */
5837
5838 /* Parse regex modifiers. */
5839 for (; modifiers[0] != '\0'; modifiers++)
5840 switch (modifiers[0])
5841 {
5842 case 'N':
5843 if (modifiers == name)
5844 error ("forcing explicit tag name but no name, ignoring", NULL);
5845 force_explicit_name = TRUE;
5846 break;
5847 case 'i':
5848 ignore_case = TRUE;
5849 break;
5850 case 's':
5851 single_line = TRUE;
5852 /* FALLTHRU */
5853 case 'm':
5854 multi_line = TRUE;
5855 need_filebuf = TRUE;
5856 break;
5857 default:
5858 {
5859 char wrongmod [2];
5860 wrongmod[0] = modifiers[0];
5861 wrongmod[1] = '\0';
5862 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5863 }
5864 break;
5865 }
5866
5867 patbuf = xnew (1, struct re_pattern_buffer);
5868 *patbuf = zeropattern;
5869 if (ignore_case)
5870 {
5871 static char lc_trans[CHARS];
5872 int i;
5873 for (i = 0; i < CHARS; i++)
5874 lc_trans[i] = lowcase (i);
5875 patbuf->translate = lc_trans; /* translation table to fold case */
5876 }
5877
5878 if (multi_line)
5879 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5880 else
5881 pat = regexp_pattern;
5882
5883 if (single_line)
5884 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5885 else
5886 re_set_syntax (RE_SYNTAX_EMACS);
5887
5888 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5889 if (multi_line)
5890 free (pat);
5891 if (err != NULL)
5892 {
5893 error ("%s while compiling pattern", err);
5894 return;
5895 }
5896
5897 rp = p_head;
5898 p_head = xnew (1, regexp);
5899 p_head->pattern = savestr (regexp_pattern);
5900 p_head->p_next = rp;
5901 p_head->lang = lang;
5902 p_head->pat = patbuf;
5903 p_head->name = savestr (name);
5904 p_head->error_signaled = FALSE;
5905 p_head->force_explicit_name = force_explicit_name;
5906 p_head->ignore_case = ignore_case;
5907 p_head->multi_line = multi_line;
5908 }
5909
5910 /*
5911 * Do the substitutions indicated by the regular expression and
5912 * arguments.
5913 */
5914 static char *
5915 substitute (in, out, regs)
5916 char *in, *out;
5917 struct re_registers *regs;
5918 {
5919 char *result, *t;
5920 int size, dig, diglen;
5921
5922 result = NULL;
5923 size = strlen (out);
5924
5925 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5926 if (out[size - 1] == '\\')
5927 fatal ("pattern error in \"%s\"", out);
5928 for (t = etags_strchr (out, '\\');
5929 t != NULL;
5930 t = etags_strchr (t + 2, '\\'))
5931 if (ISDIGIT (t[1]))
5932 {
5933 dig = t[1] - '0';
5934 diglen = regs->end[dig] - regs->start[dig];
5935 size += diglen - 2;
5936 }
5937 else
5938 size -= 1;
5939
5940 /* Allocate space and do the substitutions. */
5941 assert (size >= 0);
5942 result = xnew (size + 1, char);
5943
5944 for (t = result; *out != '\0'; out++)
5945 if (*out == '\\' && ISDIGIT (*++out))
5946 {
5947 dig = *out - '0';
5948 diglen = regs->end[dig] - regs->start[dig];
5949 strncpy (t, in + regs->start[dig], diglen);
5950 t += diglen;
5951 }
5952 else
5953 *t++ = *out;
5954 *t = '\0';
5955
5956 assert (t <= result + size);
5957 assert (t - result == (int)strlen (result));
5958
5959 return result;
5960 }
5961
5962 /* Deallocate all regexps. */
5963 static void
5964 free_regexps ()
5965 {
5966 regexp *rp;
5967 while (p_head != NULL)
5968 {
5969 rp = p_head->p_next;
5970 free (p_head->pattern);
5971 free (p_head->name);
5972 free (p_head);
5973 p_head = rp;
5974 }
5975 return;
5976 }
5977
5978 /*
5979 * Reads the whole file as a single string from `filebuf' and looks for
5980 * multi-line regular expressions, creating tags on matches.
5981 * readline already dealt with normal regexps.
5982 *
5983 * Idea by Ben Wing <ben@666.com> (2002).
5984 */
5985 static void
5986 regex_tag_multiline ()
5987 {
5988 char *buffer = filebuf.buffer;
5989 regexp *rp;
5990 char *name;
5991
5992 for (rp = p_head; rp != NULL; rp = rp->p_next)
5993 {
5994 int match = 0;
5995
5996 if (!rp->multi_line)
5997 continue; /* skip normal regexps */
5998
5999 /* Generic initialisations before parsing file from memory. */
6000 lineno = 1; /* reset global line number */
6001 charno = 0; /* reset global char number */
6002 linecharno = 0; /* reset global char number of line start */
6003
6004 /* Only use generic regexps or those for the current language. */
6005 if (rp->lang != NULL && rp->lang != curfdp->lang)
6006 continue;
6007
6008 while (match >= 0 && match < filebuf.len)
6009 {
6010 match = re_search (rp->pat, buffer, filebuf.len, charno,
6011 filebuf.len - match, &rp->regs);
6012 switch (match)
6013 {
6014 case -2:
6015 /* Some error. */
6016 if (!rp->error_signaled)
6017 {
6018 error ("regexp stack overflow while matching \"%s\"",
6019 rp->pattern);
6020 rp->error_signaled = TRUE;
6021 }
6022 break;
6023 case -1:
6024 /* No match. */
6025 break;
6026 default:
6027 if (match == rp->regs.end[0])
6028 {
6029 if (!rp->error_signaled)
6030 {
6031 error ("regexp matches the empty string: \"%s\"",
6032 rp->pattern);
6033 rp->error_signaled = TRUE;
6034 }
6035 match = -3; /* exit from while loop */
6036 break;
6037 }
6038
6039 /* Match occurred. Construct a tag. */
6040 while (charno < rp->regs.end[0])
6041 if (buffer[charno++] == '\n')
6042 lineno++, linecharno = charno;
6043 name = rp->name;
6044 if (name[0] == '\0')
6045 name = NULL;
6046 else /* make a named tag */
6047 name = substitute (buffer, rp->name, &rp->regs);
6048 if (rp->force_explicit_name)
6049 /* Force explicit tag name, if a name is there. */
6050 pfnote (name, TRUE, buffer + linecharno,
6051 charno - linecharno + 1, lineno, linecharno);
6052 else
6053 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6054 charno - linecharno + 1, lineno, linecharno);
6055 break;
6056 }
6057 }
6058 }
6059 }
6060
6061 #endif /* ETAGS_REGEXPS */
6062
6063 \f
6064 static bool
6065 nocase_tail (cp)
6066 char *cp;
6067 {
6068 register int len = 0;
6069
6070 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6071 cp++, len++;
6072 if (*cp == '\0' && !intoken (dbp[len]))
6073 {
6074 dbp += len;
6075 return TRUE;
6076 }
6077 return FALSE;
6078 }
6079
6080 static void
6081 get_tag (bp, namepp)
6082 register char *bp;
6083 char **namepp;
6084 {
6085 register char *cp = bp;
6086
6087 if (*bp != '\0')
6088 {
6089 /* Go till you get to white space or a syntactic break */
6090 for (cp = bp + 1; !notinname (*cp); cp++)
6091 continue;
6092 make_tag (bp, cp - bp, TRUE,
6093 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6094 }
6095
6096 if (namepp != NULL)
6097 *namepp = savenstr (bp, cp - bp);
6098 }
6099
6100 /*
6101 * Read a line of text from `stream' into `lbp', excluding the
6102 * newline or CR-NL, if any. Return the number of characters read from
6103 * `stream', which is the length of the line including the newline.
6104 *
6105 * On DOS or Windows we do not count the CR character, if any before the
6106 * NL, in the returned length; this mirrors the behavior of Emacs on those
6107 * platforms (for text files, it translates CR-NL to NL as it reads in the
6108 * file).
6109 *
6110 * If multi-line regular expressions are requested, each line read is
6111 * appended to `filebuf'.
6112 */
6113 static long
6114 readline_internal (lbp, stream)
6115 linebuffer *lbp;
6116 register FILE *stream;
6117 {
6118 char *buffer = lbp->buffer;
6119 register char *p = lbp->buffer;
6120 register char *pend;
6121 int chars_deleted;
6122
6123 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6124
6125 for (;;)
6126 {
6127 register int c = getc (stream);
6128 if (p == pend)
6129 {
6130 /* We're at the end of linebuffer: expand it. */
6131 lbp->size *= 2;
6132 xrnew (buffer, lbp->size, char);
6133 p += buffer - lbp->buffer;
6134 pend = buffer + lbp->size;
6135 lbp->buffer = buffer;
6136 }
6137 if (c == EOF)
6138 {
6139 *p = '\0';
6140 chars_deleted = 0;
6141 break;
6142 }
6143 if (c == '\n')
6144 {
6145 if (p > buffer && p[-1] == '\r')
6146 {
6147 p -= 1;
6148 #ifdef DOS_NT
6149 /* Assume CRLF->LF translation will be performed by Emacs
6150 when loading this file, so CRs won't appear in the buffer.
6151 It would be cleaner to compensate within Emacs;
6152 however, Emacs does not know how many CRs were deleted
6153 before any given point in the file. */
6154 chars_deleted = 1;
6155 #else
6156 chars_deleted = 2;
6157 #endif
6158 }
6159 else
6160 {
6161 chars_deleted = 1;
6162 }
6163 *p = '\0';
6164 break;
6165 }
6166 *p++ = c;
6167 }
6168 lbp->len = p - buffer;
6169
6170 if (need_filebuf /* we need filebuf for multi-line regexps */
6171 && chars_deleted > 0) /* not at EOF */
6172 {
6173 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6174 {
6175 /* Expand filebuf. */
6176 filebuf.size *= 2;
6177 xrnew (filebuf.buffer, filebuf.size, char);
6178 }
6179 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6180 filebuf.len += lbp->len;
6181 filebuf.buffer[filebuf.len++] = '\n';
6182 filebuf.buffer[filebuf.len] = '\0';
6183 }
6184
6185 return lbp->len + chars_deleted;
6186 }
6187
6188 /*
6189 * Like readline_internal, above, but in addition try to match the
6190 * input line against relevant regular expressions and manage #line
6191 * directives.
6192 */
6193 static void
6194 readline (lbp, stream)
6195 linebuffer *lbp;
6196 FILE *stream;
6197 {
6198 long result;
6199
6200 linecharno = charno; /* update global char number of line start */
6201 result = readline_internal (lbp, stream); /* read line */
6202 lineno += 1; /* increment global line number */
6203 charno += result; /* increment global char number */
6204
6205 /* Honour #line directives. */
6206 if (!no_line_directive)
6207 {
6208 static bool discard_until_line_directive;
6209
6210 /* Check whether this is a #line directive. */
6211 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6212 {
6213 int start, lno;
6214
6215 if (DEBUG) start = 0; /* shut up the compiler */
6216 if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
6217 {
6218 char *endp = lbp->buffer + start;
6219
6220 assert (start > 0);
6221 while ((endp = etags_strchr (endp, '"')) != NULL
6222 && endp[-1] == '\\')
6223 endp++;
6224 if (endp != NULL)
6225 /* Ok, this is a real #line directive. Let's deal with it. */
6226 {
6227 char *taggedabsname; /* absolute name of original file */
6228 char *taggedfname; /* name of original file as given */
6229 char *name; /* temp var */
6230
6231 discard_until_line_directive = FALSE; /* found it */
6232 name = lbp->buffer + start;
6233 *endp = '\0';
6234 canonicalize_filename (name); /* for DOS */
6235 taggedabsname = absolute_filename (name, curfdp->infabsdir);
6236 if (filename_is_absolute (name)
6237 || filename_is_absolute (curfdp->infname))
6238 taggedfname = savestr (taggedabsname);
6239 else
6240 taggedfname = relative_filename (taggedabsname,tagfiledir);
6241
6242 if (streq (curfdp->taggedfname, taggedfname))
6243 /* The #line directive is only a line number change. We
6244 deal with this afterwards. */
6245 free (taggedfname);
6246 else
6247 /* The tags following this #line directive should be
6248 attributed to taggedfname. In order to do this, set
6249 curfdp accordingly. */
6250 {
6251 fdesc *fdp; /* file description pointer */
6252
6253 /* Go look for a file description already set up for the
6254 file indicated in the #line directive. If there is
6255 one, use it from now until the next #line
6256 directive. */
6257 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6258 if (streq (fdp->infname, curfdp->infname)
6259 && streq (fdp->taggedfname, taggedfname))
6260 /* If we remove the second test above (after the &&)
6261 then all entries pertaining to the same file are
6262 coalesced in the tags file. If we use it, then
6263 entries pertaining to the same file but generated
6264 from different files (via #line directives) will
6265 go into separate sections in the tags file. These
6266 alternatives look equivalent. The first one
6267 destroys some apparently useless information. */
6268 {
6269 curfdp = fdp;
6270 free (taggedfname);
6271 break;
6272 }
6273 /* Else, if we already tagged the real file, skip all
6274 input lines until the next #line directive. */
6275 if (fdp == NULL) /* not found */
6276 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6277 if (streq (fdp->infabsname, taggedabsname))
6278 {
6279 discard_until_line_directive = TRUE;
6280 free (taggedfname);
6281 break;
6282 }
6283 /* Else create a new file description and use that from
6284 now on, until the next #line directive. */
6285 if (fdp == NULL) /* not found */
6286 {
6287 fdp = fdhead;
6288 fdhead = xnew (1, fdesc);
6289 *fdhead = *curfdp; /* copy curr. file description */
6290 fdhead->next = fdp;
6291 fdhead->infname = savestr (curfdp->infname);
6292 fdhead->infabsname = savestr (curfdp->infabsname);
6293 fdhead->infabsdir = savestr (curfdp->infabsdir);
6294 fdhead->taggedfname = taggedfname;
6295 fdhead->usecharno = FALSE;
6296 fdhead->prop = NULL;
6297 fdhead->written = FALSE;
6298 curfdp = fdhead;
6299 }
6300 }
6301 free (taggedabsname);
6302 lineno = lno - 1;
6303 readline (lbp, stream);
6304 return;
6305 } /* if a real #line directive */
6306 } /* if #line is followed by a a number */
6307 } /* if line begins with "#line " */
6308
6309 /* If we are here, no #line directive was found. */
6310 if (discard_until_line_directive)
6311 {
6312 if (result > 0)
6313 {
6314 /* Do a tail recursion on ourselves, thus discarding the contents
6315 of the line buffer. */
6316 readline (lbp, stream);
6317 return;
6318 }
6319 /* End of file. */
6320 discard_until_line_directive = FALSE;
6321 return;
6322 }
6323 } /* if #line directives should be considered */
6324
6325 #ifdef ETAGS_REGEXPS
6326 {
6327 int match;
6328 regexp *rp;
6329 char *name;
6330
6331 /* Match against relevant regexps. */
6332 if (lbp->len > 0)
6333 for (rp = p_head; rp != NULL; rp = rp->p_next)
6334 {
6335 /* Only use generic regexps or those for the current language.
6336 Also do not use multiline regexps, which is the job of
6337 regex_tag_multiline. */
6338 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6339 || rp->multi_line)
6340 continue;
6341
6342 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6343 switch (match)
6344 {
6345 case -2:
6346 /* Some error. */
6347 if (!rp->error_signaled)
6348 {
6349 error ("regexp stack overflow while matching \"%s\"",
6350 rp->pattern);
6351 rp->error_signaled = TRUE;
6352 }
6353 break;
6354 case -1:
6355 /* No match. */
6356 break;
6357 case 0:
6358 /* Empty string matched. */
6359 if (!rp->error_signaled)
6360 {
6361 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6362 rp->error_signaled = TRUE;
6363 }
6364 break;
6365 default:
6366 /* Match occurred. Construct a tag. */
6367 name = rp->name;
6368 if (name[0] == '\0')
6369 name = NULL;
6370 else /* make a named tag */
6371 name = substitute (lbp->buffer, rp->name, &rp->regs);
6372 if (rp->force_explicit_name)
6373 /* Force explicit tag name, if a name is there. */
6374 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6375 else
6376 make_tag (name, strlen (name), TRUE,
6377 lbp->buffer, match, lineno, linecharno);
6378 break;
6379 }
6380 }
6381 }
6382 #endif /* ETAGS_REGEXPS */
6383 }
6384
6385 \f
6386 /*
6387 * Return a pointer to a space of size strlen(cp)+1 allocated
6388 * with xnew where the string CP has been copied.
6389 */
6390 static char *
6391 savestr (cp)
6392 char *cp;
6393 {
6394 return savenstr (cp, strlen (cp));
6395 }
6396
6397 /*
6398 * Return a pointer to a space of size LEN+1 allocated with xnew where
6399 * the string CP has been copied for at most the first LEN characters.
6400 */
6401 static char *
6402 savenstr (cp, len)
6403 char *cp;
6404 int len;
6405 {
6406 register char *dp;
6407
6408 dp = xnew (len + 1, char);
6409 strncpy (dp, cp, len);
6410 dp[len] = '\0';
6411 return dp;
6412 }
6413
6414 /*
6415 * Return the ptr in sp at which the character c last
6416 * appears; NULL if not found
6417 *
6418 * Identical to POSIX strrchr, included for portability.
6419 */
6420 static char *
6421 etags_strrchr (sp, c)
6422 register const char *sp;
6423 register int c;
6424 {
6425 register const char *r;
6426
6427 r = NULL;
6428 do
6429 {
6430 if (*sp == c)
6431 r = sp;
6432 } while (*sp++);
6433 return (char *)r;
6434 }
6435
6436 /*
6437 * Return the ptr in sp at which the character c first
6438 * appears; NULL if not found
6439 *
6440 * Identical to POSIX strchr, included for portability.
6441 */
6442 static char *
6443 etags_strchr (sp, c)
6444 register const char *sp;
6445 register int c;
6446 {
6447 do
6448 {
6449 if (*sp == c)
6450 return (char *)sp;
6451 } while (*sp++);
6452 return NULL;
6453 }
6454
6455 /*
6456 * Compare two strings, ignoring case for alphabetic characters.
6457 *
6458 * Same as BSD's strcasecmp, included for portability.
6459 */
6460 static int
6461 etags_strcasecmp (s1, s2)
6462 register const char *s1;
6463 register const char *s2;
6464 {
6465 while (*s1 != '\0'
6466 && (ISALPHA (*s1) && ISALPHA (*s2)
6467 ? lowcase (*s1) == lowcase (*s2)
6468 : *s1 == *s2))
6469 s1++, s2++;
6470
6471 return (ISALPHA (*s1) && ISALPHA (*s2)
6472 ? lowcase (*s1) - lowcase (*s2)
6473 : *s1 - *s2);
6474 }
6475
6476 /*
6477 * Compare two strings, ignoring case for alphabetic characters.
6478 * Stop after a given number of characters
6479 *
6480 * Same as BSD's strncasecmp, included for portability.
6481 */
6482 static int
6483 etags_strncasecmp (s1, s2, n)
6484 register const char *s1;
6485 register const char *s2;
6486 register int n;
6487 {
6488 while (*s1 != '\0' && n-- > 0
6489 && (ISALPHA (*s1) && ISALPHA (*s2)
6490 ? lowcase (*s1) == lowcase (*s2)
6491 : *s1 == *s2))
6492 s1++, s2++;
6493
6494 if (n < 0)
6495 return 0;
6496 else
6497 return (ISALPHA (*s1) && ISALPHA (*s2)
6498 ? lowcase (*s1) - lowcase (*s2)
6499 : *s1 - *s2);
6500 }
6501
6502 /* Skip spaces, return new pointer. */
6503 static char *
6504 skip_spaces (cp)
6505 char *cp;
6506 {
6507 while (iswhite (*cp))
6508 cp++;
6509 return cp;
6510 }
6511
6512 /* Skip non spaces, return new pointer. */
6513 static char *
6514 skip_non_spaces (cp)
6515 char *cp;
6516 {
6517 while (*cp != '\0' && !iswhite (*cp))
6518 cp++;
6519 return cp;
6520 }
6521
6522 /* Print error message and exit. */
6523 void
6524 fatal (s1, s2)
6525 char *s1, *s2;
6526 {
6527 error (s1, s2);
6528 exit (EXIT_FAILURE);
6529 }
6530
6531 static void
6532 pfatal (s1)
6533 char *s1;
6534 {
6535 perror (s1);
6536 exit (EXIT_FAILURE);
6537 }
6538
6539 static void
6540 suggest_asking_for_help ()
6541 {
6542 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6543 progname, LONG_OPTIONS ? "--help" : "-h");
6544 exit (EXIT_FAILURE);
6545 }
6546
6547 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6548 static void
6549 error (s1, s2)
6550 const char *s1, *s2;
6551 {
6552 fprintf (stderr, "%s: ", progname);
6553 fprintf (stderr, s1, s2);
6554 fprintf (stderr, "\n");
6555 }
6556
6557 /* Return a newly-allocated string whose contents
6558 concatenate those of s1, s2, s3. */
6559 static char *
6560 concat (s1, s2, s3)
6561 char *s1, *s2, *s3;
6562 {
6563 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6564 char *result = xnew (len1 + len2 + len3 + 1, char);
6565
6566 strcpy (result, s1);
6567 strcpy (result + len1, s2);
6568 strcpy (result + len1 + len2, s3);
6569 result[len1 + len2 + len3] = '\0';
6570
6571 return result;
6572 }
6573
6574 \f
6575 /* Does the same work as the system V getcwd, but does not need to
6576 guess the buffer size in advance. */
6577 static char *
6578 etags_getcwd ()
6579 {
6580 #ifdef HAVE_GETCWD
6581 int bufsize = 200;
6582 char *path = xnew (bufsize, char);
6583
6584 while (getcwd (path, bufsize) == NULL)
6585 {
6586 if (errno != ERANGE)
6587 pfatal ("getcwd");
6588 bufsize *= 2;
6589 free (path);
6590 path = xnew (bufsize, char);
6591 }
6592
6593 canonicalize_filename (path);
6594 return path;
6595
6596 #else /* not HAVE_GETCWD */
6597 #if MSDOS
6598
6599 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6600
6601 getwd (path);
6602
6603 for (p = path; *p != '\0'; p++)
6604 if (*p == '\\')
6605 *p = '/';
6606 else
6607 *p = lowcase (*p);
6608
6609 return strdup (path);
6610 #else /* not MSDOS */
6611 linebuffer path;
6612 FILE *pipe;
6613
6614 linebuffer_init (&path);
6615 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6616 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6617 pfatal ("pwd");
6618 pclose (pipe);
6619
6620 return path.buffer;
6621 #endif /* not MSDOS */
6622 #endif /* not HAVE_GETCWD */
6623 }
6624
6625 /* Return a newly allocated string containing the file name of FILE
6626 relative to the absolute directory DIR (which should end with a slash). */
6627 static char *
6628 relative_filename (file, dir)
6629 char *file, *dir;
6630 {
6631 char *fp, *dp, *afn, *res;
6632 int i;
6633
6634 /* Find the common root of file and dir (with a trailing slash). */
6635 afn = absolute_filename (file, cwd);
6636 fp = afn;
6637 dp = dir;
6638 while (*fp++ == *dp++)
6639 continue;
6640 fp--, dp--; /* back to the first differing char */
6641 #ifdef DOS_NT
6642 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6643 return afn;
6644 #endif
6645 do /* look at the equal chars until '/' */
6646 fp--, dp--;
6647 while (*fp != '/');
6648
6649 /* Build a sequence of "../" strings for the resulting relative file name. */
6650 i = 0;
6651 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6652 i += 1;
6653 res = xnew (3*i + strlen (fp + 1) + 1, char);
6654 res[0] = '\0';
6655 while (i-- > 0)
6656 strcat (res, "../");
6657
6658 /* Add the file name relative to the common root of file and dir. */
6659 strcat (res, fp + 1);
6660 free (afn);
6661
6662 return res;
6663 }
6664
6665 /* Return a newly allocated string containing the absolute file name
6666 of FILE given DIR (which should end with a slash). */
6667 static char *
6668 absolute_filename (file, dir)
6669 char *file, *dir;
6670 {
6671 char *slashp, *cp, *res;
6672
6673 if (filename_is_absolute (file))
6674 res = savestr (file);
6675 #ifdef DOS_NT
6676 /* We don't support non-absolute file names with a drive
6677 letter, like `d:NAME' (it's too much hassle). */
6678 else if (file[1] == ':')
6679 fatal ("%s: relative file names with drive letters not supported", file);
6680 #endif
6681 else
6682 res = concat (dir, file, "");
6683
6684 /* Delete the "/dirname/.." and "/." substrings. */
6685 slashp = etags_strchr (res, '/');
6686 while (slashp != NULL && slashp[0] != '\0')
6687 {
6688 if (slashp[1] == '.')
6689 {
6690 if (slashp[2] == '.'
6691 && (slashp[3] == '/' || slashp[3] == '\0'))
6692 {
6693 cp = slashp;
6694 do
6695 cp--;
6696 while (cp >= res && !filename_is_absolute (cp));
6697 if (cp < res)
6698 cp = slashp; /* the absolute name begins with "/.." */
6699 #ifdef DOS_NT
6700 /* Under MSDOS and NT we get `d:/NAME' as absolute
6701 file name, so the luser could say `d:/../NAME'.
6702 We silently treat this as `d:/NAME'. */
6703 else if (cp[0] != '/')
6704 cp = slashp;
6705 #endif
6706 strcpy (cp, slashp + 3);
6707 slashp = cp;
6708 continue;
6709 }
6710 else if (slashp[2] == '/' || slashp[2] == '\0')
6711 {
6712 strcpy (slashp, slashp + 2);
6713 continue;
6714 }
6715 }
6716
6717 slashp = etags_strchr (slashp + 1, '/');
6718 }
6719
6720 if (res[0] == '\0')
6721 return savestr ("/");
6722 else
6723 return res;
6724 }
6725
6726 /* Return a newly allocated string containing the absolute
6727 file name of dir where FILE resides given DIR (which should
6728 end with a slash). */
6729 static char *
6730 absolute_dirname (file, dir)
6731 char *file, *dir;
6732 {
6733 char *slashp, *res;
6734 char save;
6735
6736 canonicalize_filename (file);
6737 slashp = etags_strrchr (file, '/');
6738 if (slashp == NULL)
6739 return savestr (dir);
6740 save = slashp[1];
6741 slashp[1] = '\0';
6742 res = absolute_filename (file, dir);
6743 slashp[1] = save;
6744
6745 return res;
6746 }
6747
6748 /* Whether the argument string is an absolute file name. The argument
6749 string must have been canonicalized with canonicalize_filename. */
6750 static bool
6751 filename_is_absolute (fn)
6752 char *fn;
6753 {
6754 return (fn[0] == '/'
6755 #ifdef DOS_NT
6756 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6757 #endif
6758 );
6759 }
6760
6761 /* Translate backslashes into slashes. Works in place. */
6762 static void
6763 canonicalize_filename (fn)
6764 register char *fn;
6765 {
6766 #ifdef DOS_NT
6767 /* Canonicalize drive letter case. */
6768 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6769 fn[0] = upcase (fn[0]);
6770 /* Convert backslashes to slashes. */
6771 for (; *fn != '\0'; fn++)
6772 if (*fn == '\\')
6773 *fn = '/';
6774 #else
6775 /* No action. */
6776 fn = NULL; /* shut up the compiler */
6777 #endif
6778 }
6779
6780 \f
6781 /* Initialize a linebuffer for use */
6782 static void
6783 linebuffer_init (lbp)
6784 linebuffer *lbp;
6785 {
6786 lbp->size = (DEBUG) ? 3 : 200;
6787 lbp->buffer = xnew (lbp->size, char);
6788 lbp->buffer[0] = '\0';
6789 lbp->len = 0;
6790 }
6791
6792 /* Set the minimum size of a string contained in a linebuffer. */
6793 static void
6794 linebuffer_setlen (lbp, toksize)
6795 linebuffer *lbp;
6796 int toksize;
6797 {
6798 while (lbp->size <= toksize)
6799 {
6800 lbp->size *= 2;
6801 xrnew (lbp->buffer, lbp->size, char);
6802 }
6803 lbp->len = toksize;
6804 }
6805
6806 /* Like malloc but get fatal error if memory is exhausted. */
6807 static PTR
6808 xmalloc (size)
6809 unsigned int size;
6810 {
6811 PTR result = (PTR) malloc (size);
6812 if (result == NULL)
6813 fatal ("virtual memory exhausted", (char *)NULL);
6814 return result;
6815 }
6816
6817 static PTR
6818 xrealloc (ptr, size)
6819 char *ptr;
6820 unsigned int size;
6821 {
6822 PTR result = (PTR) realloc (ptr, size);
6823 if (result == NULL)
6824 fatal ("virtual memory exhausted", (char *)NULL);
6825 return result;
6826 }
6827
6828 /*
6829 * Local Variables:
6830 * c-indentation-style: gnu
6831 * indent-tabs-mode: t
6832 * tab-width: 8
6833 * fill-column: 79
6834 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6835 * End:
6836 */
6837
6838 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6839 (do not change this comment) */
6840
6841 /* etags.c ends here */