f6b173bf465943119bd7189b7b9baff9ff161f39
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2013 Free Software
32 Foundation, Inc.
33
34 This file is not considered part of GNU Emacs.
35
36 This program is free software: you can redistribute it and/or modify
37 it under the terms of the GNU General Public License as published by
38 the Free Software Foundation, either version 3 of the License, or
39 (at your option) any later version.
40
41 This program is distributed in the hope that it will be useful,
42 but WITHOUT ANY WARRANTY; without even the implied warranty of
43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 GNU General Public License for more details.
45
46 You should have received a copy of the GNU General Public License
47 along with this program. If not, see <http://www.gnu.org/licenses/>. */
48
49
50 /* NB To comply with the above BSD license, copyright information is
51 reproduced in etc/ETAGS.README. That file should be updated when the
52 above notices are.
53
54 To the best of our knowledge, this code was originally based on the
55 ctags.c distributed with BSD4.2, which was copyrighted by the
56 University of California, as described above. */
57
58
59 /*
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 *
72 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
73 */
74
75 /*
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
79 */
80
81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
82
83 #define TRUE 1
84 #define FALSE 0
85
86 #ifdef DEBUG
87 # undef DEBUG
88 # define DEBUG TRUE
89 #else
90 # define DEBUG FALSE
91 # define NDEBUG /* disable assert */
92 #endif
93
94 #include <config.h>
95
96 #ifndef _GNU_SOURCE
97 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
98 #endif
99
100 /* WIN32_NATIVE is for XEmacs.
101 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
102 #ifdef WIN32_NATIVE
103 # undef MSDOS
104 # undef WINDOWSNT
105 # define WINDOWSNT
106 #endif /* WIN32_NATIVE */
107
108 #ifdef MSDOS
109 # undef MSDOS
110 # define MSDOS TRUE
111 # include <fcntl.h>
112 # include <sys/param.h>
113 # include <io.h>
114 #else
115 # define MSDOS FALSE
116 #endif /* MSDOS */
117
118 #ifdef WINDOWSNT
119 # include <fcntl.h>
120 # include <direct.h>
121 # include <io.h>
122 # define MAXPATHLEN _MAX_PATH
123 # undef HAVE_NTGUI
124 # undef DOS_NT
125 # define DOS_NT
126 #endif /* WINDOWSNT */
127
128 #include <unistd.h>
129 #include <stdarg.h>
130 #include <stdlib.h>
131 #include <string.h>
132 #include <stdio.h>
133 #include <ctype.h>
134 #include <errno.h>
135 #include <sys/types.h>
136 #include <sys/stat.h>
137 #include <c-strcase.h>
138
139 #include <assert.h>
140 #ifdef NDEBUG
141 # undef assert /* some systems have a buggy assert.h */
142 # define assert(x) ((void) 0)
143 #endif
144
145 #include <getopt.h>
146 #include <regex.h>
147
148 /* Define CTAGS to make the program "ctags" compatible with the usual one.
149 Leave it undefined to make the program "etags", which makes emacs-style
150 tag tables and tags typedefs, #defines and struct/union/enum by default. */
151 #ifdef CTAGS
152 # undef CTAGS
153 # define CTAGS TRUE
154 #else
155 # define CTAGS FALSE
156 #endif
157
158 #define streq(s,t) (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
159 #define strcaseeq(s,t) (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
160 #define strneq(s,t,n) (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
161 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
162
163 #define CHARS 256 /* 2^sizeof(char) */
164 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
165 #define iswhite(c) (_wht[CHAR (c)]) /* c is white (see white) */
166 #define notinname(c) (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
167 #define begtoken(c) (_btk[CHAR (c)]) /* c can start token (see begtk) */
168 #define intoken(c) (_itk[CHAR (c)]) /* c can be in token (see midtk) */
169 #define endtoken(c) (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
170
171 #define ISALNUM(c) isalnum (CHAR (c))
172 #define ISALPHA(c) isalpha (CHAR (c))
173 #define ISDIGIT(c) isdigit (CHAR (c))
174 #define ISLOWER(c) islower (CHAR (c))
175
176 #define lowcase(c) tolower (CHAR (c))
177
178
179 /*
180 * xnew, xrnew -- allocate, reallocate storage
181 *
182 * SYNOPSIS: Type *xnew (int n, Type);
183 * void xrnew (OldPointer, int n, Type);
184 */
185 #if DEBUG
186 # include "chkmalloc.h"
187 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
188 (n) * sizeof (Type)))
189 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
190 (char *) (op), (n) * sizeof (Type)))
191 #else
192 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
193 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
194 (char *) (op), (n) * sizeof (Type)))
195 #endif
196
197 #define bool int
198
199 typedef void Lang_function (FILE *);
200
201 typedef struct
202 {
203 const char *suffix; /* file name suffix for this compressor */
204 const char *command; /* takes one arg and decompresses to stdout */
205 } compressor;
206
207 typedef struct
208 {
209 const char *name; /* language name */
210 const char *help; /* detailed help for the language */
211 Lang_function *function; /* parse function */
212 const char **suffixes; /* name suffixes of this language's files */
213 const char **filenames; /* names of this language's files */
214 const char **interpreters; /* interpreters for this language */
215 bool metasource; /* source used to generate other sources */
216 } language;
217
218 typedef struct fdesc
219 {
220 struct fdesc *next; /* for the linked list */
221 char *infname; /* uncompressed input file name */
222 char *infabsname; /* absolute uncompressed input file name */
223 char *infabsdir; /* absolute dir of input file */
224 char *taggedfname; /* file name to write in tagfile */
225 language *lang; /* language of file */
226 char *prop; /* file properties to write in tagfile */
227 bool usecharno; /* etags tags shall contain char number */
228 bool written; /* entry written in the tags file */
229 } fdesc;
230
231 typedef struct node_st
232 { /* sorting structure */
233 struct node_st *left, *right; /* left and right sons */
234 fdesc *fdp; /* description of file to whom tag belongs */
235 char *name; /* tag name */
236 char *regex; /* search regexp */
237 bool valid; /* write this tag on the tag file */
238 bool is_func; /* function tag: use regexp in CTAGS mode */
239 bool been_warned; /* warning already given for duplicated tag */
240 int lno; /* line number tag is on */
241 long cno; /* character number line starts on */
242 } node;
243
244 /*
245 * A `linebuffer' is a structure which holds a line of text.
246 * `readline_internal' reads a line from a stream into a linebuffer
247 * and works regardless of the length of the line.
248 * SIZE is the size of BUFFER, LEN is the length of the string in
249 * BUFFER after readline reads it.
250 */
251 typedef struct
252 {
253 long size;
254 int len;
255 char *buffer;
256 } linebuffer;
257
258 /* Used to support mixing of --lang and file names. */
259 typedef struct
260 {
261 enum {
262 at_language, /* a language specification */
263 at_regexp, /* a regular expression */
264 at_filename, /* a file name */
265 at_stdin, /* read from stdin here */
266 at_end /* stop parsing the list */
267 } arg_type; /* argument type */
268 language *lang; /* language associated with the argument */
269 char *what; /* the argument itself */
270 } argument;
271
272 /* Structure defining a regular expression. */
273 typedef struct regexp
274 {
275 struct regexp *p_next; /* pointer to next in list */
276 language *lang; /* if set, use only for this language */
277 char *pattern; /* the regexp pattern */
278 char *name; /* tag name */
279 struct re_pattern_buffer *pat; /* the compiled pattern */
280 struct re_registers regs; /* re registers */
281 bool error_signaled; /* already signaled for this regexp */
282 bool force_explicit_name; /* do not allow implicit tag name */
283 bool ignore_case; /* ignore case when matching */
284 bool multi_line; /* do a multi-line match on the whole file */
285 } regexp;
286
287
288 /* Many compilers barf on this:
289 Lang_function Ada_funcs;
290 so let's write it this way */
291 static void Ada_funcs (FILE *);
292 static void Asm_labels (FILE *);
293 static void C_entries (int c_ext, FILE *);
294 static void default_C_entries (FILE *);
295 static void plain_C_entries (FILE *);
296 static void Cjava_entries (FILE *);
297 static void Cobol_paragraphs (FILE *);
298 static void Cplusplus_entries (FILE *);
299 static void Cstar_entries (FILE *);
300 static void Erlang_functions (FILE *);
301 static void Forth_words (FILE *);
302 static void Fortran_functions (FILE *);
303 static void HTML_labels (FILE *);
304 static void Lisp_functions (FILE *);
305 static void Lua_functions (FILE *);
306 static void Makefile_targets (FILE *);
307 static void Pascal_functions (FILE *);
308 static void Perl_functions (FILE *);
309 static void PHP_functions (FILE *);
310 static void PS_functions (FILE *);
311 static void Prolog_functions (FILE *);
312 static void Python_functions (FILE *);
313 static void Scheme_functions (FILE *);
314 static void TeX_commands (FILE *);
315 static void Texinfo_nodes (FILE *);
316 static void Yacc_entries (FILE *);
317 static void just_read_file (FILE *);
318
319 static language *get_language_from_langname (const char *);
320 static void readline (linebuffer *, FILE *);
321 static long readline_internal (linebuffer *, FILE *);
322 static bool nocase_tail (const char *);
323 static void get_tag (char *, char **);
324
325 static void analyse_regex (char *);
326 static void free_regexps (void);
327 static void regex_tag_multiline (void);
328 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
329 static _Noreturn void suggest_asking_for_help (void);
330 _Noreturn void fatal (const char *, const char *);
331 static _Noreturn void pfatal (const char *);
332 static void add_node (node *, node **);
333
334 static void init (void);
335 static void process_file_name (char *, language *);
336 static void process_file (FILE *, char *, language *);
337 static void find_entries (FILE *);
338 static void free_tree (node *);
339 static void free_fdesc (fdesc *);
340 static void pfnote (char *, bool, char *, int, int, long);
341 static void invalidate_nodes (fdesc *, node **);
342 static void put_entries (node *);
343
344 static char *concat (const char *, const char *, const char *);
345 static char *skip_spaces (char *);
346 static char *skip_non_spaces (char *);
347 static char *skip_name (char *);
348 static char *savenstr (const char *, int);
349 static char *savestr (const char *);
350 static char *etags_strchr (const char *, int);
351 static char *etags_strrchr (const char *, int);
352 static char *etags_getcwd (void);
353 static char *relative_filename (char *, char *);
354 static char *absolute_filename (char *, char *);
355 static char *absolute_dirname (char *, char *);
356 static bool filename_is_absolute (char *f);
357 static void canonicalize_filename (char *);
358 static void linebuffer_init (linebuffer *);
359 static void linebuffer_setlen (linebuffer *, int);
360 static void *xmalloc (size_t);
361 static void *xrealloc (char *, size_t);
362
363 \f
364 static char searchar = '/'; /* use /.../ searches */
365
366 static char *tagfile; /* output file */
367 static char *progname; /* name this program was invoked with */
368 static char *cwd; /* current working directory */
369 static char *tagfiledir; /* directory of tagfile */
370 static FILE *tagf; /* ioptr for tags file */
371 static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
372
373 static fdesc *fdhead; /* head of file description list */
374 static fdesc *curfdp; /* current file description */
375 static int lineno; /* line number of current line */
376 static long charno; /* current character number */
377 static long linecharno; /* charno of start of current line */
378 static char *dbp; /* pointer to start of current tag */
379
380 static const int invalidcharno = -1;
381
382 static node *nodehead; /* the head of the binary tree of tags */
383 static node *last_node; /* the last node created */
384
385 static linebuffer lb; /* the current line */
386 static linebuffer filebuf; /* a buffer containing the whole file */
387 static linebuffer token_name; /* a buffer containing a tag name */
388
389 /* boolean "functions" (see init) */
390 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
391 static const char
392 /* white chars */
393 *white = " \f\t\n\r\v",
394 /* not in a name */
395 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
396 /* token ending chars */
397 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
398 /* token starting chars */
399 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
400 /* valid in-token chars */
401 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
402
403 static bool append_to_tagfile; /* -a: append to tags */
404 /* The next five default to TRUE in C and derived languages. */
405 static bool typedefs; /* -t: create tags for C and Ada typedefs */
406 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
407 /* 0 struct/enum/union decls, and C++ */
408 /* member functions. */
409 static bool constantypedefs; /* -d: create tags for C #define, enum */
410 /* constants and variables. */
411 /* -D: opposite of -d. Default under ctags. */
412 static bool globals; /* create tags for global variables */
413 static bool members; /* create tags for C member variables */
414 static bool declarations; /* --declarations: tag them and extern in C&Co*/
415 static bool no_line_directive; /* ignore #line directives (undocumented) */
416 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
417 static bool update; /* -u: update tags */
418 static bool vgrind_style; /* -v: create vgrind style index output */
419 static bool no_warnings; /* -w: suppress warnings (undocumented) */
420 static bool cxref_style; /* -x: create cxref style output */
421 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
422 static bool ignoreindent; /* -I: ignore indentation in C */
423 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
424
425 /* STDIN is defined in LynxOS system headers */
426 #ifdef STDIN
427 # undef STDIN
428 #endif
429
430 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
431 static bool parsing_stdin; /* --parse-stdin used */
432
433 static regexp *p_head; /* list of all regexps */
434 static bool need_filebuf; /* some regexes are multi-line */
435
436 static struct option longopts[] =
437 {
438 { "append", no_argument, NULL, 'a' },
439 { "packages-only", no_argument, &packages_only, TRUE },
440 { "c++", no_argument, NULL, 'C' },
441 { "declarations", no_argument, &declarations, TRUE },
442 { "no-line-directive", no_argument, &no_line_directive, TRUE },
443 { "no-duplicates", no_argument, &no_duplicates, TRUE },
444 { "help", no_argument, NULL, 'h' },
445 { "help", no_argument, NULL, 'H' },
446 { "ignore-indentation", no_argument, NULL, 'I' },
447 { "language", required_argument, NULL, 'l' },
448 { "members", no_argument, &members, TRUE },
449 { "no-members", no_argument, &members, FALSE },
450 { "output", required_argument, NULL, 'o' },
451 { "regex", required_argument, NULL, 'r' },
452 { "no-regex", no_argument, NULL, 'R' },
453 { "ignore-case-regex", required_argument, NULL, 'c' },
454 { "parse-stdin", required_argument, NULL, STDIN },
455 { "version", no_argument, NULL, 'V' },
456
457 #if CTAGS /* Ctags options */
458 { "backward-search", no_argument, NULL, 'B' },
459 { "cxref", no_argument, NULL, 'x' },
460 { "defines", no_argument, NULL, 'd' },
461 { "globals", no_argument, &globals, TRUE },
462 { "typedefs", no_argument, NULL, 't' },
463 { "typedefs-and-c++", no_argument, NULL, 'T' },
464 { "update", no_argument, NULL, 'u' },
465 { "vgrind", no_argument, NULL, 'v' },
466 { "no-warn", no_argument, NULL, 'w' },
467
468 #else /* Etags options */
469 { "no-defines", no_argument, NULL, 'D' },
470 { "no-globals", no_argument, &globals, FALSE },
471 { "include", required_argument, NULL, 'i' },
472 #endif
473 { NULL }
474 };
475
476 static compressor compressors[] =
477 {
478 { "z", "gzip -d -c"},
479 { "Z", "gzip -d -c"},
480 { "gz", "gzip -d -c"},
481 { "GZ", "gzip -d -c"},
482 { "bz2", "bzip2 -d -c" },
483 { "xz", "xz -d -c" },
484 { NULL }
485 };
486
487 /*
488 * Language stuff.
489 */
490
491 /* Ada code */
492 static const char *Ada_suffixes [] =
493 { "ads", "adb", "ada", NULL };
494 static const char Ada_help [] =
495 "In Ada code, functions, procedures, packages, tasks and types are\n\
496 tags. Use the `--packages-only' option to create tags for\n\
497 packages only.\n\
498 Ada tag names have suffixes indicating the type of entity:\n\
499 Entity type: Qualifier:\n\
500 ------------ ----------\n\
501 function /f\n\
502 procedure /p\n\
503 package spec /s\n\
504 package body /b\n\
505 type /t\n\
506 task /k\n\
507 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
508 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
509 will just search for any tag `bidule'.";
510
511 /* Assembly code */
512 static const char *Asm_suffixes [] =
513 { "a", /* Unix assembler */
514 "asm", /* Microcontroller assembly */
515 "def", /* BSO/Tasking definition includes */
516 "inc", /* Microcontroller include files */
517 "ins", /* Microcontroller include files */
518 "s", "sa", /* Unix assembler */
519 "S", /* cpp-processed Unix assembler */
520 "src", /* BSO/Tasking C compiler output */
521 NULL
522 };
523 static const char Asm_help [] =
524 "In assembler code, labels appearing at the beginning of a line,\n\
525 followed by a colon, are tags.";
526
527
528 /* Note that .c and .h can be considered C++, if the --c++ flag was
529 given, or if the `class' or `template' keywords are met inside the file.
530 That is why default_C_entries is called for these. */
531 static const char *default_C_suffixes [] =
532 { "c", "h", NULL };
533 #if CTAGS /* C help for Ctags */
534 static const char default_C_help [] =
535 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
536 Use -T to tag definitions of `struct', `union' and `enum'.\n\
537 Use -d to tag `#define' macro definitions and `enum' constants.\n\
538 Use --globals to tag global variables.\n\
539 You can tag function declarations and external variables by\n\
540 using `--declarations', and struct members by using `--members'.";
541 #else /* C help for Etags */
542 static const char default_C_help [] =
543 "In C code, any C function or typedef is a tag, and so are\n\
544 definitions of `struct', `union' and `enum'. `#define' macro\n\
545 definitions and `enum' constants are tags unless you specify\n\
546 `--no-defines'. Global variables are tags unless you specify\n\
547 `--no-globals' and so are struct members unless you specify\n\
548 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
549 `--no-members' can make the tags table file much smaller.\n\
550 You can tag function declarations and external variables by\n\
551 using `--declarations'.";
552 #endif /* C help for Ctags and Etags */
553
554 static const char *Cplusplus_suffixes [] =
555 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
556 "M", /* Objective C++ */
557 "pdb", /* PostScript with C syntax */
558 NULL };
559 static const char Cplusplus_help [] =
560 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
561 --help --lang=c --lang=c++ for full help.)\n\
562 In addition to C tags, member functions are also recognized. Member\n\
563 variables are recognized unless you use the `--no-members' option.\n\
564 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
565 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
566 `operator+'.";
567
568 static const char *Cjava_suffixes [] =
569 { "java", NULL };
570 static char Cjava_help [] =
571 "In Java code, all the tags constructs of C and C++ code are\n\
572 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
573
574
575 static const char *Cobol_suffixes [] =
576 { "COB", "cob", NULL };
577 static char Cobol_help [] =
578 "In Cobol code, tags are paragraph names; that is, any word\n\
579 starting in column 8 and followed by a period.";
580
581 static const char *Cstar_suffixes [] =
582 { "cs", "hs", NULL };
583
584 static const char *Erlang_suffixes [] =
585 { "erl", "hrl", NULL };
586 static const char Erlang_help [] =
587 "In Erlang code, the tags are the functions, records and macros\n\
588 defined in the file.";
589
590 const char *Forth_suffixes [] =
591 { "fth", "tok", NULL };
592 static const char Forth_help [] =
593 "In Forth code, tags are words defined by `:',\n\
594 constant, code, create, defer, value, variable, buffer:, field.";
595
596 static const char *Fortran_suffixes [] =
597 { "F", "f", "f90", "for", NULL };
598 static const char Fortran_help [] =
599 "In Fortran code, functions, subroutines and block data are tags.";
600
601 static const char *HTML_suffixes [] =
602 { "htm", "html", "shtml", NULL };
603 static const char HTML_help [] =
604 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
605 `h3' headers. Also, tags are `name=' in anchors and all\n\
606 occurrences of `id='.";
607
608 static const char *Lisp_suffixes [] =
609 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
610 static const char Lisp_help [] =
611 "In Lisp code, any function defined with `defun', any variable\n\
612 defined with `defvar' or `defconst', and in general the first\n\
613 argument of any expression that starts with `(def' in column zero\n\
614 is a tag.\n\
615 The `--declarations' option tags \"(defvar foo)\" constructs too.";
616
617 static const char *Lua_suffixes [] =
618 { "lua", "LUA", NULL };
619 static const char Lua_help [] =
620 "In Lua scripts, all functions are tags.";
621
622 static const char *Makefile_filenames [] =
623 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
624 static const char Makefile_help [] =
625 "In makefiles, targets are tags; additionally, variables are tags\n\
626 unless you specify `--no-globals'.";
627
628 static const char *Objc_suffixes [] =
629 { "lm", /* Objective lex file */
630 "m", /* Objective C file */
631 NULL };
632 static const char Objc_help [] =
633 "In Objective C code, tags include Objective C definitions for classes,\n\
634 class categories, methods and protocols. Tags for variables and\n\
635 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
636 (Use --help --lang=c --lang=objc --lang=java for full help.)";
637
638 static const char *Pascal_suffixes [] =
639 { "p", "pas", NULL };
640 static const char Pascal_help [] =
641 "In Pascal code, the tags are the functions and procedures defined\n\
642 in the file.";
643 /* " // this is for working around an Emacs highlighting bug... */
644
645 static const char *Perl_suffixes [] =
646 { "pl", "pm", NULL };
647 static const char *Perl_interpreters [] =
648 { "perl", "@PERL@", NULL };
649 static const char Perl_help [] =
650 "In Perl code, the tags are the packages, subroutines and variables\n\
651 defined by the `package', `sub', `my' and `local' keywords. Use\n\
652 `--globals' if you want to tag global variables. Tags for\n\
653 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
654 defined in the default package is `main::SUB'.";
655
656 static const char *PHP_suffixes [] =
657 { "php", "php3", "php4", NULL };
658 static const char PHP_help [] =
659 "In PHP code, tags are functions, classes and defines. Unless you use\n\
660 the `--no-members' option, vars are tags too.";
661
662 static const char *plain_C_suffixes [] =
663 { "pc", /* Pro*C file */
664 NULL };
665
666 static const char *PS_suffixes [] =
667 { "ps", "psw", NULL }; /* .psw is for PSWrap */
668 static const char PS_help [] =
669 "In PostScript code, the tags are the functions.";
670
671 static const char *Prolog_suffixes [] =
672 { "prolog", NULL };
673 static const char Prolog_help [] =
674 "In Prolog code, tags are predicates and rules at the beginning of\n\
675 line.";
676
677 static const char *Python_suffixes [] =
678 { "py", NULL };
679 static const char Python_help [] =
680 "In Python code, `def' or `class' at the beginning of a line\n\
681 generate a tag.";
682
683 /* Can't do the `SCM' or `scm' prefix with a version number. */
684 static const char *Scheme_suffixes [] =
685 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
686 static const char Scheme_help [] =
687 "In Scheme code, tags include anything defined with `def' or with a\n\
688 construct whose name starts with `def'. They also include\n\
689 variables set with `set!' at top level in the file.";
690
691 static const char *TeX_suffixes [] =
692 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
693 static const char TeX_help [] =
694 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
695 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
696 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
697 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
698 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
699 \n\
700 Other commands can be specified by setting the environment variable\n\
701 `TEXTAGS' to a colon-separated list like, for example,\n\
702 TEXTAGS=\"mycommand:myothercommand\".";
703
704
705 static const char *Texinfo_suffixes [] =
706 { "texi", "texinfo", "txi", NULL };
707 static const char Texinfo_help [] =
708 "for texinfo files, lines starting with @node are tagged.";
709
710 static const char *Yacc_suffixes [] =
711 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
712 static const char Yacc_help [] =
713 "In Bison or Yacc input files, each rule defines as a tag the\n\
714 nonterminal it constructs. The portions of the file that contain\n\
715 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
716 for full help).";
717
718 static const char auto_help [] =
719 "`auto' is not a real language, it indicates to use\n\
720 a default language for files base on file name suffix and file contents.";
721
722 static const char none_help [] =
723 "`none' is not a real language, it indicates to only do\n\
724 regexp processing on files.";
725
726 static const char no_lang_help [] =
727 "No detailed help available for this language.";
728
729
730 /*
731 * Table of languages.
732 *
733 * It is ok for a given function to be listed under more than one
734 * name. I just didn't.
735 */
736
737 static language lang_names [] =
738 {
739 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
740 { "asm", Asm_help, Asm_labels, Asm_suffixes },
741 { "c", default_C_help, default_C_entries, default_C_suffixes },
742 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
743 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
744 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
745 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
746 { "forth", Forth_help, Forth_words, Forth_suffixes },
747 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
748 { "html", HTML_help, HTML_labels, HTML_suffixes },
749 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
750 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
751 { "lua", Lua_help, Lua_functions, Lua_suffixes },
752 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
753 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
754 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
755 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
756 { "php", PHP_help, PHP_functions, PHP_suffixes },
757 { "postscript",PS_help, PS_functions, PS_suffixes },
758 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
759 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
760 { "python", Python_help, Python_functions, Python_suffixes },
761 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
762 { "tex", TeX_help, TeX_commands, TeX_suffixes },
763 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
764 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
765 { "auto", auto_help }, /* default guessing scheme */
766 { "none", none_help, just_read_file }, /* regexp matching only */
767 { NULL } /* end of list */
768 };
769
770 \f
771 static void
772 print_language_names (void)
773 {
774 language *lang;
775 const char **name, **ext;
776
777 puts ("\nThese are the currently supported languages, along with the\n\
778 default file names and dot suffixes:");
779 for (lang = lang_names; lang->name != NULL; lang++)
780 {
781 printf (" %-*s", 10, lang->name);
782 if (lang->filenames != NULL)
783 for (name = lang->filenames; *name != NULL; name++)
784 printf (" %s", *name);
785 if (lang->suffixes != NULL)
786 for (ext = lang->suffixes; *ext != NULL; ext++)
787 printf (" .%s", *ext);
788 puts ("");
789 }
790 puts ("where `auto' means use default language for files based on file\n\
791 name suffix, and `none' means only do regexp processing on files.\n\
792 If no language is specified and no matching suffix is found,\n\
793 the first line of the file is read for a sharp-bang (#!) sequence\n\
794 followed by the name of an interpreter. If no such sequence is found,\n\
795 Fortran is tried first; if no tags are found, C is tried next.\n\
796 When parsing any C file, a \"class\" or \"template\" keyword\n\
797 switches to C++.");
798 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
799 \n\
800 For detailed help on a given language use, for example,\n\
801 etags --help --lang=ada.");
802 }
803
804 #ifndef EMACS_NAME
805 # define EMACS_NAME "standalone"
806 #endif
807 #ifndef VERSION
808 # define VERSION "17.38.1.4"
809 #endif
810 static _Noreturn void
811 print_version (void)
812 {
813 char emacs_copyright[] = COPYRIGHT;
814
815 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
816 puts (emacs_copyright);
817 puts ("This program is distributed under the terms in ETAGS.README");
818
819 exit (EXIT_SUCCESS);
820 }
821
822 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
823 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
824 #endif
825
826 static _Noreturn void
827 print_help (argument *argbuffer)
828 {
829 bool help_for_lang = FALSE;
830
831 for (; argbuffer->arg_type != at_end; argbuffer++)
832 if (argbuffer->arg_type == at_language)
833 {
834 if (help_for_lang)
835 puts ("");
836 puts (argbuffer->lang->help);
837 help_for_lang = TRUE;
838 }
839
840 if (help_for_lang)
841 exit (EXIT_SUCCESS);
842
843 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
844 \n\
845 These are the options accepted by %s.\n", progname, progname);
846 puts ("You may use unambiguous abbreviations for the long option names.");
847 puts (" A - as file name means read names from stdin (one per line).\n\
848 Absolute names are stored in the output file as they are.\n\
849 Relative ones are stored relative to the output file's directory.\n");
850
851 puts ("-a, --append\n\
852 Append tag entries to existing tags file.");
853
854 puts ("--packages-only\n\
855 For Ada files, only generate tags for packages.");
856
857 if (CTAGS)
858 puts ("-B, --backward-search\n\
859 Write the search commands for the tag entries using '?', the\n\
860 backward-search command instead of '/', the forward-search command.");
861
862 /* This option is mostly obsolete, because etags can now automatically
863 detect C++. Retained for backward compatibility and for debugging and
864 experimentation. In principle, we could want to tag as C++ even
865 before any "class" or "template" keyword.
866 puts ("-C, --c++\n\
867 Treat files whose name suffix defaults to C language as C++ files.");
868 */
869
870 puts ("--declarations\n\
871 In C and derived languages, create tags for function declarations,");
872 if (CTAGS)
873 puts ("\tand create tags for extern variables if --globals is used.");
874 else
875 puts
876 ("\tand create tags for extern variables unless --no-globals is used.");
877
878 if (CTAGS)
879 puts ("-d, --defines\n\
880 Create tag entries for C #define constants and enum constants, too.");
881 else
882 puts ("-D, --no-defines\n\
883 Don't create tag entries for C #define constants and enum constants.\n\
884 This makes the tags file smaller.");
885
886 if (!CTAGS)
887 puts ("-i FILE, --include=FILE\n\
888 Include a note in tag file indicating that, when searching for\n\
889 a tag, one should also consult the tags file FILE after\n\
890 checking the current file.");
891
892 puts ("-l LANG, --language=LANG\n\
893 Force the following files to be considered as written in the\n\
894 named language up to the next --language=LANG option.");
895
896 if (CTAGS)
897 puts ("--globals\n\
898 Create tag entries for global variables in some languages.");
899 else
900 puts ("--no-globals\n\
901 Do not create tag entries for global variables in some\n\
902 languages. This makes the tags file smaller.");
903
904 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
905 puts ("--no-line-directive\n\
906 Ignore #line preprocessor directives in C and derived languages.");
907
908 if (CTAGS)
909 puts ("--members\n\
910 Create tag entries for members of structures in some languages.");
911 else
912 puts ("--no-members\n\
913 Do not create tag entries for members of structures\n\
914 in some languages.");
915
916 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
917 Make a tag for each line matching a regular expression pattern\n\
918 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
919 files only. REGEXFILE is a file containing one REGEXP per line.\n\
920 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
921 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
922 puts (" If TAGNAME/ is present, the tags created are named.\n\
923 For example Tcl named tags can be created with:\n\
924 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
925 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
926 `m' means to allow multi-line matches, `s' implies `m' and\n\
927 causes dot to match any character, including newline.");
928
929 puts ("-R, --no-regex\n\
930 Don't create tags from regexps for the following files.");
931
932 puts ("-I, --ignore-indentation\n\
933 In C and C++ do not assume that a closing brace in the first\n\
934 column is the final brace of a function or structure definition.");
935
936 puts ("-o FILE, --output=FILE\n\
937 Write the tags to FILE.");
938
939 puts ("--parse-stdin=NAME\n\
940 Read from standard input and record tags as belonging to file NAME.");
941
942 if (CTAGS)
943 {
944 puts ("-t, --typedefs\n\
945 Generate tag entries for C and Ada typedefs.");
946 puts ("-T, --typedefs-and-c++\n\
947 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
948 and C++ member functions.");
949 }
950
951 if (CTAGS)
952 puts ("-u, --update\n\
953 Update the tag entries for the given files, leaving tag\n\
954 entries for other files in place. Currently, this is\n\
955 implemented by deleting the existing entries for the given\n\
956 files and then rewriting the new entries at the end of the\n\
957 tags file. It is often faster to simply rebuild the entire\n\
958 tag file than to use this.");
959
960 if (CTAGS)
961 {
962 puts ("-v, --vgrind\n\
963 Print on the standard output an index of items intended for\n\
964 human consumption, similar to the output of vgrind. The index\n\
965 is sorted, and gives the page number of each item.");
966
967 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
968 puts ("-w, --no-duplicates\n\
969 Do not create duplicate tag entries, for compatibility with\n\
970 traditional ctags.");
971
972 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
973 puts ("-w, --no-warn\n\
974 Suppress warning messages about duplicate tag entries.");
975
976 puts ("-x, --cxref\n\
977 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
978 The output uses line numbers instead of page numbers, but\n\
979 beyond that the differences are cosmetic; try both to see\n\
980 which you like.");
981 }
982
983 puts ("-V, --version\n\
984 Print the version of the program.\n\
985 -h, --help\n\
986 Print this help message.\n\
987 Followed by one or more `--language' options prints detailed\n\
988 help about tag generation for the specified languages.");
989
990 print_language_names ();
991
992 puts ("");
993 puts ("Report bugs to bug-gnu-emacs@gnu.org");
994
995 exit (EXIT_SUCCESS);
996 }
997
998 \f
999 int
1000 main (int argc, char **argv)
1001 {
1002 int i;
1003 unsigned int nincluded_files;
1004 char **included_files;
1005 argument *argbuffer;
1006 int current_arg, file_count;
1007 linebuffer filename_lb;
1008 bool help_asked = FALSE;
1009 ptrdiff_t len;
1010 char *optstring;
1011 int opt;
1012
1013
1014 #ifdef DOS_NT
1015 _fmode = O_BINARY; /* all of files are treated as binary files */
1016 #endif /* DOS_NT */
1017
1018 progname = argv[0];
1019 nincluded_files = 0;
1020 included_files = xnew (argc, char *);
1021 current_arg = 0;
1022 file_count = 0;
1023
1024 /* Allocate enough no matter what happens. Overkill, but each one
1025 is small. */
1026 argbuffer = xnew (argc, argument);
1027
1028 /*
1029 * Always find typedefs and structure tags.
1030 * Also default to find macro constants, enum constants, struct
1031 * members and global variables. Do it for both etags and ctags.
1032 */
1033 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1034 globals = members = TRUE;
1035
1036 /* When the optstring begins with a '-' getopt_long does not rearrange the
1037 non-options arguments to be at the end, but leaves them alone. */
1038 optstring = concat ("-ac:Cf:Il:o:r:RSVhH",
1039 (CTAGS) ? "BxdtTuvw" : "Di:",
1040 "");
1041
1042 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1043 switch (opt)
1044 {
1045 case 0:
1046 /* If getopt returns 0, then it has already processed a
1047 long-named option. We should do nothing. */
1048 break;
1049
1050 case 1:
1051 /* This means that a file name has been seen. Record it. */
1052 argbuffer[current_arg].arg_type = at_filename;
1053 argbuffer[current_arg].what = optarg;
1054 len = strlen (optarg);
1055 if (whatlen_max < len)
1056 whatlen_max = len;
1057 ++current_arg;
1058 ++file_count;
1059 break;
1060
1061 case STDIN:
1062 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1063 argbuffer[current_arg].arg_type = at_stdin;
1064 argbuffer[current_arg].what = optarg;
1065 len = strlen (optarg);
1066 if (whatlen_max < len)
1067 whatlen_max = len;
1068 ++current_arg;
1069 ++file_count;
1070 if (parsing_stdin)
1071 fatal ("cannot parse standard input more than once", (char *)NULL);
1072 parsing_stdin = TRUE;
1073 break;
1074
1075 /* Common options. */
1076 case 'a': append_to_tagfile = TRUE; break;
1077 case 'C': cplusplus = TRUE; break;
1078 case 'f': /* for compatibility with old makefiles */
1079 case 'o':
1080 if (tagfile)
1081 {
1082 error ("-o option may only be given once.");
1083 suggest_asking_for_help ();
1084 /* NOTREACHED */
1085 }
1086 tagfile = optarg;
1087 break;
1088 case 'I':
1089 case 'S': /* for backward compatibility */
1090 ignoreindent = TRUE;
1091 break;
1092 case 'l':
1093 {
1094 language *lang = get_language_from_langname (optarg);
1095 if (lang != NULL)
1096 {
1097 argbuffer[current_arg].lang = lang;
1098 argbuffer[current_arg].arg_type = at_language;
1099 ++current_arg;
1100 }
1101 }
1102 break;
1103 case 'c':
1104 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1105 optarg = concat (optarg, "i", ""); /* memory leak here */
1106 /* FALLTHRU */
1107 case 'r':
1108 argbuffer[current_arg].arg_type = at_regexp;
1109 argbuffer[current_arg].what = optarg;
1110 len = strlen (optarg);
1111 if (whatlen_max < len)
1112 whatlen_max = len;
1113 ++current_arg;
1114 break;
1115 case 'R':
1116 argbuffer[current_arg].arg_type = at_regexp;
1117 argbuffer[current_arg].what = NULL;
1118 ++current_arg;
1119 break;
1120 case 'V':
1121 print_version ();
1122 break;
1123 case 'h':
1124 case 'H':
1125 help_asked = TRUE;
1126 break;
1127
1128 /* Etags options */
1129 case 'D': constantypedefs = FALSE; break;
1130 case 'i': included_files[nincluded_files++] = optarg; break;
1131
1132 /* Ctags options. */
1133 case 'B': searchar = '?'; break;
1134 case 'd': constantypedefs = TRUE; break;
1135 case 't': typedefs = TRUE; break;
1136 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1137 case 'u': update = TRUE; break;
1138 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1139 case 'x': cxref_style = TRUE; break;
1140 case 'w': no_warnings = TRUE; break;
1141 default:
1142 suggest_asking_for_help ();
1143 /* NOTREACHED */
1144 }
1145
1146 /* No more options. Store the rest of arguments. */
1147 for (; optind < argc; optind++)
1148 {
1149 argbuffer[current_arg].arg_type = at_filename;
1150 argbuffer[current_arg].what = argv[optind];
1151 len = strlen (argv[optind]);
1152 if (whatlen_max < len)
1153 whatlen_max = len;
1154 ++current_arg;
1155 ++file_count;
1156 }
1157
1158 argbuffer[current_arg].arg_type = at_end;
1159
1160 if (help_asked)
1161 print_help (argbuffer);
1162 /* NOTREACHED */
1163
1164 if (nincluded_files == 0 && file_count == 0)
1165 {
1166 error ("no input files specified.");
1167 suggest_asking_for_help ();
1168 /* NOTREACHED */
1169 }
1170
1171 if (tagfile == NULL)
1172 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1173 cwd = etags_getcwd (); /* the current working directory */
1174 if (cwd[strlen (cwd) - 1] != '/')
1175 {
1176 char *oldcwd = cwd;
1177 cwd = concat (oldcwd, "/", "");
1178 free (oldcwd);
1179 }
1180
1181 /* Compute base directory for relative file names. */
1182 if (streq (tagfile, "-")
1183 || strneq (tagfile, "/dev/", 5))
1184 tagfiledir = cwd; /* relative file names are relative to cwd */
1185 else
1186 {
1187 canonicalize_filename (tagfile);
1188 tagfiledir = absolute_dirname (tagfile, cwd);
1189 }
1190
1191 init (); /* set up boolean "functions" */
1192
1193 linebuffer_init (&lb);
1194 linebuffer_init (&filename_lb);
1195 linebuffer_init (&filebuf);
1196 linebuffer_init (&token_name);
1197
1198 if (!CTAGS)
1199 {
1200 if (streq (tagfile, "-"))
1201 {
1202 tagf = stdout;
1203 #ifdef DOS_NT
1204 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1205 doesn't take effect until after `stdout' is already open). */
1206 if (!isatty (fileno (stdout)))
1207 setmode (fileno (stdout), O_BINARY);
1208 #endif /* DOS_NT */
1209 }
1210 else
1211 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1212 if (tagf == NULL)
1213 pfatal (tagfile);
1214 }
1215
1216 /*
1217 * Loop through files finding functions.
1218 */
1219 for (i = 0; i < current_arg; i++)
1220 {
1221 static language *lang; /* non-NULL if language is forced */
1222 char *this_file;
1223
1224 switch (argbuffer[i].arg_type)
1225 {
1226 case at_language:
1227 lang = argbuffer[i].lang;
1228 break;
1229 case at_regexp:
1230 analyse_regex (argbuffer[i].what);
1231 break;
1232 case at_filename:
1233 this_file = argbuffer[i].what;
1234 /* Input file named "-" means read file names from stdin
1235 (one per line) and use them. */
1236 if (streq (this_file, "-"))
1237 {
1238 if (parsing_stdin)
1239 fatal ("cannot parse standard input AND read file names from it",
1240 (char *)NULL);
1241 while (readline_internal (&filename_lb, stdin) > 0)
1242 process_file_name (filename_lb.buffer, lang);
1243 }
1244 else
1245 process_file_name (this_file, lang);
1246 break;
1247 case at_stdin:
1248 this_file = argbuffer[i].what;
1249 process_file (stdin, this_file, lang);
1250 break;
1251 }
1252 }
1253
1254 free_regexps ();
1255 free (lb.buffer);
1256 free (filebuf.buffer);
1257 free (token_name.buffer);
1258
1259 if (!CTAGS || cxref_style)
1260 {
1261 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1262 put_entries (nodehead);
1263 free_tree (nodehead);
1264 nodehead = NULL;
1265 if (!CTAGS)
1266 {
1267 fdesc *fdp;
1268
1269 /* Output file entries that have no tags. */
1270 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1271 if (!fdp->written)
1272 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1273
1274 while (nincluded_files-- > 0)
1275 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1276
1277 if (fclose (tagf) == EOF)
1278 pfatal (tagfile);
1279 }
1280
1281 exit (EXIT_SUCCESS);
1282 }
1283
1284 /* From here on, we are in (CTAGS && !cxref_style) */
1285 if (update)
1286 {
1287 char *cmd =
1288 xmalloc (strlen (tagfile) + whatlen_max +
1289 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1290 for (i = 0; i < current_arg; ++i)
1291 {
1292 switch (argbuffer[i].arg_type)
1293 {
1294 case at_filename:
1295 case at_stdin:
1296 break;
1297 default:
1298 continue; /* the for loop */
1299 }
1300 strcpy (cmd, "mv ");
1301 strcat (cmd, tagfile);
1302 strcat (cmd, " OTAGS;fgrep -v '\t");
1303 strcat (cmd, argbuffer[i].what);
1304 strcat (cmd, "\t' OTAGS >");
1305 strcat (cmd, tagfile);
1306 strcat (cmd, ";rm OTAGS");
1307 if (system (cmd) != EXIT_SUCCESS)
1308 fatal ("failed to execute shell command", (char *)NULL);
1309 }
1310 free (cmd);
1311 append_to_tagfile = TRUE;
1312 }
1313
1314 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1315 if (tagf == NULL)
1316 pfatal (tagfile);
1317 put_entries (nodehead); /* write all the tags (CTAGS) */
1318 free_tree (nodehead);
1319 nodehead = NULL;
1320 if (fclose (tagf) == EOF)
1321 pfatal (tagfile);
1322
1323 if (CTAGS)
1324 if (append_to_tagfile || update)
1325 {
1326 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1327 /* Maybe these should be used:
1328 setenv ("LC_COLLATE", "C", 1);
1329 setenv ("LC_ALL", "C", 1); */
1330 strcpy (cmd, "sort -u -o ");
1331 strcat (cmd, tagfile);
1332 strcat (cmd, " ");
1333 strcat (cmd, tagfile);
1334 exit (system (cmd));
1335 }
1336 return EXIT_SUCCESS;
1337 }
1338
1339
1340 /*
1341 * Return a compressor given the file name. If EXTPTR is non-zero,
1342 * return a pointer into FILE where the compressor-specific
1343 * extension begins. If no compressor is found, NULL is returned
1344 * and EXTPTR is not significant.
1345 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1346 */
1347 static compressor *
1348 get_compressor_from_suffix (char *file, char **extptr)
1349 {
1350 compressor *compr;
1351 char *slash, *suffix;
1352
1353 /* File has been processed by canonicalize_filename,
1354 so we don't need to consider backslashes on DOS_NT. */
1355 slash = etags_strrchr (file, '/');
1356 suffix = etags_strrchr (file, '.');
1357 if (suffix == NULL || suffix < slash)
1358 return NULL;
1359 if (extptr != NULL)
1360 *extptr = suffix;
1361 suffix += 1;
1362 /* Let those poor souls who live with DOS 8+3 file name limits get
1363 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1364 Only the first do loop is run if not MSDOS */
1365 do
1366 {
1367 for (compr = compressors; compr->suffix != NULL; compr++)
1368 if (streq (compr->suffix, suffix))
1369 return compr;
1370 if (!MSDOS)
1371 break; /* do it only once: not really a loop */
1372 if (extptr != NULL)
1373 *extptr = ++suffix;
1374 } while (*suffix != '\0');
1375 return NULL;
1376 }
1377
1378
1379
1380 /*
1381 * Return a language given the name.
1382 */
1383 static language *
1384 get_language_from_langname (const char *name)
1385 {
1386 language *lang;
1387
1388 if (name == NULL)
1389 error ("empty language name");
1390 else
1391 {
1392 for (lang = lang_names; lang->name != NULL; lang++)
1393 if (streq (name, lang->name))
1394 return lang;
1395 error ("unknown language \"%s\"", name);
1396 }
1397
1398 return NULL;
1399 }
1400
1401
1402 /*
1403 * Return a language given the interpreter name.
1404 */
1405 static language *
1406 get_language_from_interpreter (char *interpreter)
1407 {
1408 language *lang;
1409 const char **iname;
1410
1411 if (interpreter == NULL)
1412 return NULL;
1413 for (lang = lang_names; lang->name != NULL; lang++)
1414 if (lang->interpreters != NULL)
1415 for (iname = lang->interpreters; *iname != NULL; iname++)
1416 if (streq (*iname, interpreter))
1417 return lang;
1418
1419 return NULL;
1420 }
1421
1422
1423
1424 /*
1425 * Return a language given the file name.
1426 */
1427 static language *
1428 get_language_from_filename (char *file, int case_sensitive)
1429 {
1430 language *lang;
1431 const char **name, **ext, *suffix;
1432
1433 /* Try whole file name first. */
1434 for (lang = lang_names; lang->name != NULL; lang++)
1435 if (lang->filenames != NULL)
1436 for (name = lang->filenames; *name != NULL; name++)
1437 if ((case_sensitive)
1438 ? streq (*name, file)
1439 : strcaseeq (*name, file))
1440 return lang;
1441
1442 /* If not found, try suffix after last dot. */
1443 suffix = etags_strrchr (file, '.');
1444 if (suffix == NULL)
1445 return NULL;
1446 suffix += 1;
1447 for (lang = lang_names; lang->name != NULL; lang++)
1448 if (lang->suffixes != NULL)
1449 for (ext = lang->suffixes; *ext != NULL; ext++)
1450 if ((case_sensitive)
1451 ? streq (*ext, suffix)
1452 : strcaseeq (*ext, suffix))
1453 return lang;
1454 return NULL;
1455 }
1456
1457 \f
1458 /*
1459 * This routine is called on each file argument.
1460 */
1461 static void
1462 process_file_name (char *file, language *lang)
1463 {
1464 struct stat stat_buf;
1465 FILE *inf;
1466 fdesc *fdp;
1467 compressor *compr;
1468 char *compressed_name, *uncompressed_name;
1469 char *ext, *real_name;
1470 int retval;
1471
1472 canonicalize_filename (file);
1473 if (streq (file, tagfile) && !streq (tagfile, "-"))
1474 {
1475 error ("skipping inclusion of %s in self.", file);
1476 return;
1477 }
1478 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1479 {
1480 compressed_name = NULL;
1481 real_name = uncompressed_name = savestr (file);
1482 }
1483 else
1484 {
1485 real_name = compressed_name = savestr (file);
1486 uncompressed_name = savenstr (file, ext - file);
1487 }
1488
1489 /* If the canonicalized uncompressed name
1490 has already been dealt with, skip it silently. */
1491 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1492 {
1493 assert (fdp->infname != NULL);
1494 if (streq (uncompressed_name, fdp->infname))
1495 goto cleanup;
1496 }
1497
1498 if (stat (real_name, &stat_buf) != 0)
1499 {
1500 /* Reset real_name and try with a different name. */
1501 real_name = NULL;
1502 if (compressed_name != NULL) /* try with the given suffix */
1503 {
1504 if (stat (uncompressed_name, &stat_buf) == 0)
1505 real_name = uncompressed_name;
1506 }
1507 else /* try all possible suffixes */
1508 {
1509 for (compr = compressors; compr->suffix != NULL; compr++)
1510 {
1511 compressed_name = concat (file, ".", compr->suffix);
1512 if (stat (compressed_name, &stat_buf) != 0)
1513 {
1514 if (MSDOS)
1515 {
1516 char *suf = compressed_name + strlen (file);
1517 size_t suflen = strlen (compr->suffix) + 1;
1518 for ( ; suf[1]; suf++, suflen--)
1519 {
1520 memmove (suf, suf + 1, suflen);
1521 if (stat (compressed_name, &stat_buf) == 0)
1522 {
1523 real_name = compressed_name;
1524 break;
1525 }
1526 }
1527 if (real_name != NULL)
1528 break;
1529 } /* MSDOS */
1530 free (compressed_name);
1531 compressed_name = NULL;
1532 }
1533 else
1534 {
1535 real_name = compressed_name;
1536 break;
1537 }
1538 }
1539 }
1540 if (real_name == NULL)
1541 {
1542 perror (file);
1543 goto cleanup;
1544 }
1545 } /* try with a different name */
1546
1547 if (!S_ISREG (stat_buf.st_mode))
1548 {
1549 error ("skipping %s: it is not a regular file.", real_name);
1550 goto cleanup;
1551 }
1552 if (real_name == compressed_name)
1553 {
1554 char *cmd = concat (compr->command, " ", real_name);
1555 inf = (FILE *) popen (cmd, "r");
1556 free (cmd);
1557 }
1558 else
1559 inf = fopen (real_name, "r");
1560 if (inf == NULL)
1561 {
1562 perror (real_name);
1563 goto cleanup;
1564 }
1565
1566 process_file (inf, uncompressed_name, lang);
1567
1568 if (real_name == compressed_name)
1569 retval = pclose (inf);
1570 else
1571 retval = fclose (inf);
1572 if (retval < 0)
1573 pfatal (file);
1574
1575 cleanup:
1576 free (compressed_name);
1577 free (uncompressed_name);
1578 last_node = NULL;
1579 curfdp = NULL;
1580 return;
1581 }
1582
1583 static void
1584 process_file (FILE *fh, char *fn, language *lang)
1585 {
1586 static const fdesc emptyfdesc;
1587 fdesc *fdp;
1588
1589 /* Create a new input file description entry. */
1590 fdp = xnew (1, fdesc);
1591 *fdp = emptyfdesc;
1592 fdp->next = fdhead;
1593 fdp->infname = savestr (fn);
1594 fdp->lang = lang;
1595 fdp->infabsname = absolute_filename (fn, cwd);
1596 fdp->infabsdir = absolute_dirname (fn, cwd);
1597 if (filename_is_absolute (fn))
1598 {
1599 /* An absolute file name. Canonicalize it. */
1600 fdp->taggedfname = absolute_filename (fn, NULL);
1601 }
1602 else
1603 {
1604 /* A file name relative to cwd. Make it relative
1605 to the directory of the tags file. */
1606 fdp->taggedfname = relative_filename (fn, tagfiledir);
1607 }
1608 fdp->usecharno = TRUE; /* use char position when making tags */
1609 fdp->prop = NULL;
1610 fdp->written = FALSE; /* not written on tags file yet */
1611
1612 fdhead = fdp;
1613 curfdp = fdhead; /* the current file description */
1614
1615 find_entries (fh);
1616
1617 /* If not Ctags, and if this is not metasource and if it contained no #line
1618 directives, we can write the tags and free all nodes pointing to
1619 curfdp. */
1620 if (!CTAGS
1621 && curfdp->usecharno /* no #line directives in this file */
1622 && !curfdp->lang->metasource)
1623 {
1624 node *np, *prev;
1625
1626 /* Look for the head of the sublist relative to this file. See add_node
1627 for the structure of the node tree. */
1628 prev = NULL;
1629 for (np = nodehead; np != NULL; prev = np, np = np->left)
1630 if (np->fdp == curfdp)
1631 break;
1632
1633 /* If we generated tags for this file, write and delete them. */
1634 if (np != NULL)
1635 {
1636 /* This is the head of the last sublist, if any. The following
1637 instructions depend on this being true. */
1638 assert (np->left == NULL);
1639
1640 assert (fdhead == curfdp);
1641 assert (last_node->fdp == curfdp);
1642 put_entries (np); /* write tags for file curfdp->taggedfname */
1643 free_tree (np); /* remove the written nodes */
1644 if (prev == NULL)
1645 nodehead = NULL; /* no nodes left */
1646 else
1647 prev->left = NULL; /* delete the pointer to the sublist */
1648 }
1649 }
1650 }
1651
1652 /*
1653 * This routine sets up the boolean pseudo-functions which work
1654 * by setting boolean flags dependent upon the corresponding character.
1655 * Every char which is NOT in that string is not a white char. Therefore,
1656 * all of the array "_wht" is set to FALSE, and then the elements
1657 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1658 * of a char is TRUE if it is the string "white", else FALSE.
1659 */
1660 static void
1661 init (void)
1662 {
1663 register const char *sp;
1664 register int i;
1665
1666 for (i = 0; i < CHARS; i++)
1667 iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i) = FALSE;
1668 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1669 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1670 notinname ('\0') = notinname ('\n');
1671 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1672 begtoken ('\0') = begtoken ('\n');
1673 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1674 intoken ('\0') = intoken ('\n');
1675 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1676 endtoken ('\0') = endtoken ('\n');
1677 }
1678
1679 /*
1680 * This routine opens the specified file and calls the function
1681 * which finds the function and type definitions.
1682 */
1683 static void
1684 find_entries (FILE *inf)
1685 {
1686 char *cp;
1687 language *lang = curfdp->lang;
1688 Lang_function *parser = NULL;
1689
1690 /* If user specified a language, use it. */
1691 if (lang != NULL && lang->function != NULL)
1692 {
1693 parser = lang->function;
1694 }
1695
1696 /* Else try to guess the language given the file name. */
1697 if (parser == NULL)
1698 {
1699 lang = get_language_from_filename (curfdp->infname, TRUE);
1700 if (lang != NULL && lang->function != NULL)
1701 {
1702 curfdp->lang = lang;
1703 parser = lang->function;
1704 }
1705 }
1706
1707 /* Else look for sharp-bang as the first two characters. */
1708 if (parser == NULL
1709 && readline_internal (&lb, inf) > 0
1710 && lb.len >= 2
1711 && lb.buffer[0] == '#'
1712 && lb.buffer[1] == '!')
1713 {
1714 char *lp;
1715
1716 /* Set lp to point at the first char after the last slash in the
1717 line or, if no slashes, at the first nonblank. Then set cp to
1718 the first successive blank and terminate the string. */
1719 lp = etags_strrchr (lb.buffer+2, '/');
1720 if (lp != NULL)
1721 lp += 1;
1722 else
1723 lp = skip_spaces (lb.buffer + 2);
1724 cp = skip_non_spaces (lp);
1725 *cp = '\0';
1726
1727 if (strlen (lp) > 0)
1728 {
1729 lang = get_language_from_interpreter (lp);
1730 if (lang != NULL && lang->function != NULL)
1731 {
1732 curfdp->lang = lang;
1733 parser = lang->function;
1734 }
1735 }
1736 }
1737
1738 /* We rewind here, even if inf may be a pipe. We fail if the
1739 length of the first line is longer than the pipe block size,
1740 which is unlikely. */
1741 rewind (inf);
1742
1743 /* Else try to guess the language given the case insensitive file name. */
1744 if (parser == NULL)
1745 {
1746 lang = get_language_from_filename (curfdp->infname, FALSE);
1747 if (lang != NULL && lang->function != NULL)
1748 {
1749 curfdp->lang = lang;
1750 parser = lang->function;
1751 }
1752 }
1753
1754 /* Else try Fortran or C. */
1755 if (parser == NULL)
1756 {
1757 node *old_last_node = last_node;
1758
1759 curfdp->lang = get_language_from_langname ("fortran");
1760 find_entries (inf);
1761
1762 if (old_last_node == last_node)
1763 /* No Fortran entries found. Try C. */
1764 {
1765 /* We do not tag if rewind fails.
1766 Only the file name will be recorded in the tags file. */
1767 rewind (inf);
1768 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1769 find_entries (inf);
1770 }
1771 return;
1772 }
1773
1774 if (!no_line_directive
1775 && curfdp->lang != NULL && curfdp->lang->metasource)
1776 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1777 file, or anyway we parsed a file that is automatically generated from
1778 this one. If this is the case, the bingo.c file contained #line
1779 directives that generated tags pointing to this file. Let's delete
1780 them all before parsing this file, which is the real source. */
1781 {
1782 fdesc **fdpp = &fdhead;
1783 while (*fdpp != NULL)
1784 if (*fdpp != curfdp
1785 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1786 /* We found one of those! We must delete both the file description
1787 and all tags referring to it. */
1788 {
1789 fdesc *badfdp = *fdpp;
1790
1791 /* Delete the tags referring to badfdp->taggedfname
1792 that were obtained from badfdp->infname. */
1793 invalidate_nodes (badfdp, &nodehead);
1794
1795 *fdpp = badfdp->next; /* remove the bad description from the list */
1796 free_fdesc (badfdp);
1797 }
1798 else
1799 fdpp = &(*fdpp)->next; /* advance the list pointer */
1800 }
1801
1802 assert (parser != NULL);
1803
1804 /* Generic initializations before reading from file. */
1805 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1806
1807 /* Generic initializations before parsing file with readline. */
1808 lineno = 0; /* reset global line number */
1809 charno = 0; /* reset global char number */
1810 linecharno = 0; /* reset global char number of line start */
1811
1812 parser (inf);
1813
1814 regex_tag_multiline ();
1815 }
1816
1817 \f
1818 /*
1819 * Check whether an implicitly named tag should be created,
1820 * then call `pfnote'.
1821 * NAME is a string that is internally copied by this function.
1822 *
1823 * TAGS format specification
1824 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1825 * The following is explained in some more detail in etc/ETAGS.EBNF.
1826 *
1827 * make_tag creates tags with "implicit tag names" (unnamed tags)
1828 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1829 * 1. NAME does not contain any of the characters in NONAM;
1830 * 2. LINESTART contains name as either a rightmost, or rightmost but
1831 * one character, substring;
1832 * 3. the character, if any, immediately before NAME in LINESTART must
1833 * be a character in NONAM;
1834 * 4. the character, if any, immediately after NAME in LINESTART must
1835 * also be a character in NONAM.
1836 *
1837 * The implementation uses the notinname() macro, which recognizes the
1838 * characters stored in the string `nonam'.
1839 * etags.el needs to use the same characters that are in NONAM.
1840 */
1841 static void
1842 make_tag (const char *name, /* tag name, or NULL if unnamed */
1843 int namelen, /* tag length */
1844 int is_func, /* tag is a function */
1845 char *linestart, /* start of the line where tag is */
1846 int linelen, /* length of the line where tag is */
1847 int lno, /* line number */
1848 long int cno) /* character number */
1849 {
1850 bool named = (name != NULL && namelen > 0);
1851 char *nname = NULL;
1852
1853 if (!CTAGS && named) /* maybe set named to false */
1854 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1855 such that etags.el can guess a name from it. */
1856 {
1857 int i;
1858 register const char *cp = name;
1859
1860 for (i = 0; i < namelen; i++)
1861 if (notinname (*cp++))
1862 break;
1863 if (i == namelen) /* rule #1 */
1864 {
1865 cp = linestart + linelen - namelen;
1866 if (notinname (linestart[linelen-1]))
1867 cp -= 1; /* rule #4 */
1868 if (cp >= linestart /* rule #2 */
1869 && (cp == linestart
1870 || notinname (cp[-1])) /* rule #3 */
1871 && strneq (name, cp, namelen)) /* rule #2 */
1872 named = FALSE; /* use implicit tag name */
1873 }
1874 }
1875
1876 if (named)
1877 nname = savenstr (name, namelen);
1878
1879 pfnote (nname, is_func, linestart, linelen, lno, cno);
1880 }
1881
1882 /* Record a tag. */
1883 static void
1884 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1885 /* tag name, or NULL if unnamed */
1886 /* tag is a function */
1887 /* start of the line where tag is */
1888 /* length of the line where tag is */
1889 /* line number */
1890 /* character number */
1891 {
1892 register node *np;
1893
1894 assert (name == NULL || name[0] != '\0');
1895 if (CTAGS && name == NULL)
1896 return;
1897
1898 np = xnew (1, node);
1899
1900 /* If ctags mode, change name "main" to M<thisfilename>. */
1901 if (CTAGS && !cxref_style && streq (name, "main"))
1902 {
1903 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1904 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1905 fp = etags_strrchr (np->name, '.');
1906 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1907 fp[0] = '\0';
1908 }
1909 else
1910 np->name = name;
1911 np->valid = TRUE;
1912 np->been_warned = FALSE;
1913 np->fdp = curfdp;
1914 np->is_func = is_func;
1915 np->lno = lno;
1916 if (np->fdp->usecharno)
1917 /* Our char numbers are 0-base, because of C language tradition?
1918 ctags compatibility? old versions compatibility? I don't know.
1919 Anyway, since emacs's are 1-base we expect etags.el to take care
1920 of the difference. If we wanted to have 1-based numbers, we would
1921 uncomment the +1 below. */
1922 np->cno = cno /* + 1 */ ;
1923 else
1924 np->cno = invalidcharno;
1925 np->left = np->right = NULL;
1926 if (CTAGS && !cxref_style)
1927 {
1928 if (strlen (linestart) < 50)
1929 np->regex = concat (linestart, "$", "");
1930 else
1931 np->regex = savenstr (linestart, 50);
1932 }
1933 else
1934 np->regex = savenstr (linestart, linelen);
1935
1936 add_node (np, &nodehead);
1937 }
1938
1939 /*
1940 * free_tree ()
1941 * recurse on left children, iterate on right children.
1942 */
1943 static void
1944 free_tree (register node *np)
1945 {
1946 while (np)
1947 {
1948 register node *node_right = np->right;
1949 free_tree (np->left);
1950 free (np->name);
1951 free (np->regex);
1952 free (np);
1953 np = node_right;
1954 }
1955 }
1956
1957 /*
1958 * free_fdesc ()
1959 * delete a file description
1960 */
1961 static void
1962 free_fdesc (register fdesc *fdp)
1963 {
1964 free (fdp->infname);
1965 free (fdp->infabsname);
1966 free (fdp->infabsdir);
1967 free (fdp->taggedfname);
1968 free (fdp->prop);
1969 free (fdp);
1970 }
1971
1972 /*
1973 * add_node ()
1974 * Adds a node to the tree of nodes. In etags mode, sort by file
1975 * name. In ctags mode, sort by tag name. Make no attempt at
1976 * balancing.
1977 *
1978 * add_node is the only function allowed to add nodes, so it can
1979 * maintain state.
1980 */
1981 static void
1982 add_node (node *np, node **cur_node_p)
1983 {
1984 register int dif;
1985 register node *cur_node = *cur_node_p;
1986
1987 if (cur_node == NULL)
1988 {
1989 *cur_node_p = np;
1990 last_node = np;
1991 return;
1992 }
1993
1994 if (!CTAGS)
1995 /* Etags Mode */
1996 {
1997 /* For each file name, tags are in a linked sublist on the right
1998 pointer. The first tags of different files are a linked list
1999 on the left pointer. last_node points to the end of the last
2000 used sublist. */
2001 if (last_node != NULL && last_node->fdp == np->fdp)
2002 {
2003 /* Let's use the same sublist as the last added node. */
2004 assert (last_node->right == NULL);
2005 last_node->right = np;
2006 last_node = np;
2007 }
2008 else if (cur_node->fdp == np->fdp)
2009 {
2010 /* Scanning the list we found the head of a sublist which is
2011 good for us. Let's scan this sublist. */
2012 add_node (np, &cur_node->right);
2013 }
2014 else
2015 /* The head of this sublist is not good for us. Let's try the
2016 next one. */
2017 add_node (np, &cur_node->left);
2018 } /* if ETAGS mode */
2019
2020 else
2021 {
2022 /* Ctags Mode */
2023 dif = strcmp (np->name, cur_node->name);
2024
2025 /*
2026 * If this tag name matches an existing one, then
2027 * do not add the node, but maybe print a warning.
2028 */
2029 if (no_duplicates && !dif)
2030 {
2031 if (np->fdp == cur_node->fdp)
2032 {
2033 if (!no_warnings)
2034 {
2035 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2036 np->fdp->infname, lineno, np->name);
2037 fprintf (stderr, "Second entry ignored\n");
2038 }
2039 }
2040 else if (!cur_node->been_warned && !no_warnings)
2041 {
2042 fprintf
2043 (stderr,
2044 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2045 np->fdp->infname, cur_node->fdp->infname, np->name);
2046 cur_node->been_warned = TRUE;
2047 }
2048 return;
2049 }
2050
2051 /* Actually add the node */
2052 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2053 } /* if CTAGS mode */
2054 }
2055
2056 /*
2057 * invalidate_nodes ()
2058 * Scan the node tree and invalidate all nodes pointing to the
2059 * given file description (CTAGS case) or free them (ETAGS case).
2060 */
2061 static void
2062 invalidate_nodes (fdesc *badfdp, node **npp)
2063 {
2064 node *np = *npp;
2065
2066 if (np == NULL)
2067 return;
2068
2069 if (CTAGS)
2070 {
2071 if (np->left != NULL)
2072 invalidate_nodes (badfdp, &np->left);
2073 if (np->fdp == badfdp)
2074 np->valid = FALSE;
2075 if (np->right != NULL)
2076 invalidate_nodes (badfdp, &np->right);
2077 }
2078 else
2079 {
2080 assert (np->fdp != NULL);
2081 if (np->fdp == badfdp)
2082 {
2083 *npp = np->left; /* detach the sublist from the list */
2084 np->left = NULL; /* isolate it */
2085 free_tree (np); /* free it */
2086 invalidate_nodes (badfdp, npp);
2087 }
2088 else
2089 invalidate_nodes (badfdp, &np->left);
2090 }
2091 }
2092
2093 \f
2094 static int total_size_of_entries (node *);
2095 static int number_len (long) ATTRIBUTE_CONST;
2096
2097 /* Length of a non-negative number's decimal representation. */
2098 static int
2099 number_len (long int num)
2100 {
2101 int len = 1;
2102 while ((num /= 10) > 0)
2103 len += 1;
2104 return len;
2105 }
2106
2107 /*
2108 * Return total number of characters that put_entries will output for
2109 * the nodes in the linked list at the right of the specified node.
2110 * This count is irrelevant with etags.el since emacs 19.34 at least,
2111 * but is still supplied for backward compatibility.
2112 */
2113 static int
2114 total_size_of_entries (register node *np)
2115 {
2116 register int total = 0;
2117
2118 for (; np != NULL; np = np->right)
2119 if (np->valid)
2120 {
2121 total += strlen (np->regex) + 1; /* pat\177 */
2122 if (np->name != NULL)
2123 total += strlen (np->name) + 1; /* name\001 */
2124 total += number_len ((long) np->lno) + 1; /* lno, */
2125 if (np->cno != invalidcharno) /* cno */
2126 total += number_len (np->cno);
2127 total += 1; /* newline */
2128 }
2129
2130 return total;
2131 }
2132
2133 static void
2134 put_entries (register node *np)
2135 {
2136 register char *sp;
2137 static fdesc *fdp = NULL;
2138
2139 if (np == NULL)
2140 return;
2141
2142 /* Output subentries that precede this one */
2143 if (CTAGS)
2144 put_entries (np->left);
2145
2146 /* Output this entry */
2147 if (np->valid)
2148 {
2149 if (!CTAGS)
2150 {
2151 /* Etags mode */
2152 if (fdp != np->fdp)
2153 {
2154 fdp = np->fdp;
2155 fprintf (tagf, "\f\n%s,%d\n",
2156 fdp->taggedfname, total_size_of_entries (np));
2157 fdp->written = TRUE;
2158 }
2159 fputs (np->regex, tagf);
2160 fputc ('\177', tagf);
2161 if (np->name != NULL)
2162 {
2163 fputs (np->name, tagf);
2164 fputc ('\001', tagf);
2165 }
2166 fprintf (tagf, "%d,", np->lno);
2167 if (np->cno != invalidcharno)
2168 fprintf (tagf, "%ld", np->cno);
2169 fputs ("\n", tagf);
2170 }
2171 else
2172 {
2173 /* Ctags mode */
2174 if (np->name == NULL)
2175 error ("internal error: NULL name in ctags mode.");
2176
2177 if (cxref_style)
2178 {
2179 if (vgrind_style)
2180 fprintf (stdout, "%s %s %d\n",
2181 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2182 else
2183 fprintf (stdout, "%-16s %3d %-16s %s\n",
2184 np->name, np->lno, np->fdp->taggedfname, np->regex);
2185 }
2186 else
2187 {
2188 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2189
2190 if (np->is_func)
2191 { /* function or #define macro with args */
2192 putc (searchar, tagf);
2193 putc ('^', tagf);
2194
2195 for (sp = np->regex; *sp; sp++)
2196 {
2197 if (*sp == '\\' || *sp == searchar)
2198 putc ('\\', tagf);
2199 putc (*sp, tagf);
2200 }
2201 putc (searchar, tagf);
2202 }
2203 else
2204 { /* anything else; text pattern inadequate */
2205 fprintf (tagf, "%d", np->lno);
2206 }
2207 putc ('\n', tagf);
2208 }
2209 }
2210 } /* if this node contains a valid tag */
2211
2212 /* Output subentries that follow this one */
2213 put_entries (np->right);
2214 if (!CTAGS)
2215 put_entries (np->left);
2216 }
2217
2218 \f
2219 /* C extensions. */
2220 #define C_EXT 0x00fff /* C extensions */
2221 #define C_PLAIN 0x00000 /* C */
2222 #define C_PLPL 0x00001 /* C++ */
2223 #define C_STAR 0x00003 /* C* */
2224 #define C_JAVA 0x00005 /* JAVA */
2225 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2226 #define YACC 0x10000 /* yacc file */
2227
2228 /*
2229 * The C symbol tables.
2230 */
2231 enum sym_type
2232 {
2233 st_none,
2234 st_C_objprot, st_C_objimpl, st_C_objend,
2235 st_C_gnumacro,
2236 st_C_ignore, st_C_attribute,
2237 st_C_javastruct,
2238 st_C_operator,
2239 st_C_class, st_C_template,
2240 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2241 };
2242
2243 static unsigned int hash (const char *, unsigned int);
2244 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2245 static enum sym_type C_symtype (char *, int, int);
2246
2247 /* Feed stuff between (but not including) %[ and %] lines to:
2248 gperf -m 5
2249 %[
2250 %compare-strncmp
2251 %enum
2252 %struct-type
2253 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2254 %%
2255 if, 0, st_C_ignore
2256 for, 0, st_C_ignore
2257 while, 0, st_C_ignore
2258 switch, 0, st_C_ignore
2259 return, 0, st_C_ignore
2260 __attribute__, 0, st_C_attribute
2261 GTY, 0, st_C_attribute
2262 @interface, 0, st_C_objprot
2263 @protocol, 0, st_C_objprot
2264 @implementation,0, st_C_objimpl
2265 @end, 0, st_C_objend
2266 import, (C_JAVA & ~C_PLPL), st_C_ignore
2267 package, (C_JAVA & ~C_PLPL), st_C_ignore
2268 friend, C_PLPL, st_C_ignore
2269 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2270 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2271 interface, (C_JAVA & ~C_PLPL), st_C_struct
2272 class, 0, st_C_class
2273 namespace, C_PLPL, st_C_struct
2274 domain, C_STAR, st_C_struct
2275 union, 0, st_C_struct
2276 struct, 0, st_C_struct
2277 extern, 0, st_C_extern
2278 enum, 0, st_C_enum
2279 typedef, 0, st_C_typedef
2280 define, 0, st_C_define
2281 undef, 0, st_C_define
2282 operator, C_PLPL, st_C_operator
2283 template, 0, st_C_template
2284 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2285 DEFUN, 0, st_C_gnumacro
2286 SYSCALL, 0, st_C_gnumacro
2287 ENTRY, 0, st_C_gnumacro
2288 PSEUDO, 0, st_C_gnumacro
2289 # These are defined inside C functions, so currently they are not met.
2290 # EXFUN used in glibc, DEFVAR_* in emacs.
2291 #EXFUN, 0, st_C_gnumacro
2292 #DEFVAR_, 0, st_C_gnumacro
2293 %]
2294 and replace lines between %< and %> with its output, then:
2295 - remove the #if characterset check
2296 - make in_word_set static and not inline. */
2297 /*%<*/
2298 /* C code produced by gperf version 3.0.1 */
2299 /* Command-line: gperf -m 5 */
2300 /* Computed positions: -k'2-3' */
2301
2302 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2303 /* maximum key range = 33, duplicates = 0 */
2304
2305 static inline unsigned int
2306 hash (register const char *str, register unsigned int len)
2307 {
2308 static unsigned char asso_values[] =
2309 {
2310 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2311 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2312 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2313 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2314 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2315 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2316 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2317 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2318 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2319 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2320 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2321 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2322 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2323 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2324 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2325 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2326 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2327 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2328 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2329 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2330 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2331 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2332 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2333 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2334 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2335 35, 35, 35, 35, 35, 35
2336 };
2337 register int hval = len;
2338
2339 switch (hval)
2340 {
2341 default:
2342 hval += asso_values[(unsigned char)str[2]];
2343 /*FALLTHROUGH*/
2344 case 2:
2345 hval += asso_values[(unsigned char)str[1]];
2346 break;
2347 }
2348 return hval;
2349 }
2350
2351 static struct C_stab_entry *
2352 in_word_set (register const char *str, register unsigned int len)
2353 {
2354 enum
2355 {
2356 TOTAL_KEYWORDS = 33,
2357 MIN_WORD_LENGTH = 2,
2358 MAX_WORD_LENGTH = 15,
2359 MIN_HASH_VALUE = 2,
2360 MAX_HASH_VALUE = 34
2361 };
2362
2363 static struct C_stab_entry wordlist[] =
2364 {
2365 {""}, {""},
2366 {"if", 0, st_C_ignore},
2367 {"GTY", 0, st_C_attribute},
2368 {"@end", 0, st_C_objend},
2369 {"union", 0, st_C_struct},
2370 {"define", 0, st_C_define},
2371 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2372 {"template", 0, st_C_template},
2373 {"operator", C_PLPL, st_C_operator},
2374 {"@interface", 0, st_C_objprot},
2375 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2376 {"friend", C_PLPL, st_C_ignore},
2377 {"typedef", 0, st_C_typedef},
2378 {"return", 0, st_C_ignore},
2379 {"@implementation",0, st_C_objimpl},
2380 {"@protocol", 0, st_C_objprot},
2381 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2382 {"extern", 0, st_C_extern},
2383 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2384 {"struct", 0, st_C_struct},
2385 {"domain", C_STAR, st_C_struct},
2386 {"switch", 0, st_C_ignore},
2387 {"enum", 0, st_C_enum},
2388 {"for", 0, st_C_ignore},
2389 {"namespace", C_PLPL, st_C_struct},
2390 {"class", 0, st_C_class},
2391 {"while", 0, st_C_ignore},
2392 {"undef", 0, st_C_define},
2393 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2394 {"__attribute__", 0, st_C_attribute},
2395 {"SYSCALL", 0, st_C_gnumacro},
2396 {"ENTRY", 0, st_C_gnumacro},
2397 {"PSEUDO", 0, st_C_gnumacro},
2398 {"DEFUN", 0, st_C_gnumacro}
2399 };
2400
2401 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2402 {
2403 register int key = hash (str, len);
2404
2405 if (key <= MAX_HASH_VALUE && key >= 0)
2406 {
2407 register const char *s = wordlist[key].name;
2408
2409 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2410 return &wordlist[key];
2411 }
2412 }
2413 return 0;
2414 }
2415 /*%>*/
2416
2417 static enum sym_type
2418 C_symtype (char *str, int len, int c_ext)
2419 {
2420 register struct C_stab_entry *se = in_word_set (str, len);
2421
2422 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2423 return st_none;
2424 return se->type;
2425 }
2426
2427 \f
2428 /*
2429 * Ignoring __attribute__ ((list))
2430 */
2431 static bool inattribute; /* looking at an __attribute__ construct */
2432
2433 /*
2434 * C functions and variables are recognized using a simple
2435 * finite automaton. fvdef is its state variable.
2436 */
2437 static enum
2438 {
2439 fvnone, /* nothing seen */
2440 fdefunkey, /* Emacs DEFUN keyword seen */
2441 fdefunname, /* Emacs DEFUN name seen */
2442 foperator, /* func: operator keyword seen (cplpl) */
2443 fvnameseen, /* function or variable name seen */
2444 fstartlist, /* func: just after open parenthesis */
2445 finlist, /* func: in parameter list */
2446 flistseen, /* func: after parameter list */
2447 fignore, /* func: before open brace */
2448 vignore /* var-like: ignore until ';' */
2449 } fvdef;
2450
2451 static bool fvextern; /* func or var: extern keyword seen; */
2452
2453 /*
2454 * typedefs are recognized using a simple finite automaton.
2455 * typdef is its state variable.
2456 */
2457 static enum
2458 {
2459 tnone, /* nothing seen */
2460 tkeyseen, /* typedef keyword seen */
2461 ttypeseen, /* defined type seen */
2462 tinbody, /* inside typedef body */
2463 tend, /* just before typedef tag */
2464 tignore /* junk after typedef tag */
2465 } typdef;
2466
2467 /*
2468 * struct-like structures (enum, struct and union) are recognized
2469 * using another simple finite automaton. `structdef' is its state
2470 * variable.
2471 */
2472 static enum
2473 {
2474 snone, /* nothing seen yet,
2475 or in struct body if bracelev > 0 */
2476 skeyseen, /* struct-like keyword seen */
2477 stagseen, /* struct-like tag seen */
2478 scolonseen /* colon seen after struct-like tag */
2479 } structdef;
2480
2481 /*
2482 * When objdef is different from onone, objtag is the name of the class.
2483 */
2484 static const char *objtag = "<uninited>";
2485
2486 /*
2487 * Yet another little state machine to deal with preprocessor lines.
2488 */
2489 static enum
2490 {
2491 dnone, /* nothing seen */
2492 dsharpseen, /* '#' seen as first char on line */
2493 ddefineseen, /* '#' and 'define' seen */
2494 dignorerest /* ignore rest of line */
2495 } definedef;
2496
2497 /*
2498 * State machine for Objective C protocols and implementations.
2499 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2500 */
2501 static enum
2502 {
2503 onone, /* nothing seen */
2504 oprotocol, /* @interface or @protocol seen */
2505 oimplementation, /* @implementations seen */
2506 otagseen, /* class name seen */
2507 oparenseen, /* parenthesis before category seen */
2508 ocatseen, /* category name seen */
2509 oinbody, /* in @implementation body */
2510 omethodsign, /* in @implementation body, after +/- */
2511 omethodtag, /* after method name */
2512 omethodcolon, /* after method colon */
2513 omethodparm, /* after method parameter */
2514 oignore /* wait for @end */
2515 } objdef;
2516
2517
2518 /*
2519 * Use this structure to keep info about the token read, and how it
2520 * should be tagged. Used by the make_C_tag function to build a tag.
2521 */
2522 static struct tok
2523 {
2524 char *line; /* string containing the token */
2525 int offset; /* where the token starts in LINE */
2526 int length; /* token length */
2527 /*
2528 The previous members can be used to pass strings around for generic
2529 purposes. The following ones specifically refer to creating tags. In this
2530 case the token contained here is the pattern that will be used to create a
2531 tag.
2532 */
2533 bool valid; /* do not create a tag; the token should be
2534 invalidated whenever a state machine is
2535 reset prematurely */
2536 bool named; /* create a named tag */
2537 int lineno; /* source line number of tag */
2538 long linepos; /* source char number of tag */
2539 } token; /* latest token read */
2540
2541 /*
2542 * Variables and functions for dealing with nested structures.
2543 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2544 */
2545 static void pushclass_above (int, char *, int);
2546 static void popclass_above (int);
2547 static void write_classname (linebuffer *, const char *qualifier);
2548
2549 static struct {
2550 char **cname; /* nested class names */
2551 int *bracelev; /* nested class brace level */
2552 int nl; /* class nesting level (elements used) */
2553 int size; /* length of the array */
2554 } cstack; /* stack for nested declaration tags */
2555 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2556 #define nestlev (cstack.nl)
2557 /* After struct keyword or in struct body, not inside a nested function. */
2558 #define instruct (structdef == snone && nestlev > 0 \
2559 && bracelev == cstack.bracelev[nestlev-1] + 1)
2560
2561 static void
2562 pushclass_above (int bracelev, char *str, int len)
2563 {
2564 int nl;
2565
2566 popclass_above (bracelev);
2567 nl = cstack.nl;
2568 if (nl >= cstack.size)
2569 {
2570 int size = cstack.size *= 2;
2571 xrnew (cstack.cname, size, char *);
2572 xrnew (cstack.bracelev, size, int);
2573 }
2574 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2575 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2576 cstack.bracelev[nl] = bracelev;
2577 cstack.nl = nl + 1;
2578 }
2579
2580 static void
2581 popclass_above (int bracelev)
2582 {
2583 int nl;
2584
2585 for (nl = cstack.nl - 1;
2586 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2587 nl--)
2588 {
2589 free (cstack.cname[nl]);
2590 cstack.nl = nl;
2591 }
2592 }
2593
2594 static void
2595 write_classname (linebuffer *cn, const char *qualifier)
2596 {
2597 int i, len;
2598 int qlen = strlen (qualifier);
2599
2600 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2601 {
2602 len = 0;
2603 cn->len = 0;
2604 cn->buffer[0] = '\0';
2605 }
2606 else
2607 {
2608 len = strlen (cstack.cname[0]);
2609 linebuffer_setlen (cn, len);
2610 strcpy (cn->buffer, cstack.cname[0]);
2611 }
2612 for (i = 1; i < cstack.nl; i++)
2613 {
2614 char *s = cstack.cname[i];
2615 if (s == NULL)
2616 continue;
2617 linebuffer_setlen (cn, len + qlen + strlen (s));
2618 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2619 }
2620 }
2621
2622 \f
2623 static bool consider_token (char *, int, int, int *, int, int, bool *);
2624 static void make_C_tag (bool);
2625
2626 /*
2627 * consider_token ()
2628 * checks to see if the current token is at the start of a
2629 * function or variable, or corresponds to a typedef, or
2630 * is a struct/union/enum tag, or #define, or an enum constant.
2631 *
2632 * *IS_FUNC gets TRUE if the token is a function or #define macro
2633 * with args. C_EXTP points to which language we are looking at.
2634 *
2635 * Globals
2636 * fvdef IN OUT
2637 * structdef IN OUT
2638 * definedef IN OUT
2639 * typdef IN OUT
2640 * objdef IN OUT
2641 */
2642
2643 static bool
2644 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2645 /* IN: token pointer */
2646 /* IN: token length */
2647 /* IN: first char after the token */
2648 /* IN, OUT: C extensions mask */
2649 /* IN: brace level */
2650 /* IN: parenthesis level */
2651 /* OUT: function or variable found */
2652 {
2653 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2654 structtype is the type of the preceding struct-like keyword, and
2655 structbracelev is the brace level where it has been seen. */
2656 static enum sym_type structtype;
2657 static int structbracelev;
2658 static enum sym_type toktype;
2659
2660
2661 toktype = C_symtype (str, len, *c_extp);
2662
2663 /*
2664 * Skip __attribute__
2665 */
2666 if (toktype == st_C_attribute)
2667 {
2668 inattribute = TRUE;
2669 return FALSE;
2670 }
2671
2672 /*
2673 * Advance the definedef state machine.
2674 */
2675 switch (definedef)
2676 {
2677 case dnone:
2678 /* We're not on a preprocessor line. */
2679 if (toktype == st_C_gnumacro)
2680 {
2681 fvdef = fdefunkey;
2682 return FALSE;
2683 }
2684 break;
2685 case dsharpseen:
2686 if (toktype == st_C_define)
2687 {
2688 definedef = ddefineseen;
2689 }
2690 else
2691 {
2692 definedef = dignorerest;
2693 }
2694 return FALSE;
2695 case ddefineseen:
2696 /*
2697 * Make a tag for any macro, unless it is a constant
2698 * and constantypedefs is FALSE.
2699 */
2700 definedef = dignorerest;
2701 *is_func_or_var = (c == '(');
2702 if (!*is_func_or_var && !constantypedefs)
2703 return FALSE;
2704 else
2705 return TRUE;
2706 case dignorerest:
2707 return FALSE;
2708 default:
2709 error ("internal error: definedef value.");
2710 }
2711
2712 /*
2713 * Now typedefs
2714 */
2715 switch (typdef)
2716 {
2717 case tnone:
2718 if (toktype == st_C_typedef)
2719 {
2720 if (typedefs)
2721 typdef = tkeyseen;
2722 fvextern = FALSE;
2723 fvdef = fvnone;
2724 return FALSE;
2725 }
2726 break;
2727 case tkeyseen:
2728 switch (toktype)
2729 {
2730 case st_none:
2731 case st_C_class:
2732 case st_C_struct:
2733 case st_C_enum:
2734 typdef = ttypeseen;
2735 }
2736 break;
2737 case ttypeseen:
2738 if (structdef == snone && fvdef == fvnone)
2739 {
2740 fvdef = fvnameseen;
2741 return TRUE;
2742 }
2743 break;
2744 case tend:
2745 switch (toktype)
2746 {
2747 case st_C_class:
2748 case st_C_struct:
2749 case st_C_enum:
2750 return FALSE;
2751 }
2752 return TRUE;
2753 }
2754
2755 switch (toktype)
2756 {
2757 case st_C_javastruct:
2758 if (structdef == stagseen)
2759 structdef = scolonseen;
2760 return FALSE;
2761 case st_C_template:
2762 case st_C_class:
2763 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2764 && bracelev == 0
2765 && definedef == dnone && structdef == snone
2766 && typdef == tnone && fvdef == fvnone)
2767 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2768 if (toktype == st_C_template)
2769 break;
2770 /* FALLTHRU */
2771 case st_C_struct:
2772 case st_C_enum:
2773 if (parlev == 0
2774 && fvdef != vignore
2775 && (typdef == tkeyseen
2776 || (typedefs_or_cplusplus && structdef == snone)))
2777 {
2778 structdef = skeyseen;
2779 structtype = toktype;
2780 structbracelev = bracelev;
2781 if (fvdef == fvnameseen)
2782 fvdef = fvnone;
2783 }
2784 return FALSE;
2785 }
2786
2787 if (structdef == skeyseen)
2788 {
2789 structdef = stagseen;
2790 return TRUE;
2791 }
2792
2793 if (typdef != tnone)
2794 definedef = dnone;
2795
2796 /* Detect Objective C constructs. */
2797 switch (objdef)
2798 {
2799 case onone:
2800 switch (toktype)
2801 {
2802 case st_C_objprot:
2803 objdef = oprotocol;
2804 return FALSE;
2805 case st_C_objimpl:
2806 objdef = oimplementation;
2807 return FALSE;
2808 }
2809 break;
2810 case oimplementation:
2811 /* Save the class tag for functions or variables defined inside. */
2812 objtag = savenstr (str, len);
2813 objdef = oinbody;
2814 return FALSE;
2815 case oprotocol:
2816 /* Save the class tag for categories. */
2817 objtag = savenstr (str, len);
2818 objdef = otagseen;
2819 *is_func_or_var = TRUE;
2820 return TRUE;
2821 case oparenseen:
2822 objdef = ocatseen;
2823 *is_func_or_var = TRUE;
2824 return TRUE;
2825 case oinbody:
2826 break;
2827 case omethodsign:
2828 if (parlev == 0)
2829 {
2830 fvdef = fvnone;
2831 objdef = omethodtag;
2832 linebuffer_setlen (&token_name, len);
2833 memcpy (token_name.buffer, str, len);
2834 token_name.buffer[len] = '\0';
2835 return TRUE;
2836 }
2837 return FALSE;
2838 case omethodcolon:
2839 if (parlev == 0)
2840 objdef = omethodparm;
2841 return FALSE;
2842 case omethodparm:
2843 if (parlev == 0)
2844 {
2845 int oldlen = token_name.len;
2846 fvdef = fvnone;
2847 objdef = omethodtag;
2848 linebuffer_setlen (&token_name, oldlen + len);
2849 memcpy (token_name.buffer + oldlen, str, len);
2850 token_name.buffer[oldlen + len] = '\0';
2851 return TRUE;
2852 }
2853 return FALSE;
2854 case oignore:
2855 if (toktype == st_C_objend)
2856 {
2857 /* Memory leakage here: the string pointed by objtag is
2858 never released, because many tests would be needed to
2859 avoid breaking on incorrect input code. The amount of
2860 memory leaked here is the sum of the lengths of the
2861 class tags.
2862 free (objtag); */
2863 objdef = onone;
2864 }
2865 return FALSE;
2866 }
2867
2868 /* A function, variable or enum constant? */
2869 switch (toktype)
2870 {
2871 case st_C_extern:
2872 fvextern = TRUE;
2873 switch (fvdef)
2874 {
2875 case finlist:
2876 case flistseen:
2877 case fignore:
2878 case vignore:
2879 break;
2880 default:
2881 fvdef = fvnone;
2882 }
2883 return FALSE;
2884 case st_C_ignore:
2885 fvextern = FALSE;
2886 fvdef = vignore;
2887 return FALSE;
2888 case st_C_operator:
2889 fvdef = foperator;
2890 *is_func_or_var = TRUE;
2891 return TRUE;
2892 case st_none:
2893 if (constantypedefs
2894 && structdef == snone
2895 && structtype == st_C_enum && bracelev > structbracelev)
2896 return TRUE; /* enum constant */
2897 switch (fvdef)
2898 {
2899 case fdefunkey:
2900 if (bracelev > 0)
2901 break;
2902 fvdef = fdefunname; /* GNU macro */
2903 *is_func_or_var = TRUE;
2904 return TRUE;
2905 case fvnone:
2906 switch (typdef)
2907 {
2908 case ttypeseen:
2909 return FALSE;
2910 case tnone:
2911 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2912 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2913 {
2914 fvdef = vignore;
2915 return FALSE;
2916 }
2917 break;
2918 }
2919 /* FALLTHRU */
2920 case fvnameseen:
2921 if (len >= 10 && strneq (str+len-10, "::operator", 10))
2922 {
2923 if (*c_extp & C_AUTO) /* automatic detection of C++ */
2924 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2925 fvdef = foperator;
2926 *is_func_or_var = TRUE;
2927 return TRUE;
2928 }
2929 if (bracelev > 0 && !instruct)
2930 break;
2931 fvdef = fvnameseen; /* function or variable */
2932 *is_func_or_var = TRUE;
2933 return TRUE;
2934 }
2935 break;
2936 }
2937
2938 return FALSE;
2939 }
2940
2941 \f
2942 /*
2943 * C_entries often keeps pointers to tokens or lines which are older than
2944 * the line currently read. By keeping two line buffers, and switching
2945 * them at end of line, it is possible to use those pointers.
2946 */
2947 static struct
2948 {
2949 long linepos;
2950 linebuffer lb;
2951 } lbs[2];
2952
2953 #define current_lb_is_new (newndx == curndx)
2954 #define switch_line_buffers() (curndx = 1 - curndx)
2955
2956 #define curlb (lbs[curndx].lb)
2957 #define newlb (lbs[newndx].lb)
2958 #define curlinepos (lbs[curndx].linepos)
2959 #define newlinepos (lbs[newndx].linepos)
2960
2961 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2962 #define cplpl (c_ext & C_PLPL)
2963 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2964
2965 #define CNL_SAVE_DEFINEDEF() \
2966 do { \
2967 curlinepos = charno; \
2968 readline (&curlb, inf); \
2969 lp = curlb.buffer; \
2970 quotednl = FALSE; \
2971 newndx = curndx; \
2972 } while (0)
2973
2974 #define CNL() \
2975 do { \
2976 CNL_SAVE_DEFINEDEF(); \
2977 if (savetoken.valid) \
2978 { \
2979 token = savetoken; \
2980 savetoken.valid = FALSE; \
2981 } \
2982 definedef = dnone; \
2983 } while (0)
2984
2985
2986 static void
2987 make_C_tag (int isfun)
2988 {
2989 /* This function is never called when token.valid is FALSE, but
2990 we must protect against invalid input or internal errors. */
2991 if (token.valid)
2992 make_tag (token_name.buffer, token_name.len, isfun, token.line,
2993 token.offset+token.length+1, token.lineno, token.linepos);
2994 else if (DEBUG)
2995 { /* this branch is optimized away if !DEBUG */
2996 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
2997 token_name.len + 17, isfun, token.line,
2998 token.offset+token.length+1, token.lineno, token.linepos);
2999 error ("INVALID TOKEN");
3000 }
3001
3002 token.valid = FALSE;
3003 }
3004
3005
3006 /*
3007 * C_entries ()
3008 * This routine finds functions, variables, typedefs,
3009 * #define's, enum constants and struct/union/enum definitions in
3010 * C syntax and adds them to the list.
3011 */
3012 static void
3013 C_entries (int c_ext, FILE *inf)
3014 /* extension of C */
3015 /* input file */
3016 {
3017 register char c; /* latest char read; '\0' for end of line */
3018 register char *lp; /* pointer one beyond the character `c' */
3019 int curndx, newndx; /* indices for current and new lb */
3020 register int tokoff; /* offset in line of start of current token */
3021 register int toklen; /* length of current token */
3022 const char *qualifier; /* string used to qualify names */
3023 int qlen; /* length of qualifier */
3024 int bracelev; /* current brace level */
3025 int bracketlev; /* current bracket level */
3026 int parlev; /* current parenthesis level */
3027 int attrparlev; /* __attribute__ parenthesis level */
3028 int templatelev; /* current template level */
3029 int typdefbracelev; /* bracelev where a typedef struct body begun */
3030 bool incomm, inquote, inchar, quotednl, midtoken;
3031 bool yacc_rules; /* in the rules part of a yacc file */
3032 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3033
3034
3035 linebuffer_init (&lbs[0].lb);
3036 linebuffer_init (&lbs[1].lb);
3037 if (cstack.size == 0)
3038 {
3039 cstack.size = (DEBUG) ? 1 : 4;
3040 cstack.nl = 0;
3041 cstack.cname = xnew (cstack.size, char *);
3042 cstack.bracelev = xnew (cstack.size, int);
3043 }
3044
3045 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3046 curndx = newndx = 0;
3047 lp = curlb.buffer;
3048 *lp = 0;
3049
3050 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3051 structdef = snone; definedef = dnone; objdef = onone;
3052 yacc_rules = FALSE;
3053 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3054 token.valid = savetoken.valid = FALSE;
3055 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3056 if (cjava)
3057 { qualifier = "."; qlen = 1; }
3058 else
3059 { qualifier = "::"; qlen = 2; }
3060
3061
3062 while (!feof (inf))
3063 {
3064 c = *lp++;
3065 if (c == '\\')
3066 {
3067 /* If we are at the end of the line, the next character is a
3068 '\0'; do not skip it, because it is what tells us
3069 to read the next line. */
3070 if (*lp == '\0')
3071 {
3072 quotednl = TRUE;
3073 continue;
3074 }
3075 lp++;
3076 c = ' ';
3077 }
3078 else if (incomm)
3079 {
3080 switch (c)
3081 {
3082 case '*':
3083 if (*lp == '/')
3084 {
3085 c = *lp++;
3086 incomm = FALSE;
3087 }
3088 break;
3089 case '\0':
3090 /* Newlines inside comments do not end macro definitions in
3091 traditional cpp. */
3092 CNL_SAVE_DEFINEDEF ();
3093 break;
3094 }
3095 continue;
3096 }
3097 else if (inquote)
3098 {
3099 switch (c)
3100 {
3101 case '"':
3102 inquote = FALSE;
3103 break;
3104 case '\0':
3105 /* Newlines inside strings do not end macro definitions
3106 in traditional cpp, even though compilers don't
3107 usually accept them. */
3108 CNL_SAVE_DEFINEDEF ();
3109 break;
3110 }
3111 continue;
3112 }
3113 else if (inchar)
3114 {
3115 switch (c)
3116 {
3117 case '\0':
3118 /* Hmmm, something went wrong. */
3119 CNL ();
3120 /* FALLTHRU */
3121 case '\'':
3122 inchar = FALSE;
3123 break;
3124 }
3125 continue;
3126 }
3127 else switch (c)
3128 {
3129 case '"':
3130 inquote = TRUE;
3131 if (bracketlev > 0)
3132 continue;
3133 if (inattribute)
3134 break;
3135 switch (fvdef)
3136 {
3137 case fdefunkey:
3138 case fstartlist:
3139 case finlist:
3140 case fignore:
3141 case vignore:
3142 break;
3143 default:
3144 fvextern = FALSE;
3145 fvdef = fvnone;
3146 }
3147 continue;
3148 case '\'':
3149 inchar = TRUE;
3150 if (bracketlev > 0)
3151 continue;
3152 if (inattribute)
3153 break;
3154 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3155 {
3156 fvextern = FALSE;
3157 fvdef = fvnone;
3158 }
3159 continue;
3160 case '/':
3161 if (*lp == '*')
3162 {
3163 incomm = TRUE;
3164 lp++;
3165 c = ' ';
3166 if (bracketlev > 0)
3167 continue;
3168 }
3169 else if (/* cplpl && */ *lp == '/')
3170 {
3171 c = '\0';
3172 }
3173 break;
3174 case '%':
3175 if ((c_ext & YACC) && *lp == '%')
3176 {
3177 /* Entering or exiting rules section in yacc file. */
3178 lp++;
3179 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3180 typdef = tnone; structdef = snone;
3181 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3182 bracelev = 0;
3183 yacc_rules = !yacc_rules;
3184 continue;
3185 }
3186 else
3187 break;
3188 case '#':
3189 if (definedef == dnone)
3190 {
3191 char *cp;
3192 bool cpptoken = TRUE;
3193
3194 /* Look back on this line. If all blanks, or nonblanks
3195 followed by an end of comment, this is a preprocessor
3196 token. */
3197 for (cp = newlb.buffer; cp < lp-1; cp++)
3198 if (!iswhite (*cp))
3199 {
3200 if (*cp == '*' && cp[1] == '/')
3201 {
3202 cp++;
3203 cpptoken = TRUE;
3204 }
3205 else
3206 cpptoken = FALSE;
3207 }
3208 if (cpptoken)
3209 definedef = dsharpseen;
3210 } /* if (definedef == dnone) */
3211 continue;
3212 case '[':
3213 bracketlev++;
3214 continue;
3215 default:
3216 if (bracketlev > 0)
3217 {
3218 if (c == ']')
3219 --bracketlev;
3220 else if (c == '\0')
3221 CNL_SAVE_DEFINEDEF ();
3222 continue;
3223 }
3224 break;
3225 } /* switch (c) */
3226
3227
3228 /* Consider token only if some involved conditions are satisfied. */
3229 if (typdef != tignore
3230 && definedef != dignorerest
3231 && fvdef != finlist
3232 && templatelev == 0
3233 && (definedef != dnone
3234 || structdef != scolonseen)
3235 && !inattribute)
3236 {
3237 if (midtoken)
3238 {
3239 if (endtoken (c))
3240 {
3241 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3242 /* This handles :: in the middle,
3243 but not at the beginning of an identifier.
3244 Also, space-separated :: is not recognized. */
3245 {
3246 if (c_ext & C_AUTO) /* automatic detection of C++ */
3247 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3248 lp += 2;
3249 toklen += 2;
3250 c = lp[-1];
3251 goto still_in_token;
3252 }
3253 else
3254 {
3255 bool funorvar = FALSE;
3256
3257 if (yacc_rules
3258 || consider_token (newlb.buffer + tokoff, toklen, c,
3259 &c_ext, bracelev, parlev,
3260 &funorvar))
3261 {
3262 if (fvdef == foperator)
3263 {
3264 char *oldlp = lp;
3265 lp = skip_spaces (lp-1);
3266 if (*lp != '\0')
3267 lp += 1;
3268 while (*lp != '\0'
3269 && !iswhite (*lp) && *lp != '(')
3270 lp += 1;
3271 c = *lp++;
3272 toklen += lp - oldlp;
3273 }
3274 token.named = FALSE;
3275 if (!plainc
3276 && nestlev > 0 && definedef == dnone)
3277 /* in struct body */
3278 {
3279 int len;
3280 write_classname (&token_name, qualifier);
3281 len = token_name.len;
3282 linebuffer_setlen (&token_name, len+qlen+toklen);
3283 sprintf (token_name.buffer + len, "%s%.*s",
3284 qualifier, toklen, newlb.buffer + tokoff);
3285 token.named = TRUE;
3286 }
3287 else if (objdef == ocatseen)
3288 /* Objective C category */
3289 {
3290 int len = strlen (objtag) + 2 + toklen;
3291 linebuffer_setlen (&token_name, len);
3292 sprintf (token_name.buffer, "%s(%.*s)",
3293 objtag, toklen, newlb.buffer + tokoff);
3294 token.named = TRUE;
3295 }
3296 else if (objdef == omethodtag
3297 || objdef == omethodparm)
3298 /* Objective C method */
3299 {
3300 token.named = TRUE;
3301 }
3302 else if (fvdef == fdefunname)
3303 /* GNU DEFUN and similar macros */
3304 {
3305 bool defun = (newlb.buffer[tokoff] == 'F');
3306 int off = tokoff;
3307 int len = toklen;
3308
3309 /* Rewrite the tag so that emacs lisp DEFUNs
3310 can be found by their elisp name */
3311 if (defun)
3312 {
3313 off += 1;
3314 len -= 1;
3315 }
3316 linebuffer_setlen (&token_name, len);
3317 memcpy (token_name.buffer,
3318 newlb.buffer + off, len);
3319 token_name.buffer[len] = '\0';
3320 if (defun)
3321 while (--len >= 0)
3322 if (token_name.buffer[len] == '_')
3323 token_name.buffer[len] = '-';
3324 token.named = defun;
3325 }
3326 else
3327 {
3328 linebuffer_setlen (&token_name, toklen);
3329 memcpy (token_name.buffer,
3330 newlb.buffer + tokoff, toklen);
3331 token_name.buffer[toklen] = '\0';
3332 /* Name macros and members. */
3333 token.named = (structdef == stagseen
3334 || typdef == ttypeseen
3335 || typdef == tend
3336 || (funorvar
3337 && definedef == dignorerest)
3338 || (funorvar
3339 && definedef == dnone
3340 && structdef == snone
3341 && bracelev > 0));
3342 }
3343 token.lineno = lineno;
3344 token.offset = tokoff;
3345 token.length = toklen;
3346 token.line = newlb.buffer;
3347 token.linepos = newlinepos;
3348 token.valid = TRUE;
3349
3350 if (definedef == dnone
3351 && (fvdef == fvnameseen
3352 || fvdef == foperator
3353 || structdef == stagseen
3354 || typdef == tend
3355 || typdef == ttypeseen
3356 || objdef != onone))
3357 {
3358 if (current_lb_is_new)
3359 switch_line_buffers ();
3360 }
3361 else if (definedef != dnone
3362 || fvdef == fdefunname
3363 || instruct)
3364 make_C_tag (funorvar);
3365 }
3366 else /* not yacc and consider_token failed */
3367 {
3368 if (inattribute && fvdef == fignore)
3369 {
3370 /* We have just met __attribute__ after a
3371 function parameter list: do not tag the
3372 function again. */
3373 fvdef = fvnone;
3374 }
3375 }
3376 midtoken = FALSE;
3377 }
3378 } /* if (endtoken (c)) */
3379 else if (intoken (c))
3380 still_in_token:
3381 {
3382 toklen++;
3383 continue;
3384 }
3385 } /* if (midtoken) */
3386 else if (begtoken (c))
3387 {
3388 switch (definedef)
3389 {
3390 case dnone:
3391 switch (fvdef)
3392 {
3393 case fstartlist:
3394 /* This prevents tagging fb in
3395 void (__attribute__((noreturn)) *fb) (void);
3396 Fixing this is not easy and not very important. */
3397 fvdef = finlist;
3398 continue;
3399 case flistseen:
3400 if (plainc || declarations)
3401 {
3402 make_C_tag (TRUE); /* a function */
3403 fvdef = fignore;
3404 }
3405 break;
3406 }
3407 if (structdef == stagseen && !cjava)
3408 {
3409 popclass_above (bracelev);
3410 structdef = snone;
3411 }
3412 break;
3413 case dsharpseen:
3414 savetoken = token;
3415 break;
3416 }
3417 if (!yacc_rules || lp == newlb.buffer + 1)
3418 {
3419 tokoff = lp - 1 - newlb.buffer;
3420 toklen = 1;
3421 midtoken = TRUE;
3422 }
3423 continue;
3424 } /* if (begtoken) */
3425 } /* if must look at token */
3426
3427
3428 /* Detect end of line, colon, comma, semicolon and various braces
3429 after having handled a token.*/
3430 switch (c)
3431 {
3432 case ':':
3433 if (inattribute)
3434 break;
3435 if (yacc_rules && token.offset == 0 && token.valid)
3436 {
3437 make_C_tag (FALSE); /* a yacc function */
3438 break;
3439 }
3440 if (definedef != dnone)
3441 break;
3442 switch (objdef)
3443 {
3444 case otagseen:
3445 objdef = oignore;
3446 make_C_tag (TRUE); /* an Objective C class */
3447 break;
3448 case omethodtag:
3449 case omethodparm:
3450 objdef = omethodcolon;
3451 linebuffer_setlen (&token_name, token_name.len + 1);
3452 strcat (token_name.buffer, ":");
3453 break;
3454 }
3455 if (structdef == stagseen)
3456 {
3457 structdef = scolonseen;
3458 break;
3459 }
3460 /* Should be useless, but may be work as a safety net. */
3461 if (cplpl && fvdef == flistseen)
3462 {
3463 make_C_tag (TRUE); /* a function */
3464 fvdef = fignore;
3465 break;
3466 }
3467 break;
3468 case ';':
3469 if (definedef != dnone || inattribute)
3470 break;
3471 switch (typdef)
3472 {
3473 case tend:
3474 case ttypeseen:
3475 make_C_tag (FALSE); /* a typedef */
3476 typdef = tnone;
3477 fvdef = fvnone;
3478 break;
3479 case tnone:
3480 case tinbody:
3481 case tignore:
3482 switch (fvdef)
3483 {
3484 case fignore:
3485 if (typdef == tignore || cplpl)
3486 fvdef = fvnone;
3487 break;
3488 case fvnameseen:
3489 if ((globals && bracelev == 0 && (!fvextern || declarations))
3490 || (members && instruct))
3491 make_C_tag (FALSE); /* a variable */
3492 fvextern = FALSE;
3493 fvdef = fvnone;
3494 token.valid = FALSE;
3495 break;
3496 case flistseen:
3497 if ((declarations
3498 && (cplpl || !instruct)
3499 && (typdef == tnone || (typdef != tignore && instruct)))
3500 || (members
3501 && plainc && instruct))
3502 make_C_tag (TRUE); /* a function */
3503 /* FALLTHRU */
3504 default:
3505 fvextern = FALSE;
3506 fvdef = fvnone;
3507 if (declarations
3508 && cplpl && structdef == stagseen)
3509 make_C_tag (FALSE); /* forward declaration */
3510 else
3511 token.valid = FALSE;
3512 } /* switch (fvdef) */
3513 /* FALLTHRU */
3514 default:
3515 if (!instruct)
3516 typdef = tnone;
3517 }
3518 if (structdef == stagseen)
3519 structdef = snone;
3520 break;
3521 case ',':
3522 if (definedef != dnone || inattribute)
3523 break;
3524 switch (objdef)
3525 {
3526 case omethodtag:
3527 case omethodparm:
3528 make_C_tag (TRUE); /* an Objective C method */
3529 objdef = oinbody;
3530 break;
3531 }
3532 switch (fvdef)
3533 {
3534 case fdefunkey:
3535 case foperator:
3536 case fstartlist:
3537 case finlist:
3538 case fignore:
3539 case vignore:
3540 break;
3541 case fdefunname:
3542 fvdef = fignore;
3543 break;
3544 case fvnameseen:
3545 if (parlev == 0
3546 && ((globals
3547 && bracelev == 0
3548 && templatelev == 0
3549 && (!fvextern || declarations))
3550 || (members && instruct)))
3551 make_C_tag (FALSE); /* a variable */
3552 break;
3553 case flistseen:
3554 if ((declarations && typdef == tnone && !instruct)
3555 || (members && typdef != tignore && instruct))
3556 {
3557 make_C_tag (TRUE); /* a function */
3558 fvdef = fvnameseen;
3559 }
3560 else if (!declarations)
3561 fvdef = fvnone;
3562 token.valid = FALSE;
3563 break;
3564 default:
3565 fvdef = fvnone;
3566 }
3567 if (structdef == stagseen)
3568 structdef = snone;
3569 break;
3570 case ']':
3571 if (definedef != dnone || inattribute)
3572 break;
3573 if (structdef == stagseen)
3574 structdef = snone;
3575 switch (typdef)
3576 {
3577 case ttypeseen:
3578 case tend:
3579 typdef = tignore;
3580 make_C_tag (FALSE); /* a typedef */
3581 break;
3582 case tnone:
3583 case tinbody:
3584 switch (fvdef)
3585 {
3586 case foperator:
3587 case finlist:
3588 case fignore:
3589 case vignore:
3590 break;
3591 case fvnameseen:
3592 if ((members && bracelev == 1)
3593 || (globals && bracelev == 0
3594 && (!fvextern || declarations)))
3595 make_C_tag (FALSE); /* a variable */
3596 /* FALLTHRU */
3597 default:
3598 fvdef = fvnone;
3599 }
3600 break;
3601 }
3602 break;
3603 case '(':
3604 if (inattribute)
3605 {
3606 attrparlev++;
3607 break;
3608 }
3609 if (definedef != dnone)
3610 break;
3611 if (objdef == otagseen && parlev == 0)
3612 objdef = oparenseen;
3613 switch (fvdef)
3614 {
3615 case fvnameseen:
3616 if (typdef == ttypeseen
3617 && *lp != '*'
3618 && !instruct)
3619 {
3620 /* This handles constructs like:
3621 typedef void OperatorFun (int fun); */
3622 make_C_tag (FALSE);
3623 typdef = tignore;
3624 fvdef = fignore;
3625 break;
3626 }
3627 /* FALLTHRU */
3628 case foperator:
3629 fvdef = fstartlist;
3630 break;
3631 case flistseen:
3632 fvdef = finlist;
3633 break;
3634 }
3635 parlev++;
3636 break;
3637 case ')':
3638 if (inattribute)
3639 {
3640 if (--attrparlev == 0)
3641 inattribute = FALSE;
3642 break;
3643 }
3644 if (definedef != dnone)
3645 break;
3646 if (objdef == ocatseen && parlev == 1)
3647 {
3648 make_C_tag (TRUE); /* an Objective C category */
3649 objdef = oignore;
3650 }
3651 if (--parlev == 0)
3652 {
3653 switch (fvdef)
3654 {
3655 case fstartlist:
3656 case finlist:
3657 fvdef = flistseen;
3658 break;
3659 }
3660 if (!instruct
3661 && (typdef == tend
3662 || typdef == ttypeseen))
3663 {
3664 typdef = tignore;
3665 make_C_tag (FALSE); /* a typedef */
3666 }
3667 }
3668 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3669 parlev = 0;
3670 break;
3671 case '{':
3672 if (definedef != dnone)
3673 break;
3674 if (typdef == ttypeseen)
3675 {
3676 /* Whenever typdef is set to tinbody (currently only
3677 here), typdefbracelev should be set to bracelev. */
3678 typdef = tinbody;
3679 typdefbracelev = bracelev;
3680 }
3681 switch (fvdef)
3682 {
3683 case flistseen:
3684 make_C_tag (TRUE); /* a function */
3685 /* FALLTHRU */
3686 case fignore:
3687 fvdef = fvnone;
3688 break;
3689 case fvnone:
3690 switch (objdef)
3691 {
3692 case otagseen:
3693 make_C_tag (TRUE); /* an Objective C class */
3694 objdef = oignore;
3695 break;
3696 case omethodtag:
3697 case omethodparm:
3698 make_C_tag (TRUE); /* an Objective C method */
3699 objdef = oinbody;
3700 break;
3701 default:
3702 /* Neutralize `extern "C" {' grot. */
3703 if (bracelev == 0 && structdef == snone && nestlev == 0
3704 && typdef == tnone)
3705 bracelev = -1;
3706 }
3707 break;
3708 }
3709 switch (structdef)
3710 {
3711 case skeyseen: /* unnamed struct */
3712 pushclass_above (bracelev, NULL, 0);
3713 structdef = snone;
3714 break;
3715 case stagseen: /* named struct or enum */
3716 case scolonseen: /* a class */
3717 pushclass_above (bracelev,token.line+token.offset, token.length);
3718 structdef = snone;
3719 make_C_tag (FALSE); /* a struct or enum */
3720 break;
3721 }
3722 bracelev += 1;
3723 break;
3724 case '*':
3725 if (definedef != dnone)
3726 break;
3727 if (fvdef == fstartlist)
3728 {
3729 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3730 token.valid = FALSE;
3731 }
3732 break;
3733 case '}':
3734 if (definedef != dnone)
3735 break;
3736 bracelev -= 1;
3737 if (!ignoreindent && lp == newlb.buffer + 1)
3738 {
3739 if (bracelev != 0)
3740 token.valid = FALSE; /* unexpected value, token unreliable */
3741 bracelev = 0; /* reset brace level if first column */
3742 parlev = 0; /* also reset paren level, just in case... */
3743 }
3744 else if (bracelev < 0)
3745 {
3746 token.valid = FALSE; /* something gone amiss, token unreliable */
3747 bracelev = 0;
3748 }
3749 if (bracelev == 0 && fvdef == vignore)
3750 fvdef = fvnone; /* end of function */
3751 popclass_above (bracelev);
3752 structdef = snone;
3753 /* Only if typdef == tinbody is typdefbracelev significant. */
3754 if (typdef == tinbody && bracelev <= typdefbracelev)
3755 {
3756 assert (bracelev == typdefbracelev);
3757 typdef = tend;
3758 }
3759 break;
3760 case '=':
3761 if (definedef != dnone)
3762 break;
3763 switch (fvdef)
3764 {
3765 case foperator:
3766 case finlist:
3767 case fignore:
3768 case vignore:
3769 break;
3770 case fvnameseen:
3771 if ((members && bracelev == 1)
3772 || (globals && bracelev == 0 && (!fvextern || declarations)))
3773 make_C_tag (FALSE); /* a variable */
3774 /* FALLTHRU */
3775 default:
3776 fvdef = vignore;
3777 }
3778 break;
3779 case '<':
3780 if (cplpl
3781 && (structdef == stagseen || fvdef == fvnameseen))
3782 {
3783 templatelev++;
3784 break;
3785 }
3786 goto resetfvdef;
3787 case '>':
3788 if (templatelev > 0)
3789 {
3790 templatelev--;
3791 break;
3792 }
3793 goto resetfvdef;
3794 case '+':
3795 case '-':
3796 if (objdef == oinbody && bracelev == 0)
3797 {
3798 objdef = omethodsign;
3799 break;
3800 }
3801 /* FALLTHRU */
3802 resetfvdef:
3803 case '#': case '~': case '&': case '%': case '/':
3804 case '|': case '^': case '!': case '.': case '?':
3805 if (definedef != dnone)
3806 break;
3807 /* These surely cannot follow a function tag in C. */
3808 switch (fvdef)
3809 {
3810 case foperator:
3811 case finlist:
3812 case fignore:
3813 case vignore:
3814 break;
3815 default:
3816 fvdef = fvnone;
3817 }
3818 break;
3819 case '\0':
3820 if (objdef == otagseen)
3821 {
3822 make_C_tag (TRUE); /* an Objective C class */
3823 objdef = oignore;
3824 }
3825 /* If a macro spans multiple lines don't reset its state. */
3826 if (quotednl)
3827 CNL_SAVE_DEFINEDEF ();
3828 else
3829 CNL ();
3830 break;
3831 } /* switch (c) */
3832
3833 } /* while not eof */
3834
3835 free (lbs[0].lb.buffer);
3836 free (lbs[1].lb.buffer);
3837 }
3838
3839 /*
3840 * Process either a C++ file or a C file depending on the setting
3841 * of a global flag.
3842 */
3843 static void
3844 default_C_entries (FILE *inf)
3845 {
3846 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3847 }
3848
3849 /* Always do plain C. */
3850 static void
3851 plain_C_entries (FILE *inf)
3852 {
3853 C_entries (0, inf);
3854 }
3855
3856 /* Always do C++. */
3857 static void
3858 Cplusplus_entries (FILE *inf)
3859 {
3860 C_entries (C_PLPL, inf);
3861 }
3862
3863 /* Always do Java. */
3864 static void
3865 Cjava_entries (FILE *inf)
3866 {
3867 C_entries (C_JAVA, inf);
3868 }
3869
3870 /* Always do C*. */
3871 static void
3872 Cstar_entries (FILE *inf)
3873 {
3874 C_entries (C_STAR, inf);
3875 }
3876
3877 /* Always do Yacc. */
3878 static void
3879 Yacc_entries (FILE *inf)
3880 {
3881 C_entries (YACC, inf);
3882 }
3883
3884 \f
3885 /* Useful macros. */
3886 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3887 for (; /* loop initialization */ \
3888 !feof (file_pointer) /* loop test */ \
3889 && /* instructions at start of loop */ \
3890 (readline (&line_buffer, file_pointer), \
3891 char_pointer = line_buffer.buffer, \
3892 TRUE); \
3893 )
3894
3895 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
3896 ((assert ("" kw), TRUE) /* syntax error if not a literal string */ \
3897 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3898 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
3899 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3900
3901 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3902 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3903 ((assert ("" kw), TRUE) /* syntax error if not a literal string */ \
3904 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3905 && ((cp) += sizeof (kw)-1)) /* skip spaces */
3906
3907 /*
3908 * Read a file, but do no processing. This is used to do regexp
3909 * matching on files that have no language defined.
3910 */
3911 static void
3912 just_read_file (FILE *inf)
3913 {
3914 while (!feof (inf))
3915 readline (&lb, inf);
3916 }
3917
3918 \f
3919 /* Fortran parsing */
3920
3921 static void F_takeprec (void);
3922 static void F_getit (FILE *);
3923
3924 static void
3925 F_takeprec (void)
3926 {
3927 dbp = skip_spaces (dbp);
3928 if (*dbp != '*')
3929 return;
3930 dbp++;
3931 dbp = skip_spaces (dbp);
3932 if (strneq (dbp, "(*)", 3))
3933 {
3934 dbp += 3;
3935 return;
3936 }
3937 if (!ISDIGIT (*dbp))
3938 {
3939 --dbp; /* force failure */
3940 return;
3941 }
3942 do
3943 dbp++;
3944 while (ISDIGIT (*dbp));
3945 }
3946
3947 static void
3948 F_getit (FILE *inf)
3949 {
3950 register char *cp;
3951
3952 dbp = skip_spaces (dbp);
3953 if (*dbp == '\0')
3954 {
3955 readline (&lb, inf);
3956 dbp = lb.buffer;
3957 if (dbp[5] != '&')
3958 return;
3959 dbp += 6;
3960 dbp = skip_spaces (dbp);
3961 }
3962 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3963 return;
3964 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3965 continue;
3966 make_tag (dbp, cp-dbp, TRUE,
3967 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3968 }
3969
3970
3971 static void
3972 Fortran_functions (FILE *inf)
3973 {
3974 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3975 {
3976 if (*dbp == '%')
3977 dbp++; /* Ratfor escape to fortran */
3978 dbp = skip_spaces (dbp);
3979 if (*dbp == '\0')
3980 continue;
3981
3982 if (LOOKING_AT_NOCASE (dbp, "recursive"))
3983 dbp = skip_spaces (dbp);
3984
3985 if (LOOKING_AT_NOCASE (dbp, "pure"))
3986 dbp = skip_spaces (dbp);
3987
3988 if (LOOKING_AT_NOCASE (dbp, "elemental"))
3989 dbp = skip_spaces (dbp);
3990
3991 switch (lowcase (*dbp))
3992 {
3993 case 'i':
3994 if (nocase_tail ("integer"))
3995 F_takeprec ();
3996 break;
3997 case 'r':
3998 if (nocase_tail ("real"))
3999 F_takeprec ();
4000 break;
4001 case 'l':
4002 if (nocase_tail ("logical"))
4003 F_takeprec ();
4004 break;
4005 case 'c':
4006 if (nocase_tail ("complex") || nocase_tail ("character"))
4007 F_takeprec ();
4008 break;
4009 case 'd':
4010 if (nocase_tail ("double"))
4011 {
4012 dbp = skip_spaces (dbp);
4013 if (*dbp == '\0')
4014 continue;
4015 if (nocase_tail ("precision"))
4016 break;
4017 continue;
4018 }
4019 break;
4020 }
4021 dbp = skip_spaces (dbp);
4022 if (*dbp == '\0')
4023 continue;
4024 switch (lowcase (*dbp))
4025 {
4026 case 'f':
4027 if (nocase_tail ("function"))
4028 F_getit (inf);
4029 continue;
4030 case 's':
4031 if (nocase_tail ("subroutine"))
4032 F_getit (inf);
4033 continue;
4034 case 'e':
4035 if (nocase_tail ("entry"))
4036 F_getit (inf);
4037 continue;
4038 case 'b':
4039 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4040 {
4041 dbp = skip_spaces (dbp);
4042 if (*dbp == '\0') /* assume un-named */
4043 make_tag ("blockdata", 9, TRUE,
4044 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4045 else
4046 F_getit (inf); /* look for name */
4047 }
4048 continue;
4049 }
4050 }
4051 }
4052
4053 \f
4054 /*
4055 * Ada parsing
4056 * Original code by
4057 * Philippe Waroquiers (1998)
4058 */
4059
4060 /* Once we are positioned after an "interesting" keyword, let's get
4061 the real tag value necessary. */
4062 static void
4063 Ada_getit (FILE *inf, const char *name_qualifier)
4064 {
4065 register char *cp;
4066 char *name;
4067 char c;
4068
4069 while (!feof (inf))
4070 {
4071 dbp = skip_spaces (dbp);
4072 if (*dbp == '\0'
4073 || (dbp[0] == '-' && dbp[1] == '-'))
4074 {
4075 readline (&lb, inf);
4076 dbp = lb.buffer;
4077 }
4078 switch (lowcase (*dbp))
4079 {
4080 case 'b':
4081 if (nocase_tail ("body"))
4082 {
4083 /* Skipping body of procedure body or package body or ....
4084 resetting qualifier to body instead of spec. */
4085 name_qualifier = "/b";
4086 continue;
4087 }
4088 break;
4089 case 't':
4090 /* Skipping type of task type or protected type ... */
4091 if (nocase_tail ("type"))
4092 continue;
4093 break;
4094 }
4095 if (*dbp == '"')
4096 {
4097 dbp += 1;
4098 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4099 continue;
4100 }
4101 else
4102 {
4103 dbp = skip_spaces (dbp);
4104 for (cp = dbp;
4105 (*cp != '\0'
4106 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4107 cp++)
4108 continue;
4109 if (cp == dbp)
4110 return;
4111 }
4112 c = *cp;
4113 *cp = '\0';
4114 name = concat (dbp, name_qualifier, "");
4115 *cp = c;
4116 make_tag (name, strlen (name), TRUE,
4117 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4118 free (name);
4119 if (c == '"')
4120 dbp = cp + 1;
4121 return;
4122 }
4123 }
4124
4125 static void
4126 Ada_funcs (FILE *inf)
4127 {
4128 bool inquote = FALSE;
4129 bool skip_till_semicolumn = FALSE;
4130
4131 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4132 {
4133 while (*dbp != '\0')
4134 {
4135 /* Skip a string i.e. "abcd". */
4136 if (inquote || (*dbp == '"'))
4137 {
4138 dbp = etags_strchr (dbp + !inquote, '"');
4139 if (dbp != NULL)
4140 {
4141 inquote = FALSE;
4142 dbp += 1;
4143 continue; /* advance char */
4144 }
4145 else
4146 {
4147 inquote = TRUE;
4148 break; /* advance line */
4149 }
4150 }
4151
4152 /* Skip comments. */
4153 if (dbp[0] == '-' && dbp[1] == '-')
4154 break; /* advance line */
4155
4156 /* Skip character enclosed in single quote i.e. 'a'
4157 and skip single quote starting an attribute i.e. 'Image. */
4158 if (*dbp == '\'')
4159 {
4160 dbp++ ;
4161 if (*dbp != '\0')
4162 dbp++;
4163 continue;
4164 }
4165
4166 if (skip_till_semicolumn)
4167 {
4168 if (*dbp == ';')
4169 skip_till_semicolumn = FALSE;
4170 dbp++;
4171 continue; /* advance char */
4172 }
4173
4174 /* Search for beginning of a token. */
4175 if (!begtoken (*dbp))
4176 {
4177 dbp++;
4178 continue; /* advance char */
4179 }
4180
4181 /* We are at the beginning of a token. */
4182 switch (lowcase (*dbp))
4183 {
4184 case 'f':
4185 if (!packages_only && nocase_tail ("function"))
4186 Ada_getit (inf, "/f");
4187 else
4188 break; /* from switch */
4189 continue; /* advance char */
4190 case 'p':
4191 if (!packages_only && nocase_tail ("procedure"))
4192 Ada_getit (inf, "/p");
4193 else if (nocase_tail ("package"))
4194 Ada_getit (inf, "/s");
4195 else if (nocase_tail ("protected")) /* protected type */
4196 Ada_getit (inf, "/t");
4197 else
4198 break; /* from switch */
4199 continue; /* advance char */
4200
4201 case 'u':
4202 if (typedefs && !packages_only && nocase_tail ("use"))
4203 {
4204 /* when tagging types, avoid tagging use type Pack.Typename;
4205 for this, we will skip everything till a ; */
4206 skip_till_semicolumn = TRUE;
4207 continue; /* advance char */
4208 }
4209
4210 case 't':
4211 if (!packages_only && nocase_tail ("task"))
4212 Ada_getit (inf, "/k");
4213 else if (typedefs && !packages_only && nocase_tail ("type"))
4214 {
4215 Ada_getit (inf, "/t");
4216 while (*dbp != '\0')
4217 dbp += 1;
4218 }
4219 else
4220 break; /* from switch */
4221 continue; /* advance char */
4222 }
4223
4224 /* Look for the end of the token. */
4225 while (!endtoken (*dbp))
4226 dbp++;
4227
4228 } /* advance char */
4229 } /* advance line */
4230 }
4231
4232 \f
4233 /*
4234 * Unix and microcontroller assembly tag handling
4235 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4236 * Idea by Bob Weiner, Motorola Inc. (1994)
4237 */
4238 static void
4239 Asm_labels (FILE *inf)
4240 {
4241 register char *cp;
4242
4243 LOOP_ON_INPUT_LINES (inf, lb, cp)
4244 {
4245 /* If first char is alphabetic or one of [_.$], test for colon
4246 following identifier. */
4247 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4248 {
4249 /* Read past label. */
4250 cp++;
4251 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4252 cp++;
4253 if (*cp == ':' || iswhite (*cp))
4254 /* Found end of label, so copy it and add it to the table. */
4255 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4256 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4257 }
4258 }
4259 }
4260
4261 \f
4262 /*
4263 * Perl support
4264 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4265 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4266 * Perl variable names: /^(my|local).../
4267 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4268 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4269 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4270 */
4271 static void
4272 Perl_functions (FILE *inf)
4273 {
4274 char *package = savestr ("main"); /* current package name */
4275 register char *cp;
4276
4277 LOOP_ON_INPUT_LINES (inf, lb, cp)
4278 {
4279 cp = skip_spaces (cp);
4280
4281 if (LOOKING_AT (cp, "package"))
4282 {
4283 free (package);
4284 get_tag (cp, &package);
4285 }
4286 else if (LOOKING_AT (cp, "sub"))
4287 {
4288 char *pos, *sp;
4289
4290 subr:
4291 sp = cp;
4292 while (!notinname (*cp))
4293 cp++;
4294 if (cp == sp)
4295 continue; /* nothing found */
4296 if ((pos = etags_strchr (sp, ':')) != NULL
4297 && pos < cp && pos[1] == ':')
4298 /* The name is already qualified. */
4299 make_tag (sp, cp - sp, TRUE,
4300 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4301 else
4302 /* Qualify it. */
4303 {
4304 char savechar, *name;
4305
4306 savechar = *cp;
4307 *cp = '\0';
4308 name = concat (package, "::", sp);
4309 *cp = savechar;
4310 make_tag (name, strlen (name), TRUE,
4311 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4312 free (name);
4313 }
4314 }
4315 else if (LOOKING_AT (cp, "use constant")
4316 || LOOKING_AT (cp, "use constant::defer"))
4317 {
4318 /* For hash style multi-constant like
4319 use constant { FOO => 123,
4320 BAR => 456 };
4321 only the first FOO is picked up. Parsing across the value
4322 expressions would be difficult in general, due to possible nested
4323 hashes, here-documents, etc. */
4324 if (*cp == '{')
4325 cp = skip_spaces (cp+1);
4326 goto subr;
4327 }
4328 else if (globals) /* only if we are tagging global vars */
4329 {
4330 /* Skip a qualifier, if any. */
4331 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4332 /* After "my" or "local", but before any following paren or space. */
4333 char *varstart = cp;
4334
4335 if (qual /* should this be removed? If yes, how? */
4336 && (*cp == '$' || *cp == '@' || *cp == '%'))
4337 {
4338 varstart += 1;
4339 do
4340 cp++;
4341 while (ISALNUM (*cp) || *cp == '_');
4342 }
4343 else if (qual)
4344 {
4345 /* Should be examining a variable list at this point;
4346 could insist on seeing an open parenthesis. */
4347 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4348 cp++;
4349 }
4350 else
4351 continue;
4352
4353 make_tag (varstart, cp - varstart, FALSE,
4354 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4355 }
4356 }
4357 free (package);
4358 }
4359
4360
4361 /*
4362 * Python support
4363 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4364 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4365 * More ideas by seb bacon <seb@jamkit.com> (2002)
4366 */
4367 static void
4368 Python_functions (FILE *inf)
4369 {
4370 register char *cp;
4371
4372 LOOP_ON_INPUT_LINES (inf, lb, cp)
4373 {
4374 cp = skip_spaces (cp);
4375 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4376 {
4377 char *name = cp;
4378 while (!notinname (*cp) && *cp != ':')
4379 cp++;
4380 make_tag (name, cp - name, TRUE,
4381 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4382 }
4383 }
4384 }
4385
4386 \f
4387 /*
4388 * PHP support
4389 * Look for:
4390 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4391 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4392 * - /^[ \t]*define\(\"[^\"]+/
4393 * Only with --members:
4394 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4395 * Idea by Diez B. Roggisch (2001)
4396 */
4397 static void
4398 PHP_functions (FILE *inf)
4399 {
4400 register char *cp, *name;
4401 bool search_identifier = FALSE;
4402
4403 LOOP_ON_INPUT_LINES (inf, lb, cp)
4404 {
4405 cp = skip_spaces (cp);
4406 name = cp;
4407 if (search_identifier
4408 && *cp != '\0')
4409 {
4410 while (!notinname (*cp))
4411 cp++;
4412 make_tag (name, cp - name, TRUE,
4413 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4414 search_identifier = FALSE;
4415 }
4416 else if (LOOKING_AT (cp, "function"))
4417 {
4418 if (*cp == '&')
4419 cp = skip_spaces (cp+1);
4420 if (*cp != '\0')
4421 {
4422 name = cp;
4423 while (!notinname (*cp))
4424 cp++;
4425 make_tag (name, cp - name, TRUE,
4426 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4427 }
4428 else
4429 search_identifier = TRUE;
4430 }
4431 else if (LOOKING_AT (cp, "class"))
4432 {
4433 if (*cp != '\0')
4434 {
4435 name = cp;
4436 while (*cp != '\0' && !iswhite (*cp))
4437 cp++;
4438 make_tag (name, cp - name, FALSE,
4439 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4440 }
4441 else
4442 search_identifier = TRUE;
4443 }
4444 else if (strneq (cp, "define", 6)
4445 && (cp = skip_spaces (cp+6))
4446 && *cp++ == '('
4447 && (*cp == '"' || *cp == '\''))
4448 {
4449 char quote = *cp++;
4450 name = cp;
4451 while (*cp != quote && *cp != '\0')
4452 cp++;
4453 make_tag (name, cp - name, FALSE,
4454 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4455 }
4456 else if (members
4457 && LOOKING_AT (cp, "var")
4458 && *cp == '$')
4459 {
4460 name = cp;
4461 while (!notinname (*cp))
4462 cp++;
4463 make_tag (name, cp - name, FALSE,
4464 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4465 }
4466 }
4467 }
4468
4469 \f
4470 /*
4471 * Cobol tag functions
4472 * We could look for anything that could be a paragraph name.
4473 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4474 * Idea by Corny de Souza (1993)
4475 */
4476 static void
4477 Cobol_paragraphs (FILE *inf)
4478 {
4479 register char *bp, *ep;
4480
4481 LOOP_ON_INPUT_LINES (inf, lb, bp)
4482 {
4483 if (lb.len < 9)
4484 continue;
4485 bp += 8;
4486
4487 /* If eoln, compiler option or comment ignore whole line. */
4488 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4489 continue;
4490
4491 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4492 continue;
4493 if (*ep++ == '.')
4494 make_tag (bp, ep - bp, TRUE,
4495 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4496 }
4497 }
4498
4499 \f
4500 /*
4501 * Makefile support
4502 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4503 */
4504 static void
4505 Makefile_targets (FILE *inf)
4506 {
4507 register char *bp;
4508
4509 LOOP_ON_INPUT_LINES (inf, lb, bp)
4510 {
4511 if (*bp == '\t' || *bp == '#')
4512 continue;
4513 while (*bp != '\0' && *bp != '=' && *bp != ':')
4514 bp++;
4515 if (*bp == ':' || (globals && *bp == '='))
4516 {
4517 /* We should detect if there is more than one tag, but we do not.
4518 We just skip initial and final spaces. */
4519 char * namestart = skip_spaces (lb.buffer);
4520 while (--bp > namestart)
4521 if (!notinname (*bp))
4522 break;
4523 make_tag (namestart, bp - namestart + 1, TRUE,
4524 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4525 }
4526 }
4527 }
4528
4529 \f
4530 /*
4531 * Pascal parsing
4532 * Original code by Mosur K. Mohan (1989)
4533 *
4534 * Locates tags for procedures & functions. Doesn't do any type- or
4535 * var-definitions. It does look for the keyword "extern" or
4536 * "forward" immediately following the procedure statement; if found,
4537 * the tag is skipped.
4538 */
4539 static void
4540 Pascal_functions (FILE *inf)
4541 {
4542 linebuffer tline; /* mostly copied from C_entries */
4543 long save_lcno;
4544 int save_lineno, namelen, taglen;
4545 char c, *name;
4546
4547 bool /* each of these flags is TRUE if: */
4548 incomment, /* point is inside a comment */
4549 inquote, /* point is inside '..' string */
4550 get_tagname, /* point is after PROCEDURE/FUNCTION
4551 keyword, so next item = potential tag */
4552 found_tag, /* point is after a potential tag */
4553 inparms, /* point is within parameter-list */
4554 verify_tag; /* point has passed the parm-list, so the
4555 next token will determine whether this
4556 is a FORWARD/EXTERN to be ignored, or
4557 whether it is a real tag */
4558
4559 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4560 name = NULL; /* keep compiler quiet */
4561 dbp = lb.buffer;
4562 *dbp = '\0';
4563 linebuffer_init (&tline);
4564
4565 incomment = inquote = FALSE;
4566 found_tag = FALSE; /* have a proc name; check if extern */
4567 get_tagname = FALSE; /* found "procedure" keyword */
4568 inparms = FALSE; /* found '(' after "proc" */
4569 verify_tag = FALSE; /* check if "extern" is ahead */
4570
4571
4572 while (!feof (inf)) /* long main loop to get next char */
4573 {
4574 c = *dbp++;
4575 if (c == '\0') /* if end of line */
4576 {
4577 readline (&lb, inf);
4578 dbp = lb.buffer;
4579 if (*dbp == '\0')
4580 continue;
4581 if (!((found_tag && verify_tag)
4582 || get_tagname))
4583 c = *dbp++; /* only if don't need *dbp pointing
4584 to the beginning of the name of
4585 the procedure or function */
4586 }
4587 if (incomment)
4588 {
4589 if (c == '}') /* within { } comments */
4590 incomment = FALSE;
4591 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4592 {
4593 dbp++;
4594 incomment = FALSE;
4595 }
4596 continue;
4597 }
4598 else if (inquote)
4599 {
4600 if (c == '\'')
4601 inquote = FALSE;
4602 continue;
4603 }
4604 else
4605 switch (c)
4606 {
4607 case '\'':
4608 inquote = TRUE; /* found first quote */
4609 continue;
4610 case '{': /* found open { comment */
4611 incomment = TRUE;
4612 continue;
4613 case '(':
4614 if (*dbp == '*') /* found open (* comment */
4615 {
4616 incomment = TRUE;
4617 dbp++;
4618 }
4619 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4620 inparms = TRUE;
4621 continue;
4622 case ')': /* end of parms list */
4623 if (inparms)
4624 inparms = FALSE;
4625 continue;
4626 case ';':
4627 if (found_tag && !inparms) /* end of proc or fn stmt */
4628 {
4629 verify_tag = TRUE;
4630 break;
4631 }
4632 continue;
4633 }
4634 if (found_tag && verify_tag && (*dbp != ' '))
4635 {
4636 /* Check if this is an "extern" declaration. */
4637 if (*dbp == '\0')
4638 continue;
4639 if (lowcase (*dbp) == 'e')
4640 {
4641 if (nocase_tail ("extern")) /* superfluous, really! */
4642 {
4643 found_tag = FALSE;
4644 verify_tag = FALSE;
4645 }
4646 }
4647 else if (lowcase (*dbp) == 'f')
4648 {
4649 if (nocase_tail ("forward")) /* check for forward reference */
4650 {
4651 found_tag = FALSE;
4652 verify_tag = FALSE;
4653 }
4654 }
4655 if (found_tag && verify_tag) /* not external proc, so make tag */
4656 {
4657 found_tag = FALSE;
4658 verify_tag = FALSE;
4659 make_tag (name, namelen, TRUE,
4660 tline.buffer, taglen, save_lineno, save_lcno);
4661 continue;
4662 }
4663 }
4664 if (get_tagname) /* grab name of proc or fn */
4665 {
4666 char *cp;
4667
4668 if (*dbp == '\0')
4669 continue;
4670
4671 /* Find block name. */
4672 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4673 continue;
4674
4675 /* Save all values for later tagging. */
4676 linebuffer_setlen (&tline, lb.len);
4677 strcpy (tline.buffer, lb.buffer);
4678 save_lineno = lineno;
4679 save_lcno = linecharno;
4680 name = tline.buffer + (dbp - lb.buffer);
4681 namelen = cp - dbp;
4682 taglen = cp - lb.buffer + 1;
4683
4684 dbp = cp; /* set dbp to e-o-token */
4685 get_tagname = FALSE;
4686 found_tag = TRUE;
4687 continue;
4688
4689 /* And proceed to check for "extern". */
4690 }
4691 else if (!incomment && !inquote && !found_tag)
4692 {
4693 /* Check for proc/fn keywords. */
4694 switch (lowcase (c))
4695 {
4696 case 'p':
4697 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4698 get_tagname = TRUE;
4699 continue;
4700 case 'f':
4701 if (nocase_tail ("unction"))
4702 get_tagname = TRUE;
4703 continue;
4704 }
4705 }
4706 } /* while not eof */
4707
4708 free (tline.buffer);
4709 }
4710
4711 \f
4712 /*
4713 * Lisp tag functions
4714 * look for (def or (DEF, quote or QUOTE
4715 */
4716
4717 static void L_getit (void);
4718
4719 static void
4720 L_getit (void)
4721 {
4722 if (*dbp == '\'') /* Skip prefix quote */
4723 dbp++;
4724 else if (*dbp == '(')
4725 {
4726 dbp++;
4727 /* Try to skip "(quote " */
4728 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4729 /* Ok, then skip "(" before name in (defstruct (foo)) */
4730 dbp = skip_spaces (dbp);
4731 }
4732 get_tag (dbp, NULL);
4733 }
4734
4735 static void
4736 Lisp_functions (FILE *inf)
4737 {
4738 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4739 {
4740 if (dbp[0] != '(')
4741 continue;
4742
4743 /* "(defvar foo)" is a declaration rather than a definition. */
4744 if (! declarations)
4745 {
4746 char *p = dbp + 1;
4747 if (LOOKING_AT (p, "defvar"))
4748 {
4749 p = skip_name (p); /* past var name */
4750 p = skip_spaces (p);
4751 if (*p == ')')
4752 continue;
4753 }
4754 }
4755
4756 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4757 {
4758 dbp = skip_non_spaces (dbp);
4759 dbp = skip_spaces (dbp);
4760 L_getit ();
4761 }
4762 else
4763 {
4764 /* Check for (foo::defmumble name-defined ... */
4765 do
4766 dbp++;
4767 while (!notinname (*dbp) && *dbp != ':');
4768 if (*dbp == ':')
4769 {
4770 do
4771 dbp++;
4772 while (*dbp == ':');
4773
4774 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4775 {
4776 dbp = skip_non_spaces (dbp);
4777 dbp = skip_spaces (dbp);
4778 L_getit ();
4779 }
4780 }
4781 }
4782 }
4783 }
4784
4785 \f
4786 /*
4787 * Lua script language parsing
4788 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4789 *
4790 * "function" and "local function" are tags if they start at column 1.
4791 */
4792 static void
4793 Lua_functions (FILE *inf)
4794 {
4795 register char *bp;
4796
4797 LOOP_ON_INPUT_LINES (inf, lb, bp)
4798 {
4799 if (bp[0] != 'f' && bp[0] != 'l')
4800 continue;
4801
4802 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4803
4804 if (LOOKING_AT (bp, "function"))
4805 get_tag (bp, NULL);
4806 }
4807 }
4808
4809 \f
4810 /*
4811 * PostScript tags
4812 * Just look for lines where the first character is '/'
4813 * Also look at "defineps" for PSWrap
4814 * Ideas by:
4815 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4816 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4817 */
4818 static void
4819 PS_functions (FILE *inf)
4820 {
4821 register char *bp, *ep;
4822
4823 LOOP_ON_INPUT_LINES (inf, lb, bp)
4824 {
4825 if (bp[0] == '/')
4826 {
4827 for (ep = bp+1;
4828 *ep != '\0' && *ep != ' ' && *ep != '{';
4829 ep++)
4830 continue;
4831 make_tag (bp, ep - bp, TRUE,
4832 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4833 }
4834 else if (LOOKING_AT (bp, "defineps"))
4835 get_tag (bp, NULL);
4836 }
4837 }
4838
4839 \f
4840 /*
4841 * Forth tags
4842 * Ignore anything after \ followed by space or in ( )
4843 * Look for words defined by :
4844 * Look for constant, code, create, defer, value, and variable
4845 * OBP extensions: Look for buffer:, field,
4846 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4847 */
4848 static void
4849 Forth_words (FILE *inf)
4850 {
4851 register char *bp;
4852
4853 LOOP_ON_INPUT_LINES (inf, lb, bp)
4854 while ((bp = skip_spaces (bp))[0] != '\0')
4855 if (bp[0] == '\\' && iswhite (bp[1]))
4856 break; /* read next line */
4857 else if (bp[0] == '(' && iswhite (bp[1]))
4858 do /* skip to ) or eol */
4859 bp++;
4860 while (*bp != ')' && *bp != '\0');
4861 else if ((bp[0] == ':' && iswhite (bp[1]) && bp++)
4862 || LOOKING_AT_NOCASE (bp, "constant")
4863 || LOOKING_AT_NOCASE (bp, "code")
4864 || LOOKING_AT_NOCASE (bp, "create")
4865 || LOOKING_AT_NOCASE (bp, "defer")
4866 || LOOKING_AT_NOCASE (bp, "value")
4867 || LOOKING_AT_NOCASE (bp, "variable")
4868 || LOOKING_AT_NOCASE (bp, "buffer:")
4869 || LOOKING_AT_NOCASE (bp, "field"))
4870 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4871 else
4872 bp = skip_non_spaces (bp);
4873 }
4874
4875 \f
4876 /*
4877 * Scheme tag functions
4878 * look for (def... xyzzy
4879 * (def... (xyzzy
4880 * (def ... ((...(xyzzy ....
4881 * (set! xyzzy
4882 * Original code by Ken Haase (1985?)
4883 */
4884 static void
4885 Scheme_functions (FILE *inf)
4886 {
4887 register char *bp;
4888
4889 LOOP_ON_INPUT_LINES (inf, lb, bp)
4890 {
4891 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4892 {
4893 bp = skip_non_spaces (bp+4);
4894 /* Skip over open parens and white space. Don't continue past
4895 '\0'. */
4896 while (*bp && notinname (*bp))
4897 bp++;
4898 get_tag (bp, NULL);
4899 }
4900 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4901 get_tag (bp, NULL);
4902 }
4903 }
4904
4905 \f
4906 /* Find tags in TeX and LaTeX input files. */
4907
4908 /* TEX_toktab is a table of TeX control sequences that define tags.
4909 * Each entry records one such control sequence.
4910 *
4911 * Original code from who knows whom.
4912 * Ideas by:
4913 * Stefan Monnier (2002)
4914 */
4915
4916 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4917
4918 /* Default set of control sequences to put into TEX_toktab.
4919 The value of environment var TEXTAGS is prepended to this. */
4920 static const char *TEX_defenv = "\
4921 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4922 :part:appendix:entry:index:def\
4923 :newcommand:renewcommand:newenvironment:renewenvironment";
4924
4925 static void TEX_mode (FILE *);
4926 static void TEX_decode_env (const char *, const char *);
4927
4928 static char TEX_esc = '\\';
4929 static char TEX_opgrp = '{';
4930 static char TEX_clgrp = '}';
4931
4932 /*
4933 * TeX/LaTeX scanning loop.
4934 */
4935 static void
4936 TeX_commands (FILE *inf)
4937 {
4938 char *cp;
4939 linebuffer *key;
4940
4941 /* Select either \ or ! as escape character. */
4942 TEX_mode (inf);
4943
4944 /* Initialize token table once from environment. */
4945 if (TEX_toktab == NULL)
4946 TEX_decode_env ("TEXTAGS", TEX_defenv);
4947
4948 LOOP_ON_INPUT_LINES (inf, lb, cp)
4949 {
4950 /* Look at each TEX keyword in line. */
4951 for (;;)
4952 {
4953 /* Look for a TEX escape. */
4954 while (*cp++ != TEX_esc)
4955 if (cp[-1] == '\0' || cp[-1] == '%')
4956 goto tex_next_line;
4957
4958 for (key = TEX_toktab; key->buffer != NULL; key++)
4959 if (strneq (cp, key->buffer, key->len))
4960 {
4961 register char *p;
4962 int namelen, linelen;
4963 bool opgrp = FALSE;
4964
4965 cp = skip_spaces (cp + key->len);
4966 if (*cp == TEX_opgrp)
4967 {
4968 opgrp = TRUE;
4969 cp++;
4970 }
4971 for (p = cp;
4972 (!iswhite (*p) && *p != '#' &&
4973 *p != TEX_opgrp && *p != TEX_clgrp);
4974 p++)
4975 continue;
4976 namelen = p - cp;
4977 linelen = lb.len;
4978 if (!opgrp || *p == TEX_clgrp)
4979 {
4980 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4981 p++;
4982 linelen = p - lb.buffer + 1;
4983 }
4984 make_tag (cp, namelen, TRUE,
4985 lb.buffer, linelen, lineno, linecharno);
4986 goto tex_next_line; /* We only tag a line once */
4987 }
4988 }
4989 tex_next_line:
4990 ;
4991 }
4992 }
4993
4994 #define TEX_LESC '\\'
4995 #define TEX_SESC '!'
4996
4997 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4998 chars accordingly. */
4999 static void
5000 TEX_mode (FILE *inf)
5001 {
5002 int c;
5003
5004 while ((c = getc (inf)) != EOF)
5005 {
5006 /* Skip to next line if we hit the TeX comment char. */
5007 if (c == '%')
5008 while (c != '\n' && c != EOF)
5009 c = getc (inf);
5010 else if (c == TEX_LESC || c == TEX_SESC )
5011 break;
5012 }
5013
5014 if (c == TEX_LESC)
5015 {
5016 TEX_esc = TEX_LESC;
5017 TEX_opgrp = '{';
5018 TEX_clgrp = '}';
5019 }
5020 else
5021 {
5022 TEX_esc = TEX_SESC;
5023 TEX_opgrp = '<';
5024 TEX_clgrp = '>';
5025 }
5026 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5027 No attempt is made to correct the situation. */
5028 rewind (inf);
5029 }
5030
5031 /* Read environment and prepend it to the default string.
5032 Build token table. */
5033 static void
5034 TEX_decode_env (const char *evarname, const char *defenv)
5035 {
5036 register const char *env, *p;
5037 int i, len;
5038
5039 /* Append default string to environment. */
5040 env = getenv (evarname);
5041 if (!env)
5042 env = defenv;
5043 else
5044 env = concat (env, defenv, "");
5045
5046 /* Allocate a token table */
5047 for (len = 1, p = env; p;)
5048 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5049 len++;
5050 TEX_toktab = xnew (len, linebuffer);
5051
5052 /* Unpack environment string into token table. Be careful about */
5053 /* zero-length strings (leading ':', "::" and trailing ':') */
5054 for (i = 0; *env != '\0';)
5055 {
5056 p = etags_strchr (env, ':');
5057 if (!p) /* End of environment string. */
5058 p = env + strlen (env);
5059 if (p - env > 0)
5060 { /* Only non-zero strings. */
5061 TEX_toktab[i].buffer = savenstr (env, p - env);
5062 TEX_toktab[i].len = p - env;
5063 i++;
5064 }
5065 if (*p)
5066 env = p + 1;
5067 else
5068 {
5069 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5070 TEX_toktab[i].len = 0;
5071 break;
5072 }
5073 }
5074 }
5075
5076 \f
5077 /* Texinfo support. Dave Love, Mar. 2000. */
5078 static void
5079 Texinfo_nodes (FILE *inf)
5080 {
5081 char *cp, *start;
5082 LOOP_ON_INPUT_LINES (inf, lb, cp)
5083 if (LOOKING_AT (cp, "@node"))
5084 {
5085 start = cp;
5086 while (*cp != '\0' && *cp != ',')
5087 cp++;
5088 make_tag (start, cp - start, TRUE,
5089 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5090 }
5091 }
5092
5093 \f
5094 /*
5095 * HTML support.
5096 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5097 * Contents of <a name=xxx> are tags with name xxx.
5098 *
5099 * Francesco Potortì, 2002.
5100 */
5101 static void
5102 HTML_labels (FILE *inf)
5103 {
5104 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5105 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5106 bool intag = FALSE; /* inside an html tag, looking for ID= */
5107 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5108 char *end;
5109
5110
5111 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5112
5113 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5114 for (;;) /* loop on the same line */
5115 {
5116 if (skiptag) /* skip HTML tag */
5117 {
5118 while (*dbp != '\0' && *dbp != '>')
5119 dbp++;
5120 if (*dbp == '>')
5121 {
5122 dbp += 1;
5123 skiptag = FALSE;
5124 continue; /* look on the same line */
5125 }
5126 break; /* go to next line */
5127 }
5128
5129 else if (intag) /* look for "name=" or "id=" */
5130 {
5131 while (*dbp != '\0' && *dbp != '>'
5132 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5133 dbp++;
5134 if (*dbp == '\0')
5135 break; /* go to next line */
5136 if (*dbp == '>')
5137 {
5138 dbp += 1;
5139 intag = FALSE;
5140 continue; /* look on the same line */
5141 }
5142 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5143 || LOOKING_AT_NOCASE (dbp, "id="))
5144 {
5145 bool quoted = (dbp[0] == '"');
5146
5147 if (quoted)
5148 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5149 continue;
5150 else
5151 for (end = dbp; *end != '\0' && intoken (*end); end++)
5152 continue;
5153 linebuffer_setlen (&token_name, end - dbp);
5154 memcpy (token_name.buffer, dbp, end - dbp);
5155 token_name.buffer[end - dbp] = '\0';
5156
5157 dbp = end;
5158 intag = FALSE; /* we found what we looked for */
5159 skiptag = TRUE; /* skip to the end of the tag */
5160 getnext = TRUE; /* then grab the text */
5161 continue; /* look on the same line */
5162 }
5163 dbp += 1;
5164 }
5165
5166 else if (getnext) /* grab next tokens and tag them */
5167 {
5168 dbp = skip_spaces (dbp);
5169 if (*dbp == '\0')
5170 break; /* go to next line */
5171 if (*dbp == '<')
5172 {
5173 intag = TRUE;
5174 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5175 continue; /* look on the same line */
5176 }
5177
5178 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5179 continue;
5180 make_tag (token_name.buffer, token_name.len, TRUE,
5181 dbp, end - dbp, lineno, linecharno);
5182 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5183 getnext = FALSE;
5184 break; /* go to next line */
5185 }
5186
5187 else /* look for an interesting HTML tag */
5188 {
5189 while (*dbp != '\0' && *dbp != '<')
5190 dbp++;
5191 if (*dbp == '\0')
5192 break; /* go to next line */
5193 intag = TRUE;
5194 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5195 {
5196 inanchor = TRUE;
5197 continue; /* look on the same line */
5198 }
5199 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5200 || LOOKING_AT_NOCASE (dbp, "<h1>")
5201 || LOOKING_AT_NOCASE (dbp, "<h2>")
5202 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5203 {
5204 intag = FALSE;
5205 getnext = TRUE;
5206 continue; /* look on the same line */
5207 }
5208 dbp += 1;
5209 }
5210 }
5211 }
5212
5213 \f
5214 /*
5215 * Prolog support
5216 *
5217 * Assumes that the predicate or rule starts at column 0.
5218 * Only the first clause of a predicate or rule is added.
5219 * Original code by Sunichirou Sugou (1989)
5220 * Rewritten by Anders Lindgren (1996)
5221 */
5222 static size_t prolog_pr (char *, char *);
5223 static void prolog_skip_comment (linebuffer *, FILE *);
5224 static size_t prolog_atom (char *, size_t);
5225
5226 static void
5227 Prolog_functions (FILE *inf)
5228 {
5229 char *cp, *last;
5230 size_t len;
5231 size_t allocated;
5232
5233 allocated = 0;
5234 len = 0;
5235 last = NULL;
5236
5237 LOOP_ON_INPUT_LINES (inf, lb, cp)
5238 {
5239 if (cp[0] == '\0') /* Empty line */
5240 continue;
5241 else if (iswhite (cp[0])) /* Not a predicate */
5242 continue;
5243 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5244 prolog_skip_comment (&lb, inf);
5245 else if ((len = prolog_pr (cp, last)) > 0)
5246 {
5247 /* Predicate or rule. Store the function name so that we
5248 only generate a tag for the first clause. */
5249 if (last == NULL)
5250 last = xnew (len + 1, char);
5251 else if (len + 1 > allocated)
5252 xrnew (last, len + 1, char);
5253 allocated = len + 1;
5254 memcpy (last, cp, len);
5255 last[len] = '\0';
5256 }
5257 }
5258 free (last);
5259 }
5260
5261
5262 static void
5263 prolog_skip_comment (linebuffer *plb, FILE *inf)
5264 {
5265 char *cp;
5266
5267 do
5268 {
5269 for (cp = plb->buffer; *cp != '\0'; cp++)
5270 if (cp[0] == '*' && cp[1] == '/')
5271 return;
5272 readline (plb, inf);
5273 }
5274 while (!feof (inf));
5275 }
5276
5277 /*
5278 * A predicate or rule definition is added if it matches:
5279 * <beginning of line><Prolog Atom><whitespace>(
5280 * or <beginning of line><Prolog Atom><whitespace>:-
5281 *
5282 * It is added to the tags database if it doesn't match the
5283 * name of the previous clause header.
5284 *
5285 * Return the size of the name of the predicate or rule, or 0 if no
5286 * header was found.
5287 */
5288 static size_t
5289 prolog_pr (char *s, char *last)
5290
5291 /* Name of last clause. */
5292 {
5293 size_t pos;
5294 size_t len;
5295
5296 pos = prolog_atom (s, 0);
5297 if (! pos)
5298 return 0;
5299
5300 len = pos;
5301 pos = skip_spaces (s + pos) - s;
5302
5303 if ((s[pos] == '.'
5304 || (s[pos] == '(' && (pos += 1))
5305 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5306 && (last == NULL /* save only the first clause */
5307 || len != strlen (last)
5308 || !strneq (s, last, len)))
5309 {
5310 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5311 return len;
5312 }
5313 else
5314 return 0;
5315 }
5316
5317 /*
5318 * Consume a Prolog atom.
5319 * Return the number of bytes consumed, or 0 if there was an error.
5320 *
5321 * A prolog atom, in this context, could be one of:
5322 * - An alphanumeric sequence, starting with a lower case letter.
5323 * - A quoted arbitrary string. Single quotes can escape themselves.
5324 * Backslash quotes everything.
5325 */
5326 static size_t
5327 prolog_atom (char *s, size_t pos)
5328 {
5329 size_t origpos;
5330
5331 origpos = pos;
5332
5333 if (ISLOWER (s[pos]) || (s[pos] == '_'))
5334 {
5335 /* The atom is unquoted. */
5336 pos++;
5337 while (ISALNUM (s[pos]) || (s[pos] == '_'))
5338 {
5339 pos++;
5340 }
5341 return pos - origpos;
5342 }
5343 else if (s[pos] == '\'')
5344 {
5345 pos++;
5346
5347 for (;;)
5348 {
5349 if (s[pos] == '\'')
5350 {
5351 pos++;
5352 if (s[pos] != '\'')
5353 break;
5354 pos++; /* A double quote */
5355 }
5356 else if (s[pos] == '\0')
5357 /* Multiline quoted atoms are ignored. */
5358 return 0;
5359 else if (s[pos] == '\\')
5360 {
5361 if (s[pos+1] == '\0')
5362 return 0;
5363 pos += 2;
5364 }
5365 else
5366 pos++;
5367 }
5368 return pos - origpos;
5369 }
5370 else
5371 return 0;
5372 }
5373
5374 \f
5375 /*
5376 * Support for Erlang
5377 *
5378 * Generates tags for functions, defines, and records.
5379 * Assumes that Erlang functions start at column 0.
5380 * Original code by Anders Lindgren (1996)
5381 */
5382 static int erlang_func (char *, char *);
5383 static void erlang_attribute (char *);
5384 static int erlang_atom (char *);
5385
5386 static void
5387 Erlang_functions (FILE *inf)
5388 {
5389 char *cp, *last;
5390 int len;
5391 int allocated;
5392
5393 allocated = 0;
5394 len = 0;
5395 last = NULL;
5396
5397 LOOP_ON_INPUT_LINES (inf, lb, cp)
5398 {
5399 if (cp[0] == '\0') /* Empty line */
5400 continue;
5401 else if (iswhite (cp[0])) /* Not function nor attribute */
5402 continue;
5403 else if (cp[0] == '%') /* comment */
5404 continue;
5405 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5406 continue;
5407 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5408 {
5409 erlang_attribute (cp);
5410 if (last != NULL)
5411 {
5412 free (last);
5413 last = NULL;
5414 }
5415 }
5416 else if ((len = erlang_func (cp, last)) > 0)
5417 {
5418 /*
5419 * Function. Store the function name so that we only
5420 * generates a tag for the first clause.
5421 */
5422 if (last == NULL)
5423 last = xnew (len + 1, char);
5424 else if (len + 1 > allocated)
5425 xrnew (last, len + 1, char);
5426 allocated = len + 1;
5427 memcpy (last, cp, len);
5428 last[len] = '\0';
5429 }
5430 }
5431 free (last);
5432 }
5433
5434
5435 /*
5436 * A function definition is added if it matches:
5437 * <beginning of line><Erlang Atom><whitespace>(
5438 *
5439 * It is added to the tags database if it doesn't match the
5440 * name of the previous clause header.
5441 *
5442 * Return the size of the name of the function, or 0 if no function
5443 * was found.
5444 */
5445 static int
5446 erlang_func (char *s, char *last)
5447
5448 /* Name of last clause. */
5449 {
5450 int pos;
5451 int len;
5452
5453 pos = erlang_atom (s);
5454 if (pos < 1)
5455 return 0;
5456
5457 len = pos;
5458 pos = skip_spaces (s + pos) - s;
5459
5460 /* Save only the first clause. */
5461 if (s[pos++] == '('
5462 && (last == NULL
5463 || len != (int)strlen (last)
5464 || !strneq (s, last, len)))
5465 {
5466 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5467 return len;
5468 }
5469
5470 return 0;
5471 }
5472
5473
5474 /*
5475 * Handle attributes. Currently, tags are generated for defines
5476 * and records.
5477 *
5478 * They are on the form:
5479 * -define(foo, bar).
5480 * -define(Foo(M, N), M+N).
5481 * -record(graph, {vtab = notable, cyclic = true}).
5482 */
5483 static void
5484 erlang_attribute (char *s)
5485 {
5486 char *cp = s;
5487
5488 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5489 && *cp++ == '(')
5490 {
5491 int len = erlang_atom (skip_spaces (cp));
5492 if (len > 0)
5493 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5494 }
5495 return;
5496 }
5497
5498
5499 /*
5500 * Consume an Erlang atom (or variable).
5501 * Return the number of bytes consumed, or -1 if there was an error.
5502 */
5503 static int
5504 erlang_atom (char *s)
5505 {
5506 int pos = 0;
5507
5508 if (ISALPHA (s[pos]) || s[pos] == '_')
5509 {
5510 /* The atom is unquoted. */
5511 do
5512 pos++;
5513 while (ISALNUM (s[pos]) || s[pos] == '_');
5514 }
5515 else if (s[pos] == '\'')
5516 {
5517 for (pos++; s[pos] != '\''; pos++)
5518 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5519 || (s[pos] == '\\' && s[++pos] == '\0'))
5520 return 0;
5521 pos++;
5522 }
5523
5524 return pos;
5525 }
5526
5527 \f
5528 static char *scan_separators (char *);
5529 static void add_regex (char *, language *);
5530 static char *substitute (char *, char *, struct re_registers *);
5531
5532 /*
5533 * Take a string like "/blah/" and turn it into "blah", verifying
5534 * that the first and last characters are the same, and handling
5535 * quoted separator characters. Actually, stops on the occurrence of
5536 * an unquoted separator. Also process \t, \n, etc. and turn into
5537 * appropriate characters. Works in place. Null terminates name string.
5538 * Returns pointer to terminating separator, or NULL for
5539 * unterminated regexps.
5540 */
5541 static char *
5542 scan_separators (char *name)
5543 {
5544 char sep = name[0];
5545 char *copyto = name;
5546 bool quoted = FALSE;
5547
5548 for (++name; *name != '\0'; ++name)
5549 {
5550 if (quoted)
5551 {
5552 switch (*name)
5553 {
5554 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5555 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5556 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5557 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5558 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5559 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5560 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5561 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5562 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5563 default:
5564 if (*name == sep)
5565 *copyto++ = sep;
5566 else
5567 {
5568 /* Something else is quoted, so preserve the quote. */
5569 *copyto++ = '\\';
5570 *copyto++ = *name;
5571 }
5572 break;
5573 }
5574 quoted = FALSE;
5575 }
5576 else if (*name == '\\')
5577 quoted = TRUE;
5578 else if (*name == sep)
5579 break;
5580 else
5581 *copyto++ = *name;
5582 }
5583 if (*name != sep)
5584 name = NULL; /* signal unterminated regexp */
5585
5586 /* Terminate copied string. */
5587 *copyto = '\0';
5588 return name;
5589 }
5590
5591 /* Look at the argument of --regex or --no-regex and do the right
5592 thing. Same for each line of a regexp file. */
5593 static void
5594 analyse_regex (char *regex_arg)
5595 {
5596 if (regex_arg == NULL)
5597 {
5598 free_regexps (); /* --no-regex: remove existing regexps */
5599 return;
5600 }
5601
5602 /* A real --regexp option or a line in a regexp file. */
5603 switch (regex_arg[0])
5604 {
5605 /* Comments in regexp file or null arg to --regex. */
5606 case '\0':
5607 case ' ':
5608 case '\t':
5609 break;
5610
5611 /* Read a regex file. This is recursive and may result in a
5612 loop, which will stop when the file descriptors are exhausted. */
5613 case '@':
5614 {
5615 FILE *regexfp;
5616 linebuffer regexbuf;
5617 char *regexfile = regex_arg + 1;
5618
5619 /* regexfile is a file containing regexps, one per line. */
5620 regexfp = fopen (regexfile, "r");
5621 if (regexfp == NULL)
5622 pfatal (regexfile);
5623 linebuffer_init (&regexbuf);
5624 while (readline_internal (&regexbuf, regexfp) > 0)
5625 analyse_regex (regexbuf.buffer);
5626 free (regexbuf.buffer);
5627 fclose (regexfp);
5628 }
5629 break;
5630
5631 /* Regexp to be used for a specific language only. */
5632 case '{':
5633 {
5634 language *lang;
5635 char *lang_name = regex_arg + 1;
5636 char *cp;
5637
5638 for (cp = lang_name; *cp != '}'; cp++)
5639 if (*cp == '\0')
5640 {
5641 error ("unterminated language name in regex: %s", regex_arg);
5642 return;
5643 }
5644 *cp++ = '\0';
5645 lang = get_language_from_langname (lang_name);
5646 if (lang == NULL)
5647 return;
5648 add_regex (cp, lang);
5649 }
5650 break;
5651
5652 /* Regexp to be used for any language. */
5653 default:
5654 add_regex (regex_arg, NULL);
5655 break;
5656 }
5657 }
5658
5659 /* Separate the regexp pattern, compile it,
5660 and care for optional name and modifiers. */
5661 static void
5662 add_regex (char *regexp_pattern, language *lang)
5663 {
5664 static struct re_pattern_buffer zeropattern;
5665 char sep, *pat, *name, *modifiers;
5666 char empty = '\0';
5667 const char *err;
5668 struct re_pattern_buffer *patbuf;
5669 regexp *rp;
5670 bool
5671 force_explicit_name = TRUE, /* do not use implicit tag names */
5672 ignore_case = FALSE, /* case is significant */
5673 multi_line = FALSE, /* matches are done one line at a time */
5674 single_line = FALSE; /* dot does not match newline */
5675
5676
5677 if (strlen (regexp_pattern) < 3)
5678 {
5679 error ("null regexp");
5680 return;
5681 }
5682 sep = regexp_pattern[0];
5683 name = scan_separators (regexp_pattern);
5684 if (name == NULL)
5685 {
5686 error ("%s: unterminated regexp", regexp_pattern);
5687 return;
5688 }
5689 if (name[1] == sep)
5690 {
5691 error ("null name for regexp \"%s\"", regexp_pattern);
5692 return;
5693 }
5694 modifiers = scan_separators (name);
5695 if (modifiers == NULL) /* no terminating separator --> no name */
5696 {
5697 modifiers = name;
5698 name = &empty;
5699 }
5700 else
5701 modifiers += 1; /* skip separator */
5702
5703 /* Parse regex modifiers. */
5704 for (; modifiers[0] != '\0'; modifiers++)
5705 switch (modifiers[0])
5706 {
5707 case 'N':
5708 if (modifiers == name)
5709 error ("forcing explicit tag name but no name, ignoring");
5710 force_explicit_name = TRUE;
5711 break;
5712 case 'i':
5713 ignore_case = TRUE;
5714 break;
5715 case 's':
5716 single_line = TRUE;
5717 /* FALLTHRU */
5718 case 'm':
5719 multi_line = TRUE;
5720 need_filebuf = TRUE;
5721 break;
5722 default:
5723 error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5724 break;
5725 }
5726
5727 patbuf = xnew (1, struct re_pattern_buffer);
5728 *patbuf = zeropattern;
5729 if (ignore_case)
5730 {
5731 static char lc_trans[CHARS];
5732 int i;
5733 for (i = 0; i < CHARS; i++)
5734 lc_trans[i] = lowcase (i);
5735 patbuf->translate = lc_trans; /* translation table to fold case */
5736 }
5737
5738 if (multi_line)
5739 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5740 else
5741 pat = regexp_pattern;
5742
5743 if (single_line)
5744 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5745 else
5746 re_set_syntax (RE_SYNTAX_EMACS);
5747
5748 err = re_compile_pattern (pat, strlen (pat), patbuf);
5749 if (multi_line)
5750 free (pat);
5751 if (err != NULL)
5752 {
5753 error ("%s while compiling pattern", err);
5754 return;
5755 }
5756
5757 rp = p_head;
5758 p_head = xnew (1, regexp);
5759 p_head->pattern = savestr (regexp_pattern);
5760 p_head->p_next = rp;
5761 p_head->lang = lang;
5762 p_head->pat = patbuf;
5763 p_head->name = savestr (name);
5764 p_head->error_signaled = FALSE;
5765 p_head->force_explicit_name = force_explicit_name;
5766 p_head->ignore_case = ignore_case;
5767 p_head->multi_line = multi_line;
5768 }
5769
5770 /*
5771 * Do the substitutions indicated by the regular expression and
5772 * arguments.
5773 */
5774 static char *
5775 substitute (char *in, char *out, struct re_registers *regs)
5776 {
5777 char *result, *t;
5778 int size, dig, diglen;
5779
5780 result = NULL;
5781 size = strlen (out);
5782
5783 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5784 if (out[size - 1] == '\\')
5785 fatal ("pattern error in \"%s\"", out);
5786 for (t = etags_strchr (out, '\\');
5787 t != NULL;
5788 t = etags_strchr (t + 2, '\\'))
5789 if (ISDIGIT (t[1]))
5790 {
5791 dig = t[1] - '0';
5792 diglen = regs->end[dig] - regs->start[dig];
5793 size += diglen - 2;
5794 }
5795 else
5796 size -= 1;
5797
5798 /* Allocate space and do the substitutions. */
5799 assert (size >= 0);
5800 result = xnew (size + 1, char);
5801
5802 for (t = result; *out != '\0'; out++)
5803 if (*out == '\\' && ISDIGIT (*++out))
5804 {
5805 dig = *out - '0';
5806 diglen = regs->end[dig] - regs->start[dig];
5807 memcpy (t, in + regs->start[dig], diglen);
5808 t += diglen;
5809 }
5810 else
5811 *t++ = *out;
5812 *t = '\0';
5813
5814 assert (t <= result + size);
5815 assert (t - result == (int)strlen (result));
5816
5817 return result;
5818 }
5819
5820 /* Deallocate all regexps. */
5821 static void
5822 free_regexps (void)
5823 {
5824 regexp *rp;
5825 while (p_head != NULL)
5826 {
5827 rp = p_head->p_next;
5828 free (p_head->pattern);
5829 free (p_head->name);
5830 free (p_head);
5831 p_head = rp;
5832 }
5833 return;
5834 }
5835
5836 /*
5837 * Reads the whole file as a single string from `filebuf' and looks for
5838 * multi-line regular expressions, creating tags on matches.
5839 * readline already dealt with normal regexps.
5840 *
5841 * Idea by Ben Wing <ben@666.com> (2002).
5842 */
5843 static void
5844 regex_tag_multiline (void)
5845 {
5846 char *buffer = filebuf.buffer;
5847 regexp *rp;
5848 char *name;
5849
5850 for (rp = p_head; rp != NULL; rp = rp->p_next)
5851 {
5852 int match = 0;
5853
5854 if (!rp->multi_line)
5855 continue; /* skip normal regexps */
5856
5857 /* Generic initializations before parsing file from memory. */
5858 lineno = 1; /* reset global line number */
5859 charno = 0; /* reset global char number */
5860 linecharno = 0; /* reset global char number of line start */
5861
5862 /* Only use generic regexps or those for the current language. */
5863 if (rp->lang != NULL && rp->lang != curfdp->lang)
5864 continue;
5865
5866 while (match >= 0 && match < filebuf.len)
5867 {
5868 match = re_search (rp->pat, buffer, filebuf.len, charno,
5869 filebuf.len - match, &rp->regs);
5870 switch (match)
5871 {
5872 case -2:
5873 /* Some error. */
5874 if (!rp->error_signaled)
5875 {
5876 error ("regexp stack overflow while matching \"%s\"",
5877 rp->pattern);
5878 rp->error_signaled = TRUE;
5879 }
5880 break;
5881 case -1:
5882 /* No match. */
5883 break;
5884 default:
5885 if (match == rp->regs.end[0])
5886 {
5887 if (!rp->error_signaled)
5888 {
5889 error ("regexp matches the empty string: \"%s\"",
5890 rp->pattern);
5891 rp->error_signaled = TRUE;
5892 }
5893 match = -3; /* exit from while loop */
5894 break;
5895 }
5896
5897 /* Match occurred. Construct a tag. */
5898 while (charno < rp->regs.end[0])
5899 if (buffer[charno++] == '\n')
5900 lineno++, linecharno = charno;
5901 name = rp->name;
5902 if (name[0] == '\0')
5903 name = NULL;
5904 else /* make a named tag */
5905 name = substitute (buffer, rp->name, &rp->regs);
5906 if (rp->force_explicit_name)
5907 /* Force explicit tag name, if a name is there. */
5908 pfnote (name, TRUE, buffer + linecharno,
5909 charno - linecharno + 1, lineno, linecharno);
5910 else
5911 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5912 charno - linecharno + 1, lineno, linecharno);
5913 break;
5914 }
5915 }
5916 }
5917 }
5918
5919 \f
5920 static bool
5921 nocase_tail (const char *cp)
5922 {
5923 register int len = 0;
5924
5925 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5926 cp++, len++;
5927 if (*cp == '\0' && !intoken (dbp[len]))
5928 {
5929 dbp += len;
5930 return TRUE;
5931 }
5932 return FALSE;
5933 }
5934
5935 static void
5936 get_tag (register char *bp, char **namepp)
5937 {
5938 register char *cp = bp;
5939
5940 if (*bp != '\0')
5941 {
5942 /* Go till you get to white space or a syntactic break */
5943 for (cp = bp + 1; !notinname (*cp); cp++)
5944 continue;
5945 make_tag (bp, cp - bp, TRUE,
5946 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5947 }
5948
5949 if (namepp != NULL)
5950 *namepp = savenstr (bp, cp - bp);
5951 }
5952
5953 /*
5954 * Read a line of text from `stream' into `lbp', excluding the
5955 * newline or CR-NL, if any. Return the number of characters read from
5956 * `stream', which is the length of the line including the newline.
5957 *
5958 * On DOS or Windows we do not count the CR character, if any before the
5959 * NL, in the returned length; this mirrors the behavior of Emacs on those
5960 * platforms (for text files, it translates CR-NL to NL as it reads in the
5961 * file).
5962 *
5963 * If multi-line regular expressions are requested, each line read is
5964 * appended to `filebuf'.
5965 */
5966 static long
5967 readline_internal (linebuffer *lbp, register FILE *stream)
5968 {
5969 char *buffer = lbp->buffer;
5970 register char *p = lbp->buffer;
5971 register char *pend;
5972 int chars_deleted;
5973
5974 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5975
5976 for (;;)
5977 {
5978 register int c = getc (stream);
5979 if (p == pend)
5980 {
5981 /* We're at the end of linebuffer: expand it. */
5982 lbp->size *= 2;
5983 xrnew (buffer, lbp->size, char);
5984 p += buffer - lbp->buffer;
5985 pend = buffer + lbp->size;
5986 lbp->buffer = buffer;
5987 }
5988 if (c == EOF)
5989 {
5990 *p = '\0';
5991 chars_deleted = 0;
5992 break;
5993 }
5994 if (c == '\n')
5995 {
5996 if (p > buffer && p[-1] == '\r')
5997 {
5998 p -= 1;
5999 #ifdef DOS_NT
6000 /* Assume CRLF->LF translation will be performed by Emacs
6001 when loading this file, so CRs won't appear in the buffer.
6002 It would be cleaner to compensate within Emacs;
6003 however, Emacs does not know how many CRs were deleted
6004 before any given point in the file. */
6005 chars_deleted = 1;
6006 #else
6007 chars_deleted = 2;
6008 #endif
6009 }
6010 else
6011 {
6012 chars_deleted = 1;
6013 }
6014 *p = '\0';
6015 break;
6016 }
6017 *p++ = c;
6018 }
6019 lbp->len = p - buffer;
6020
6021 if (need_filebuf /* we need filebuf for multi-line regexps */
6022 && chars_deleted > 0) /* not at EOF */
6023 {
6024 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6025 {
6026 /* Expand filebuf. */
6027 filebuf.size *= 2;
6028 xrnew (filebuf.buffer, filebuf.size, char);
6029 }
6030 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6031 filebuf.len += lbp->len;
6032 filebuf.buffer[filebuf.len++] = '\n';
6033 filebuf.buffer[filebuf.len] = '\0';
6034 }
6035
6036 return lbp->len + chars_deleted;
6037 }
6038
6039 /*
6040 * Like readline_internal, above, but in addition try to match the
6041 * input line against relevant regular expressions and manage #line
6042 * directives.
6043 */
6044 static void
6045 readline (linebuffer *lbp, FILE *stream)
6046 {
6047 long result;
6048
6049 linecharno = charno; /* update global char number of line start */
6050 result = readline_internal (lbp, stream); /* read line */
6051 lineno += 1; /* increment global line number */
6052 charno += result; /* increment global char number */
6053
6054 /* Honor #line directives. */
6055 if (!no_line_directive)
6056 {
6057 static bool discard_until_line_directive;
6058
6059 /* Check whether this is a #line directive. */
6060 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6061 {
6062 unsigned int lno;
6063 int start = 0;
6064
6065 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6066 && start > 0) /* double quote character found */
6067 {
6068 char *endp = lbp->buffer + start;
6069
6070 while ((endp = etags_strchr (endp, '"')) != NULL
6071 && endp[-1] == '\\')
6072 endp++;
6073 if (endp != NULL)
6074 /* Ok, this is a real #line directive. Let's deal with it. */
6075 {
6076 char *taggedabsname; /* absolute name of original file */
6077 char *taggedfname; /* name of original file as given */
6078 char *name; /* temp var */
6079
6080 discard_until_line_directive = FALSE; /* found it */
6081 name = lbp->buffer + start;
6082 *endp = '\0';
6083 canonicalize_filename (name);
6084 taggedabsname = absolute_filename (name, tagfiledir);
6085 if (filename_is_absolute (name)
6086 || filename_is_absolute (curfdp->infname))
6087 taggedfname = savestr (taggedabsname);
6088 else
6089 taggedfname = relative_filename (taggedabsname,tagfiledir);
6090
6091 if (streq (curfdp->taggedfname, taggedfname))
6092 /* The #line directive is only a line number change. We
6093 deal with this afterwards. */
6094 free (taggedfname);
6095 else
6096 /* The tags following this #line directive should be
6097 attributed to taggedfname. In order to do this, set
6098 curfdp accordingly. */
6099 {
6100 fdesc *fdp; /* file description pointer */
6101
6102 /* Go look for a file description already set up for the
6103 file indicated in the #line directive. If there is
6104 one, use it from now until the next #line
6105 directive. */
6106 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6107 if (streq (fdp->infname, curfdp->infname)
6108 && streq (fdp->taggedfname, taggedfname))
6109 /* If we remove the second test above (after the &&)
6110 then all entries pertaining to the same file are
6111 coalesced in the tags file. If we use it, then
6112 entries pertaining to the same file but generated
6113 from different files (via #line directives) will
6114 go into separate sections in the tags file. These
6115 alternatives look equivalent. The first one
6116 destroys some apparently useless information. */
6117 {
6118 curfdp = fdp;
6119 free (taggedfname);
6120 break;
6121 }
6122 /* Else, if we already tagged the real file, skip all
6123 input lines until the next #line directive. */
6124 if (fdp == NULL) /* not found */
6125 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6126 if (streq (fdp->infabsname, taggedabsname))
6127 {
6128 discard_until_line_directive = TRUE;
6129 free (taggedfname);
6130 break;
6131 }
6132 /* Else create a new file description and use that from
6133 now on, until the next #line directive. */
6134 if (fdp == NULL) /* not found */
6135 {
6136 fdp = fdhead;
6137 fdhead = xnew (1, fdesc);
6138 *fdhead = *curfdp; /* copy curr. file description */
6139 fdhead->next = fdp;
6140 fdhead->infname = savestr (curfdp->infname);
6141 fdhead->infabsname = savestr (curfdp->infabsname);
6142 fdhead->infabsdir = savestr (curfdp->infabsdir);
6143 fdhead->taggedfname = taggedfname;
6144 fdhead->usecharno = FALSE;
6145 fdhead->prop = NULL;
6146 fdhead->written = FALSE;
6147 curfdp = fdhead;
6148 }
6149 }
6150 free (taggedabsname);
6151 lineno = lno - 1;
6152 readline (lbp, stream);
6153 return;
6154 } /* if a real #line directive */
6155 } /* if #line is followed by a number */
6156 } /* if line begins with "#line " */
6157
6158 /* If we are here, no #line directive was found. */
6159 if (discard_until_line_directive)
6160 {
6161 if (result > 0)
6162 {
6163 /* Do a tail recursion on ourselves, thus discarding the contents
6164 of the line buffer. */
6165 readline (lbp, stream);
6166 return;
6167 }
6168 /* End of file. */
6169 discard_until_line_directive = FALSE;
6170 return;
6171 }
6172 } /* if #line directives should be considered */
6173
6174 {
6175 int match;
6176 regexp *rp;
6177 char *name;
6178
6179 /* Match against relevant regexps. */
6180 if (lbp->len > 0)
6181 for (rp = p_head; rp != NULL; rp = rp->p_next)
6182 {
6183 /* Only use generic regexps or those for the current language.
6184 Also do not use multiline regexps, which is the job of
6185 regex_tag_multiline. */
6186 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6187 || rp->multi_line)
6188 continue;
6189
6190 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6191 switch (match)
6192 {
6193 case -2:
6194 /* Some error. */
6195 if (!rp->error_signaled)
6196 {
6197 error ("regexp stack overflow while matching \"%s\"",
6198 rp->pattern);
6199 rp->error_signaled = TRUE;
6200 }
6201 break;
6202 case -1:
6203 /* No match. */
6204 break;
6205 case 0:
6206 /* Empty string matched. */
6207 if (!rp->error_signaled)
6208 {
6209 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6210 rp->error_signaled = TRUE;
6211 }
6212 break;
6213 default:
6214 /* Match occurred. Construct a tag. */
6215 name = rp->name;
6216 if (name[0] == '\0')
6217 name = NULL;
6218 else /* make a named tag */
6219 name = substitute (lbp->buffer, rp->name, &rp->regs);
6220 if (rp->force_explicit_name)
6221 /* Force explicit tag name, if a name is there. */
6222 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6223 else
6224 make_tag (name, strlen (name), TRUE,
6225 lbp->buffer, match, lineno, linecharno);
6226 break;
6227 }
6228 }
6229 }
6230 }
6231
6232 \f
6233 /*
6234 * Return a pointer to a space of size strlen(cp)+1 allocated
6235 * with xnew where the string CP has been copied.
6236 */
6237 static char *
6238 savestr (const char *cp)
6239 {
6240 return savenstr (cp, strlen (cp));
6241 }
6242
6243 /*
6244 * Return a pointer to a space of size LEN+1 allocated with xnew where
6245 * the string CP has been copied for at most the first LEN characters.
6246 */
6247 static char *
6248 savenstr (const char *cp, int len)
6249 {
6250 register char *dp;
6251
6252 dp = xnew (len + 1, char);
6253 memcpy (dp, cp, len);
6254 dp[len] = '\0';
6255 return dp;
6256 }
6257
6258 /*
6259 * Return the ptr in sp at which the character c last
6260 * appears; NULL if not found
6261 *
6262 * Identical to POSIX strrchr, included for portability.
6263 */
6264 static char *
6265 etags_strrchr (register const char *sp, register int c)
6266 {
6267 register const char *r;
6268
6269 r = NULL;
6270 do
6271 {
6272 if (*sp == c)
6273 r = sp;
6274 } while (*sp++);
6275 return (char *)r;
6276 }
6277
6278 /*
6279 * Return the ptr in sp at which the character c first
6280 * appears; NULL if not found
6281 *
6282 * Identical to POSIX strchr, included for portability.
6283 */
6284 static char *
6285 etags_strchr (register const char *sp, register int c)
6286 {
6287 do
6288 {
6289 if (*sp == c)
6290 return (char *)sp;
6291 } while (*sp++);
6292 return NULL;
6293 }
6294
6295 /* Skip spaces (end of string is not space), return new pointer. */
6296 static char *
6297 skip_spaces (char *cp)
6298 {
6299 while (iswhite (*cp))
6300 cp++;
6301 return cp;
6302 }
6303
6304 /* Skip non spaces, except end of string, return new pointer. */
6305 static char *
6306 skip_non_spaces (char *cp)
6307 {
6308 while (*cp != '\0' && !iswhite (*cp))
6309 cp++;
6310 return cp;
6311 }
6312
6313 /* Skip any chars in the "name" class.*/
6314 static char *
6315 skip_name (char *cp)
6316 {
6317 /* '\0' is a notinname() so loop stops there too */
6318 while (! notinname (*cp))
6319 cp++;
6320 return cp;
6321 }
6322
6323 /* Print error message and exit. */
6324 void
6325 fatal (const char *s1, const char *s2)
6326 {
6327 error (s1, s2);
6328 exit (EXIT_FAILURE);
6329 }
6330
6331 static void
6332 pfatal (const char *s1)
6333 {
6334 perror (s1);
6335 exit (EXIT_FAILURE);
6336 }
6337
6338 static void
6339 suggest_asking_for_help (void)
6340 {
6341 fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6342 progname);
6343 exit (EXIT_FAILURE);
6344 }
6345
6346 /* Output a diagnostic with printf-style FORMAT and args. */
6347 static void
6348 error (const char *format, ...)
6349 {
6350 va_list ap;
6351 va_start (ap, format);
6352 fprintf (stderr, "%s: ", progname);
6353 vfprintf (stderr, format, ap);
6354 fprintf (stderr, "\n");
6355 va_end (ap);
6356 }
6357
6358 /* Return a newly-allocated string whose contents
6359 concatenate those of s1, s2, s3. */
6360 static char *
6361 concat (const char *s1, const char *s2, const char *s3)
6362 {
6363 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6364 char *result = xnew (len1 + len2 + len3 + 1, char);
6365
6366 strcpy (result, s1);
6367 strcpy (result + len1, s2);
6368 strcpy (result + len1 + len2, s3);
6369 result[len1 + len2 + len3] = '\0';
6370
6371 return result;
6372 }
6373
6374 \f
6375 /* Does the same work as the system V getcwd, but does not need to
6376 guess the buffer size in advance. */
6377 static char *
6378 etags_getcwd (void)
6379 {
6380 int bufsize = 200;
6381 char *path = xnew (bufsize, char);
6382
6383 while (getcwd (path, bufsize) == NULL)
6384 {
6385 if (errno != ERANGE)
6386 pfatal ("getcwd");
6387 bufsize *= 2;
6388 free (path);
6389 path = xnew (bufsize, char);
6390 }
6391
6392 canonicalize_filename (path);
6393 return path;
6394 }
6395
6396 /* Return a newly allocated string containing the file name of FILE
6397 relative to the absolute directory DIR (which should end with a slash). */
6398 static char *
6399 relative_filename (char *file, char *dir)
6400 {
6401 char *fp, *dp, *afn, *res;
6402 int i;
6403
6404 /* Find the common root of file and dir (with a trailing slash). */
6405 afn = absolute_filename (file, cwd);
6406 fp = afn;
6407 dp = dir;
6408 while (*fp++ == *dp++)
6409 continue;
6410 fp--, dp--; /* back to the first differing char */
6411 #ifdef DOS_NT
6412 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6413 return afn;
6414 #endif
6415 do /* look at the equal chars until '/' */
6416 fp--, dp--;
6417 while (*fp != '/');
6418
6419 /* Build a sequence of "../" strings for the resulting relative file name. */
6420 i = 0;
6421 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6422 i += 1;
6423 res = xnew (3*i + strlen (fp + 1) + 1, char);
6424 res[0] = '\0';
6425 while (i-- > 0)
6426 strcat (res, "../");
6427
6428 /* Add the file name relative to the common root of file and dir. */
6429 strcat (res, fp + 1);
6430 free (afn);
6431
6432 return res;
6433 }
6434
6435 /* Return a newly allocated string containing the absolute file name
6436 of FILE given DIR (which should end with a slash). */
6437 static char *
6438 absolute_filename (char *file, char *dir)
6439 {
6440 char *slashp, *cp, *res;
6441
6442 if (filename_is_absolute (file))
6443 res = savestr (file);
6444 #ifdef DOS_NT
6445 /* We don't support non-absolute file names with a drive
6446 letter, like `d:NAME' (it's too much hassle). */
6447 else if (file[1] == ':')
6448 fatal ("%s: relative file names with drive letters not supported", file);
6449 #endif
6450 else
6451 res = concat (dir, file, "");
6452
6453 /* Delete the "/dirname/.." and "/." substrings. */
6454 slashp = etags_strchr (res, '/');
6455 while (slashp != NULL && slashp[0] != '\0')
6456 {
6457 if (slashp[1] == '.')
6458 {
6459 if (slashp[2] == '.'
6460 && (slashp[3] == '/' || slashp[3] == '\0'))
6461 {
6462 cp = slashp;
6463 do
6464 cp--;
6465 while (cp >= res && !filename_is_absolute (cp));
6466 if (cp < res)
6467 cp = slashp; /* the absolute name begins with "/.." */
6468 #ifdef DOS_NT
6469 /* Under MSDOS and NT we get `d:/NAME' as absolute
6470 file name, so the luser could say `d:/../NAME'.
6471 We silently treat this as `d:/NAME'. */
6472 else if (cp[0] != '/')
6473 cp = slashp;
6474 #endif
6475 memmove (cp, slashp + 3, strlen (slashp + 2));
6476 slashp = cp;
6477 continue;
6478 }
6479 else if (slashp[2] == '/' || slashp[2] == '\0')
6480 {
6481 memmove (slashp, slashp + 2, strlen (slashp + 1));
6482 continue;
6483 }
6484 }
6485
6486 slashp = etags_strchr (slashp + 1, '/');
6487 }
6488
6489 if (res[0] == '\0') /* just a safety net: should never happen */
6490 {
6491 free (res);
6492 return savestr ("/");
6493 }
6494 else
6495 return res;
6496 }
6497
6498 /* Return a newly allocated string containing the absolute
6499 file name of dir where FILE resides given DIR (which should
6500 end with a slash). */
6501 static char *
6502 absolute_dirname (char *file, char *dir)
6503 {
6504 char *slashp, *res;
6505 char save;
6506
6507 slashp = etags_strrchr (file, '/');
6508 if (slashp == NULL)
6509 return savestr (dir);
6510 save = slashp[1];
6511 slashp[1] = '\0';
6512 res = absolute_filename (file, dir);
6513 slashp[1] = save;
6514
6515 return res;
6516 }
6517
6518 /* Whether the argument string is an absolute file name. The argument
6519 string must have been canonicalized with canonicalize_filename. */
6520 static bool
6521 filename_is_absolute (char *fn)
6522 {
6523 return (fn[0] == '/'
6524 #ifdef DOS_NT
6525 || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/')
6526 #endif
6527 );
6528 }
6529
6530 /* Downcase DOS drive letter and collapse separators into single slashes.
6531 Works in place. */
6532 static void
6533 canonicalize_filename (register char *fn)
6534 {
6535 register char* cp;
6536 char sep = '/';
6537
6538 #ifdef DOS_NT
6539 /* Canonicalize drive letter case. */
6540 # define ISUPPER(c) isupper (CHAR (c))
6541 if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6542 fn[0] = lowcase (fn[0]);
6543
6544 sep = '\\';
6545 #endif
6546
6547 /* Collapse multiple separators into a single slash. */
6548 for (cp = fn; *cp != '\0'; cp++, fn++)
6549 if (*cp == sep)
6550 {
6551 *fn = '/';
6552 while (cp[1] == sep)
6553 cp++;
6554 }
6555 else
6556 *fn = *cp;
6557 *fn = '\0';
6558 }
6559
6560 \f
6561 /* Initialize a linebuffer for use. */
6562 static void
6563 linebuffer_init (linebuffer *lbp)
6564 {
6565 lbp->size = (DEBUG) ? 3 : 200;
6566 lbp->buffer = xnew (lbp->size, char);
6567 lbp->buffer[0] = '\0';
6568 lbp->len = 0;
6569 }
6570
6571 /* Set the minimum size of a string contained in a linebuffer. */
6572 static void
6573 linebuffer_setlen (linebuffer *lbp, int toksize)
6574 {
6575 while (lbp->size <= toksize)
6576 {
6577 lbp->size *= 2;
6578 xrnew (lbp->buffer, lbp->size, char);
6579 }
6580 lbp->len = toksize;
6581 }
6582
6583 /* Like malloc but get fatal error if memory is exhausted. */
6584 static void *
6585 xmalloc (size_t size)
6586 {
6587 void *result = malloc (size);
6588 if (result == NULL)
6589 fatal ("virtual memory exhausted", (char *)NULL);
6590 return result;
6591 }
6592
6593 static void *
6594 xrealloc (char *ptr, size_t size)
6595 {
6596 void *result = realloc (ptr, size);
6597 if (result == NULL)
6598 fatal ("virtual memory exhausted", (char *)NULL);
6599 return result;
6600 }
6601
6602 /*
6603 * Local Variables:
6604 * indent-tabs-mode: t
6605 * tab-width: 8
6606 * fill-column: 79
6607 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6608 * c-file-style: "gnu"
6609 * End:
6610 */
6611
6612 /* etags.c ends here */