Support filtering by keywords in package listings.
[bpt/emacs.git] / lib-src / etags.c
... / ...
CommitLineData
1/* Tags file maker to go with GNU Emacs -*- coding: utf-8 -*-
2
3Copyright (C) 1984 The Regents of the University of California
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are
7met:
81. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
102. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
143. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2013 Free Software
32Foundation, Inc.
33
34This file is not considered part of GNU Emacs.
35
36This program is free software: you can redistribute it and/or modify
37it under the terms of the GNU General Public License as published by
38the Free Software Foundation, either version 3 of the License, or
39(at your option) any later version.
40
41This program is distributed in the hope that it will be useful,
42but WITHOUT ANY WARRANTY; without even the implied warranty of
43MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44GNU General Public License for more details.
45
46You should have received a copy of the GNU General Public License
47along with this program. If not, see <http://www.gnu.org/licenses/>. */
48
49
50/* NB To comply with the above BSD license, copyright information is
51reproduced in etc/ETAGS.README. That file should be updated when the
52above notices are.
53
54To the best of our knowledge, this code was originally based on the
55ctags.c distributed with BSD4.2, which was copyrighted by the
56University of California, as described above. */
57
58
59/*
60 * Authors:
61 * 1983 Ctags originally by Ken Arnold.
62 * 1984 Fortran added by Jim Kleckner.
63 * 1984 Ed Pelegri-Llopart added C typedefs.
64 * 1985 Emacs TAGS format by Richard Stallman.
65 * 1989 Sam Kendall added C++.
66 * 1992 Joseph B. Wells improved C and C++ parsing.
67 * 1993 Francesco Potortì reorganized C and C++.
68 * 1994 Line-by-line regexp tags by Tom Tromey.
69 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
70 * 2002 #line directives by Francesco Potortì.
71 *
72 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
73 */
74
75/*
76 * If you want to add support for a new language, start by looking at the LUA
77 * language, which is the simplest. Alternatively, consider distributing etags
78 * together with a configuration file containing regexp definitions for etags.
79 */
80
81char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
82
83#define TRUE 1
84#define FALSE 0
85
86#ifdef DEBUG
87# undef DEBUG
88# define DEBUG TRUE
89#else
90# define DEBUG FALSE
91# define NDEBUG /* disable assert */
92#endif
93
94#include <config.h>
95
96#ifndef _GNU_SOURCE
97# define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
98#endif
99
100/* WIN32_NATIVE is for XEmacs.
101 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
102#ifdef WIN32_NATIVE
103# undef MSDOS
104# undef WINDOWSNT
105# define WINDOWSNT
106#endif /* WIN32_NATIVE */
107
108#ifdef MSDOS
109# undef MSDOS
110# define MSDOS TRUE
111# include <fcntl.h>
112# include <sys/param.h>
113# include <io.h>
114#else
115# define MSDOS FALSE
116#endif /* MSDOS */
117
118#ifdef WINDOWSNT
119# include <fcntl.h>
120# include <direct.h>
121# include <io.h>
122# define MAXPATHLEN _MAX_PATH
123# undef HAVE_NTGUI
124# undef DOS_NT
125# define DOS_NT
126#endif /* WINDOWSNT */
127
128#include <unistd.h>
129#include <stdarg.h>
130#include <stdlib.h>
131#include <string.h>
132#include <stdio.h>
133#include <ctype.h>
134#include <errno.h>
135#include <sys/types.h>
136#include <sys/stat.h>
137#include <c-strcase.h>
138
139#include <assert.h>
140#ifdef NDEBUG
141# undef assert /* some systems have a buggy assert.h */
142# define assert(x) ((void) 0)
143#endif
144
145#include <getopt.h>
146#include <regex.h>
147
148/* Define CTAGS to make the program "ctags" compatible with the usual one.
149 Leave it undefined to make the program "etags", which makes emacs-style
150 tag tables and tags typedefs, #defines and struct/union/enum by default. */
151#ifdef CTAGS
152# undef CTAGS
153# define CTAGS TRUE
154#else
155# define CTAGS FALSE
156#endif
157
158#define streq(s,t) (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
159#define strcaseeq(s,t) (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
160#define strneq(s,t,n) (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
161#define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
162
163#define CHARS 256 /* 2^sizeof(char) */
164#define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
165#define iswhite(c) (_wht[CHAR (c)]) /* c is white (see white) */
166#define notinname(c) (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
167#define begtoken(c) (_btk[CHAR (c)]) /* c can start token (see begtk) */
168#define intoken(c) (_itk[CHAR (c)]) /* c can be in token (see midtk) */
169#define endtoken(c) (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
170
171#define ISALNUM(c) isalnum (CHAR (c))
172#define ISALPHA(c) isalpha (CHAR (c))
173#define ISDIGIT(c) isdigit (CHAR (c))
174#define ISLOWER(c) islower (CHAR (c))
175
176#define lowcase(c) tolower (CHAR (c))
177
178
179/*
180 * xnew, xrnew -- allocate, reallocate storage
181 *
182 * SYNOPSIS: Type *xnew (int n, Type);
183 * void xrnew (OldPointer, int n, Type);
184 */
185#if DEBUG
186# include "chkmalloc.h"
187# define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
188 (n) * sizeof (Type)))
189# define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
190 (char *) (op), (n) * sizeof (Type)))
191#else
192# define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
193# define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
194 (char *) (op), (n) * sizeof (Type)))
195#endif
196
197#define bool int
198
199typedef void Lang_function (FILE *);
200
201typedef struct
202{
203 const char *suffix; /* file name suffix for this compressor */
204 const char *command; /* takes one arg and decompresses to stdout */
205} compressor;
206
207typedef struct
208{
209 const char *name; /* language name */
210 const char *help; /* detailed help for the language */
211 Lang_function *function; /* parse function */
212 const char **suffixes; /* name suffixes of this language's files */
213 const char **filenames; /* names of this language's files */
214 const char **interpreters; /* interpreters for this language */
215 bool metasource; /* source used to generate other sources */
216} language;
217
218typedef struct fdesc
219{
220 struct fdesc *next; /* for the linked list */
221 char *infname; /* uncompressed input file name */
222 char *infabsname; /* absolute uncompressed input file name */
223 char *infabsdir; /* absolute dir of input file */
224 char *taggedfname; /* file name to write in tagfile */
225 language *lang; /* language of file */
226 char *prop; /* file properties to write in tagfile */
227 bool usecharno; /* etags tags shall contain char number */
228 bool written; /* entry written in the tags file */
229} fdesc;
230
231typedef struct node_st
232{ /* sorting structure */
233 struct node_st *left, *right; /* left and right sons */
234 fdesc *fdp; /* description of file to whom tag belongs */
235 char *name; /* tag name */
236 char *regex; /* search regexp */
237 bool valid; /* write this tag on the tag file */
238 bool is_func; /* function tag: use regexp in CTAGS mode */
239 bool been_warned; /* warning already given for duplicated tag */
240 int lno; /* line number tag is on */
241 long cno; /* character number line starts on */
242} node;
243
244/*
245 * A `linebuffer' is a structure which holds a line of text.
246 * `readline_internal' reads a line from a stream into a linebuffer
247 * and works regardless of the length of the line.
248 * SIZE is the size of BUFFER, LEN is the length of the string in
249 * BUFFER after readline reads it.
250 */
251typedef struct
252{
253 long size;
254 int len;
255 char *buffer;
256} linebuffer;
257
258/* Used to support mixing of --lang and file names. */
259typedef struct
260{
261 enum {
262 at_language, /* a language specification */
263 at_regexp, /* a regular expression */
264 at_filename, /* a file name */
265 at_stdin, /* read from stdin here */
266 at_end /* stop parsing the list */
267 } arg_type; /* argument type */
268 language *lang; /* language associated with the argument */
269 char *what; /* the argument itself */
270} argument;
271
272/* Structure defining a regular expression. */
273typedef struct regexp
274{
275 struct regexp *p_next; /* pointer to next in list */
276 language *lang; /* if set, use only for this language */
277 char *pattern; /* the regexp pattern */
278 char *name; /* tag name */
279 struct re_pattern_buffer *pat; /* the compiled pattern */
280 struct re_registers regs; /* re registers */
281 bool error_signaled; /* already signaled for this regexp */
282 bool force_explicit_name; /* do not allow implicit tag name */
283 bool ignore_case; /* ignore case when matching */
284 bool multi_line; /* do a multi-line match on the whole file */
285} regexp;
286
287
288/* Many compilers barf on this:
289 Lang_function Ada_funcs;
290 so let's write it this way */
291static void Ada_funcs (FILE *);
292static void Asm_labels (FILE *);
293static void C_entries (int c_ext, FILE *);
294static void default_C_entries (FILE *);
295static void plain_C_entries (FILE *);
296static void Cjava_entries (FILE *);
297static void Cobol_paragraphs (FILE *);
298static void Cplusplus_entries (FILE *);
299static void Cstar_entries (FILE *);
300static void Erlang_functions (FILE *);
301static void Forth_words (FILE *);
302static void Fortran_functions (FILE *);
303static void HTML_labels (FILE *);
304static void Lisp_functions (FILE *);
305static void Lua_functions (FILE *);
306static void Makefile_targets (FILE *);
307static void Pascal_functions (FILE *);
308static void Perl_functions (FILE *);
309static void PHP_functions (FILE *);
310static void PS_functions (FILE *);
311static void Prolog_functions (FILE *);
312static void Python_functions (FILE *);
313static void Scheme_functions (FILE *);
314static void TeX_commands (FILE *);
315static void Texinfo_nodes (FILE *);
316static void Yacc_entries (FILE *);
317static void just_read_file (FILE *);
318
319static language *get_language_from_langname (const char *);
320static void readline (linebuffer *, FILE *);
321static long readline_internal (linebuffer *, FILE *);
322static bool nocase_tail (const char *);
323static void get_tag (char *, char **);
324
325static void analyse_regex (char *);
326static void free_regexps (void);
327static void regex_tag_multiline (void);
328static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
329static _Noreturn void suggest_asking_for_help (void);
330_Noreturn void fatal (const char *, const char *);
331static _Noreturn void pfatal (const char *);
332static void add_node (node *, node **);
333
334static void init (void);
335static void process_file_name (char *, language *);
336static void process_file (FILE *, char *, language *);
337static void find_entries (FILE *);
338static void free_tree (node *);
339static void free_fdesc (fdesc *);
340static void pfnote (char *, bool, char *, int, int, long);
341static void invalidate_nodes (fdesc *, node **);
342static void put_entries (node *);
343
344static char *concat (const char *, const char *, const char *);
345static char *skip_spaces (char *);
346static char *skip_non_spaces (char *);
347static char *skip_name (char *);
348static char *savenstr (const char *, int);
349static char *savestr (const char *);
350static char *etags_strchr (const char *, int);
351static char *etags_strrchr (const char *, int);
352static char *etags_getcwd (void);
353static char *relative_filename (char *, char *);
354static char *absolute_filename (char *, char *);
355static char *absolute_dirname (char *, char *);
356static bool filename_is_absolute (char *f);
357static void canonicalize_filename (char *);
358static void linebuffer_init (linebuffer *);
359static void linebuffer_setlen (linebuffer *, int);
360static void *xmalloc (size_t);
361static void *xrealloc (char *, size_t);
362
363\f
364static char searchar = '/'; /* use /.../ searches */
365
366static char *tagfile; /* output file */
367static char *progname; /* name this program was invoked with */
368static char *cwd; /* current working directory */
369static char *tagfiledir; /* directory of tagfile */
370static FILE *tagf; /* ioptr for tags file */
371static ptrdiff_t whatlen_max; /* maximum length of any 'what' member */
372
373static fdesc *fdhead; /* head of file description list */
374static fdesc *curfdp; /* current file description */
375static int lineno; /* line number of current line */
376static long charno; /* current character number */
377static long linecharno; /* charno of start of current line */
378static char *dbp; /* pointer to start of current tag */
379
380static const int invalidcharno = -1;
381
382static node *nodehead; /* the head of the binary tree of tags */
383static node *last_node; /* the last node created */
384
385static linebuffer lb; /* the current line */
386static linebuffer filebuf; /* a buffer containing the whole file */
387static linebuffer token_name; /* a buffer containing a tag name */
388
389/* boolean "functions" (see init) */
390static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
391static const char
392 /* white chars */
393 *white = " \f\t\n\r\v",
394 /* not in a name */
395 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
396 /* token ending chars */
397 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
398 /* token starting chars */
399 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
400 /* valid in-token chars */
401 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
402
403static bool append_to_tagfile; /* -a: append to tags */
404/* The next five default to TRUE in C and derived languages. */
405static bool typedefs; /* -t: create tags for C and Ada typedefs */
406static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
407 /* 0 struct/enum/union decls, and C++ */
408 /* member functions. */
409static bool constantypedefs; /* -d: create tags for C #define, enum */
410 /* constants and variables. */
411 /* -D: opposite of -d. Default under ctags. */
412static bool globals; /* create tags for global variables */
413static bool members; /* create tags for C member variables */
414static bool declarations; /* --declarations: tag them and extern in C&Co*/
415static bool no_line_directive; /* ignore #line directives (undocumented) */
416static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
417static bool update; /* -u: update tags */
418static bool vgrind_style; /* -v: create vgrind style index output */
419static bool no_warnings; /* -w: suppress warnings (undocumented) */
420static bool cxref_style; /* -x: create cxref style output */
421static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
422static bool ignoreindent; /* -I: ignore indentation in C */
423static bool packages_only; /* --packages-only: in Ada, only tag packages*/
424
425/* STDIN is defined in LynxOS system headers */
426#ifdef STDIN
427# undef STDIN
428#endif
429
430#define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
431static bool parsing_stdin; /* --parse-stdin used */
432
433static regexp *p_head; /* list of all regexps */
434static bool need_filebuf; /* some regexes are multi-line */
435
436static struct option longopts[] =
437{
438 { "append", no_argument, NULL, 'a' },
439 { "packages-only", no_argument, &packages_only, TRUE },
440 { "c++", no_argument, NULL, 'C' },
441 { "declarations", no_argument, &declarations, TRUE },
442 { "no-line-directive", no_argument, &no_line_directive, TRUE },
443 { "no-duplicates", no_argument, &no_duplicates, TRUE },
444 { "help", no_argument, NULL, 'h' },
445 { "help", no_argument, NULL, 'H' },
446 { "ignore-indentation", no_argument, NULL, 'I' },
447 { "language", required_argument, NULL, 'l' },
448 { "members", no_argument, &members, TRUE },
449 { "no-members", no_argument, &members, FALSE },
450 { "output", required_argument, NULL, 'o' },
451 { "regex", required_argument, NULL, 'r' },
452 { "no-regex", no_argument, NULL, 'R' },
453 { "ignore-case-regex", required_argument, NULL, 'c' },
454 { "parse-stdin", required_argument, NULL, STDIN },
455 { "version", no_argument, NULL, 'V' },
456
457#if CTAGS /* Ctags options */
458 { "backward-search", no_argument, NULL, 'B' },
459 { "cxref", no_argument, NULL, 'x' },
460 { "defines", no_argument, NULL, 'd' },
461 { "globals", no_argument, &globals, TRUE },
462 { "typedefs", no_argument, NULL, 't' },
463 { "typedefs-and-c++", no_argument, NULL, 'T' },
464 { "update", no_argument, NULL, 'u' },
465 { "vgrind", no_argument, NULL, 'v' },
466 { "no-warn", no_argument, NULL, 'w' },
467
468#else /* Etags options */
469 { "no-defines", no_argument, NULL, 'D' },
470 { "no-globals", no_argument, &globals, FALSE },
471 { "include", required_argument, NULL, 'i' },
472#endif
473 { NULL }
474};
475
476static compressor compressors[] =
477{
478 { "z", "gzip -d -c"},
479 { "Z", "gzip -d -c"},
480 { "gz", "gzip -d -c"},
481 { "GZ", "gzip -d -c"},
482 { "bz2", "bzip2 -d -c" },
483 { "xz", "xz -d -c" },
484 { NULL }
485};
486
487/*
488 * Language stuff.
489 */
490
491/* Ada code */
492static const char *Ada_suffixes [] =
493 { "ads", "adb", "ada", NULL };
494static const char Ada_help [] =
495"In Ada code, functions, procedures, packages, tasks and types are\n\
496tags. Use the `--packages-only' option to create tags for\n\
497packages only.\n\
498Ada tag names have suffixes indicating the type of entity:\n\
499 Entity type: Qualifier:\n\
500 ------------ ----------\n\
501 function /f\n\
502 procedure /p\n\
503 package spec /s\n\
504 package body /b\n\
505 type /t\n\
506 task /k\n\
507Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
508body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
509will just search for any tag `bidule'.";
510
511/* Assembly code */
512static const char *Asm_suffixes [] =
513 { "a", /* Unix assembler */
514 "asm", /* Microcontroller assembly */
515 "def", /* BSO/Tasking definition includes */
516 "inc", /* Microcontroller include files */
517 "ins", /* Microcontroller include files */
518 "s", "sa", /* Unix assembler */
519 "S", /* cpp-processed Unix assembler */
520 "src", /* BSO/Tasking C compiler output */
521 NULL
522 };
523static const char Asm_help [] =
524"In assembler code, labels appearing at the beginning of a line,\n\
525followed by a colon, are tags.";
526
527
528/* Note that .c and .h can be considered C++, if the --c++ flag was
529 given, or if the `class' or `template' keywords are met inside the file.
530 That is why default_C_entries is called for these. */
531static const char *default_C_suffixes [] =
532 { "c", "h", NULL };
533#if CTAGS /* C help for Ctags */
534static const char default_C_help [] =
535"In C code, any C function is a tag. Use -t to tag typedefs.\n\
536Use -T to tag definitions of `struct', `union' and `enum'.\n\
537Use -d to tag `#define' macro definitions and `enum' constants.\n\
538Use --globals to tag global variables.\n\
539You can tag function declarations and external variables by\n\
540using `--declarations', and struct members by using `--members'.";
541#else /* C help for Etags */
542static const char default_C_help [] =
543"In C code, any C function or typedef is a tag, and so are\n\
544definitions of `struct', `union' and `enum'. `#define' macro\n\
545definitions and `enum' constants are tags unless you specify\n\
546`--no-defines'. Global variables are tags unless you specify\n\
547`--no-globals' and so are struct members unless you specify\n\
548`--no-members'. Use of `--no-globals', `--no-defines' and\n\
549`--no-members' can make the tags table file much smaller.\n\
550You can tag function declarations and external variables by\n\
551using `--declarations'.";
552#endif /* C help for Ctags and Etags */
553
554static const char *Cplusplus_suffixes [] =
555 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
556 "M", /* Objective C++ */
557 "pdb", /* PostScript with C syntax */
558 NULL };
559static const char Cplusplus_help [] =
560"In C++ code, all the tag constructs of C code are tagged. (Use\n\
561--help --lang=c --lang=c++ for full help.)\n\
562In addition to C tags, member functions are also recognized. Member\n\
563variables are recognized unless you use the `--no-members' option.\n\
564Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
565and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
566`operator+'.";
567
568static const char *Cjava_suffixes [] =
569 { "java", NULL };
570static char Cjava_help [] =
571"In Java code, all the tags constructs of C and C++ code are\n\
572tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
573
574
575static const char *Cobol_suffixes [] =
576 { "COB", "cob", NULL };
577static char Cobol_help [] =
578"In Cobol code, tags are paragraph names; that is, any word\n\
579starting in column 8 and followed by a period.";
580
581static const char *Cstar_suffixes [] =
582 { "cs", "hs", NULL };
583
584static const char *Erlang_suffixes [] =
585 { "erl", "hrl", NULL };
586static const char Erlang_help [] =
587"In Erlang code, the tags are the functions, records and macros\n\
588defined in the file.";
589
590const char *Forth_suffixes [] =
591 { "fth", "tok", NULL };
592static const char Forth_help [] =
593"In Forth code, tags are words defined by `:',\n\
594constant, code, create, defer, value, variable, buffer:, field.";
595
596static const char *Fortran_suffixes [] =
597 { "F", "f", "f90", "for", NULL };
598static const char Fortran_help [] =
599"In Fortran code, functions, subroutines and block data are tags.";
600
601static const char *HTML_suffixes [] =
602 { "htm", "html", "shtml", NULL };
603static const char HTML_help [] =
604"In HTML input files, the tags are the `title' and the `h1', `h2',\n\
605`h3' headers. Also, tags are `name=' in anchors and all\n\
606occurrences of `id='.";
607
608static const char *Lisp_suffixes [] =
609 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
610static const char Lisp_help [] =
611"In Lisp code, any function defined with `defun', any variable\n\
612defined with `defvar' or `defconst', and in general the first\n\
613argument of any expression that starts with `(def' in column zero\n\
614is a tag.\n\
615The `--declarations' option tags \"(defvar foo)\" constructs too.";
616
617static const char *Lua_suffixes [] =
618 { "lua", "LUA", NULL };
619static const char Lua_help [] =
620"In Lua scripts, all functions are tags.";
621
622static const char *Makefile_filenames [] =
623 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
624static const char Makefile_help [] =
625"In makefiles, targets are tags; additionally, variables are tags\n\
626unless you specify `--no-globals'.";
627
628static const char *Objc_suffixes [] =
629 { "lm", /* Objective lex file */
630 "m", /* Objective C file */
631 NULL };
632static const char Objc_help [] =
633"In Objective C code, tags include Objective C definitions for classes,\n\
634class categories, methods and protocols. Tags for variables and\n\
635functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
636(Use --help --lang=c --lang=objc --lang=java for full help.)";
637
638static const char *Pascal_suffixes [] =
639 { "p", "pas", NULL };
640static const char Pascal_help [] =
641"In Pascal code, the tags are the functions and procedures defined\n\
642in the file.";
643/* " // this is for working around an Emacs highlighting bug... */
644
645static const char *Perl_suffixes [] =
646 { "pl", "pm", NULL };
647static const char *Perl_interpreters [] =
648 { "perl", "@PERL@", NULL };
649static const char Perl_help [] =
650"In Perl code, the tags are the packages, subroutines and variables\n\
651defined by the `package', `sub', `my' and `local' keywords. Use\n\
652`--globals' if you want to tag global variables. Tags for\n\
653subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
654defined in the default package is `main::SUB'.";
655
656static const char *PHP_suffixes [] =
657 { "php", "php3", "php4", NULL };
658static const char PHP_help [] =
659"In PHP code, tags are functions, classes and defines. Unless you use\n\
660the `--no-members' option, vars are tags too.";
661
662static const char *plain_C_suffixes [] =
663 { "pc", /* Pro*C file */
664 NULL };
665
666static const char *PS_suffixes [] =
667 { "ps", "psw", NULL }; /* .psw is for PSWrap */
668static const char PS_help [] =
669"In PostScript code, the tags are the functions.";
670
671static const char *Prolog_suffixes [] =
672 { "prolog", NULL };
673static const char Prolog_help [] =
674"In Prolog code, tags are predicates and rules at the beginning of\n\
675line.";
676
677static const char *Python_suffixes [] =
678 { "py", NULL };
679static const char Python_help [] =
680"In Python code, `def' or `class' at the beginning of a line\n\
681generate a tag.";
682
683/* Can't do the `SCM' or `scm' prefix with a version number. */
684static const char *Scheme_suffixes [] =
685 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
686static const char Scheme_help [] =
687"In Scheme code, tags include anything defined with `def' or with a\n\
688construct whose name starts with `def'. They also include\n\
689variables set with `set!' at top level in the file.";
690
691static const char *TeX_suffixes [] =
692 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
693static const char TeX_help [] =
694"In LaTeX text, the argument of any of the commands `\\chapter',\n\
695`\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
696`\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
697`\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
698`\\newenvironment' or `\\renewenvironment' is a tag.\n\
699\n\
700Other commands can be specified by setting the environment variable\n\
701`TEXTAGS' to a colon-separated list like, for example,\n\
702 TEXTAGS=\"mycommand:myothercommand\".";
703
704
705static const char *Texinfo_suffixes [] =
706 { "texi", "texinfo", "txi", NULL };
707static const char Texinfo_help [] =
708"for texinfo files, lines starting with @node are tagged.";
709
710static const char *Yacc_suffixes [] =
711 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
712static const char Yacc_help [] =
713"In Bison or Yacc input files, each rule defines as a tag the\n\
714nonterminal it constructs. The portions of the file that contain\n\
715C code are parsed as C code (use --help --lang=c --lang=yacc\n\
716for full help).";
717
718static const char auto_help [] =
719"`auto' is not a real language, it indicates to use\n\
720a default language for files base on file name suffix and file contents.";
721
722static const char none_help [] =
723"`none' is not a real language, it indicates to only do\n\
724regexp processing on files.";
725
726static const char no_lang_help [] =
727"No detailed help available for this language.";
728
729
730/*
731 * Table of languages.
732 *
733 * It is ok for a given function to be listed under more than one
734 * name. I just didn't.
735 */
736
737static language lang_names [] =
738{
739 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
740 { "asm", Asm_help, Asm_labels, Asm_suffixes },
741 { "c", default_C_help, default_C_entries, default_C_suffixes },
742 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
743 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
744 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
745 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
746 { "forth", Forth_help, Forth_words, Forth_suffixes },
747 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
748 { "html", HTML_help, HTML_labels, HTML_suffixes },
749 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
750 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
751 { "lua", Lua_help, Lua_functions, Lua_suffixes },
752 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
753 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
754 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
755 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
756 { "php", PHP_help, PHP_functions, PHP_suffixes },
757 { "postscript",PS_help, PS_functions, PS_suffixes },
758 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
759 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
760 { "python", Python_help, Python_functions, Python_suffixes },
761 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
762 { "tex", TeX_help, TeX_commands, TeX_suffixes },
763 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
764 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
765 { "auto", auto_help }, /* default guessing scheme */
766 { "none", none_help, just_read_file }, /* regexp matching only */
767 { NULL } /* end of list */
768};
769
770\f
771static void
772print_language_names (void)
773{
774 language *lang;
775 const char **name, **ext;
776
777 puts ("\nThese are the currently supported languages, along with the\n\
778default file names and dot suffixes:");
779 for (lang = lang_names; lang->name != NULL; lang++)
780 {
781 printf (" %-*s", 10, lang->name);
782 if (lang->filenames != NULL)
783 for (name = lang->filenames; *name != NULL; name++)
784 printf (" %s", *name);
785 if (lang->suffixes != NULL)
786 for (ext = lang->suffixes; *ext != NULL; ext++)
787 printf (" .%s", *ext);
788 puts ("");
789 }
790 puts ("where `auto' means use default language for files based on file\n\
791name suffix, and `none' means only do regexp processing on files.\n\
792If no language is specified and no matching suffix is found,\n\
793the first line of the file is read for a sharp-bang (#!) sequence\n\
794followed by the name of an interpreter. If no such sequence is found,\n\
795Fortran is tried first; if no tags are found, C is tried next.\n\
796When parsing any C file, a \"class\" or \"template\" keyword\n\
797switches to C++.");
798 puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
799\n\
800For detailed help on a given language use, for example,\n\
801etags --help --lang=ada.");
802}
803
804#ifndef EMACS_NAME
805# define EMACS_NAME "standalone"
806#endif
807#ifndef VERSION
808# define VERSION "17.38.1.4"
809#endif
810static _Noreturn void
811print_version (void)
812{
813 char emacs_copyright[] = COPYRIGHT;
814
815 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
816 puts (emacs_copyright);
817 puts ("This program is distributed under the terms in ETAGS.README");
818
819 exit (EXIT_SUCCESS);
820}
821
822#ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
823# define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
824#endif
825
826static _Noreturn void
827print_help (argument *argbuffer)
828{
829 bool help_for_lang = FALSE;
830
831 for (; argbuffer->arg_type != at_end; argbuffer++)
832 if (argbuffer->arg_type == at_language)
833 {
834 if (help_for_lang)
835 puts ("");
836 puts (argbuffer->lang->help);
837 help_for_lang = TRUE;
838 }
839
840 if (help_for_lang)
841 exit (EXIT_SUCCESS);
842
843 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
844\n\
845These are the options accepted by %s.\n", progname, progname);
846 puts ("You may use unambiguous abbreviations for the long option names.");
847 puts (" A - as file name means read names from stdin (one per line).\n\
848Absolute names are stored in the output file as they are.\n\
849Relative ones are stored relative to the output file's directory.\n");
850
851 puts ("-a, --append\n\
852 Append tag entries to existing tags file.");
853
854 puts ("--packages-only\n\
855 For Ada files, only generate tags for packages.");
856
857 if (CTAGS)
858 puts ("-B, --backward-search\n\
859 Write the search commands for the tag entries using '?', the\n\
860 backward-search command instead of '/', the forward-search command.");
861
862 /* This option is mostly obsolete, because etags can now automatically
863 detect C++. Retained for backward compatibility and for debugging and
864 experimentation. In principle, we could want to tag as C++ even
865 before any "class" or "template" keyword.
866 puts ("-C, --c++\n\
867 Treat files whose name suffix defaults to C language as C++ files.");
868 */
869
870 puts ("--declarations\n\
871 In C and derived languages, create tags for function declarations,");
872 if (CTAGS)
873 puts ("\tand create tags for extern variables if --globals is used.");
874 else
875 puts
876 ("\tand create tags for extern variables unless --no-globals is used.");
877
878 if (CTAGS)
879 puts ("-d, --defines\n\
880 Create tag entries for C #define constants and enum constants, too.");
881 else
882 puts ("-D, --no-defines\n\
883 Don't create tag entries for C #define constants and enum constants.\n\
884 This makes the tags file smaller.");
885
886 if (!CTAGS)
887 puts ("-i FILE, --include=FILE\n\
888 Include a note in tag file indicating that, when searching for\n\
889 a tag, one should also consult the tags file FILE after\n\
890 checking the current file.");
891
892 puts ("-l LANG, --language=LANG\n\
893 Force the following files to be considered as written in the\n\
894 named language up to the next --language=LANG option.");
895
896 if (CTAGS)
897 puts ("--globals\n\
898 Create tag entries for global variables in some languages.");
899 else
900 puts ("--no-globals\n\
901 Do not create tag entries for global variables in some\n\
902 languages. This makes the tags file smaller.");
903
904 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
905 puts ("--no-line-directive\n\
906 Ignore #line preprocessor directives in C and derived languages.");
907
908 if (CTAGS)
909 puts ("--members\n\
910 Create tag entries for members of structures in some languages.");
911 else
912 puts ("--no-members\n\
913 Do not create tag entries for members of structures\n\
914 in some languages.");
915
916 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
917 Make a tag for each line matching a regular expression pattern\n\
918 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
919 files only. REGEXFILE is a file containing one REGEXP per line.\n\
920 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
921 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
922 puts (" If TAGNAME/ is present, the tags created are named.\n\
923 For example Tcl named tags can be created with:\n\
924 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
925 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
926 `m' means to allow multi-line matches, `s' implies `m' and\n\
927 causes dot to match any character, including newline.");
928
929 puts ("-R, --no-regex\n\
930 Don't create tags from regexps for the following files.");
931
932 puts ("-I, --ignore-indentation\n\
933 In C and C++ do not assume that a closing brace in the first\n\
934 column is the final brace of a function or structure definition.");
935
936 puts ("-o FILE, --output=FILE\n\
937 Write the tags to FILE.");
938
939 puts ("--parse-stdin=NAME\n\
940 Read from standard input and record tags as belonging to file NAME.");
941
942 if (CTAGS)
943 {
944 puts ("-t, --typedefs\n\
945 Generate tag entries for C and Ada typedefs.");
946 puts ("-T, --typedefs-and-c++\n\
947 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
948 and C++ member functions.");
949 }
950
951 if (CTAGS)
952 puts ("-u, --update\n\
953 Update the tag entries for the given files, leaving tag\n\
954 entries for other files in place. Currently, this is\n\
955 implemented by deleting the existing entries for the given\n\
956 files and then rewriting the new entries at the end of the\n\
957 tags file. It is often faster to simply rebuild the entire\n\
958 tag file than to use this.");
959
960 if (CTAGS)
961 {
962 puts ("-v, --vgrind\n\
963 Print on the standard output an index of items intended for\n\
964 human consumption, similar to the output of vgrind. The index\n\
965 is sorted, and gives the page number of each item.");
966
967 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
968 puts ("-w, --no-duplicates\n\
969 Do not create duplicate tag entries, for compatibility with\n\
970 traditional ctags.");
971
972 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
973 puts ("-w, --no-warn\n\
974 Suppress warning messages about duplicate tag entries.");
975
976 puts ("-x, --cxref\n\
977 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
978 The output uses line numbers instead of page numbers, but\n\
979 beyond that the differences are cosmetic; try both to see\n\
980 which you like.");
981 }
982
983 puts ("-V, --version\n\
984 Print the version of the program.\n\
985-h, --help\n\
986 Print this help message.\n\
987 Followed by one or more `--language' options prints detailed\n\
988 help about tag generation for the specified languages.");
989
990 print_language_names ();
991
992 puts ("");
993 puts ("Report bugs to bug-gnu-emacs@gnu.org");
994
995 exit (EXIT_SUCCESS);
996}
997
998\f
999int
1000main (int argc, char **argv)
1001{
1002 int i;
1003 unsigned int nincluded_files;
1004 char **included_files;
1005 argument *argbuffer;
1006 int current_arg, file_count;
1007 linebuffer filename_lb;
1008 bool help_asked = FALSE;
1009 ptrdiff_t len;
1010 char *optstring;
1011 int opt;
1012
1013
1014#ifdef DOS_NT
1015 _fmode = O_BINARY; /* all of files are treated as binary files */
1016#endif /* DOS_NT */
1017
1018 progname = argv[0];
1019 nincluded_files = 0;
1020 included_files = xnew (argc, char *);
1021 current_arg = 0;
1022 file_count = 0;
1023
1024 /* Allocate enough no matter what happens. Overkill, but each one
1025 is small. */
1026 argbuffer = xnew (argc, argument);
1027
1028 /*
1029 * Always find typedefs and structure tags.
1030 * Also default to find macro constants, enum constants, struct
1031 * members and global variables. Do it for both etags and ctags.
1032 */
1033 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1034 globals = members = TRUE;
1035
1036 /* When the optstring begins with a '-' getopt_long does not rearrange the
1037 non-options arguments to be at the end, but leaves them alone. */
1038 optstring = concat ("-ac:Cf:Il:o:r:RSVhH",
1039 (CTAGS) ? "BxdtTuvw" : "Di:",
1040 "");
1041
1042 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1043 switch (opt)
1044 {
1045 case 0:
1046 /* If getopt returns 0, then it has already processed a
1047 long-named option. We should do nothing. */
1048 break;
1049
1050 case 1:
1051 /* This means that a file name has been seen. Record it. */
1052 argbuffer[current_arg].arg_type = at_filename;
1053 argbuffer[current_arg].what = optarg;
1054 len = strlen (optarg);
1055 if (whatlen_max < len)
1056 whatlen_max = len;
1057 ++current_arg;
1058 ++file_count;
1059 break;
1060
1061 case STDIN:
1062 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1063 argbuffer[current_arg].arg_type = at_stdin;
1064 argbuffer[current_arg].what = optarg;
1065 len = strlen (optarg);
1066 if (whatlen_max < len)
1067 whatlen_max = len;
1068 ++current_arg;
1069 ++file_count;
1070 if (parsing_stdin)
1071 fatal ("cannot parse standard input more than once", (char *)NULL);
1072 parsing_stdin = TRUE;
1073 break;
1074
1075 /* Common options. */
1076 case 'a': append_to_tagfile = TRUE; break;
1077 case 'C': cplusplus = TRUE; break;
1078 case 'f': /* for compatibility with old makefiles */
1079 case 'o':
1080 if (tagfile)
1081 {
1082 error ("-o option may only be given once.");
1083 suggest_asking_for_help ();
1084 /* NOTREACHED */
1085 }
1086 tagfile = optarg;
1087 break;
1088 case 'I':
1089 case 'S': /* for backward compatibility */
1090 ignoreindent = TRUE;
1091 break;
1092 case 'l':
1093 {
1094 language *lang = get_language_from_langname (optarg);
1095 if (lang != NULL)
1096 {
1097 argbuffer[current_arg].lang = lang;
1098 argbuffer[current_arg].arg_type = at_language;
1099 ++current_arg;
1100 }
1101 }
1102 break;
1103 case 'c':
1104 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1105 optarg = concat (optarg, "i", ""); /* memory leak here */
1106 /* FALLTHRU */
1107 case 'r':
1108 argbuffer[current_arg].arg_type = at_regexp;
1109 argbuffer[current_arg].what = optarg;
1110 len = strlen (optarg);
1111 if (whatlen_max < len)
1112 whatlen_max = len;
1113 ++current_arg;
1114 break;
1115 case 'R':
1116 argbuffer[current_arg].arg_type = at_regexp;
1117 argbuffer[current_arg].what = NULL;
1118 ++current_arg;
1119 break;
1120 case 'V':
1121 print_version ();
1122 break;
1123 case 'h':
1124 case 'H':
1125 help_asked = TRUE;
1126 break;
1127
1128 /* Etags options */
1129 case 'D': constantypedefs = FALSE; break;
1130 case 'i': included_files[nincluded_files++] = optarg; break;
1131
1132 /* Ctags options. */
1133 case 'B': searchar = '?'; break;
1134 case 'd': constantypedefs = TRUE; break;
1135 case 't': typedefs = TRUE; break;
1136 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1137 case 'u': update = TRUE; break;
1138 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1139 case 'x': cxref_style = TRUE; break;
1140 case 'w': no_warnings = TRUE; break;
1141 default:
1142 suggest_asking_for_help ();
1143 /* NOTREACHED */
1144 }
1145
1146 /* No more options. Store the rest of arguments. */
1147 for (; optind < argc; optind++)
1148 {
1149 argbuffer[current_arg].arg_type = at_filename;
1150 argbuffer[current_arg].what = argv[optind];
1151 len = strlen (argv[optind]);
1152 if (whatlen_max < len)
1153 whatlen_max = len;
1154 ++current_arg;
1155 ++file_count;
1156 }
1157
1158 argbuffer[current_arg].arg_type = at_end;
1159
1160 if (help_asked)
1161 print_help (argbuffer);
1162 /* NOTREACHED */
1163
1164 if (nincluded_files == 0 && file_count == 0)
1165 {
1166 error ("no input files specified.");
1167 suggest_asking_for_help ();
1168 /* NOTREACHED */
1169 }
1170
1171 if (tagfile == NULL)
1172 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1173 cwd = etags_getcwd (); /* the current working directory */
1174 if (cwd[strlen (cwd) - 1] != '/')
1175 {
1176 char *oldcwd = cwd;
1177 cwd = concat (oldcwd, "/", "");
1178 free (oldcwd);
1179 }
1180
1181 /* Compute base directory for relative file names. */
1182 if (streq (tagfile, "-")
1183 || strneq (tagfile, "/dev/", 5))
1184 tagfiledir = cwd; /* relative file names are relative to cwd */
1185 else
1186 {
1187 canonicalize_filename (tagfile);
1188 tagfiledir = absolute_dirname (tagfile, cwd);
1189 }
1190
1191 init (); /* set up boolean "functions" */
1192
1193 linebuffer_init (&lb);
1194 linebuffer_init (&filename_lb);
1195 linebuffer_init (&filebuf);
1196 linebuffer_init (&token_name);
1197
1198 if (!CTAGS)
1199 {
1200 if (streq (tagfile, "-"))
1201 {
1202 tagf = stdout;
1203#ifdef DOS_NT
1204 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1205 doesn't take effect until after `stdout' is already open). */
1206 if (!isatty (fileno (stdout)))
1207 setmode (fileno (stdout), O_BINARY);
1208#endif /* DOS_NT */
1209 }
1210 else
1211 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1212 if (tagf == NULL)
1213 pfatal (tagfile);
1214 }
1215
1216 /*
1217 * Loop through files finding functions.
1218 */
1219 for (i = 0; i < current_arg; i++)
1220 {
1221 static language *lang; /* non-NULL if language is forced */
1222 char *this_file;
1223
1224 switch (argbuffer[i].arg_type)
1225 {
1226 case at_language:
1227 lang = argbuffer[i].lang;
1228 break;
1229 case at_regexp:
1230 analyse_regex (argbuffer[i].what);
1231 break;
1232 case at_filename:
1233 this_file = argbuffer[i].what;
1234 /* Input file named "-" means read file names from stdin
1235 (one per line) and use them. */
1236 if (streq (this_file, "-"))
1237 {
1238 if (parsing_stdin)
1239 fatal ("cannot parse standard input AND read file names from it",
1240 (char *)NULL);
1241 while (readline_internal (&filename_lb, stdin) > 0)
1242 process_file_name (filename_lb.buffer, lang);
1243 }
1244 else
1245 process_file_name (this_file, lang);
1246 break;
1247 case at_stdin:
1248 this_file = argbuffer[i].what;
1249 process_file (stdin, this_file, lang);
1250 break;
1251 }
1252 }
1253
1254 free_regexps ();
1255 free (lb.buffer);
1256 free (filebuf.buffer);
1257 free (token_name.buffer);
1258
1259 if (!CTAGS || cxref_style)
1260 {
1261 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1262 put_entries (nodehead);
1263 free_tree (nodehead);
1264 nodehead = NULL;
1265 if (!CTAGS)
1266 {
1267 fdesc *fdp;
1268
1269 /* Output file entries that have no tags. */
1270 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1271 if (!fdp->written)
1272 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1273
1274 while (nincluded_files-- > 0)
1275 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1276
1277 if (fclose (tagf) == EOF)
1278 pfatal (tagfile);
1279 }
1280
1281 exit (EXIT_SUCCESS);
1282 }
1283
1284 /* From here on, we are in (CTAGS && !cxref_style) */
1285 if (update)
1286 {
1287 char *cmd =
1288 xmalloc (strlen (tagfile) + whatlen_max +
1289 sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1290 for (i = 0; i < current_arg; ++i)
1291 {
1292 switch (argbuffer[i].arg_type)
1293 {
1294 case at_filename:
1295 case at_stdin:
1296 break;
1297 default:
1298 continue; /* the for loop */
1299 }
1300 strcpy (cmd, "mv ");
1301 strcat (cmd, tagfile);
1302 strcat (cmd, " OTAGS;fgrep -v '\t");
1303 strcat (cmd, argbuffer[i].what);
1304 strcat (cmd, "\t' OTAGS >");
1305 strcat (cmd, tagfile);
1306 strcat (cmd, ";rm OTAGS");
1307 if (system (cmd) != EXIT_SUCCESS)
1308 fatal ("failed to execute shell command", (char *)NULL);
1309 }
1310 free (cmd);
1311 append_to_tagfile = TRUE;
1312 }
1313
1314 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1315 if (tagf == NULL)
1316 pfatal (tagfile);
1317 put_entries (nodehead); /* write all the tags (CTAGS) */
1318 free_tree (nodehead);
1319 nodehead = NULL;
1320 if (fclose (tagf) == EOF)
1321 pfatal (tagfile);
1322
1323 if (CTAGS)
1324 if (append_to_tagfile || update)
1325 {
1326 char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1327 /* Maybe these should be used:
1328 setenv ("LC_COLLATE", "C", 1);
1329 setenv ("LC_ALL", "C", 1); */
1330 strcpy (cmd, "sort -u -o ");
1331 strcat (cmd, tagfile);
1332 strcat (cmd, " ");
1333 strcat (cmd, tagfile);
1334 exit (system (cmd));
1335 }
1336 return EXIT_SUCCESS;
1337}
1338
1339
1340/*
1341 * Return a compressor given the file name. If EXTPTR is non-zero,
1342 * return a pointer into FILE where the compressor-specific
1343 * extension begins. If no compressor is found, NULL is returned
1344 * and EXTPTR is not significant.
1345 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1346 */
1347static compressor *
1348get_compressor_from_suffix (char *file, char **extptr)
1349{
1350 compressor *compr;
1351 char *slash, *suffix;
1352
1353 /* File has been processed by canonicalize_filename,
1354 so we don't need to consider backslashes on DOS_NT. */
1355 slash = etags_strrchr (file, '/');
1356 suffix = etags_strrchr (file, '.');
1357 if (suffix == NULL || suffix < slash)
1358 return NULL;
1359 if (extptr != NULL)
1360 *extptr = suffix;
1361 suffix += 1;
1362 /* Let those poor souls who live with DOS 8+3 file name limits get
1363 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1364 Only the first do loop is run if not MSDOS */
1365 do
1366 {
1367 for (compr = compressors; compr->suffix != NULL; compr++)
1368 if (streq (compr->suffix, suffix))
1369 return compr;
1370 if (!MSDOS)
1371 break; /* do it only once: not really a loop */
1372 if (extptr != NULL)
1373 *extptr = ++suffix;
1374 } while (*suffix != '\0');
1375 return NULL;
1376}
1377
1378
1379
1380/*
1381 * Return a language given the name.
1382 */
1383static language *
1384get_language_from_langname (const char *name)
1385{
1386 language *lang;
1387
1388 if (name == NULL)
1389 error ("empty language name");
1390 else
1391 {
1392 for (lang = lang_names; lang->name != NULL; lang++)
1393 if (streq (name, lang->name))
1394 return lang;
1395 error ("unknown language \"%s\"", name);
1396 }
1397
1398 return NULL;
1399}
1400
1401
1402/*
1403 * Return a language given the interpreter name.
1404 */
1405static language *
1406get_language_from_interpreter (char *interpreter)
1407{
1408 language *lang;
1409 const char **iname;
1410
1411 if (interpreter == NULL)
1412 return NULL;
1413 for (lang = lang_names; lang->name != NULL; lang++)
1414 if (lang->interpreters != NULL)
1415 for (iname = lang->interpreters; *iname != NULL; iname++)
1416 if (streq (*iname, interpreter))
1417 return lang;
1418
1419 return NULL;
1420}
1421
1422
1423
1424/*
1425 * Return a language given the file name.
1426 */
1427static language *
1428get_language_from_filename (char *file, int case_sensitive)
1429{
1430 language *lang;
1431 const char **name, **ext, *suffix;
1432
1433 /* Try whole file name first. */
1434 for (lang = lang_names; lang->name != NULL; lang++)
1435 if (lang->filenames != NULL)
1436 for (name = lang->filenames; *name != NULL; name++)
1437 if ((case_sensitive)
1438 ? streq (*name, file)
1439 : strcaseeq (*name, file))
1440 return lang;
1441
1442 /* If not found, try suffix after last dot. */
1443 suffix = etags_strrchr (file, '.');
1444 if (suffix == NULL)
1445 return NULL;
1446 suffix += 1;
1447 for (lang = lang_names; lang->name != NULL; lang++)
1448 if (lang->suffixes != NULL)
1449 for (ext = lang->suffixes; *ext != NULL; ext++)
1450 if ((case_sensitive)
1451 ? streq (*ext, suffix)
1452 : strcaseeq (*ext, suffix))
1453 return lang;
1454 return NULL;
1455}
1456
1457\f
1458/*
1459 * This routine is called on each file argument.
1460 */
1461static void
1462process_file_name (char *file, language *lang)
1463{
1464 struct stat stat_buf;
1465 FILE *inf;
1466 fdesc *fdp;
1467 compressor *compr;
1468 char *compressed_name, *uncompressed_name;
1469 char *ext, *real_name;
1470 int retval;
1471
1472 canonicalize_filename (file);
1473 if (streq (file, tagfile) && !streq (tagfile, "-"))
1474 {
1475 error ("skipping inclusion of %s in self.", file);
1476 return;
1477 }
1478 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1479 {
1480 compressed_name = NULL;
1481 real_name = uncompressed_name = savestr (file);
1482 }
1483 else
1484 {
1485 real_name = compressed_name = savestr (file);
1486 uncompressed_name = savenstr (file, ext - file);
1487 }
1488
1489 /* If the canonicalized uncompressed name
1490 has already been dealt with, skip it silently. */
1491 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1492 {
1493 assert (fdp->infname != NULL);
1494 if (streq (uncompressed_name, fdp->infname))
1495 goto cleanup;
1496 }
1497
1498 if (stat (real_name, &stat_buf) != 0)
1499 {
1500 /* Reset real_name and try with a different name. */
1501 real_name = NULL;
1502 if (compressed_name != NULL) /* try with the given suffix */
1503 {
1504 if (stat (uncompressed_name, &stat_buf) == 0)
1505 real_name = uncompressed_name;
1506 }
1507 else /* try all possible suffixes */
1508 {
1509 for (compr = compressors; compr->suffix != NULL; compr++)
1510 {
1511 compressed_name = concat (file, ".", compr->suffix);
1512 if (stat (compressed_name, &stat_buf) != 0)
1513 {
1514 if (MSDOS)
1515 {
1516 char *suf = compressed_name + strlen (file);
1517 size_t suflen = strlen (compr->suffix) + 1;
1518 for ( ; suf[1]; suf++, suflen--)
1519 {
1520 memmove (suf, suf + 1, suflen);
1521 if (stat (compressed_name, &stat_buf) == 0)
1522 {
1523 real_name = compressed_name;
1524 break;
1525 }
1526 }
1527 if (real_name != NULL)
1528 break;
1529 } /* MSDOS */
1530 free (compressed_name);
1531 compressed_name = NULL;
1532 }
1533 else
1534 {
1535 real_name = compressed_name;
1536 break;
1537 }
1538 }
1539 }
1540 if (real_name == NULL)
1541 {
1542 perror (file);
1543 goto cleanup;
1544 }
1545 } /* try with a different name */
1546
1547 if (!S_ISREG (stat_buf.st_mode))
1548 {
1549 error ("skipping %s: it is not a regular file.", real_name);
1550 goto cleanup;
1551 }
1552 if (real_name == compressed_name)
1553 {
1554 char *cmd = concat (compr->command, " ", real_name);
1555 inf = (FILE *) popen (cmd, "r");
1556 free (cmd);
1557 }
1558 else
1559 inf = fopen (real_name, "r");
1560 if (inf == NULL)
1561 {
1562 perror (real_name);
1563 goto cleanup;
1564 }
1565
1566 process_file (inf, uncompressed_name, lang);
1567
1568 if (real_name == compressed_name)
1569 retval = pclose (inf);
1570 else
1571 retval = fclose (inf);
1572 if (retval < 0)
1573 pfatal (file);
1574
1575 cleanup:
1576 free (compressed_name);
1577 free (uncompressed_name);
1578 last_node = NULL;
1579 curfdp = NULL;
1580 return;
1581}
1582
1583static void
1584process_file (FILE *fh, char *fn, language *lang)
1585{
1586 static const fdesc emptyfdesc;
1587 fdesc *fdp;
1588
1589 /* Create a new input file description entry. */
1590 fdp = xnew (1, fdesc);
1591 *fdp = emptyfdesc;
1592 fdp->next = fdhead;
1593 fdp->infname = savestr (fn);
1594 fdp->lang = lang;
1595 fdp->infabsname = absolute_filename (fn, cwd);
1596 fdp->infabsdir = absolute_dirname (fn, cwd);
1597 if (filename_is_absolute (fn))
1598 {
1599 /* An absolute file name. Canonicalize it. */
1600 fdp->taggedfname = absolute_filename (fn, NULL);
1601 }
1602 else
1603 {
1604 /* A file name relative to cwd. Make it relative
1605 to the directory of the tags file. */
1606 fdp->taggedfname = relative_filename (fn, tagfiledir);
1607 }
1608 fdp->usecharno = TRUE; /* use char position when making tags */
1609 fdp->prop = NULL;
1610 fdp->written = FALSE; /* not written on tags file yet */
1611
1612 fdhead = fdp;
1613 curfdp = fdhead; /* the current file description */
1614
1615 find_entries (fh);
1616
1617 /* If not Ctags, and if this is not metasource and if it contained no #line
1618 directives, we can write the tags and free all nodes pointing to
1619 curfdp. */
1620 if (!CTAGS
1621 && curfdp->usecharno /* no #line directives in this file */
1622 && !curfdp->lang->metasource)
1623 {
1624 node *np, *prev;
1625
1626 /* Look for the head of the sublist relative to this file. See add_node
1627 for the structure of the node tree. */
1628 prev = NULL;
1629 for (np = nodehead; np != NULL; prev = np, np = np->left)
1630 if (np->fdp == curfdp)
1631 break;
1632
1633 /* If we generated tags for this file, write and delete them. */
1634 if (np != NULL)
1635 {
1636 /* This is the head of the last sublist, if any. The following
1637 instructions depend on this being true. */
1638 assert (np->left == NULL);
1639
1640 assert (fdhead == curfdp);
1641 assert (last_node->fdp == curfdp);
1642 put_entries (np); /* write tags for file curfdp->taggedfname */
1643 free_tree (np); /* remove the written nodes */
1644 if (prev == NULL)
1645 nodehead = NULL; /* no nodes left */
1646 else
1647 prev->left = NULL; /* delete the pointer to the sublist */
1648 }
1649 }
1650}
1651
1652/*
1653 * This routine sets up the boolean pseudo-functions which work
1654 * by setting boolean flags dependent upon the corresponding character.
1655 * Every char which is NOT in that string is not a white char. Therefore,
1656 * all of the array "_wht" is set to FALSE, and then the elements
1657 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1658 * of a char is TRUE if it is the string "white", else FALSE.
1659 */
1660static void
1661init (void)
1662{
1663 register const char *sp;
1664 register int i;
1665
1666 for (i = 0; i < CHARS; i++)
1667 iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i) = FALSE;
1668 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1669 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1670 notinname ('\0') = notinname ('\n');
1671 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1672 begtoken ('\0') = begtoken ('\n');
1673 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1674 intoken ('\0') = intoken ('\n');
1675 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1676 endtoken ('\0') = endtoken ('\n');
1677}
1678
1679/*
1680 * This routine opens the specified file and calls the function
1681 * which finds the function and type definitions.
1682 */
1683static void
1684find_entries (FILE *inf)
1685{
1686 char *cp;
1687 language *lang = curfdp->lang;
1688 Lang_function *parser = NULL;
1689
1690 /* If user specified a language, use it. */
1691 if (lang != NULL && lang->function != NULL)
1692 {
1693 parser = lang->function;
1694 }
1695
1696 /* Else try to guess the language given the file name. */
1697 if (parser == NULL)
1698 {
1699 lang = get_language_from_filename (curfdp->infname, TRUE);
1700 if (lang != NULL && lang->function != NULL)
1701 {
1702 curfdp->lang = lang;
1703 parser = lang->function;
1704 }
1705 }
1706
1707 /* Else look for sharp-bang as the first two characters. */
1708 if (parser == NULL
1709 && readline_internal (&lb, inf) > 0
1710 && lb.len >= 2
1711 && lb.buffer[0] == '#'
1712 && lb.buffer[1] == '!')
1713 {
1714 char *lp;
1715
1716 /* Set lp to point at the first char after the last slash in the
1717 line or, if no slashes, at the first nonblank. Then set cp to
1718 the first successive blank and terminate the string. */
1719 lp = etags_strrchr (lb.buffer+2, '/');
1720 if (lp != NULL)
1721 lp += 1;
1722 else
1723 lp = skip_spaces (lb.buffer + 2);
1724 cp = skip_non_spaces (lp);
1725 *cp = '\0';
1726
1727 if (strlen (lp) > 0)
1728 {
1729 lang = get_language_from_interpreter (lp);
1730 if (lang != NULL && lang->function != NULL)
1731 {
1732 curfdp->lang = lang;
1733 parser = lang->function;
1734 }
1735 }
1736 }
1737
1738 /* We rewind here, even if inf may be a pipe. We fail if the
1739 length of the first line is longer than the pipe block size,
1740 which is unlikely. */
1741 rewind (inf);
1742
1743 /* Else try to guess the language given the case insensitive file name. */
1744 if (parser == NULL)
1745 {
1746 lang = get_language_from_filename (curfdp->infname, FALSE);
1747 if (lang != NULL && lang->function != NULL)
1748 {
1749 curfdp->lang = lang;
1750 parser = lang->function;
1751 }
1752 }
1753
1754 /* Else try Fortran or C. */
1755 if (parser == NULL)
1756 {
1757 node *old_last_node = last_node;
1758
1759 curfdp->lang = get_language_from_langname ("fortran");
1760 find_entries (inf);
1761
1762 if (old_last_node == last_node)
1763 /* No Fortran entries found. Try C. */
1764 {
1765 /* We do not tag if rewind fails.
1766 Only the file name will be recorded in the tags file. */
1767 rewind (inf);
1768 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1769 find_entries (inf);
1770 }
1771 return;
1772 }
1773
1774 if (!no_line_directive
1775 && curfdp->lang != NULL && curfdp->lang->metasource)
1776 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1777 file, or anyway we parsed a file that is automatically generated from
1778 this one. If this is the case, the bingo.c file contained #line
1779 directives that generated tags pointing to this file. Let's delete
1780 them all before parsing this file, which is the real source. */
1781 {
1782 fdesc **fdpp = &fdhead;
1783 while (*fdpp != NULL)
1784 if (*fdpp != curfdp
1785 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1786 /* We found one of those! We must delete both the file description
1787 and all tags referring to it. */
1788 {
1789 fdesc *badfdp = *fdpp;
1790
1791 /* Delete the tags referring to badfdp->taggedfname
1792 that were obtained from badfdp->infname. */
1793 invalidate_nodes (badfdp, &nodehead);
1794
1795 *fdpp = badfdp->next; /* remove the bad description from the list */
1796 free_fdesc (badfdp);
1797 }
1798 else
1799 fdpp = &(*fdpp)->next; /* advance the list pointer */
1800 }
1801
1802 assert (parser != NULL);
1803
1804 /* Generic initializations before reading from file. */
1805 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1806
1807 /* Generic initializations before parsing file with readline. */
1808 lineno = 0; /* reset global line number */
1809 charno = 0; /* reset global char number */
1810 linecharno = 0; /* reset global char number of line start */
1811
1812 parser (inf);
1813
1814 regex_tag_multiline ();
1815}
1816
1817\f
1818/*
1819 * Check whether an implicitly named tag should be created,
1820 * then call `pfnote'.
1821 * NAME is a string that is internally copied by this function.
1822 *
1823 * TAGS format specification
1824 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1825 * The following is explained in some more detail in etc/ETAGS.EBNF.
1826 *
1827 * make_tag creates tags with "implicit tag names" (unnamed tags)
1828 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1829 * 1. NAME does not contain any of the characters in NONAM;
1830 * 2. LINESTART contains name as either a rightmost, or rightmost but
1831 * one character, substring;
1832 * 3. the character, if any, immediately before NAME in LINESTART must
1833 * be a character in NONAM;
1834 * 4. the character, if any, immediately after NAME in LINESTART must
1835 * also be a character in NONAM.
1836 *
1837 * The implementation uses the notinname() macro, which recognizes the
1838 * characters stored in the string `nonam'.
1839 * etags.el needs to use the same characters that are in NONAM.
1840 */
1841static void
1842make_tag (const char *name, /* tag name, or NULL if unnamed */
1843 int namelen, /* tag length */
1844 int is_func, /* tag is a function */
1845 char *linestart, /* start of the line where tag is */
1846 int linelen, /* length of the line where tag is */
1847 int lno, /* line number */
1848 long int cno) /* character number */
1849{
1850 bool named = (name != NULL && namelen > 0);
1851 char *nname = NULL;
1852
1853 if (!CTAGS && named) /* maybe set named to false */
1854 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1855 such that etags.el can guess a name from it. */
1856 {
1857 int i;
1858 register const char *cp = name;
1859
1860 for (i = 0; i < namelen; i++)
1861 if (notinname (*cp++))
1862 break;
1863 if (i == namelen) /* rule #1 */
1864 {
1865 cp = linestart + linelen - namelen;
1866 if (notinname (linestart[linelen-1]))
1867 cp -= 1; /* rule #4 */
1868 if (cp >= linestart /* rule #2 */
1869 && (cp == linestart
1870 || notinname (cp[-1])) /* rule #3 */
1871 && strneq (name, cp, namelen)) /* rule #2 */
1872 named = FALSE; /* use implicit tag name */
1873 }
1874 }
1875
1876 if (named)
1877 nname = savenstr (name, namelen);
1878
1879 pfnote (nname, is_func, linestart, linelen, lno, cno);
1880}
1881
1882/* Record a tag. */
1883static void
1884pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1885 /* tag name, or NULL if unnamed */
1886 /* tag is a function */
1887 /* start of the line where tag is */
1888 /* length of the line where tag is */
1889 /* line number */
1890 /* character number */
1891{
1892 register node *np;
1893
1894 assert (name == NULL || name[0] != '\0');
1895 if (CTAGS && name == NULL)
1896 return;
1897
1898 np = xnew (1, node);
1899
1900 /* If ctags mode, change name "main" to M<thisfilename>. */
1901 if (CTAGS && !cxref_style && streq (name, "main"))
1902 {
1903 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1904 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1905 fp = etags_strrchr (np->name, '.');
1906 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1907 fp[0] = '\0';
1908 }
1909 else
1910 np->name = name;
1911 np->valid = TRUE;
1912 np->been_warned = FALSE;
1913 np->fdp = curfdp;
1914 np->is_func = is_func;
1915 np->lno = lno;
1916 if (np->fdp->usecharno)
1917 /* Our char numbers are 0-base, because of C language tradition?
1918 ctags compatibility? old versions compatibility? I don't know.
1919 Anyway, since emacs's are 1-base we expect etags.el to take care
1920 of the difference. If we wanted to have 1-based numbers, we would
1921 uncomment the +1 below. */
1922 np->cno = cno /* + 1 */ ;
1923 else
1924 np->cno = invalidcharno;
1925 np->left = np->right = NULL;
1926 if (CTAGS && !cxref_style)
1927 {
1928 if (strlen (linestart) < 50)
1929 np->regex = concat (linestart, "$", "");
1930 else
1931 np->regex = savenstr (linestart, 50);
1932 }
1933 else
1934 np->regex = savenstr (linestart, linelen);
1935
1936 add_node (np, &nodehead);
1937}
1938
1939/*
1940 * free_tree ()
1941 * recurse on left children, iterate on right children.
1942 */
1943static void
1944free_tree (register node *np)
1945{
1946 while (np)
1947 {
1948 register node *node_right = np->right;
1949 free_tree (np->left);
1950 free (np->name);
1951 free (np->regex);
1952 free (np);
1953 np = node_right;
1954 }
1955}
1956
1957/*
1958 * free_fdesc ()
1959 * delete a file description
1960 */
1961static void
1962free_fdesc (register fdesc *fdp)
1963{
1964 free (fdp->infname);
1965 free (fdp->infabsname);
1966 free (fdp->infabsdir);
1967 free (fdp->taggedfname);
1968 free (fdp->prop);
1969 free (fdp);
1970}
1971
1972/*
1973 * add_node ()
1974 * Adds a node to the tree of nodes. In etags mode, sort by file
1975 * name. In ctags mode, sort by tag name. Make no attempt at
1976 * balancing.
1977 *
1978 * add_node is the only function allowed to add nodes, so it can
1979 * maintain state.
1980 */
1981static void
1982add_node (node *np, node **cur_node_p)
1983{
1984 register int dif;
1985 register node *cur_node = *cur_node_p;
1986
1987 if (cur_node == NULL)
1988 {
1989 *cur_node_p = np;
1990 last_node = np;
1991 return;
1992 }
1993
1994 if (!CTAGS)
1995 /* Etags Mode */
1996 {
1997 /* For each file name, tags are in a linked sublist on the right
1998 pointer. The first tags of different files are a linked list
1999 on the left pointer. last_node points to the end of the last
2000 used sublist. */
2001 if (last_node != NULL && last_node->fdp == np->fdp)
2002 {
2003 /* Let's use the same sublist as the last added node. */
2004 assert (last_node->right == NULL);
2005 last_node->right = np;
2006 last_node = np;
2007 }
2008 else if (cur_node->fdp == np->fdp)
2009 {
2010 /* Scanning the list we found the head of a sublist which is
2011 good for us. Let's scan this sublist. */
2012 add_node (np, &cur_node->right);
2013 }
2014 else
2015 /* The head of this sublist is not good for us. Let's try the
2016 next one. */
2017 add_node (np, &cur_node->left);
2018 } /* if ETAGS mode */
2019
2020 else
2021 {
2022 /* Ctags Mode */
2023 dif = strcmp (np->name, cur_node->name);
2024
2025 /*
2026 * If this tag name matches an existing one, then
2027 * do not add the node, but maybe print a warning.
2028 */
2029 if (no_duplicates && !dif)
2030 {
2031 if (np->fdp == cur_node->fdp)
2032 {
2033 if (!no_warnings)
2034 {
2035 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2036 np->fdp->infname, lineno, np->name);
2037 fprintf (stderr, "Second entry ignored\n");
2038 }
2039 }
2040 else if (!cur_node->been_warned && !no_warnings)
2041 {
2042 fprintf
2043 (stderr,
2044 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2045 np->fdp->infname, cur_node->fdp->infname, np->name);
2046 cur_node->been_warned = TRUE;
2047 }
2048 return;
2049 }
2050
2051 /* Actually add the node */
2052 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2053 } /* if CTAGS mode */
2054}
2055
2056/*
2057 * invalidate_nodes ()
2058 * Scan the node tree and invalidate all nodes pointing to the
2059 * given file description (CTAGS case) or free them (ETAGS case).
2060 */
2061static void
2062invalidate_nodes (fdesc *badfdp, node **npp)
2063{
2064 node *np = *npp;
2065
2066 if (np == NULL)
2067 return;
2068
2069 if (CTAGS)
2070 {
2071 if (np->left != NULL)
2072 invalidate_nodes (badfdp, &np->left);
2073 if (np->fdp == badfdp)
2074 np->valid = FALSE;
2075 if (np->right != NULL)
2076 invalidate_nodes (badfdp, &np->right);
2077 }
2078 else
2079 {
2080 assert (np->fdp != NULL);
2081 if (np->fdp == badfdp)
2082 {
2083 *npp = np->left; /* detach the sublist from the list */
2084 np->left = NULL; /* isolate it */
2085 free_tree (np); /* free it */
2086 invalidate_nodes (badfdp, npp);
2087 }
2088 else
2089 invalidate_nodes (badfdp, &np->left);
2090 }
2091}
2092
2093\f
2094static int total_size_of_entries (node *);
2095static int number_len (long) ATTRIBUTE_CONST;
2096
2097/* Length of a non-negative number's decimal representation. */
2098static int
2099number_len (long int num)
2100{
2101 int len = 1;
2102 while ((num /= 10) > 0)
2103 len += 1;
2104 return len;
2105}
2106
2107/*
2108 * Return total number of characters that put_entries will output for
2109 * the nodes in the linked list at the right of the specified node.
2110 * This count is irrelevant with etags.el since emacs 19.34 at least,
2111 * but is still supplied for backward compatibility.
2112 */
2113static int
2114total_size_of_entries (register node *np)
2115{
2116 register int total = 0;
2117
2118 for (; np != NULL; np = np->right)
2119 if (np->valid)
2120 {
2121 total += strlen (np->regex) + 1; /* pat\177 */
2122 if (np->name != NULL)
2123 total += strlen (np->name) + 1; /* name\001 */
2124 total += number_len ((long) np->lno) + 1; /* lno, */
2125 if (np->cno != invalidcharno) /* cno */
2126 total += number_len (np->cno);
2127 total += 1; /* newline */
2128 }
2129
2130 return total;
2131}
2132
2133static void
2134put_entries (register node *np)
2135{
2136 register char *sp;
2137 static fdesc *fdp = NULL;
2138
2139 if (np == NULL)
2140 return;
2141
2142 /* Output subentries that precede this one */
2143 if (CTAGS)
2144 put_entries (np->left);
2145
2146 /* Output this entry */
2147 if (np->valid)
2148 {
2149 if (!CTAGS)
2150 {
2151 /* Etags mode */
2152 if (fdp != np->fdp)
2153 {
2154 fdp = np->fdp;
2155 fprintf (tagf, "\f\n%s,%d\n",
2156 fdp->taggedfname, total_size_of_entries (np));
2157 fdp->written = TRUE;
2158 }
2159 fputs (np->regex, tagf);
2160 fputc ('\177', tagf);
2161 if (np->name != NULL)
2162 {
2163 fputs (np->name, tagf);
2164 fputc ('\001', tagf);
2165 }
2166 fprintf (tagf, "%d,", np->lno);
2167 if (np->cno != invalidcharno)
2168 fprintf (tagf, "%ld", np->cno);
2169 fputs ("\n", tagf);
2170 }
2171 else
2172 {
2173 /* Ctags mode */
2174 if (np->name == NULL)
2175 error ("internal error: NULL name in ctags mode.");
2176
2177 if (cxref_style)
2178 {
2179 if (vgrind_style)
2180 fprintf (stdout, "%s %s %d\n",
2181 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2182 else
2183 fprintf (stdout, "%-16s %3d %-16s %s\n",
2184 np->name, np->lno, np->fdp->taggedfname, np->regex);
2185 }
2186 else
2187 {
2188 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2189
2190 if (np->is_func)
2191 { /* function or #define macro with args */
2192 putc (searchar, tagf);
2193 putc ('^', tagf);
2194
2195 for (sp = np->regex; *sp; sp++)
2196 {
2197 if (*sp == '\\' || *sp == searchar)
2198 putc ('\\', tagf);
2199 putc (*sp, tagf);
2200 }
2201 putc (searchar, tagf);
2202 }
2203 else
2204 { /* anything else; text pattern inadequate */
2205 fprintf (tagf, "%d", np->lno);
2206 }
2207 putc ('\n', tagf);
2208 }
2209 }
2210 } /* if this node contains a valid tag */
2211
2212 /* Output subentries that follow this one */
2213 put_entries (np->right);
2214 if (!CTAGS)
2215 put_entries (np->left);
2216}
2217
2218\f
2219/* C extensions. */
2220#define C_EXT 0x00fff /* C extensions */
2221#define C_PLAIN 0x00000 /* C */
2222#define C_PLPL 0x00001 /* C++ */
2223#define C_STAR 0x00003 /* C* */
2224#define C_JAVA 0x00005 /* JAVA */
2225#define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2226#define YACC 0x10000 /* yacc file */
2227
2228/*
2229 * The C symbol tables.
2230 */
2231enum sym_type
2232{
2233 st_none,
2234 st_C_objprot, st_C_objimpl, st_C_objend,
2235 st_C_gnumacro,
2236 st_C_ignore, st_C_attribute,
2237 st_C_javastruct,
2238 st_C_operator,
2239 st_C_class, st_C_template,
2240 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2241};
2242
2243/* Feed stuff between (but not including) %[ and %] lines to:
2244 gperf -m 5
2245%[
2246%compare-strncmp
2247%enum
2248%struct-type
2249struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2250%%
2251if, 0, st_C_ignore
2252for, 0, st_C_ignore
2253while, 0, st_C_ignore
2254switch, 0, st_C_ignore
2255return, 0, st_C_ignore
2256__attribute__, 0, st_C_attribute
2257GTY, 0, st_C_attribute
2258@interface, 0, st_C_objprot
2259@protocol, 0, st_C_objprot
2260@implementation,0, st_C_objimpl
2261@end, 0, st_C_objend
2262import, (C_JAVA & ~C_PLPL), st_C_ignore
2263package, (C_JAVA & ~C_PLPL), st_C_ignore
2264friend, C_PLPL, st_C_ignore
2265extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2266implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2267interface, (C_JAVA & ~C_PLPL), st_C_struct
2268class, 0, st_C_class
2269namespace, C_PLPL, st_C_struct
2270domain, C_STAR, st_C_struct
2271union, 0, st_C_struct
2272struct, 0, st_C_struct
2273extern, 0, st_C_extern
2274enum, 0, st_C_enum
2275typedef, 0, st_C_typedef
2276define, 0, st_C_define
2277undef, 0, st_C_define
2278operator, C_PLPL, st_C_operator
2279template, 0, st_C_template
2280# DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2281DEFUN, 0, st_C_gnumacro
2282SYSCALL, 0, st_C_gnumacro
2283ENTRY, 0, st_C_gnumacro
2284PSEUDO, 0, st_C_gnumacro
2285# These are defined inside C functions, so currently they are not met.
2286# EXFUN used in glibc, DEFVAR_* in emacs.
2287#EXFUN, 0, st_C_gnumacro
2288#DEFVAR_, 0, st_C_gnumacro
2289%]
2290and replace lines between %< and %> with its output, then:
2291 - remove the #if characterset check
2292 - make in_word_set static and not inline. */
2293/*%<*/
2294/* C code produced by gperf version 3.0.1 */
2295/* Command-line: gperf -m 5 */
2296/* Computed positions: -k'2-3' */
2297
2298struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2299/* maximum key range = 33, duplicates = 0 */
2300
2301static int
2302hash (const char *str, int len)
2303{
2304 static char const asso_values[] =
2305 {
2306 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2307 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2308 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2309 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2310 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2311 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2312 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2313 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2314 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2315 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2316 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2317 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2318 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2319 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2320 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2321 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2322 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2323 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2324 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2325 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2326 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2327 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2328 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2329 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2330 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2331 35, 35, 35, 35, 35, 35
2332 };
2333 int hval = len;
2334
2335 switch (hval)
2336 {
2337 default:
2338 hval += asso_values[(unsigned char) str[2]];
2339 /*FALLTHROUGH*/
2340 case 2:
2341 hval += asso_values[(unsigned char) str[1]];
2342 break;
2343 }
2344 return hval;
2345}
2346
2347static struct C_stab_entry *
2348in_word_set (register const char *str, register unsigned int len)
2349{
2350 enum
2351 {
2352 TOTAL_KEYWORDS = 33,
2353 MIN_WORD_LENGTH = 2,
2354 MAX_WORD_LENGTH = 15,
2355 MIN_HASH_VALUE = 2,
2356 MAX_HASH_VALUE = 34
2357 };
2358
2359 static struct C_stab_entry wordlist[] =
2360 {
2361 {""}, {""},
2362 {"if", 0, st_C_ignore},
2363 {"GTY", 0, st_C_attribute},
2364 {"@end", 0, st_C_objend},
2365 {"union", 0, st_C_struct},
2366 {"define", 0, st_C_define},
2367 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2368 {"template", 0, st_C_template},
2369 {"operator", C_PLPL, st_C_operator},
2370 {"@interface", 0, st_C_objprot},
2371 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2372 {"friend", C_PLPL, st_C_ignore},
2373 {"typedef", 0, st_C_typedef},
2374 {"return", 0, st_C_ignore},
2375 {"@implementation",0, st_C_objimpl},
2376 {"@protocol", 0, st_C_objprot},
2377 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2378 {"extern", 0, st_C_extern},
2379 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2380 {"struct", 0, st_C_struct},
2381 {"domain", C_STAR, st_C_struct},
2382 {"switch", 0, st_C_ignore},
2383 {"enum", 0, st_C_enum},
2384 {"for", 0, st_C_ignore},
2385 {"namespace", C_PLPL, st_C_struct},
2386 {"class", 0, st_C_class},
2387 {"while", 0, st_C_ignore},
2388 {"undef", 0, st_C_define},
2389 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2390 {"__attribute__", 0, st_C_attribute},
2391 {"SYSCALL", 0, st_C_gnumacro},
2392 {"ENTRY", 0, st_C_gnumacro},
2393 {"PSEUDO", 0, st_C_gnumacro},
2394 {"DEFUN", 0, st_C_gnumacro}
2395 };
2396
2397 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2398 {
2399 int key = hash (str, len);
2400
2401 if (key <= MAX_HASH_VALUE && key >= 0)
2402 {
2403 const char *s = wordlist[key].name;
2404
2405 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2406 return &wordlist[key];
2407 }
2408 }
2409 return 0;
2410}
2411/*%>*/
2412
2413static enum sym_type
2414C_symtype (char *str, int len, int c_ext)
2415{
2416 register struct C_stab_entry *se = in_word_set (str, len);
2417
2418 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2419 return st_none;
2420 return se->type;
2421}
2422
2423\f
2424/*
2425 * Ignoring __attribute__ ((list))
2426 */
2427static bool inattribute; /* looking at an __attribute__ construct */
2428
2429/*
2430 * C functions and variables are recognized using a simple
2431 * finite automaton. fvdef is its state variable.
2432 */
2433static enum
2434{
2435 fvnone, /* nothing seen */
2436 fdefunkey, /* Emacs DEFUN keyword seen */
2437 fdefunname, /* Emacs DEFUN name seen */
2438 foperator, /* func: operator keyword seen (cplpl) */
2439 fvnameseen, /* function or variable name seen */
2440 fstartlist, /* func: just after open parenthesis */
2441 finlist, /* func: in parameter list */
2442 flistseen, /* func: after parameter list */
2443 fignore, /* func: before open brace */
2444 vignore /* var-like: ignore until ';' */
2445} fvdef;
2446
2447static bool fvextern; /* func or var: extern keyword seen; */
2448
2449/*
2450 * typedefs are recognized using a simple finite automaton.
2451 * typdef is its state variable.
2452 */
2453static enum
2454{
2455 tnone, /* nothing seen */
2456 tkeyseen, /* typedef keyword seen */
2457 ttypeseen, /* defined type seen */
2458 tinbody, /* inside typedef body */
2459 tend, /* just before typedef tag */
2460 tignore /* junk after typedef tag */
2461} typdef;
2462
2463/*
2464 * struct-like structures (enum, struct and union) are recognized
2465 * using another simple finite automaton. `structdef' is its state
2466 * variable.
2467 */
2468static enum
2469{
2470 snone, /* nothing seen yet,
2471 or in struct body if bracelev > 0 */
2472 skeyseen, /* struct-like keyword seen */
2473 stagseen, /* struct-like tag seen */
2474 scolonseen /* colon seen after struct-like tag */
2475} structdef;
2476
2477/*
2478 * When objdef is different from onone, objtag is the name of the class.
2479 */
2480static const char *objtag = "<uninited>";
2481
2482/*
2483 * Yet another little state machine to deal with preprocessor lines.
2484 */
2485static enum
2486{
2487 dnone, /* nothing seen */
2488 dsharpseen, /* '#' seen as first char on line */
2489 ddefineseen, /* '#' and 'define' seen */
2490 dignorerest /* ignore rest of line */
2491} definedef;
2492
2493/*
2494 * State machine for Objective C protocols and implementations.
2495 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2496 */
2497static enum
2498{
2499 onone, /* nothing seen */
2500 oprotocol, /* @interface or @protocol seen */
2501 oimplementation, /* @implementations seen */
2502 otagseen, /* class name seen */
2503 oparenseen, /* parenthesis before category seen */
2504 ocatseen, /* category name seen */
2505 oinbody, /* in @implementation body */
2506 omethodsign, /* in @implementation body, after +/- */
2507 omethodtag, /* after method name */
2508 omethodcolon, /* after method colon */
2509 omethodparm, /* after method parameter */
2510 oignore /* wait for @end */
2511} objdef;
2512
2513
2514/*
2515 * Use this structure to keep info about the token read, and how it
2516 * should be tagged. Used by the make_C_tag function to build a tag.
2517 */
2518static struct tok
2519{
2520 char *line; /* string containing the token */
2521 int offset; /* where the token starts in LINE */
2522 int length; /* token length */
2523 /*
2524 The previous members can be used to pass strings around for generic
2525 purposes. The following ones specifically refer to creating tags. In this
2526 case the token contained here is the pattern that will be used to create a
2527 tag.
2528 */
2529 bool valid; /* do not create a tag; the token should be
2530 invalidated whenever a state machine is
2531 reset prematurely */
2532 bool named; /* create a named tag */
2533 int lineno; /* source line number of tag */
2534 long linepos; /* source char number of tag */
2535} token; /* latest token read */
2536
2537/*
2538 * Variables and functions for dealing with nested structures.
2539 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2540 */
2541static void pushclass_above (int, char *, int);
2542static void popclass_above (int);
2543static void write_classname (linebuffer *, const char *qualifier);
2544
2545static struct {
2546 char **cname; /* nested class names */
2547 int *bracelev; /* nested class brace level */
2548 int nl; /* class nesting level (elements used) */
2549 int size; /* length of the array */
2550} cstack; /* stack for nested declaration tags */
2551/* Current struct nesting depth (namespace, class, struct, union, enum). */
2552#define nestlev (cstack.nl)
2553/* After struct keyword or in struct body, not inside a nested function. */
2554#define instruct (structdef == snone && nestlev > 0 \
2555 && bracelev == cstack.bracelev[nestlev-1] + 1)
2556
2557static void
2558pushclass_above (int bracelev, char *str, int len)
2559{
2560 int nl;
2561
2562 popclass_above (bracelev);
2563 nl = cstack.nl;
2564 if (nl >= cstack.size)
2565 {
2566 int size = cstack.size *= 2;
2567 xrnew (cstack.cname, size, char *);
2568 xrnew (cstack.bracelev, size, int);
2569 }
2570 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2571 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2572 cstack.bracelev[nl] = bracelev;
2573 cstack.nl = nl + 1;
2574}
2575
2576static void
2577popclass_above (int bracelev)
2578{
2579 int nl;
2580
2581 for (nl = cstack.nl - 1;
2582 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2583 nl--)
2584 {
2585 free (cstack.cname[nl]);
2586 cstack.nl = nl;
2587 }
2588}
2589
2590static void
2591write_classname (linebuffer *cn, const char *qualifier)
2592{
2593 int i, len;
2594 int qlen = strlen (qualifier);
2595
2596 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2597 {
2598 len = 0;
2599 cn->len = 0;
2600 cn->buffer[0] = '\0';
2601 }
2602 else
2603 {
2604 len = strlen (cstack.cname[0]);
2605 linebuffer_setlen (cn, len);
2606 strcpy (cn->buffer, cstack.cname[0]);
2607 }
2608 for (i = 1; i < cstack.nl; i++)
2609 {
2610 char *s = cstack.cname[i];
2611 if (s == NULL)
2612 continue;
2613 linebuffer_setlen (cn, len + qlen + strlen (s));
2614 len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2615 }
2616}
2617
2618\f
2619static bool consider_token (char *, int, int, int *, int, int, bool *);
2620static void make_C_tag (bool);
2621
2622/*
2623 * consider_token ()
2624 * checks to see if the current token is at the start of a
2625 * function or variable, or corresponds to a typedef, or
2626 * is a struct/union/enum tag, or #define, or an enum constant.
2627 *
2628 * *IS_FUNC gets TRUE if the token is a function or #define macro
2629 * with args. C_EXTP points to which language we are looking at.
2630 *
2631 * Globals
2632 * fvdef IN OUT
2633 * structdef IN OUT
2634 * definedef IN OUT
2635 * typdef IN OUT
2636 * objdef IN OUT
2637 */
2638
2639static bool
2640consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2641 /* IN: token pointer */
2642 /* IN: token length */
2643 /* IN: first char after the token */
2644 /* IN, OUT: C extensions mask */
2645 /* IN: brace level */
2646 /* IN: parenthesis level */
2647 /* OUT: function or variable found */
2648{
2649 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2650 structtype is the type of the preceding struct-like keyword, and
2651 structbracelev is the brace level where it has been seen. */
2652 static enum sym_type structtype;
2653 static int structbracelev;
2654 static enum sym_type toktype;
2655
2656
2657 toktype = C_symtype (str, len, *c_extp);
2658
2659 /*
2660 * Skip __attribute__
2661 */
2662 if (toktype == st_C_attribute)
2663 {
2664 inattribute = TRUE;
2665 return FALSE;
2666 }
2667
2668 /*
2669 * Advance the definedef state machine.
2670 */
2671 switch (definedef)
2672 {
2673 case dnone:
2674 /* We're not on a preprocessor line. */
2675 if (toktype == st_C_gnumacro)
2676 {
2677 fvdef = fdefunkey;
2678 return FALSE;
2679 }
2680 break;
2681 case dsharpseen:
2682 if (toktype == st_C_define)
2683 {
2684 definedef = ddefineseen;
2685 }
2686 else
2687 {
2688 definedef = dignorerest;
2689 }
2690 return FALSE;
2691 case ddefineseen:
2692 /*
2693 * Make a tag for any macro, unless it is a constant
2694 * and constantypedefs is FALSE.
2695 */
2696 definedef = dignorerest;
2697 *is_func_or_var = (c == '(');
2698 if (!*is_func_or_var && !constantypedefs)
2699 return FALSE;
2700 else
2701 return TRUE;
2702 case dignorerest:
2703 return FALSE;
2704 default:
2705 error ("internal error: definedef value.");
2706 }
2707
2708 /*
2709 * Now typedefs
2710 */
2711 switch (typdef)
2712 {
2713 case tnone:
2714 if (toktype == st_C_typedef)
2715 {
2716 if (typedefs)
2717 typdef = tkeyseen;
2718 fvextern = FALSE;
2719 fvdef = fvnone;
2720 return FALSE;
2721 }
2722 break;
2723 case tkeyseen:
2724 switch (toktype)
2725 {
2726 case st_none:
2727 case st_C_class:
2728 case st_C_struct:
2729 case st_C_enum:
2730 typdef = ttypeseen;
2731 }
2732 break;
2733 case ttypeseen:
2734 if (structdef == snone && fvdef == fvnone)
2735 {
2736 fvdef = fvnameseen;
2737 return TRUE;
2738 }
2739 break;
2740 case tend:
2741 switch (toktype)
2742 {
2743 case st_C_class:
2744 case st_C_struct:
2745 case st_C_enum:
2746 return FALSE;
2747 }
2748 return TRUE;
2749 }
2750
2751 switch (toktype)
2752 {
2753 case st_C_javastruct:
2754 if (structdef == stagseen)
2755 structdef = scolonseen;
2756 return FALSE;
2757 case st_C_template:
2758 case st_C_class:
2759 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2760 && bracelev == 0
2761 && definedef == dnone && structdef == snone
2762 && typdef == tnone && fvdef == fvnone)
2763 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2764 if (toktype == st_C_template)
2765 break;
2766 /* FALLTHRU */
2767 case st_C_struct:
2768 case st_C_enum:
2769 if (parlev == 0
2770 && fvdef != vignore
2771 && (typdef == tkeyseen
2772 || (typedefs_or_cplusplus && structdef == snone)))
2773 {
2774 structdef = skeyseen;
2775 structtype = toktype;
2776 structbracelev = bracelev;
2777 if (fvdef == fvnameseen)
2778 fvdef = fvnone;
2779 }
2780 return FALSE;
2781 }
2782
2783 if (structdef == skeyseen)
2784 {
2785 structdef = stagseen;
2786 return TRUE;
2787 }
2788
2789 if (typdef != tnone)
2790 definedef = dnone;
2791
2792 /* Detect Objective C constructs. */
2793 switch (objdef)
2794 {
2795 case onone:
2796 switch (toktype)
2797 {
2798 case st_C_objprot:
2799 objdef = oprotocol;
2800 return FALSE;
2801 case st_C_objimpl:
2802 objdef = oimplementation;
2803 return FALSE;
2804 }
2805 break;
2806 case oimplementation:
2807 /* Save the class tag for functions or variables defined inside. */
2808 objtag = savenstr (str, len);
2809 objdef = oinbody;
2810 return FALSE;
2811 case oprotocol:
2812 /* Save the class tag for categories. */
2813 objtag = savenstr (str, len);
2814 objdef = otagseen;
2815 *is_func_or_var = TRUE;
2816 return TRUE;
2817 case oparenseen:
2818 objdef = ocatseen;
2819 *is_func_or_var = TRUE;
2820 return TRUE;
2821 case oinbody:
2822 break;
2823 case omethodsign:
2824 if (parlev == 0)
2825 {
2826 fvdef = fvnone;
2827 objdef = omethodtag;
2828 linebuffer_setlen (&token_name, len);
2829 memcpy (token_name.buffer, str, len);
2830 token_name.buffer[len] = '\0';
2831 return TRUE;
2832 }
2833 return FALSE;
2834 case omethodcolon:
2835 if (parlev == 0)
2836 objdef = omethodparm;
2837 return FALSE;
2838 case omethodparm:
2839 if (parlev == 0)
2840 {
2841 int oldlen = token_name.len;
2842 fvdef = fvnone;
2843 objdef = omethodtag;
2844 linebuffer_setlen (&token_name, oldlen + len);
2845 memcpy (token_name.buffer + oldlen, str, len);
2846 token_name.buffer[oldlen + len] = '\0';
2847 return TRUE;
2848 }
2849 return FALSE;
2850 case oignore:
2851 if (toktype == st_C_objend)
2852 {
2853 /* Memory leakage here: the string pointed by objtag is
2854 never released, because many tests would be needed to
2855 avoid breaking on incorrect input code. The amount of
2856 memory leaked here is the sum of the lengths of the
2857 class tags.
2858 free (objtag); */
2859 objdef = onone;
2860 }
2861 return FALSE;
2862 }
2863
2864 /* A function, variable or enum constant? */
2865 switch (toktype)
2866 {
2867 case st_C_extern:
2868 fvextern = TRUE;
2869 switch (fvdef)
2870 {
2871 case finlist:
2872 case flistseen:
2873 case fignore:
2874 case vignore:
2875 break;
2876 default:
2877 fvdef = fvnone;
2878 }
2879 return FALSE;
2880 case st_C_ignore:
2881 fvextern = FALSE;
2882 fvdef = vignore;
2883 return FALSE;
2884 case st_C_operator:
2885 fvdef = foperator;
2886 *is_func_or_var = TRUE;
2887 return TRUE;
2888 case st_none:
2889 if (constantypedefs
2890 && structdef == snone
2891 && structtype == st_C_enum && bracelev > structbracelev)
2892 return TRUE; /* enum constant */
2893 switch (fvdef)
2894 {
2895 case fdefunkey:
2896 if (bracelev > 0)
2897 break;
2898 fvdef = fdefunname; /* GNU macro */
2899 *is_func_or_var = TRUE;
2900 return TRUE;
2901 case fvnone:
2902 switch (typdef)
2903 {
2904 case ttypeseen:
2905 return FALSE;
2906 case tnone:
2907 if ((strneq (str, "asm", 3) && endtoken (str[3]))
2908 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2909 {
2910 fvdef = vignore;
2911 return FALSE;
2912 }
2913 break;
2914 }
2915 /* FALLTHRU */
2916 case fvnameseen:
2917 if (len >= 10 && strneq (str+len-10, "::operator", 10))
2918 {
2919 if (*c_extp & C_AUTO) /* automatic detection of C++ */
2920 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2921 fvdef = foperator;
2922 *is_func_or_var = TRUE;
2923 return TRUE;
2924 }
2925 if (bracelev > 0 && !instruct)
2926 break;
2927 fvdef = fvnameseen; /* function or variable */
2928 *is_func_or_var = TRUE;
2929 return TRUE;
2930 }
2931 break;
2932 }
2933
2934 return FALSE;
2935}
2936
2937\f
2938/*
2939 * C_entries often keeps pointers to tokens or lines which are older than
2940 * the line currently read. By keeping two line buffers, and switching
2941 * them at end of line, it is possible to use those pointers.
2942 */
2943static struct
2944{
2945 long linepos;
2946 linebuffer lb;
2947} lbs[2];
2948
2949#define current_lb_is_new (newndx == curndx)
2950#define switch_line_buffers() (curndx = 1 - curndx)
2951
2952#define curlb (lbs[curndx].lb)
2953#define newlb (lbs[newndx].lb)
2954#define curlinepos (lbs[curndx].linepos)
2955#define newlinepos (lbs[newndx].linepos)
2956
2957#define plainc ((c_ext & C_EXT) == C_PLAIN)
2958#define cplpl (c_ext & C_PLPL)
2959#define cjava ((c_ext & C_JAVA) == C_JAVA)
2960
2961#define CNL_SAVE_DEFINEDEF() \
2962do { \
2963 curlinepos = charno; \
2964 readline (&curlb, inf); \
2965 lp = curlb.buffer; \
2966 quotednl = FALSE; \
2967 newndx = curndx; \
2968} while (0)
2969
2970#define CNL() \
2971do { \
2972 CNL_SAVE_DEFINEDEF(); \
2973 if (savetoken.valid) \
2974 { \
2975 token = savetoken; \
2976 savetoken.valid = FALSE; \
2977 } \
2978 definedef = dnone; \
2979} while (0)
2980
2981
2982static void
2983make_C_tag (int isfun)
2984{
2985 /* This function is never called when token.valid is FALSE, but
2986 we must protect against invalid input or internal errors. */
2987 if (token.valid)
2988 make_tag (token_name.buffer, token_name.len, isfun, token.line,
2989 token.offset+token.length+1, token.lineno, token.linepos);
2990 else if (DEBUG)
2991 { /* this branch is optimized away if !DEBUG */
2992 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
2993 token_name.len + 17, isfun, token.line,
2994 token.offset+token.length+1, token.lineno, token.linepos);
2995 error ("INVALID TOKEN");
2996 }
2997
2998 token.valid = FALSE;
2999}
3000
3001
3002/*
3003 * C_entries ()
3004 * This routine finds functions, variables, typedefs,
3005 * #define's, enum constants and struct/union/enum definitions in
3006 * C syntax and adds them to the list.
3007 */
3008static void
3009C_entries (int c_ext, FILE *inf)
3010 /* extension of C */
3011 /* input file */
3012{
3013 register char c; /* latest char read; '\0' for end of line */
3014 register char *lp; /* pointer one beyond the character `c' */
3015 int curndx, newndx; /* indices for current and new lb */
3016 register int tokoff; /* offset in line of start of current token */
3017 register int toklen; /* length of current token */
3018 const char *qualifier; /* string used to qualify names */
3019 int qlen; /* length of qualifier */
3020 int bracelev; /* current brace level */
3021 int bracketlev; /* current bracket level */
3022 int parlev; /* current parenthesis level */
3023 int attrparlev; /* __attribute__ parenthesis level */
3024 int templatelev; /* current template level */
3025 int typdefbracelev; /* bracelev where a typedef struct body begun */
3026 bool incomm, inquote, inchar, quotednl, midtoken;
3027 bool yacc_rules; /* in the rules part of a yacc file */
3028 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3029
3030
3031 linebuffer_init (&lbs[0].lb);
3032 linebuffer_init (&lbs[1].lb);
3033 if (cstack.size == 0)
3034 {
3035 cstack.size = (DEBUG) ? 1 : 4;
3036 cstack.nl = 0;
3037 cstack.cname = xnew (cstack.size, char *);
3038 cstack.bracelev = xnew (cstack.size, int);
3039 }
3040
3041 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3042 curndx = newndx = 0;
3043 lp = curlb.buffer;
3044 *lp = 0;
3045
3046 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3047 structdef = snone; definedef = dnone; objdef = onone;
3048 yacc_rules = FALSE;
3049 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3050 token.valid = savetoken.valid = FALSE;
3051 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3052 if (cjava)
3053 { qualifier = "."; qlen = 1; }
3054 else
3055 { qualifier = "::"; qlen = 2; }
3056
3057
3058 while (!feof (inf))
3059 {
3060 c = *lp++;
3061 if (c == '\\')
3062 {
3063 /* If we are at the end of the line, the next character is a
3064 '\0'; do not skip it, because it is what tells us
3065 to read the next line. */
3066 if (*lp == '\0')
3067 {
3068 quotednl = TRUE;
3069 continue;
3070 }
3071 lp++;
3072 c = ' ';
3073 }
3074 else if (incomm)
3075 {
3076 switch (c)
3077 {
3078 case '*':
3079 if (*lp == '/')
3080 {
3081 c = *lp++;
3082 incomm = FALSE;
3083 }
3084 break;
3085 case '\0':
3086 /* Newlines inside comments do not end macro definitions in
3087 traditional cpp. */
3088 CNL_SAVE_DEFINEDEF ();
3089 break;
3090 }
3091 continue;
3092 }
3093 else if (inquote)
3094 {
3095 switch (c)
3096 {
3097 case '"':
3098 inquote = FALSE;
3099 break;
3100 case '\0':
3101 /* Newlines inside strings do not end macro definitions
3102 in traditional cpp, even though compilers don't
3103 usually accept them. */
3104 CNL_SAVE_DEFINEDEF ();
3105 break;
3106 }
3107 continue;
3108 }
3109 else if (inchar)
3110 {
3111 switch (c)
3112 {
3113 case '\0':
3114 /* Hmmm, something went wrong. */
3115 CNL ();
3116 /* FALLTHRU */
3117 case '\'':
3118 inchar = FALSE;
3119 break;
3120 }
3121 continue;
3122 }
3123 else switch (c)
3124 {
3125 case '"':
3126 inquote = TRUE;
3127 if (bracketlev > 0)
3128 continue;
3129 if (inattribute)
3130 break;
3131 switch (fvdef)
3132 {
3133 case fdefunkey:
3134 case fstartlist:
3135 case finlist:
3136 case fignore:
3137 case vignore:
3138 break;
3139 default:
3140 fvextern = FALSE;
3141 fvdef = fvnone;
3142 }
3143 continue;
3144 case '\'':
3145 inchar = TRUE;
3146 if (bracketlev > 0)
3147 continue;
3148 if (inattribute)
3149 break;
3150 if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3151 {
3152 fvextern = FALSE;
3153 fvdef = fvnone;
3154 }
3155 continue;
3156 case '/':
3157 if (*lp == '*')
3158 {
3159 incomm = TRUE;
3160 lp++;
3161 c = ' ';
3162 if (bracketlev > 0)
3163 continue;
3164 }
3165 else if (/* cplpl && */ *lp == '/')
3166 {
3167 c = '\0';
3168 }
3169 break;
3170 case '%':
3171 if ((c_ext & YACC) && *lp == '%')
3172 {
3173 /* Entering or exiting rules section in yacc file. */
3174 lp++;
3175 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3176 typdef = tnone; structdef = snone;
3177 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3178 bracelev = 0;
3179 yacc_rules = !yacc_rules;
3180 continue;
3181 }
3182 else
3183 break;
3184 case '#':
3185 if (definedef == dnone)
3186 {
3187 char *cp;
3188 bool cpptoken = TRUE;
3189
3190 /* Look back on this line. If all blanks, or nonblanks
3191 followed by an end of comment, this is a preprocessor
3192 token. */
3193 for (cp = newlb.buffer; cp < lp-1; cp++)
3194 if (!iswhite (*cp))
3195 {
3196 if (*cp == '*' && cp[1] == '/')
3197 {
3198 cp++;
3199 cpptoken = TRUE;
3200 }
3201 else
3202 cpptoken = FALSE;
3203 }
3204 if (cpptoken)
3205 definedef = dsharpseen;
3206 } /* if (definedef == dnone) */
3207 continue;
3208 case '[':
3209 bracketlev++;
3210 continue;
3211 default:
3212 if (bracketlev > 0)
3213 {
3214 if (c == ']')
3215 --bracketlev;
3216 else if (c == '\0')
3217 CNL_SAVE_DEFINEDEF ();
3218 continue;
3219 }
3220 break;
3221 } /* switch (c) */
3222
3223
3224 /* Consider token only if some involved conditions are satisfied. */
3225 if (typdef != tignore
3226 && definedef != dignorerest
3227 && fvdef != finlist
3228 && templatelev == 0
3229 && (definedef != dnone
3230 || structdef != scolonseen)
3231 && !inattribute)
3232 {
3233 if (midtoken)
3234 {
3235 if (endtoken (c))
3236 {
3237 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3238 /* This handles :: in the middle,
3239 but not at the beginning of an identifier.
3240 Also, space-separated :: is not recognized. */
3241 {
3242 if (c_ext & C_AUTO) /* automatic detection of C++ */
3243 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3244 lp += 2;
3245 toklen += 2;
3246 c = lp[-1];
3247 goto still_in_token;
3248 }
3249 else
3250 {
3251 bool funorvar = FALSE;
3252
3253 if (yacc_rules
3254 || consider_token (newlb.buffer + tokoff, toklen, c,
3255 &c_ext, bracelev, parlev,
3256 &funorvar))
3257 {
3258 if (fvdef == foperator)
3259 {
3260 char *oldlp = lp;
3261 lp = skip_spaces (lp-1);
3262 if (*lp != '\0')
3263 lp += 1;
3264 while (*lp != '\0'
3265 && !iswhite (*lp) && *lp != '(')
3266 lp += 1;
3267 c = *lp++;
3268 toklen += lp - oldlp;
3269 }
3270 token.named = FALSE;
3271 if (!plainc
3272 && nestlev > 0 && definedef == dnone)
3273 /* in struct body */
3274 {
3275 int len;
3276 write_classname (&token_name, qualifier);
3277 len = token_name.len;
3278 linebuffer_setlen (&token_name, len+qlen+toklen);
3279 sprintf (token_name.buffer + len, "%s%.*s",
3280 qualifier, toklen, newlb.buffer + tokoff);
3281 token.named = TRUE;
3282 }
3283 else if (objdef == ocatseen)
3284 /* Objective C category */
3285 {
3286 int len = strlen (objtag) + 2 + toklen;
3287 linebuffer_setlen (&token_name, len);
3288 sprintf (token_name.buffer, "%s(%.*s)",
3289 objtag, toklen, newlb.buffer + tokoff);
3290 token.named = TRUE;
3291 }
3292 else if (objdef == omethodtag
3293 || objdef == omethodparm)
3294 /* Objective C method */
3295 {
3296 token.named = TRUE;
3297 }
3298 else if (fvdef == fdefunname)
3299 /* GNU DEFUN and similar macros */
3300 {
3301 bool defun = (newlb.buffer[tokoff] == 'F');
3302 int off = tokoff;
3303 int len = toklen;
3304
3305 /* Rewrite the tag so that emacs lisp DEFUNs
3306 can be found by their elisp name */
3307 if (defun)
3308 {
3309 off += 1;
3310 len -= 1;
3311 }
3312 linebuffer_setlen (&token_name, len);
3313 memcpy (token_name.buffer,
3314 newlb.buffer + off, len);
3315 token_name.buffer[len] = '\0';
3316 if (defun)
3317 while (--len >= 0)
3318 if (token_name.buffer[len] == '_')
3319 token_name.buffer[len] = '-';
3320 token.named = defun;
3321 }
3322 else
3323 {
3324 linebuffer_setlen (&token_name, toklen);
3325 memcpy (token_name.buffer,
3326 newlb.buffer + tokoff, toklen);
3327 token_name.buffer[toklen] = '\0';
3328 /* Name macros and members. */
3329 token.named = (structdef == stagseen
3330 || typdef == ttypeseen
3331 || typdef == tend
3332 || (funorvar
3333 && definedef == dignorerest)
3334 || (funorvar
3335 && definedef == dnone
3336 && structdef == snone
3337 && bracelev > 0));
3338 }
3339 token.lineno = lineno;
3340 token.offset = tokoff;
3341 token.length = toklen;
3342 token.line = newlb.buffer;
3343 token.linepos = newlinepos;
3344 token.valid = TRUE;
3345
3346 if (definedef == dnone
3347 && (fvdef == fvnameseen
3348 || fvdef == foperator
3349 || structdef == stagseen
3350 || typdef == tend
3351 || typdef == ttypeseen
3352 || objdef != onone))
3353 {
3354 if (current_lb_is_new)
3355 switch_line_buffers ();
3356 }
3357 else if (definedef != dnone
3358 || fvdef == fdefunname
3359 || instruct)
3360 make_C_tag (funorvar);
3361 }
3362 else /* not yacc and consider_token failed */
3363 {
3364 if (inattribute && fvdef == fignore)
3365 {
3366 /* We have just met __attribute__ after a
3367 function parameter list: do not tag the
3368 function again. */
3369 fvdef = fvnone;
3370 }
3371 }
3372 midtoken = FALSE;
3373 }
3374 } /* if (endtoken (c)) */
3375 else if (intoken (c))
3376 still_in_token:
3377 {
3378 toklen++;
3379 continue;
3380 }
3381 } /* if (midtoken) */
3382 else if (begtoken (c))
3383 {
3384 switch (definedef)
3385 {
3386 case dnone:
3387 switch (fvdef)
3388 {
3389 case fstartlist:
3390 /* This prevents tagging fb in
3391 void (__attribute__((noreturn)) *fb) (void);
3392 Fixing this is not easy and not very important. */
3393 fvdef = finlist;
3394 continue;
3395 case flistseen:
3396 if (plainc || declarations)
3397 {
3398 make_C_tag (TRUE); /* a function */
3399 fvdef = fignore;
3400 }
3401 break;
3402 }
3403 if (structdef == stagseen && !cjava)
3404 {
3405 popclass_above (bracelev);
3406 structdef = snone;
3407 }
3408 break;
3409 case dsharpseen:
3410 savetoken = token;
3411 break;
3412 }
3413 if (!yacc_rules || lp == newlb.buffer + 1)
3414 {
3415 tokoff = lp - 1 - newlb.buffer;
3416 toklen = 1;
3417 midtoken = TRUE;
3418 }
3419 continue;
3420 } /* if (begtoken) */
3421 } /* if must look at token */
3422
3423
3424 /* Detect end of line, colon, comma, semicolon and various braces
3425 after having handled a token.*/
3426 switch (c)
3427 {
3428 case ':':
3429 if (inattribute)
3430 break;
3431 if (yacc_rules && token.offset == 0 && token.valid)
3432 {
3433 make_C_tag (FALSE); /* a yacc function */
3434 break;
3435 }
3436 if (definedef != dnone)
3437 break;
3438 switch (objdef)
3439 {
3440 case otagseen:
3441 objdef = oignore;
3442 make_C_tag (TRUE); /* an Objective C class */
3443 break;
3444 case omethodtag:
3445 case omethodparm:
3446 objdef = omethodcolon;
3447 linebuffer_setlen (&token_name, token_name.len + 1);
3448 strcat (token_name.buffer, ":");
3449 break;
3450 }
3451 if (structdef == stagseen)
3452 {
3453 structdef = scolonseen;
3454 break;
3455 }
3456 /* Should be useless, but may be work as a safety net. */
3457 if (cplpl && fvdef == flistseen)
3458 {
3459 make_C_tag (TRUE); /* a function */
3460 fvdef = fignore;
3461 break;
3462 }
3463 break;
3464 case ';':
3465 if (definedef != dnone || inattribute)
3466 break;
3467 switch (typdef)
3468 {
3469 case tend:
3470 case ttypeseen:
3471 make_C_tag (FALSE); /* a typedef */
3472 typdef = tnone;
3473 fvdef = fvnone;
3474 break;
3475 case tnone:
3476 case tinbody:
3477 case tignore:
3478 switch (fvdef)
3479 {
3480 case fignore:
3481 if (typdef == tignore || cplpl)
3482 fvdef = fvnone;
3483 break;
3484 case fvnameseen:
3485 if ((globals && bracelev == 0 && (!fvextern || declarations))
3486 || (members && instruct))
3487 make_C_tag (FALSE); /* a variable */
3488 fvextern = FALSE;
3489 fvdef = fvnone;
3490 token.valid = FALSE;
3491 break;
3492 case flistseen:
3493 if ((declarations
3494 && (cplpl || !instruct)
3495 && (typdef == tnone || (typdef != tignore && instruct)))
3496 || (members
3497 && plainc && instruct))
3498 make_C_tag (TRUE); /* a function */
3499 /* FALLTHRU */
3500 default:
3501 fvextern = FALSE;
3502 fvdef = fvnone;
3503 if (declarations
3504 && cplpl && structdef == stagseen)
3505 make_C_tag (FALSE); /* forward declaration */
3506 else
3507 token.valid = FALSE;
3508 } /* switch (fvdef) */
3509 /* FALLTHRU */
3510 default:
3511 if (!instruct)
3512 typdef = tnone;
3513 }
3514 if (structdef == stagseen)
3515 structdef = snone;
3516 break;
3517 case ',':
3518 if (definedef != dnone || inattribute)
3519 break;
3520 switch (objdef)
3521 {
3522 case omethodtag:
3523 case omethodparm:
3524 make_C_tag (TRUE); /* an Objective C method */
3525 objdef = oinbody;
3526 break;
3527 }
3528 switch (fvdef)
3529 {
3530 case fdefunkey:
3531 case foperator:
3532 case fstartlist:
3533 case finlist:
3534 case fignore:
3535 case vignore:
3536 break;
3537 case fdefunname:
3538 fvdef = fignore;
3539 break;
3540 case fvnameseen:
3541 if (parlev == 0
3542 && ((globals
3543 && bracelev == 0
3544 && templatelev == 0
3545 && (!fvextern || declarations))
3546 || (members && instruct)))
3547 make_C_tag (FALSE); /* a variable */
3548 break;
3549 case flistseen:
3550 if ((declarations && typdef == tnone && !instruct)
3551 || (members && typdef != tignore && instruct))
3552 {
3553 make_C_tag (TRUE); /* a function */
3554 fvdef = fvnameseen;
3555 }
3556 else if (!declarations)
3557 fvdef = fvnone;
3558 token.valid = FALSE;
3559 break;
3560 default:
3561 fvdef = fvnone;
3562 }
3563 if (structdef == stagseen)
3564 structdef = snone;
3565 break;
3566 case ']':
3567 if (definedef != dnone || inattribute)
3568 break;
3569 if (structdef == stagseen)
3570 structdef = snone;
3571 switch (typdef)
3572 {
3573 case ttypeseen:
3574 case tend:
3575 typdef = tignore;
3576 make_C_tag (FALSE); /* a typedef */
3577 break;
3578 case tnone:
3579 case tinbody:
3580 switch (fvdef)
3581 {
3582 case foperator:
3583 case finlist:
3584 case fignore:
3585 case vignore:
3586 break;
3587 case fvnameseen:
3588 if ((members && bracelev == 1)
3589 || (globals && bracelev == 0
3590 && (!fvextern || declarations)))
3591 make_C_tag (FALSE); /* a variable */
3592 /* FALLTHRU */
3593 default:
3594 fvdef = fvnone;
3595 }
3596 break;
3597 }
3598 break;
3599 case '(':
3600 if (inattribute)
3601 {
3602 attrparlev++;
3603 break;
3604 }
3605 if (definedef != dnone)
3606 break;
3607 if (objdef == otagseen && parlev == 0)
3608 objdef = oparenseen;
3609 switch (fvdef)
3610 {
3611 case fvnameseen:
3612 if (typdef == ttypeseen
3613 && *lp != '*'
3614 && !instruct)
3615 {
3616 /* This handles constructs like:
3617 typedef void OperatorFun (int fun); */
3618 make_C_tag (FALSE);
3619 typdef = tignore;
3620 fvdef = fignore;
3621 break;
3622 }
3623 /* FALLTHRU */
3624 case foperator:
3625 fvdef = fstartlist;
3626 break;
3627 case flistseen:
3628 fvdef = finlist;
3629 break;
3630 }
3631 parlev++;
3632 break;
3633 case ')':
3634 if (inattribute)
3635 {
3636 if (--attrparlev == 0)
3637 inattribute = FALSE;
3638 break;
3639 }
3640 if (definedef != dnone)
3641 break;
3642 if (objdef == ocatseen && parlev == 1)
3643 {
3644 make_C_tag (TRUE); /* an Objective C category */
3645 objdef = oignore;
3646 }
3647 if (--parlev == 0)
3648 {
3649 switch (fvdef)
3650 {
3651 case fstartlist:
3652 case finlist:
3653 fvdef = flistseen;
3654 break;
3655 }
3656 if (!instruct
3657 && (typdef == tend
3658 || typdef == ttypeseen))
3659 {
3660 typdef = tignore;
3661 make_C_tag (FALSE); /* a typedef */
3662 }
3663 }
3664 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3665 parlev = 0;
3666 break;
3667 case '{':
3668 if (definedef != dnone)
3669 break;
3670 if (typdef == ttypeseen)
3671 {
3672 /* Whenever typdef is set to tinbody (currently only
3673 here), typdefbracelev should be set to bracelev. */
3674 typdef = tinbody;
3675 typdefbracelev = bracelev;
3676 }
3677 switch (fvdef)
3678 {
3679 case flistseen:
3680 make_C_tag (TRUE); /* a function */
3681 /* FALLTHRU */
3682 case fignore:
3683 fvdef = fvnone;
3684 break;
3685 case fvnone:
3686 switch (objdef)
3687 {
3688 case otagseen:
3689 make_C_tag (TRUE); /* an Objective C class */
3690 objdef = oignore;
3691 break;
3692 case omethodtag:
3693 case omethodparm:
3694 make_C_tag (TRUE); /* an Objective C method */
3695 objdef = oinbody;
3696 break;
3697 default:
3698 /* Neutralize `extern "C" {' grot. */
3699 if (bracelev == 0 && structdef == snone && nestlev == 0
3700 && typdef == tnone)
3701 bracelev = -1;
3702 }
3703 break;
3704 }
3705 switch (structdef)
3706 {
3707 case skeyseen: /* unnamed struct */
3708 pushclass_above (bracelev, NULL, 0);
3709 structdef = snone;
3710 break;
3711 case stagseen: /* named struct or enum */
3712 case scolonseen: /* a class */
3713 pushclass_above (bracelev,token.line+token.offset, token.length);
3714 structdef = snone;
3715 make_C_tag (FALSE); /* a struct or enum */
3716 break;
3717 }
3718 bracelev += 1;
3719 break;
3720 case '*':
3721 if (definedef != dnone)
3722 break;
3723 if (fvdef == fstartlist)
3724 {
3725 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3726 token.valid = FALSE;
3727 }
3728 break;
3729 case '}':
3730 if (definedef != dnone)
3731 break;
3732 bracelev -= 1;
3733 if (!ignoreindent && lp == newlb.buffer + 1)
3734 {
3735 if (bracelev != 0)
3736 token.valid = FALSE; /* unexpected value, token unreliable */
3737 bracelev = 0; /* reset brace level if first column */
3738 parlev = 0; /* also reset paren level, just in case... */
3739 }
3740 else if (bracelev < 0)
3741 {
3742 token.valid = FALSE; /* something gone amiss, token unreliable */
3743 bracelev = 0;
3744 }
3745 if (bracelev == 0 && fvdef == vignore)
3746 fvdef = fvnone; /* end of function */
3747 popclass_above (bracelev);
3748 structdef = snone;
3749 /* Only if typdef == tinbody is typdefbracelev significant. */
3750 if (typdef == tinbody && bracelev <= typdefbracelev)
3751 {
3752 assert (bracelev == typdefbracelev);
3753 typdef = tend;
3754 }
3755 break;
3756 case '=':
3757 if (definedef != dnone)
3758 break;
3759 switch (fvdef)
3760 {
3761 case foperator:
3762 case finlist:
3763 case fignore:
3764 case vignore:
3765 break;
3766 case fvnameseen:
3767 if ((members && bracelev == 1)
3768 || (globals && bracelev == 0 && (!fvextern || declarations)))
3769 make_C_tag (FALSE); /* a variable */
3770 /* FALLTHRU */
3771 default:
3772 fvdef = vignore;
3773 }
3774 break;
3775 case '<':
3776 if (cplpl
3777 && (structdef == stagseen || fvdef == fvnameseen))
3778 {
3779 templatelev++;
3780 break;
3781 }
3782 goto resetfvdef;
3783 case '>':
3784 if (templatelev > 0)
3785 {
3786 templatelev--;
3787 break;
3788 }
3789 goto resetfvdef;
3790 case '+':
3791 case '-':
3792 if (objdef == oinbody && bracelev == 0)
3793 {
3794 objdef = omethodsign;
3795 break;
3796 }
3797 /* FALLTHRU */
3798 resetfvdef:
3799 case '#': case '~': case '&': case '%': case '/':
3800 case '|': case '^': case '!': case '.': case '?':
3801 if (definedef != dnone)
3802 break;
3803 /* These surely cannot follow a function tag in C. */
3804 switch (fvdef)
3805 {
3806 case foperator:
3807 case finlist:
3808 case fignore:
3809 case vignore:
3810 break;
3811 default:
3812 fvdef = fvnone;
3813 }
3814 break;
3815 case '\0':
3816 if (objdef == otagseen)
3817 {
3818 make_C_tag (TRUE); /* an Objective C class */
3819 objdef = oignore;
3820 }
3821 /* If a macro spans multiple lines don't reset its state. */
3822 if (quotednl)
3823 CNL_SAVE_DEFINEDEF ();
3824 else
3825 CNL ();
3826 break;
3827 } /* switch (c) */
3828
3829 } /* while not eof */
3830
3831 free (lbs[0].lb.buffer);
3832 free (lbs[1].lb.buffer);
3833}
3834
3835/*
3836 * Process either a C++ file or a C file depending on the setting
3837 * of a global flag.
3838 */
3839static void
3840default_C_entries (FILE *inf)
3841{
3842 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3843}
3844
3845/* Always do plain C. */
3846static void
3847plain_C_entries (FILE *inf)
3848{
3849 C_entries (0, inf);
3850}
3851
3852/* Always do C++. */
3853static void
3854Cplusplus_entries (FILE *inf)
3855{
3856 C_entries (C_PLPL, inf);
3857}
3858
3859/* Always do Java. */
3860static void
3861Cjava_entries (FILE *inf)
3862{
3863 C_entries (C_JAVA, inf);
3864}
3865
3866/* Always do C*. */
3867static void
3868Cstar_entries (FILE *inf)
3869{
3870 C_entries (C_STAR, inf);
3871}
3872
3873/* Always do Yacc. */
3874static void
3875Yacc_entries (FILE *inf)
3876{
3877 C_entries (YACC, inf);
3878}
3879
3880\f
3881/* Useful macros. */
3882#define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
3883 for (; /* loop initialization */ \
3884 !feof (file_pointer) /* loop test */ \
3885 && /* instructions at start of loop */ \
3886 (readline (&line_buffer, file_pointer), \
3887 char_pointer = line_buffer.buffer, \
3888 TRUE); \
3889 )
3890
3891#define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
3892 ((assert ("" kw), TRUE) /* syntax error if not a literal string */ \
3893 && strneq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3894 && notinname ((cp)[sizeof (kw)-1]) /* end of kw */ \
3895 && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3896
3897/* Similar to LOOKING_AT but does not use notinname, does not skip */
3898#define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3899 ((assert ("" kw), TRUE) /* syntax error if not a literal string */ \
3900 && strncaseeq ((cp), kw, sizeof (kw)-1) /* cp points at kw */ \
3901 && ((cp) += sizeof (kw)-1)) /* skip spaces */
3902
3903/*
3904 * Read a file, but do no processing. This is used to do regexp
3905 * matching on files that have no language defined.
3906 */
3907static void
3908just_read_file (FILE *inf)
3909{
3910 while (!feof (inf))
3911 readline (&lb, inf);
3912}
3913
3914\f
3915/* Fortran parsing */
3916
3917static void F_takeprec (void);
3918static void F_getit (FILE *);
3919
3920static void
3921F_takeprec (void)
3922{
3923 dbp = skip_spaces (dbp);
3924 if (*dbp != '*')
3925 return;
3926 dbp++;
3927 dbp = skip_spaces (dbp);
3928 if (strneq (dbp, "(*)", 3))
3929 {
3930 dbp += 3;
3931 return;
3932 }
3933 if (!ISDIGIT (*dbp))
3934 {
3935 --dbp; /* force failure */
3936 return;
3937 }
3938 do
3939 dbp++;
3940 while (ISDIGIT (*dbp));
3941}
3942
3943static void
3944F_getit (FILE *inf)
3945{
3946 register char *cp;
3947
3948 dbp = skip_spaces (dbp);
3949 if (*dbp == '\0')
3950 {
3951 readline (&lb, inf);
3952 dbp = lb.buffer;
3953 if (dbp[5] != '&')
3954 return;
3955 dbp += 6;
3956 dbp = skip_spaces (dbp);
3957 }
3958 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3959 return;
3960 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3961 continue;
3962 make_tag (dbp, cp-dbp, TRUE,
3963 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3964}
3965
3966
3967static void
3968Fortran_functions (FILE *inf)
3969{
3970 LOOP_ON_INPUT_LINES (inf, lb, dbp)
3971 {
3972 if (*dbp == '%')
3973 dbp++; /* Ratfor escape to fortran */
3974 dbp = skip_spaces (dbp);
3975 if (*dbp == '\0')
3976 continue;
3977
3978 if (LOOKING_AT_NOCASE (dbp, "recursive"))
3979 dbp = skip_spaces (dbp);
3980
3981 if (LOOKING_AT_NOCASE (dbp, "pure"))
3982 dbp = skip_spaces (dbp);
3983
3984 if (LOOKING_AT_NOCASE (dbp, "elemental"))
3985 dbp = skip_spaces (dbp);
3986
3987 switch (lowcase (*dbp))
3988 {
3989 case 'i':
3990 if (nocase_tail ("integer"))
3991 F_takeprec ();
3992 break;
3993 case 'r':
3994 if (nocase_tail ("real"))
3995 F_takeprec ();
3996 break;
3997 case 'l':
3998 if (nocase_tail ("logical"))
3999 F_takeprec ();
4000 break;
4001 case 'c':
4002 if (nocase_tail ("complex") || nocase_tail ("character"))
4003 F_takeprec ();
4004 break;
4005 case 'd':
4006 if (nocase_tail ("double"))
4007 {
4008 dbp = skip_spaces (dbp);
4009 if (*dbp == '\0')
4010 continue;
4011 if (nocase_tail ("precision"))
4012 break;
4013 continue;
4014 }
4015 break;
4016 }
4017 dbp = skip_spaces (dbp);
4018 if (*dbp == '\0')
4019 continue;
4020 switch (lowcase (*dbp))
4021 {
4022 case 'f':
4023 if (nocase_tail ("function"))
4024 F_getit (inf);
4025 continue;
4026 case 's':
4027 if (nocase_tail ("subroutine"))
4028 F_getit (inf);
4029 continue;
4030 case 'e':
4031 if (nocase_tail ("entry"))
4032 F_getit (inf);
4033 continue;
4034 case 'b':
4035 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4036 {
4037 dbp = skip_spaces (dbp);
4038 if (*dbp == '\0') /* assume un-named */
4039 make_tag ("blockdata", 9, TRUE,
4040 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4041 else
4042 F_getit (inf); /* look for name */
4043 }
4044 continue;
4045 }
4046 }
4047}
4048
4049\f
4050/*
4051 * Ada parsing
4052 * Original code by
4053 * Philippe Waroquiers (1998)
4054 */
4055
4056/* Once we are positioned after an "interesting" keyword, let's get
4057 the real tag value necessary. */
4058static void
4059Ada_getit (FILE *inf, const char *name_qualifier)
4060{
4061 register char *cp;
4062 char *name;
4063 char c;
4064
4065 while (!feof (inf))
4066 {
4067 dbp = skip_spaces (dbp);
4068 if (*dbp == '\0'
4069 || (dbp[0] == '-' && dbp[1] == '-'))
4070 {
4071 readline (&lb, inf);
4072 dbp = lb.buffer;
4073 }
4074 switch (lowcase (*dbp))
4075 {
4076 case 'b':
4077 if (nocase_tail ("body"))
4078 {
4079 /* Skipping body of procedure body or package body or ....
4080 resetting qualifier to body instead of spec. */
4081 name_qualifier = "/b";
4082 continue;
4083 }
4084 break;
4085 case 't':
4086 /* Skipping type of task type or protected type ... */
4087 if (nocase_tail ("type"))
4088 continue;
4089 break;
4090 }
4091 if (*dbp == '"')
4092 {
4093 dbp += 1;
4094 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4095 continue;
4096 }
4097 else
4098 {
4099 dbp = skip_spaces (dbp);
4100 for (cp = dbp;
4101 (*cp != '\0'
4102 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4103 cp++)
4104 continue;
4105 if (cp == dbp)
4106 return;
4107 }
4108 c = *cp;
4109 *cp = '\0';
4110 name = concat (dbp, name_qualifier, "");
4111 *cp = c;
4112 make_tag (name, strlen (name), TRUE,
4113 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4114 free (name);
4115 if (c == '"')
4116 dbp = cp + 1;
4117 return;
4118 }
4119}
4120
4121static void
4122Ada_funcs (FILE *inf)
4123{
4124 bool inquote = FALSE;
4125 bool skip_till_semicolumn = FALSE;
4126
4127 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4128 {
4129 while (*dbp != '\0')
4130 {
4131 /* Skip a string i.e. "abcd". */
4132 if (inquote || (*dbp == '"'))
4133 {
4134 dbp = etags_strchr (dbp + !inquote, '"');
4135 if (dbp != NULL)
4136 {
4137 inquote = FALSE;
4138 dbp += 1;
4139 continue; /* advance char */
4140 }
4141 else
4142 {
4143 inquote = TRUE;
4144 break; /* advance line */
4145 }
4146 }
4147
4148 /* Skip comments. */
4149 if (dbp[0] == '-' && dbp[1] == '-')
4150 break; /* advance line */
4151
4152 /* Skip character enclosed in single quote i.e. 'a'
4153 and skip single quote starting an attribute i.e. 'Image. */
4154 if (*dbp == '\'')
4155 {
4156 dbp++ ;
4157 if (*dbp != '\0')
4158 dbp++;
4159 continue;
4160 }
4161
4162 if (skip_till_semicolumn)
4163 {
4164 if (*dbp == ';')
4165 skip_till_semicolumn = FALSE;
4166 dbp++;
4167 continue; /* advance char */
4168 }
4169
4170 /* Search for beginning of a token. */
4171 if (!begtoken (*dbp))
4172 {
4173 dbp++;
4174 continue; /* advance char */
4175 }
4176
4177 /* We are at the beginning of a token. */
4178 switch (lowcase (*dbp))
4179 {
4180 case 'f':
4181 if (!packages_only && nocase_tail ("function"))
4182 Ada_getit (inf, "/f");
4183 else
4184 break; /* from switch */
4185 continue; /* advance char */
4186 case 'p':
4187 if (!packages_only && nocase_tail ("procedure"))
4188 Ada_getit (inf, "/p");
4189 else if (nocase_tail ("package"))
4190 Ada_getit (inf, "/s");
4191 else if (nocase_tail ("protected")) /* protected type */
4192 Ada_getit (inf, "/t");
4193 else
4194 break; /* from switch */
4195 continue; /* advance char */
4196
4197 case 'u':
4198 if (typedefs && !packages_only && nocase_tail ("use"))
4199 {
4200 /* when tagging types, avoid tagging use type Pack.Typename;
4201 for this, we will skip everything till a ; */
4202 skip_till_semicolumn = TRUE;
4203 continue; /* advance char */
4204 }
4205
4206 case 't':
4207 if (!packages_only && nocase_tail ("task"))
4208 Ada_getit (inf, "/k");
4209 else if (typedefs && !packages_only && nocase_tail ("type"))
4210 {
4211 Ada_getit (inf, "/t");
4212 while (*dbp != '\0')
4213 dbp += 1;
4214 }
4215 else
4216 break; /* from switch */
4217 continue; /* advance char */
4218 }
4219
4220 /* Look for the end of the token. */
4221 while (!endtoken (*dbp))
4222 dbp++;
4223
4224 } /* advance char */
4225 } /* advance line */
4226}
4227
4228\f
4229/*
4230 * Unix and microcontroller assembly tag handling
4231 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4232 * Idea by Bob Weiner, Motorola Inc. (1994)
4233 */
4234static void
4235Asm_labels (FILE *inf)
4236{
4237 register char *cp;
4238
4239 LOOP_ON_INPUT_LINES (inf, lb, cp)
4240 {
4241 /* If first char is alphabetic or one of [_.$], test for colon
4242 following identifier. */
4243 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4244 {
4245 /* Read past label. */
4246 cp++;
4247 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4248 cp++;
4249 if (*cp == ':' || iswhite (*cp))
4250 /* Found end of label, so copy it and add it to the table. */
4251 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4252 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4253 }
4254 }
4255}
4256
4257\f
4258/*
4259 * Perl support
4260 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4261 * /^use constant[ \t\n]+[^ \t\n{=,;]+/
4262 * Perl variable names: /^(my|local).../
4263 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4264 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4265 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4266 */
4267static void
4268Perl_functions (FILE *inf)
4269{
4270 char *package = savestr ("main"); /* current package name */
4271 register char *cp;
4272
4273 LOOP_ON_INPUT_LINES (inf, lb, cp)
4274 {
4275 cp = skip_spaces (cp);
4276
4277 if (LOOKING_AT (cp, "package"))
4278 {
4279 free (package);
4280 get_tag (cp, &package);
4281 }
4282 else if (LOOKING_AT (cp, "sub"))
4283 {
4284 char *pos, *sp;
4285
4286 subr:
4287 sp = cp;
4288 while (!notinname (*cp))
4289 cp++;
4290 if (cp == sp)
4291 continue; /* nothing found */
4292 if ((pos = etags_strchr (sp, ':')) != NULL
4293 && pos < cp && pos[1] == ':')
4294 /* The name is already qualified. */
4295 make_tag (sp, cp - sp, TRUE,
4296 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4297 else
4298 /* Qualify it. */
4299 {
4300 char savechar, *name;
4301
4302 savechar = *cp;
4303 *cp = '\0';
4304 name = concat (package, "::", sp);
4305 *cp = savechar;
4306 make_tag (name, strlen (name), TRUE,
4307 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4308 free (name);
4309 }
4310 }
4311 else if (LOOKING_AT (cp, "use constant")
4312 || LOOKING_AT (cp, "use constant::defer"))
4313 {
4314 /* For hash style multi-constant like
4315 use constant { FOO => 123,
4316 BAR => 456 };
4317 only the first FOO is picked up. Parsing across the value
4318 expressions would be difficult in general, due to possible nested
4319 hashes, here-documents, etc. */
4320 if (*cp == '{')
4321 cp = skip_spaces (cp+1);
4322 goto subr;
4323 }
4324 else if (globals) /* only if we are tagging global vars */
4325 {
4326 /* Skip a qualifier, if any. */
4327 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4328 /* After "my" or "local", but before any following paren or space. */
4329 char *varstart = cp;
4330
4331 if (qual /* should this be removed? If yes, how? */
4332 && (*cp == '$' || *cp == '@' || *cp == '%'))
4333 {
4334 varstart += 1;
4335 do
4336 cp++;
4337 while (ISALNUM (*cp) || *cp == '_');
4338 }
4339 else if (qual)
4340 {
4341 /* Should be examining a variable list at this point;
4342 could insist on seeing an open parenthesis. */
4343 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4344 cp++;
4345 }
4346 else
4347 continue;
4348
4349 make_tag (varstart, cp - varstart, FALSE,
4350 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4351 }
4352 }
4353 free (package);
4354}
4355
4356
4357/*
4358 * Python support
4359 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4360 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4361 * More ideas by seb bacon <seb@jamkit.com> (2002)
4362 */
4363static void
4364Python_functions (FILE *inf)
4365{
4366 register char *cp;
4367
4368 LOOP_ON_INPUT_LINES (inf, lb, cp)
4369 {
4370 cp = skip_spaces (cp);
4371 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4372 {
4373 char *name = cp;
4374 while (!notinname (*cp) && *cp != ':')
4375 cp++;
4376 make_tag (name, cp - name, TRUE,
4377 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4378 }
4379 }
4380}
4381
4382\f
4383/*
4384 * PHP support
4385 * Look for:
4386 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4387 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4388 * - /^[ \t]*define\(\"[^\"]+/
4389 * Only with --members:
4390 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4391 * Idea by Diez B. Roggisch (2001)
4392 */
4393static void
4394PHP_functions (FILE *inf)
4395{
4396 register char *cp, *name;
4397 bool search_identifier = FALSE;
4398
4399 LOOP_ON_INPUT_LINES (inf, lb, cp)
4400 {
4401 cp = skip_spaces (cp);
4402 name = cp;
4403 if (search_identifier
4404 && *cp != '\0')
4405 {
4406 while (!notinname (*cp))
4407 cp++;
4408 make_tag (name, cp - name, TRUE,
4409 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4410 search_identifier = FALSE;
4411 }
4412 else if (LOOKING_AT (cp, "function"))
4413 {
4414 if (*cp == '&')
4415 cp = skip_spaces (cp+1);
4416 if (*cp != '\0')
4417 {
4418 name = cp;
4419 while (!notinname (*cp))
4420 cp++;
4421 make_tag (name, cp - name, TRUE,
4422 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4423 }
4424 else
4425 search_identifier = TRUE;
4426 }
4427 else if (LOOKING_AT (cp, "class"))
4428 {
4429 if (*cp != '\0')
4430 {
4431 name = cp;
4432 while (*cp != '\0' && !iswhite (*cp))
4433 cp++;
4434 make_tag (name, cp - name, FALSE,
4435 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4436 }
4437 else
4438 search_identifier = TRUE;
4439 }
4440 else if (strneq (cp, "define", 6)
4441 && (cp = skip_spaces (cp+6))
4442 && *cp++ == '('
4443 && (*cp == '"' || *cp == '\''))
4444 {
4445 char quote = *cp++;
4446 name = cp;
4447 while (*cp != quote && *cp != '\0')
4448 cp++;
4449 make_tag (name, cp - name, FALSE,
4450 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4451 }
4452 else if (members
4453 && LOOKING_AT (cp, "var")
4454 && *cp == '$')
4455 {
4456 name = cp;
4457 while (!notinname (*cp))
4458 cp++;
4459 make_tag (name, cp - name, FALSE,
4460 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4461 }
4462 }
4463}
4464
4465\f
4466/*
4467 * Cobol tag functions
4468 * We could look for anything that could be a paragraph name.
4469 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4470 * Idea by Corny de Souza (1993)
4471 */
4472static void
4473Cobol_paragraphs (FILE *inf)
4474{
4475 register char *bp, *ep;
4476
4477 LOOP_ON_INPUT_LINES (inf, lb, bp)
4478 {
4479 if (lb.len < 9)
4480 continue;
4481 bp += 8;
4482
4483 /* If eoln, compiler option or comment ignore whole line. */
4484 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4485 continue;
4486
4487 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4488 continue;
4489 if (*ep++ == '.')
4490 make_tag (bp, ep - bp, TRUE,
4491 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4492 }
4493}
4494
4495\f
4496/*
4497 * Makefile support
4498 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4499 */
4500static void
4501Makefile_targets (FILE *inf)
4502{
4503 register char *bp;
4504
4505 LOOP_ON_INPUT_LINES (inf, lb, bp)
4506 {
4507 if (*bp == '\t' || *bp == '#')
4508 continue;
4509 while (*bp != '\0' && *bp != '=' && *bp != ':')
4510 bp++;
4511 if (*bp == ':' || (globals && *bp == '='))
4512 {
4513 /* We should detect if there is more than one tag, but we do not.
4514 We just skip initial and final spaces. */
4515 char * namestart = skip_spaces (lb.buffer);
4516 while (--bp > namestart)
4517 if (!notinname (*bp))
4518 break;
4519 make_tag (namestart, bp - namestart + 1, TRUE,
4520 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4521 }
4522 }
4523}
4524
4525\f
4526/*
4527 * Pascal parsing
4528 * Original code by Mosur K. Mohan (1989)
4529 *
4530 * Locates tags for procedures & functions. Doesn't do any type- or
4531 * var-definitions. It does look for the keyword "extern" or
4532 * "forward" immediately following the procedure statement; if found,
4533 * the tag is skipped.
4534 */
4535static void
4536Pascal_functions (FILE *inf)
4537{
4538 linebuffer tline; /* mostly copied from C_entries */
4539 long save_lcno;
4540 int save_lineno, namelen, taglen;
4541 char c, *name;
4542
4543 bool /* each of these flags is TRUE if: */
4544 incomment, /* point is inside a comment */
4545 inquote, /* point is inside '..' string */
4546 get_tagname, /* point is after PROCEDURE/FUNCTION
4547 keyword, so next item = potential tag */
4548 found_tag, /* point is after a potential tag */
4549 inparms, /* point is within parameter-list */
4550 verify_tag; /* point has passed the parm-list, so the
4551 next token will determine whether this
4552 is a FORWARD/EXTERN to be ignored, or
4553 whether it is a real tag */
4554
4555 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4556 name = NULL; /* keep compiler quiet */
4557 dbp = lb.buffer;
4558 *dbp = '\0';
4559 linebuffer_init (&tline);
4560
4561 incomment = inquote = FALSE;
4562 found_tag = FALSE; /* have a proc name; check if extern */
4563 get_tagname = FALSE; /* found "procedure" keyword */
4564 inparms = FALSE; /* found '(' after "proc" */
4565 verify_tag = FALSE; /* check if "extern" is ahead */
4566
4567
4568 while (!feof (inf)) /* long main loop to get next char */
4569 {
4570 c = *dbp++;
4571 if (c == '\0') /* if end of line */
4572 {
4573 readline (&lb, inf);
4574 dbp = lb.buffer;
4575 if (*dbp == '\0')
4576 continue;
4577 if (!((found_tag && verify_tag)
4578 || get_tagname))
4579 c = *dbp++; /* only if don't need *dbp pointing
4580 to the beginning of the name of
4581 the procedure or function */
4582 }
4583 if (incomment)
4584 {
4585 if (c == '}') /* within { } comments */
4586 incomment = FALSE;
4587 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4588 {
4589 dbp++;
4590 incomment = FALSE;
4591 }
4592 continue;
4593 }
4594 else if (inquote)
4595 {
4596 if (c == '\'')
4597 inquote = FALSE;
4598 continue;
4599 }
4600 else
4601 switch (c)
4602 {
4603 case '\'':
4604 inquote = TRUE; /* found first quote */
4605 continue;
4606 case '{': /* found open { comment */
4607 incomment = TRUE;
4608 continue;
4609 case '(':
4610 if (*dbp == '*') /* found open (* comment */
4611 {
4612 incomment = TRUE;
4613 dbp++;
4614 }
4615 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4616 inparms = TRUE;
4617 continue;
4618 case ')': /* end of parms list */
4619 if (inparms)
4620 inparms = FALSE;
4621 continue;
4622 case ';':
4623 if (found_tag && !inparms) /* end of proc or fn stmt */
4624 {
4625 verify_tag = TRUE;
4626 break;
4627 }
4628 continue;
4629 }
4630 if (found_tag && verify_tag && (*dbp != ' '))
4631 {
4632 /* Check if this is an "extern" declaration. */
4633 if (*dbp == '\0')
4634 continue;
4635 if (lowcase (*dbp) == 'e')
4636 {
4637 if (nocase_tail ("extern")) /* superfluous, really! */
4638 {
4639 found_tag = FALSE;
4640 verify_tag = FALSE;
4641 }
4642 }
4643 else if (lowcase (*dbp) == 'f')
4644 {
4645 if (nocase_tail ("forward")) /* check for forward reference */
4646 {
4647 found_tag = FALSE;
4648 verify_tag = FALSE;
4649 }
4650 }
4651 if (found_tag && verify_tag) /* not external proc, so make tag */
4652 {
4653 found_tag = FALSE;
4654 verify_tag = FALSE;
4655 make_tag (name, namelen, TRUE,
4656 tline.buffer, taglen, save_lineno, save_lcno);
4657 continue;
4658 }
4659 }
4660 if (get_tagname) /* grab name of proc or fn */
4661 {
4662 char *cp;
4663
4664 if (*dbp == '\0')
4665 continue;
4666
4667 /* Find block name. */
4668 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4669 continue;
4670
4671 /* Save all values for later tagging. */
4672 linebuffer_setlen (&tline, lb.len);
4673 strcpy (tline.buffer, lb.buffer);
4674 save_lineno = lineno;
4675 save_lcno = linecharno;
4676 name = tline.buffer + (dbp - lb.buffer);
4677 namelen = cp - dbp;
4678 taglen = cp - lb.buffer + 1;
4679
4680 dbp = cp; /* set dbp to e-o-token */
4681 get_tagname = FALSE;
4682 found_tag = TRUE;
4683 continue;
4684
4685 /* And proceed to check for "extern". */
4686 }
4687 else if (!incomment && !inquote && !found_tag)
4688 {
4689 /* Check for proc/fn keywords. */
4690 switch (lowcase (c))
4691 {
4692 case 'p':
4693 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4694 get_tagname = TRUE;
4695 continue;
4696 case 'f':
4697 if (nocase_tail ("unction"))
4698 get_tagname = TRUE;
4699 continue;
4700 }
4701 }
4702 } /* while not eof */
4703
4704 free (tline.buffer);
4705}
4706
4707\f
4708/*
4709 * Lisp tag functions
4710 * look for (def or (DEF, quote or QUOTE
4711 */
4712
4713static void L_getit (void);
4714
4715static void
4716L_getit (void)
4717{
4718 if (*dbp == '\'') /* Skip prefix quote */
4719 dbp++;
4720 else if (*dbp == '(')
4721 {
4722 dbp++;
4723 /* Try to skip "(quote " */
4724 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4725 /* Ok, then skip "(" before name in (defstruct (foo)) */
4726 dbp = skip_spaces (dbp);
4727 }
4728 get_tag (dbp, NULL);
4729}
4730
4731static void
4732Lisp_functions (FILE *inf)
4733{
4734 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4735 {
4736 if (dbp[0] != '(')
4737 continue;
4738
4739 /* "(defvar foo)" is a declaration rather than a definition. */
4740 if (! declarations)
4741 {
4742 char *p = dbp + 1;
4743 if (LOOKING_AT (p, "defvar"))
4744 {
4745 p = skip_name (p); /* past var name */
4746 p = skip_spaces (p);
4747 if (*p == ')')
4748 continue;
4749 }
4750 }
4751
4752 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4753 {
4754 dbp = skip_non_spaces (dbp);
4755 dbp = skip_spaces (dbp);
4756 L_getit ();
4757 }
4758 else
4759 {
4760 /* Check for (foo::defmumble name-defined ... */
4761 do
4762 dbp++;
4763 while (!notinname (*dbp) && *dbp != ':');
4764 if (*dbp == ':')
4765 {
4766 do
4767 dbp++;
4768 while (*dbp == ':');
4769
4770 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4771 {
4772 dbp = skip_non_spaces (dbp);
4773 dbp = skip_spaces (dbp);
4774 L_getit ();
4775 }
4776 }
4777 }
4778 }
4779}
4780
4781\f
4782/*
4783 * Lua script language parsing
4784 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4785 *
4786 * "function" and "local function" are tags if they start at column 1.
4787 */
4788static void
4789Lua_functions (FILE *inf)
4790{
4791 register char *bp;
4792
4793 LOOP_ON_INPUT_LINES (inf, lb, bp)
4794 {
4795 if (bp[0] != 'f' && bp[0] != 'l')
4796 continue;
4797
4798 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4799
4800 if (LOOKING_AT (bp, "function"))
4801 get_tag (bp, NULL);
4802 }
4803}
4804
4805\f
4806/*
4807 * PostScript tags
4808 * Just look for lines where the first character is '/'
4809 * Also look at "defineps" for PSWrap
4810 * Ideas by:
4811 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4812 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4813 */
4814static void
4815PS_functions (FILE *inf)
4816{
4817 register char *bp, *ep;
4818
4819 LOOP_ON_INPUT_LINES (inf, lb, bp)
4820 {
4821 if (bp[0] == '/')
4822 {
4823 for (ep = bp+1;
4824 *ep != '\0' && *ep != ' ' && *ep != '{';
4825 ep++)
4826 continue;
4827 make_tag (bp, ep - bp, TRUE,
4828 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4829 }
4830 else if (LOOKING_AT (bp, "defineps"))
4831 get_tag (bp, NULL);
4832 }
4833}
4834
4835\f
4836/*
4837 * Forth tags
4838 * Ignore anything after \ followed by space or in ( )
4839 * Look for words defined by :
4840 * Look for constant, code, create, defer, value, and variable
4841 * OBP extensions: Look for buffer:, field,
4842 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4843 */
4844static void
4845Forth_words (FILE *inf)
4846{
4847 register char *bp;
4848
4849 LOOP_ON_INPUT_LINES (inf, lb, bp)
4850 while ((bp = skip_spaces (bp))[0] != '\0')
4851 if (bp[0] == '\\' && iswhite (bp[1]))
4852 break; /* read next line */
4853 else if (bp[0] == '(' && iswhite (bp[1]))
4854 do /* skip to ) or eol */
4855 bp++;
4856 while (*bp != ')' && *bp != '\0');
4857 else if ((bp[0] == ':' && iswhite (bp[1]) && bp++)
4858 || LOOKING_AT_NOCASE (bp, "constant")
4859 || LOOKING_AT_NOCASE (bp, "code")
4860 || LOOKING_AT_NOCASE (bp, "create")
4861 || LOOKING_AT_NOCASE (bp, "defer")
4862 || LOOKING_AT_NOCASE (bp, "value")
4863 || LOOKING_AT_NOCASE (bp, "variable")
4864 || LOOKING_AT_NOCASE (bp, "buffer:")
4865 || LOOKING_AT_NOCASE (bp, "field"))
4866 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4867 else
4868 bp = skip_non_spaces (bp);
4869}
4870
4871\f
4872/*
4873 * Scheme tag functions
4874 * look for (def... xyzzy
4875 * (def... (xyzzy
4876 * (def ... ((...(xyzzy ....
4877 * (set! xyzzy
4878 * Original code by Ken Haase (1985?)
4879 */
4880static void
4881Scheme_functions (FILE *inf)
4882{
4883 register char *bp;
4884
4885 LOOP_ON_INPUT_LINES (inf, lb, bp)
4886 {
4887 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4888 {
4889 bp = skip_non_spaces (bp+4);
4890 /* Skip over open parens and white space. Don't continue past
4891 '\0'. */
4892 while (*bp && notinname (*bp))
4893 bp++;
4894 get_tag (bp, NULL);
4895 }
4896 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4897 get_tag (bp, NULL);
4898 }
4899}
4900
4901\f
4902/* Find tags in TeX and LaTeX input files. */
4903
4904/* TEX_toktab is a table of TeX control sequences that define tags.
4905 * Each entry records one such control sequence.
4906 *
4907 * Original code from who knows whom.
4908 * Ideas by:
4909 * Stefan Monnier (2002)
4910 */
4911
4912static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4913
4914/* Default set of control sequences to put into TEX_toktab.
4915 The value of environment var TEXTAGS is prepended to this. */
4916static const char *TEX_defenv = "\
4917:chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4918:part:appendix:entry:index:def\
4919:newcommand:renewcommand:newenvironment:renewenvironment";
4920
4921static void TEX_mode (FILE *);
4922static void TEX_decode_env (const char *, const char *);
4923
4924static char TEX_esc = '\\';
4925static char TEX_opgrp = '{';
4926static char TEX_clgrp = '}';
4927
4928/*
4929 * TeX/LaTeX scanning loop.
4930 */
4931static void
4932TeX_commands (FILE *inf)
4933{
4934 char *cp;
4935 linebuffer *key;
4936
4937 /* Select either \ or ! as escape character. */
4938 TEX_mode (inf);
4939
4940 /* Initialize token table once from environment. */
4941 if (TEX_toktab == NULL)
4942 TEX_decode_env ("TEXTAGS", TEX_defenv);
4943
4944 LOOP_ON_INPUT_LINES (inf, lb, cp)
4945 {
4946 /* Look at each TEX keyword in line. */
4947 for (;;)
4948 {
4949 /* Look for a TEX escape. */
4950 while (*cp++ != TEX_esc)
4951 if (cp[-1] == '\0' || cp[-1] == '%')
4952 goto tex_next_line;
4953
4954 for (key = TEX_toktab; key->buffer != NULL; key++)
4955 if (strneq (cp, key->buffer, key->len))
4956 {
4957 register char *p;
4958 int namelen, linelen;
4959 bool opgrp = FALSE;
4960
4961 cp = skip_spaces (cp + key->len);
4962 if (*cp == TEX_opgrp)
4963 {
4964 opgrp = TRUE;
4965 cp++;
4966 }
4967 for (p = cp;
4968 (!iswhite (*p) && *p != '#' &&
4969 *p != TEX_opgrp && *p != TEX_clgrp);
4970 p++)
4971 continue;
4972 namelen = p - cp;
4973 linelen = lb.len;
4974 if (!opgrp || *p == TEX_clgrp)
4975 {
4976 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4977 p++;
4978 linelen = p - lb.buffer + 1;
4979 }
4980 make_tag (cp, namelen, TRUE,
4981 lb.buffer, linelen, lineno, linecharno);
4982 goto tex_next_line; /* We only tag a line once */
4983 }
4984 }
4985 tex_next_line:
4986 ;
4987 }
4988}
4989
4990#define TEX_LESC '\\'
4991#define TEX_SESC '!'
4992
4993/* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4994 chars accordingly. */
4995static void
4996TEX_mode (FILE *inf)
4997{
4998 int c;
4999
5000 while ((c = getc (inf)) != EOF)
5001 {
5002 /* Skip to next line if we hit the TeX comment char. */
5003 if (c == '%')
5004 while (c != '\n' && c != EOF)
5005 c = getc (inf);
5006 else if (c == TEX_LESC || c == TEX_SESC )
5007 break;
5008 }
5009
5010 if (c == TEX_LESC)
5011 {
5012 TEX_esc = TEX_LESC;
5013 TEX_opgrp = '{';
5014 TEX_clgrp = '}';
5015 }
5016 else
5017 {
5018 TEX_esc = TEX_SESC;
5019 TEX_opgrp = '<';
5020 TEX_clgrp = '>';
5021 }
5022 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5023 No attempt is made to correct the situation. */
5024 rewind (inf);
5025}
5026
5027/* Read environment and prepend it to the default string.
5028 Build token table. */
5029static void
5030TEX_decode_env (const char *evarname, const char *defenv)
5031{
5032 register const char *env, *p;
5033 int i, len;
5034
5035 /* Append default string to environment. */
5036 env = getenv (evarname);
5037 if (!env)
5038 env = defenv;
5039 else
5040 env = concat (env, defenv, "");
5041
5042 /* Allocate a token table */
5043 for (len = 1, p = env; p;)
5044 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5045 len++;
5046 TEX_toktab = xnew (len, linebuffer);
5047
5048 /* Unpack environment string into token table. Be careful about */
5049 /* zero-length strings (leading ':', "::" and trailing ':') */
5050 for (i = 0; *env != '\0';)
5051 {
5052 p = etags_strchr (env, ':');
5053 if (!p) /* End of environment string. */
5054 p = env + strlen (env);
5055 if (p - env > 0)
5056 { /* Only non-zero strings. */
5057 TEX_toktab[i].buffer = savenstr (env, p - env);
5058 TEX_toktab[i].len = p - env;
5059 i++;
5060 }
5061 if (*p)
5062 env = p + 1;
5063 else
5064 {
5065 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5066 TEX_toktab[i].len = 0;
5067 break;
5068 }
5069 }
5070}
5071
5072\f
5073/* Texinfo support. Dave Love, Mar. 2000. */
5074static void
5075Texinfo_nodes (FILE *inf)
5076{
5077 char *cp, *start;
5078 LOOP_ON_INPUT_LINES (inf, lb, cp)
5079 if (LOOKING_AT (cp, "@node"))
5080 {
5081 start = cp;
5082 while (*cp != '\0' && *cp != ',')
5083 cp++;
5084 make_tag (start, cp - start, TRUE,
5085 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5086 }
5087}
5088
5089\f
5090/*
5091 * HTML support.
5092 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5093 * Contents of <a name=xxx> are tags with name xxx.
5094 *
5095 * Francesco Potortì, 2002.
5096 */
5097static void
5098HTML_labels (FILE *inf)
5099{
5100 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5101 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5102 bool intag = FALSE; /* inside an html tag, looking for ID= */
5103 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5104 char *end;
5105
5106
5107 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5108
5109 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5110 for (;;) /* loop on the same line */
5111 {
5112 if (skiptag) /* skip HTML tag */
5113 {
5114 while (*dbp != '\0' && *dbp != '>')
5115 dbp++;
5116 if (*dbp == '>')
5117 {
5118 dbp += 1;
5119 skiptag = FALSE;
5120 continue; /* look on the same line */
5121 }
5122 break; /* go to next line */
5123 }
5124
5125 else if (intag) /* look for "name=" or "id=" */
5126 {
5127 while (*dbp != '\0' && *dbp != '>'
5128 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5129 dbp++;
5130 if (*dbp == '\0')
5131 break; /* go to next line */
5132 if (*dbp == '>')
5133 {
5134 dbp += 1;
5135 intag = FALSE;
5136 continue; /* look on the same line */
5137 }
5138 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5139 || LOOKING_AT_NOCASE (dbp, "id="))
5140 {
5141 bool quoted = (dbp[0] == '"');
5142
5143 if (quoted)
5144 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5145 continue;
5146 else
5147 for (end = dbp; *end != '\0' && intoken (*end); end++)
5148 continue;
5149 linebuffer_setlen (&token_name, end - dbp);
5150 memcpy (token_name.buffer, dbp, end - dbp);
5151 token_name.buffer[end - dbp] = '\0';
5152
5153 dbp = end;
5154 intag = FALSE; /* we found what we looked for */
5155 skiptag = TRUE; /* skip to the end of the tag */
5156 getnext = TRUE; /* then grab the text */
5157 continue; /* look on the same line */
5158 }
5159 dbp += 1;
5160 }
5161
5162 else if (getnext) /* grab next tokens and tag them */
5163 {
5164 dbp = skip_spaces (dbp);
5165 if (*dbp == '\0')
5166 break; /* go to next line */
5167 if (*dbp == '<')
5168 {
5169 intag = TRUE;
5170 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5171 continue; /* look on the same line */
5172 }
5173
5174 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5175 continue;
5176 make_tag (token_name.buffer, token_name.len, TRUE,
5177 dbp, end - dbp, lineno, linecharno);
5178 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5179 getnext = FALSE;
5180 break; /* go to next line */
5181 }
5182
5183 else /* look for an interesting HTML tag */
5184 {
5185 while (*dbp != '\0' && *dbp != '<')
5186 dbp++;
5187 if (*dbp == '\0')
5188 break; /* go to next line */
5189 intag = TRUE;
5190 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5191 {
5192 inanchor = TRUE;
5193 continue; /* look on the same line */
5194 }
5195 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5196 || LOOKING_AT_NOCASE (dbp, "<h1>")
5197 || LOOKING_AT_NOCASE (dbp, "<h2>")
5198 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5199 {
5200 intag = FALSE;
5201 getnext = TRUE;
5202 continue; /* look on the same line */
5203 }
5204 dbp += 1;
5205 }
5206 }
5207}
5208
5209\f
5210/*
5211 * Prolog support
5212 *
5213 * Assumes that the predicate or rule starts at column 0.
5214 * Only the first clause of a predicate or rule is added.
5215 * Original code by Sunichirou Sugou (1989)
5216 * Rewritten by Anders Lindgren (1996)
5217 */
5218static size_t prolog_pr (char *, char *);
5219static void prolog_skip_comment (linebuffer *, FILE *);
5220static size_t prolog_atom (char *, size_t);
5221
5222static void
5223Prolog_functions (FILE *inf)
5224{
5225 char *cp, *last;
5226 size_t len;
5227 size_t allocated;
5228
5229 allocated = 0;
5230 len = 0;
5231 last = NULL;
5232
5233 LOOP_ON_INPUT_LINES (inf, lb, cp)
5234 {
5235 if (cp[0] == '\0') /* Empty line */
5236 continue;
5237 else if (iswhite (cp[0])) /* Not a predicate */
5238 continue;
5239 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5240 prolog_skip_comment (&lb, inf);
5241 else if ((len = prolog_pr (cp, last)) > 0)
5242 {
5243 /* Predicate or rule. Store the function name so that we
5244 only generate a tag for the first clause. */
5245 if (last == NULL)
5246 last = xnew (len + 1, char);
5247 else if (len + 1 > allocated)
5248 xrnew (last, len + 1, char);
5249 allocated = len + 1;
5250 memcpy (last, cp, len);
5251 last[len] = '\0';
5252 }
5253 }
5254 free (last);
5255}
5256
5257
5258static void
5259prolog_skip_comment (linebuffer *plb, FILE *inf)
5260{
5261 char *cp;
5262
5263 do
5264 {
5265 for (cp = plb->buffer; *cp != '\0'; cp++)
5266 if (cp[0] == '*' && cp[1] == '/')
5267 return;
5268 readline (plb, inf);
5269 }
5270 while (!feof (inf));
5271}
5272
5273/*
5274 * A predicate or rule definition is added if it matches:
5275 * <beginning of line><Prolog Atom><whitespace>(
5276 * or <beginning of line><Prolog Atom><whitespace>:-
5277 *
5278 * It is added to the tags database if it doesn't match the
5279 * name of the previous clause header.
5280 *
5281 * Return the size of the name of the predicate or rule, or 0 if no
5282 * header was found.
5283 */
5284static size_t
5285prolog_pr (char *s, char *last)
5286
5287 /* Name of last clause. */
5288{
5289 size_t pos;
5290 size_t len;
5291
5292 pos = prolog_atom (s, 0);
5293 if (! pos)
5294 return 0;
5295
5296 len = pos;
5297 pos = skip_spaces (s + pos) - s;
5298
5299 if ((s[pos] == '.'
5300 || (s[pos] == '(' && (pos += 1))
5301 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5302 && (last == NULL /* save only the first clause */
5303 || len != strlen (last)
5304 || !strneq (s, last, len)))
5305 {
5306 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5307 return len;
5308 }
5309 else
5310 return 0;
5311}
5312
5313/*
5314 * Consume a Prolog atom.
5315 * Return the number of bytes consumed, or 0 if there was an error.
5316 *
5317 * A prolog atom, in this context, could be one of:
5318 * - An alphanumeric sequence, starting with a lower case letter.
5319 * - A quoted arbitrary string. Single quotes can escape themselves.
5320 * Backslash quotes everything.
5321 */
5322static size_t
5323prolog_atom (char *s, size_t pos)
5324{
5325 size_t origpos;
5326
5327 origpos = pos;
5328
5329 if (ISLOWER (s[pos]) || (s[pos] == '_'))
5330 {
5331 /* The atom is unquoted. */
5332 pos++;
5333 while (ISALNUM (s[pos]) || (s[pos] == '_'))
5334 {
5335 pos++;
5336 }
5337 return pos - origpos;
5338 }
5339 else if (s[pos] == '\'')
5340 {
5341 pos++;
5342
5343 for (;;)
5344 {
5345 if (s[pos] == '\'')
5346 {
5347 pos++;
5348 if (s[pos] != '\'')
5349 break;
5350 pos++; /* A double quote */
5351 }
5352 else if (s[pos] == '\0')
5353 /* Multiline quoted atoms are ignored. */
5354 return 0;
5355 else if (s[pos] == '\\')
5356 {
5357 if (s[pos+1] == '\0')
5358 return 0;
5359 pos += 2;
5360 }
5361 else
5362 pos++;
5363 }
5364 return pos - origpos;
5365 }
5366 else
5367 return 0;
5368}
5369
5370\f
5371/*
5372 * Support for Erlang
5373 *
5374 * Generates tags for functions, defines, and records.
5375 * Assumes that Erlang functions start at column 0.
5376 * Original code by Anders Lindgren (1996)
5377 */
5378static int erlang_func (char *, char *);
5379static void erlang_attribute (char *);
5380static int erlang_atom (char *);
5381
5382static void
5383Erlang_functions (FILE *inf)
5384{
5385 char *cp, *last;
5386 int len;
5387 int allocated;
5388
5389 allocated = 0;
5390 len = 0;
5391 last = NULL;
5392
5393 LOOP_ON_INPUT_LINES (inf, lb, cp)
5394 {
5395 if (cp[0] == '\0') /* Empty line */
5396 continue;
5397 else if (iswhite (cp[0])) /* Not function nor attribute */
5398 continue;
5399 else if (cp[0] == '%') /* comment */
5400 continue;
5401 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5402 continue;
5403 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5404 {
5405 erlang_attribute (cp);
5406 if (last != NULL)
5407 {
5408 free (last);
5409 last = NULL;
5410 }
5411 }
5412 else if ((len = erlang_func (cp, last)) > 0)
5413 {
5414 /*
5415 * Function. Store the function name so that we only
5416 * generates a tag for the first clause.
5417 */
5418 if (last == NULL)
5419 last = xnew (len + 1, char);
5420 else if (len + 1 > allocated)
5421 xrnew (last, len + 1, char);
5422 allocated = len + 1;
5423 memcpy (last, cp, len);
5424 last[len] = '\0';
5425 }
5426 }
5427 free (last);
5428}
5429
5430
5431/*
5432 * A function definition is added if it matches:
5433 * <beginning of line><Erlang Atom><whitespace>(
5434 *
5435 * It is added to the tags database if it doesn't match the
5436 * name of the previous clause header.
5437 *
5438 * Return the size of the name of the function, or 0 if no function
5439 * was found.
5440 */
5441static int
5442erlang_func (char *s, char *last)
5443
5444 /* Name of last clause. */
5445{
5446 int pos;
5447 int len;
5448
5449 pos = erlang_atom (s);
5450 if (pos < 1)
5451 return 0;
5452
5453 len = pos;
5454 pos = skip_spaces (s + pos) - s;
5455
5456 /* Save only the first clause. */
5457 if (s[pos++] == '('
5458 && (last == NULL
5459 || len != (int)strlen (last)
5460 || !strneq (s, last, len)))
5461 {
5462 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5463 return len;
5464 }
5465
5466 return 0;
5467}
5468
5469
5470/*
5471 * Handle attributes. Currently, tags are generated for defines
5472 * and records.
5473 *
5474 * They are on the form:
5475 * -define(foo, bar).
5476 * -define(Foo(M, N), M+N).
5477 * -record(graph, {vtab = notable, cyclic = true}).
5478 */
5479static void
5480erlang_attribute (char *s)
5481{
5482 char *cp = s;
5483
5484 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5485 && *cp++ == '(')
5486 {
5487 int len = erlang_atom (skip_spaces (cp));
5488 if (len > 0)
5489 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5490 }
5491 return;
5492}
5493
5494
5495/*
5496 * Consume an Erlang atom (or variable).
5497 * Return the number of bytes consumed, or -1 if there was an error.
5498 */
5499static int
5500erlang_atom (char *s)
5501{
5502 int pos = 0;
5503
5504 if (ISALPHA (s[pos]) || s[pos] == '_')
5505 {
5506 /* The atom is unquoted. */
5507 do
5508 pos++;
5509 while (ISALNUM (s[pos]) || s[pos] == '_');
5510 }
5511 else if (s[pos] == '\'')
5512 {
5513 for (pos++; s[pos] != '\''; pos++)
5514 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5515 || (s[pos] == '\\' && s[++pos] == '\0'))
5516 return 0;
5517 pos++;
5518 }
5519
5520 return pos;
5521}
5522
5523\f
5524static char *scan_separators (char *);
5525static void add_regex (char *, language *);
5526static char *substitute (char *, char *, struct re_registers *);
5527
5528/*
5529 * Take a string like "/blah/" and turn it into "blah", verifying
5530 * that the first and last characters are the same, and handling
5531 * quoted separator characters. Actually, stops on the occurrence of
5532 * an unquoted separator. Also process \t, \n, etc. and turn into
5533 * appropriate characters. Works in place. Null terminates name string.
5534 * Returns pointer to terminating separator, or NULL for
5535 * unterminated regexps.
5536 */
5537static char *
5538scan_separators (char *name)
5539{
5540 char sep = name[0];
5541 char *copyto = name;
5542 bool quoted = FALSE;
5543
5544 for (++name; *name != '\0'; ++name)
5545 {
5546 if (quoted)
5547 {
5548 switch (*name)
5549 {
5550 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5551 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5552 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5553 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5554 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5555 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5556 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5557 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5558 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5559 default:
5560 if (*name == sep)
5561 *copyto++ = sep;
5562 else
5563 {
5564 /* Something else is quoted, so preserve the quote. */
5565 *copyto++ = '\\';
5566 *copyto++ = *name;
5567 }
5568 break;
5569 }
5570 quoted = FALSE;
5571 }
5572 else if (*name == '\\')
5573 quoted = TRUE;
5574 else if (*name == sep)
5575 break;
5576 else
5577 *copyto++ = *name;
5578 }
5579 if (*name != sep)
5580 name = NULL; /* signal unterminated regexp */
5581
5582 /* Terminate copied string. */
5583 *copyto = '\0';
5584 return name;
5585}
5586
5587/* Look at the argument of --regex or --no-regex and do the right
5588 thing. Same for each line of a regexp file. */
5589static void
5590analyse_regex (char *regex_arg)
5591{
5592 if (regex_arg == NULL)
5593 {
5594 free_regexps (); /* --no-regex: remove existing regexps */
5595 return;
5596 }
5597
5598 /* A real --regexp option or a line in a regexp file. */
5599 switch (regex_arg[0])
5600 {
5601 /* Comments in regexp file or null arg to --regex. */
5602 case '\0':
5603 case ' ':
5604 case '\t':
5605 break;
5606
5607 /* Read a regex file. This is recursive and may result in a
5608 loop, which will stop when the file descriptors are exhausted. */
5609 case '@':
5610 {
5611 FILE *regexfp;
5612 linebuffer regexbuf;
5613 char *regexfile = regex_arg + 1;
5614
5615 /* regexfile is a file containing regexps, one per line. */
5616 regexfp = fopen (regexfile, "r");
5617 if (regexfp == NULL)
5618 pfatal (regexfile);
5619 linebuffer_init (&regexbuf);
5620 while (readline_internal (&regexbuf, regexfp) > 0)
5621 analyse_regex (regexbuf.buffer);
5622 free (regexbuf.buffer);
5623 fclose (regexfp);
5624 }
5625 break;
5626
5627 /* Regexp to be used for a specific language only. */
5628 case '{':
5629 {
5630 language *lang;
5631 char *lang_name = regex_arg + 1;
5632 char *cp;
5633
5634 for (cp = lang_name; *cp != '}'; cp++)
5635 if (*cp == '\0')
5636 {
5637 error ("unterminated language name in regex: %s", regex_arg);
5638 return;
5639 }
5640 *cp++ = '\0';
5641 lang = get_language_from_langname (lang_name);
5642 if (lang == NULL)
5643 return;
5644 add_regex (cp, lang);
5645 }
5646 break;
5647
5648 /* Regexp to be used for any language. */
5649 default:
5650 add_regex (regex_arg, NULL);
5651 break;
5652 }
5653}
5654
5655/* Separate the regexp pattern, compile it,
5656 and care for optional name and modifiers. */
5657static void
5658add_regex (char *regexp_pattern, language *lang)
5659{
5660 static struct re_pattern_buffer zeropattern;
5661 char sep, *pat, *name, *modifiers;
5662 char empty = '\0';
5663 const char *err;
5664 struct re_pattern_buffer *patbuf;
5665 regexp *rp;
5666 bool
5667 force_explicit_name = TRUE, /* do not use implicit tag names */
5668 ignore_case = FALSE, /* case is significant */
5669 multi_line = FALSE, /* matches are done one line at a time */
5670 single_line = FALSE; /* dot does not match newline */
5671
5672
5673 if (strlen (regexp_pattern) < 3)
5674 {
5675 error ("null regexp");
5676 return;
5677 }
5678 sep = regexp_pattern[0];
5679 name = scan_separators (regexp_pattern);
5680 if (name == NULL)
5681 {
5682 error ("%s: unterminated regexp", regexp_pattern);
5683 return;
5684 }
5685 if (name[1] == sep)
5686 {
5687 error ("null name for regexp \"%s\"", regexp_pattern);
5688 return;
5689 }
5690 modifiers = scan_separators (name);
5691 if (modifiers == NULL) /* no terminating separator --> no name */
5692 {
5693 modifiers = name;
5694 name = &empty;
5695 }
5696 else
5697 modifiers += 1; /* skip separator */
5698
5699 /* Parse regex modifiers. */
5700 for (; modifiers[0] != '\0'; modifiers++)
5701 switch (modifiers[0])
5702 {
5703 case 'N':
5704 if (modifiers == name)
5705 error ("forcing explicit tag name but no name, ignoring");
5706 force_explicit_name = TRUE;
5707 break;
5708 case 'i':
5709 ignore_case = TRUE;
5710 break;
5711 case 's':
5712 single_line = TRUE;
5713 /* FALLTHRU */
5714 case 'm':
5715 multi_line = TRUE;
5716 need_filebuf = TRUE;
5717 break;
5718 default:
5719 error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5720 break;
5721 }
5722
5723 patbuf = xnew (1, struct re_pattern_buffer);
5724 *patbuf = zeropattern;
5725 if (ignore_case)
5726 {
5727 static char lc_trans[CHARS];
5728 int i;
5729 for (i = 0; i < CHARS; i++)
5730 lc_trans[i] = lowcase (i);
5731 patbuf->translate = lc_trans; /* translation table to fold case */
5732 }
5733
5734 if (multi_line)
5735 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5736 else
5737 pat = regexp_pattern;
5738
5739 if (single_line)
5740 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5741 else
5742 re_set_syntax (RE_SYNTAX_EMACS);
5743
5744 err = re_compile_pattern (pat, strlen (pat), patbuf);
5745 if (multi_line)
5746 free (pat);
5747 if (err != NULL)
5748 {
5749 error ("%s while compiling pattern", err);
5750 return;
5751 }
5752
5753 rp = p_head;
5754 p_head = xnew (1, regexp);
5755 p_head->pattern = savestr (regexp_pattern);
5756 p_head->p_next = rp;
5757 p_head->lang = lang;
5758 p_head->pat = patbuf;
5759 p_head->name = savestr (name);
5760 p_head->error_signaled = FALSE;
5761 p_head->force_explicit_name = force_explicit_name;
5762 p_head->ignore_case = ignore_case;
5763 p_head->multi_line = multi_line;
5764}
5765
5766/*
5767 * Do the substitutions indicated by the regular expression and
5768 * arguments.
5769 */
5770static char *
5771substitute (char *in, char *out, struct re_registers *regs)
5772{
5773 char *result, *t;
5774 int size, dig, diglen;
5775
5776 result = NULL;
5777 size = strlen (out);
5778
5779 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5780 if (out[size - 1] == '\\')
5781 fatal ("pattern error in \"%s\"", out);
5782 for (t = etags_strchr (out, '\\');
5783 t != NULL;
5784 t = etags_strchr (t + 2, '\\'))
5785 if (ISDIGIT (t[1]))
5786 {
5787 dig = t[1] - '0';
5788 diglen = regs->end[dig] - regs->start[dig];
5789 size += diglen - 2;
5790 }
5791 else
5792 size -= 1;
5793
5794 /* Allocate space and do the substitutions. */
5795 assert (size >= 0);
5796 result = xnew (size + 1, char);
5797
5798 for (t = result; *out != '\0'; out++)
5799 if (*out == '\\' && ISDIGIT (*++out))
5800 {
5801 dig = *out - '0';
5802 diglen = regs->end[dig] - regs->start[dig];
5803 memcpy (t, in + regs->start[dig], diglen);
5804 t += diglen;
5805 }
5806 else
5807 *t++ = *out;
5808 *t = '\0';
5809
5810 assert (t <= result + size);
5811 assert (t - result == (int)strlen (result));
5812
5813 return result;
5814}
5815
5816/* Deallocate all regexps. */
5817static void
5818free_regexps (void)
5819{
5820 regexp *rp;
5821 while (p_head != NULL)
5822 {
5823 rp = p_head->p_next;
5824 free (p_head->pattern);
5825 free (p_head->name);
5826 free (p_head);
5827 p_head = rp;
5828 }
5829 return;
5830}
5831
5832/*
5833 * Reads the whole file as a single string from `filebuf' and looks for
5834 * multi-line regular expressions, creating tags on matches.
5835 * readline already dealt with normal regexps.
5836 *
5837 * Idea by Ben Wing <ben@666.com> (2002).
5838 */
5839static void
5840regex_tag_multiline (void)
5841{
5842 char *buffer = filebuf.buffer;
5843 regexp *rp;
5844 char *name;
5845
5846 for (rp = p_head; rp != NULL; rp = rp->p_next)
5847 {
5848 int match = 0;
5849
5850 if (!rp->multi_line)
5851 continue; /* skip normal regexps */
5852
5853 /* Generic initializations before parsing file from memory. */
5854 lineno = 1; /* reset global line number */
5855 charno = 0; /* reset global char number */
5856 linecharno = 0; /* reset global char number of line start */
5857
5858 /* Only use generic regexps or those for the current language. */
5859 if (rp->lang != NULL && rp->lang != curfdp->lang)
5860 continue;
5861
5862 while (match >= 0 && match < filebuf.len)
5863 {
5864 match = re_search (rp->pat, buffer, filebuf.len, charno,
5865 filebuf.len - match, &rp->regs);
5866 switch (match)
5867 {
5868 case -2:
5869 /* Some error. */
5870 if (!rp->error_signaled)
5871 {
5872 error ("regexp stack overflow while matching \"%s\"",
5873 rp->pattern);
5874 rp->error_signaled = TRUE;
5875 }
5876 break;
5877 case -1:
5878 /* No match. */
5879 break;
5880 default:
5881 if (match == rp->regs.end[0])
5882 {
5883 if (!rp->error_signaled)
5884 {
5885 error ("regexp matches the empty string: \"%s\"",
5886 rp->pattern);
5887 rp->error_signaled = TRUE;
5888 }
5889 match = -3; /* exit from while loop */
5890 break;
5891 }
5892
5893 /* Match occurred. Construct a tag. */
5894 while (charno < rp->regs.end[0])
5895 if (buffer[charno++] == '\n')
5896 lineno++, linecharno = charno;
5897 name = rp->name;
5898 if (name[0] == '\0')
5899 name = NULL;
5900 else /* make a named tag */
5901 name = substitute (buffer, rp->name, &rp->regs);
5902 if (rp->force_explicit_name)
5903 /* Force explicit tag name, if a name is there. */
5904 pfnote (name, TRUE, buffer + linecharno,
5905 charno - linecharno + 1, lineno, linecharno);
5906 else
5907 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5908 charno - linecharno + 1, lineno, linecharno);
5909 break;
5910 }
5911 }
5912 }
5913}
5914
5915\f
5916static bool
5917nocase_tail (const char *cp)
5918{
5919 register int len = 0;
5920
5921 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5922 cp++, len++;
5923 if (*cp == '\0' && !intoken (dbp[len]))
5924 {
5925 dbp += len;
5926 return TRUE;
5927 }
5928 return FALSE;
5929}
5930
5931static void
5932get_tag (register char *bp, char **namepp)
5933{
5934 register char *cp = bp;
5935
5936 if (*bp != '\0')
5937 {
5938 /* Go till you get to white space or a syntactic break */
5939 for (cp = bp + 1; !notinname (*cp); cp++)
5940 continue;
5941 make_tag (bp, cp - bp, TRUE,
5942 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5943 }
5944
5945 if (namepp != NULL)
5946 *namepp = savenstr (bp, cp - bp);
5947}
5948
5949/*
5950 * Read a line of text from `stream' into `lbp', excluding the
5951 * newline or CR-NL, if any. Return the number of characters read from
5952 * `stream', which is the length of the line including the newline.
5953 *
5954 * On DOS or Windows we do not count the CR character, if any before the
5955 * NL, in the returned length; this mirrors the behavior of Emacs on those
5956 * platforms (for text files, it translates CR-NL to NL as it reads in the
5957 * file).
5958 *
5959 * If multi-line regular expressions are requested, each line read is
5960 * appended to `filebuf'.
5961 */
5962static long
5963readline_internal (linebuffer *lbp, register FILE *stream)
5964{
5965 char *buffer = lbp->buffer;
5966 register char *p = lbp->buffer;
5967 register char *pend;
5968 int chars_deleted;
5969
5970 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
5971
5972 for (;;)
5973 {
5974 register int c = getc (stream);
5975 if (p == pend)
5976 {
5977 /* We're at the end of linebuffer: expand it. */
5978 lbp->size *= 2;
5979 xrnew (buffer, lbp->size, char);
5980 p += buffer - lbp->buffer;
5981 pend = buffer + lbp->size;
5982 lbp->buffer = buffer;
5983 }
5984 if (c == EOF)
5985 {
5986 *p = '\0';
5987 chars_deleted = 0;
5988 break;
5989 }
5990 if (c == '\n')
5991 {
5992 if (p > buffer && p[-1] == '\r')
5993 {
5994 p -= 1;
5995#ifdef DOS_NT
5996 /* Assume CRLF->LF translation will be performed by Emacs
5997 when loading this file, so CRs won't appear in the buffer.
5998 It would be cleaner to compensate within Emacs;
5999 however, Emacs does not know how many CRs were deleted
6000 before any given point in the file. */
6001 chars_deleted = 1;
6002#else
6003 chars_deleted = 2;
6004#endif
6005 }
6006 else
6007 {
6008 chars_deleted = 1;
6009 }
6010 *p = '\0';
6011 break;
6012 }
6013 *p++ = c;
6014 }
6015 lbp->len = p - buffer;
6016
6017 if (need_filebuf /* we need filebuf for multi-line regexps */
6018 && chars_deleted > 0) /* not at EOF */
6019 {
6020 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6021 {
6022 /* Expand filebuf. */
6023 filebuf.size *= 2;
6024 xrnew (filebuf.buffer, filebuf.size, char);
6025 }
6026 memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6027 filebuf.len += lbp->len;
6028 filebuf.buffer[filebuf.len++] = '\n';
6029 filebuf.buffer[filebuf.len] = '\0';
6030 }
6031
6032 return lbp->len + chars_deleted;
6033}
6034
6035/*
6036 * Like readline_internal, above, but in addition try to match the
6037 * input line against relevant regular expressions and manage #line
6038 * directives.
6039 */
6040static void
6041readline (linebuffer *lbp, FILE *stream)
6042{
6043 long result;
6044
6045 linecharno = charno; /* update global char number of line start */
6046 result = readline_internal (lbp, stream); /* read line */
6047 lineno += 1; /* increment global line number */
6048 charno += result; /* increment global char number */
6049
6050 /* Honor #line directives. */
6051 if (!no_line_directive)
6052 {
6053 static bool discard_until_line_directive;
6054
6055 /* Check whether this is a #line directive. */
6056 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6057 {
6058 unsigned int lno;
6059 int start = 0;
6060
6061 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6062 && start > 0) /* double quote character found */
6063 {
6064 char *endp = lbp->buffer + start;
6065
6066 while ((endp = etags_strchr (endp, '"')) != NULL
6067 && endp[-1] == '\\')
6068 endp++;
6069 if (endp != NULL)
6070 /* Ok, this is a real #line directive. Let's deal with it. */
6071 {
6072 char *taggedabsname; /* absolute name of original file */
6073 char *taggedfname; /* name of original file as given */
6074 char *name; /* temp var */
6075
6076 discard_until_line_directive = FALSE; /* found it */
6077 name = lbp->buffer + start;
6078 *endp = '\0';
6079 canonicalize_filename (name);
6080 taggedabsname = absolute_filename (name, tagfiledir);
6081 if (filename_is_absolute (name)
6082 || filename_is_absolute (curfdp->infname))
6083 taggedfname = savestr (taggedabsname);
6084 else
6085 taggedfname = relative_filename (taggedabsname,tagfiledir);
6086
6087 if (streq (curfdp->taggedfname, taggedfname))
6088 /* The #line directive is only a line number change. We
6089 deal with this afterwards. */
6090 free (taggedfname);
6091 else
6092 /* The tags following this #line directive should be
6093 attributed to taggedfname. In order to do this, set
6094 curfdp accordingly. */
6095 {
6096 fdesc *fdp; /* file description pointer */
6097
6098 /* Go look for a file description already set up for the
6099 file indicated in the #line directive. If there is
6100 one, use it from now until the next #line
6101 directive. */
6102 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6103 if (streq (fdp->infname, curfdp->infname)
6104 && streq (fdp->taggedfname, taggedfname))
6105 /* If we remove the second test above (after the &&)
6106 then all entries pertaining to the same file are
6107 coalesced in the tags file. If we use it, then
6108 entries pertaining to the same file but generated
6109 from different files (via #line directives) will
6110 go into separate sections in the tags file. These
6111 alternatives look equivalent. The first one
6112 destroys some apparently useless information. */
6113 {
6114 curfdp = fdp;
6115 free (taggedfname);
6116 break;
6117 }
6118 /* Else, if we already tagged the real file, skip all
6119 input lines until the next #line directive. */
6120 if (fdp == NULL) /* not found */
6121 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6122 if (streq (fdp->infabsname, taggedabsname))
6123 {
6124 discard_until_line_directive = TRUE;
6125 free (taggedfname);
6126 break;
6127 }
6128 /* Else create a new file description and use that from
6129 now on, until the next #line directive. */
6130 if (fdp == NULL) /* not found */
6131 {
6132 fdp = fdhead;
6133 fdhead = xnew (1, fdesc);
6134 *fdhead = *curfdp; /* copy curr. file description */
6135 fdhead->next = fdp;
6136 fdhead->infname = savestr (curfdp->infname);
6137 fdhead->infabsname = savestr (curfdp->infabsname);
6138 fdhead->infabsdir = savestr (curfdp->infabsdir);
6139 fdhead->taggedfname = taggedfname;
6140 fdhead->usecharno = FALSE;
6141 fdhead->prop = NULL;
6142 fdhead->written = FALSE;
6143 curfdp = fdhead;
6144 }
6145 }
6146 free (taggedabsname);
6147 lineno = lno - 1;
6148 readline (lbp, stream);
6149 return;
6150 } /* if a real #line directive */
6151 } /* if #line is followed by a number */
6152 } /* if line begins with "#line " */
6153
6154 /* If we are here, no #line directive was found. */
6155 if (discard_until_line_directive)
6156 {
6157 if (result > 0)
6158 {
6159 /* Do a tail recursion on ourselves, thus discarding the contents
6160 of the line buffer. */
6161 readline (lbp, stream);
6162 return;
6163 }
6164 /* End of file. */
6165 discard_until_line_directive = FALSE;
6166 return;
6167 }
6168 } /* if #line directives should be considered */
6169
6170 {
6171 int match;
6172 regexp *rp;
6173 char *name;
6174
6175 /* Match against relevant regexps. */
6176 if (lbp->len > 0)
6177 for (rp = p_head; rp != NULL; rp = rp->p_next)
6178 {
6179 /* Only use generic regexps or those for the current language.
6180 Also do not use multiline regexps, which is the job of
6181 regex_tag_multiline. */
6182 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6183 || rp->multi_line)
6184 continue;
6185
6186 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6187 switch (match)
6188 {
6189 case -2:
6190 /* Some error. */
6191 if (!rp->error_signaled)
6192 {
6193 error ("regexp stack overflow while matching \"%s\"",
6194 rp->pattern);
6195 rp->error_signaled = TRUE;
6196 }
6197 break;
6198 case -1:
6199 /* No match. */
6200 break;
6201 case 0:
6202 /* Empty string matched. */
6203 if (!rp->error_signaled)
6204 {
6205 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6206 rp->error_signaled = TRUE;
6207 }
6208 break;
6209 default:
6210 /* Match occurred. Construct a tag. */
6211 name = rp->name;
6212 if (name[0] == '\0')
6213 name = NULL;
6214 else /* make a named tag */
6215 name = substitute (lbp->buffer, rp->name, &rp->regs);
6216 if (rp->force_explicit_name)
6217 /* Force explicit tag name, if a name is there. */
6218 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6219 else
6220 make_tag (name, strlen (name), TRUE,
6221 lbp->buffer, match, lineno, linecharno);
6222 break;
6223 }
6224 }
6225 }
6226}
6227
6228\f
6229/*
6230 * Return a pointer to a space of size strlen(cp)+1 allocated
6231 * with xnew where the string CP has been copied.
6232 */
6233static char *
6234savestr (const char *cp)
6235{
6236 return savenstr (cp, strlen (cp));
6237}
6238
6239/*
6240 * Return a pointer to a space of size LEN+1 allocated with xnew where
6241 * the string CP has been copied for at most the first LEN characters.
6242 */
6243static char *
6244savenstr (const char *cp, int len)
6245{
6246 char *dp = xnew (len + 1, char);
6247 dp[len] = '\0';
6248 return memcpy (dp, cp, len);
6249}
6250
6251/*
6252 * Return the ptr in sp at which the character c last
6253 * appears; NULL if not found
6254 *
6255 * Identical to POSIX strrchr, included for portability.
6256 */
6257static char *
6258etags_strrchr (register const char *sp, register int c)
6259{
6260 register const char *r;
6261
6262 r = NULL;
6263 do
6264 {
6265 if (*sp == c)
6266 r = sp;
6267 } while (*sp++);
6268 return (char *)r;
6269}
6270
6271/*
6272 * Return the ptr in sp at which the character c first
6273 * appears; NULL if not found
6274 *
6275 * Identical to POSIX strchr, included for portability.
6276 */
6277static char *
6278etags_strchr (register const char *sp, register int c)
6279{
6280 do
6281 {
6282 if (*sp == c)
6283 return (char *)sp;
6284 } while (*sp++);
6285 return NULL;
6286}
6287
6288/* Skip spaces (end of string is not space), return new pointer. */
6289static char *
6290skip_spaces (char *cp)
6291{
6292 while (iswhite (*cp))
6293 cp++;
6294 return cp;
6295}
6296
6297/* Skip non spaces, except end of string, return new pointer. */
6298static char *
6299skip_non_spaces (char *cp)
6300{
6301 while (*cp != '\0' && !iswhite (*cp))
6302 cp++;
6303 return cp;
6304}
6305
6306/* Skip any chars in the "name" class.*/
6307static char *
6308skip_name (char *cp)
6309{
6310 /* '\0' is a notinname() so loop stops there too */
6311 while (! notinname (*cp))
6312 cp++;
6313 return cp;
6314}
6315
6316/* Print error message and exit. */
6317void
6318fatal (const char *s1, const char *s2)
6319{
6320 error (s1, s2);
6321 exit (EXIT_FAILURE);
6322}
6323
6324static void
6325pfatal (const char *s1)
6326{
6327 perror (s1);
6328 exit (EXIT_FAILURE);
6329}
6330
6331static void
6332suggest_asking_for_help (void)
6333{
6334 fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6335 progname);
6336 exit (EXIT_FAILURE);
6337}
6338
6339/* Output a diagnostic with printf-style FORMAT and args. */
6340static void
6341error (const char *format, ...)
6342{
6343 va_list ap;
6344 va_start (ap, format);
6345 fprintf (stderr, "%s: ", progname);
6346 vfprintf (stderr, format, ap);
6347 fprintf (stderr, "\n");
6348 va_end (ap);
6349}
6350
6351/* Return a newly-allocated string whose contents
6352 concatenate those of s1, s2, s3. */
6353static char *
6354concat (const char *s1, const char *s2, const char *s3)
6355{
6356 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6357 char *result = xnew (len1 + len2 + len3 + 1, char);
6358
6359 strcpy (result, s1);
6360 strcpy (result + len1, s2);
6361 strcpy (result + len1 + len2, s3);
6362
6363 return result;
6364}
6365
6366\f
6367/* Does the same work as the system V getcwd, but does not need to
6368 guess the buffer size in advance. */
6369static char *
6370etags_getcwd (void)
6371{
6372 int bufsize = 200;
6373 char *path = xnew (bufsize, char);
6374
6375 while (getcwd (path, bufsize) == NULL)
6376 {
6377 if (errno != ERANGE)
6378 pfatal ("getcwd");
6379 bufsize *= 2;
6380 free (path);
6381 path = xnew (bufsize, char);
6382 }
6383
6384 canonicalize_filename (path);
6385 return path;
6386}
6387
6388/* Return a newly allocated string containing the file name of FILE
6389 relative to the absolute directory DIR (which should end with a slash). */
6390static char *
6391relative_filename (char *file, char *dir)
6392{
6393 char *fp, *dp, *afn, *res;
6394 int i;
6395
6396 /* Find the common root of file and dir (with a trailing slash). */
6397 afn = absolute_filename (file, cwd);
6398 fp = afn;
6399 dp = dir;
6400 while (*fp++ == *dp++)
6401 continue;
6402 fp--, dp--; /* back to the first differing char */
6403#ifdef DOS_NT
6404 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6405 return afn;
6406#endif
6407 do /* look at the equal chars until '/' */
6408 fp--, dp--;
6409 while (*fp != '/');
6410
6411 /* Build a sequence of "../" strings for the resulting relative file name. */
6412 i = 0;
6413 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6414 i += 1;
6415 res = xnew (3*i + strlen (fp + 1) + 1, char);
6416 res[0] = '\0';
6417 while (i-- > 0)
6418 strcat (res, "../");
6419
6420 /* Add the file name relative to the common root of file and dir. */
6421 strcat (res, fp + 1);
6422 free (afn);
6423
6424 return res;
6425}
6426
6427/* Return a newly allocated string containing the absolute file name
6428 of FILE given DIR (which should end with a slash). */
6429static char *
6430absolute_filename (char *file, char *dir)
6431{
6432 char *slashp, *cp, *res;
6433
6434 if (filename_is_absolute (file))
6435 res = savestr (file);
6436#ifdef DOS_NT
6437 /* We don't support non-absolute file names with a drive
6438 letter, like `d:NAME' (it's too much hassle). */
6439 else if (file[1] == ':')
6440 fatal ("%s: relative file names with drive letters not supported", file);
6441#endif
6442 else
6443 res = concat (dir, file, "");
6444
6445 /* Delete the "/dirname/.." and "/." substrings. */
6446 slashp = etags_strchr (res, '/');
6447 while (slashp != NULL && slashp[0] != '\0')
6448 {
6449 if (slashp[1] == '.')
6450 {
6451 if (slashp[2] == '.'
6452 && (slashp[3] == '/' || slashp[3] == '\0'))
6453 {
6454 cp = slashp;
6455 do
6456 cp--;
6457 while (cp >= res && !filename_is_absolute (cp));
6458 if (cp < res)
6459 cp = slashp; /* the absolute name begins with "/.." */
6460#ifdef DOS_NT
6461 /* Under MSDOS and NT we get `d:/NAME' as absolute
6462 file name, so the luser could say `d:/../NAME'.
6463 We silently treat this as `d:/NAME'. */
6464 else if (cp[0] != '/')
6465 cp = slashp;
6466#endif
6467 memmove (cp, slashp + 3, strlen (slashp + 2));
6468 slashp = cp;
6469 continue;
6470 }
6471 else if (slashp[2] == '/' || slashp[2] == '\0')
6472 {
6473 memmove (slashp, slashp + 2, strlen (slashp + 1));
6474 continue;
6475 }
6476 }
6477
6478 slashp = etags_strchr (slashp + 1, '/');
6479 }
6480
6481 if (res[0] == '\0') /* just a safety net: should never happen */
6482 {
6483 free (res);
6484 return savestr ("/");
6485 }
6486 else
6487 return res;
6488}
6489
6490/* Return a newly allocated string containing the absolute
6491 file name of dir where FILE resides given DIR (which should
6492 end with a slash). */
6493static char *
6494absolute_dirname (char *file, char *dir)
6495{
6496 char *slashp, *res;
6497 char save;
6498
6499 slashp = etags_strrchr (file, '/');
6500 if (slashp == NULL)
6501 return savestr (dir);
6502 save = slashp[1];
6503 slashp[1] = '\0';
6504 res = absolute_filename (file, dir);
6505 slashp[1] = save;
6506
6507 return res;
6508}
6509
6510/* Whether the argument string is an absolute file name. The argument
6511 string must have been canonicalized with canonicalize_filename. */
6512static bool
6513filename_is_absolute (char *fn)
6514{
6515 return (fn[0] == '/'
6516#ifdef DOS_NT
6517 || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/')
6518#endif
6519 );
6520}
6521
6522/* Downcase DOS drive letter and collapse separators into single slashes.
6523 Works in place. */
6524static void
6525canonicalize_filename (register char *fn)
6526{
6527 register char* cp;
6528 char sep = '/';
6529
6530#ifdef DOS_NT
6531 /* Canonicalize drive letter case. */
6532# define ISUPPER(c) isupper (CHAR (c))
6533 if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6534 fn[0] = lowcase (fn[0]);
6535
6536 sep = '\\';
6537#endif
6538
6539 /* Collapse multiple separators into a single slash. */
6540 for (cp = fn; *cp != '\0'; cp++, fn++)
6541 if (*cp == sep)
6542 {
6543 *fn = '/';
6544 while (cp[1] == sep)
6545 cp++;
6546 }
6547 else
6548 *fn = *cp;
6549 *fn = '\0';
6550}
6551
6552\f
6553/* Initialize a linebuffer for use. */
6554static void
6555linebuffer_init (linebuffer *lbp)
6556{
6557 lbp->size = (DEBUG) ? 3 : 200;
6558 lbp->buffer = xnew (lbp->size, char);
6559 lbp->buffer[0] = '\0';
6560 lbp->len = 0;
6561}
6562
6563/* Set the minimum size of a string contained in a linebuffer. */
6564static void
6565linebuffer_setlen (linebuffer *lbp, int toksize)
6566{
6567 while (lbp->size <= toksize)
6568 {
6569 lbp->size *= 2;
6570 xrnew (lbp->buffer, lbp->size, char);
6571 }
6572 lbp->len = toksize;
6573}
6574
6575/* Like malloc but get fatal error if memory is exhausted. */
6576static void *
6577xmalloc (size_t size)
6578{
6579 void *result = malloc (size);
6580 if (result == NULL)
6581 fatal ("virtual memory exhausted", (char *)NULL);
6582 return result;
6583}
6584
6585static void *
6586xrealloc (char *ptr, size_t size)
6587{
6588 void *result = realloc (ptr, size);
6589 if (result == NULL)
6590 fatal ("virtual memory exhausted", (char *)NULL);
6591 return result;
6592}
6593
6594/*
6595 * Local Variables:
6596 * indent-tabs-mode: t
6597 * tab-width: 8
6598 * fill-column: 79
6599 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6600 * c-file-style: "gnu"
6601 * End:
6602 */
6603
6604/* etags.c ends here */