Switch license to GPLv3 or later.
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
33 Free Software Foundation, Inc.
34
35 This file is not considered part of GNU Emacs.
36
37 This program is free software; you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation; either version 3, or (at your option)
40 any later version.
41
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
46
47 You should have received a copy of the GNU General Public License
48 along with this program; see the file COPYING. If not, write to the
49 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
50 Boston, MA 02110-1301, USA. */
51
52
53 /* NB To comply with the above BSD license, copyright information is
54 reproduced in etc/ETAGS.README. That file should be updated when the
55 above notices are.
56
57 To the best of our knowledge, this code was originally based on the
58 ctags.c distributed with BSD4.2, which was copyrighted by the
59 University of California, as described above. */
60
61
62 /*
63 * Authors:
64 * Ctags originally by Ken Arnold.
65 * Fortran added by Jim Kleckner.
66 * Ed Pelegri-Llopart added C typedefs.
67 * Gnu Emacs TAGS format and modifications by RMS?
68 * 1989 Sam Kendall added C++.
69 * 1992 Joseph B. Wells improved C and C++ parsing.
70 * 1993 Francesco Potortì reorganised C and C++.
71 * 1994 Line-by-line regexp tags by Tom Tromey.
72 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
73 * 2002 #line directives by Francesco Potortì.
74 *
75 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
76 */
77
78 /*
79 * If you want to add support for a new language, start by looking at the LUA
80 * language, which is the simplest. Alternatively, consider shipping a
81 * configuration file containing regexp definitions for etags.
82 */
83
84 char pot_etags_version[] = "@(#) pot revision number is 17.26";
85
86 #define TRUE 1
87 #define FALSE 0
88
89 #ifdef DEBUG
90 # undef DEBUG
91 # define DEBUG TRUE
92 #else
93 # define DEBUG FALSE
94 # define NDEBUG /* disable assert */
95 #endif
96
97 #ifdef HAVE_CONFIG_H
98 # include <config.h>
99 /* On some systems, Emacs defines static as nothing for the sake
100 of unexec. We don't want that here since we don't use unexec. */
101 # undef static
102 # ifndef PTR /* for XEmacs */
103 # define PTR void *
104 # endif
105 # ifndef __P /* for XEmacs */
106 # define __P(args) args
107 # endif
108 #else /* no config.h */
109 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
110 # define __P(args) args /* use prototypes */
111 # define PTR void * /* for generic pointers */
112 # else /* not standard C */
113 # define __P(args) () /* no prototypes */
114 # define const /* remove const for old compilers' sake */
115 # define PTR long * /* don't use void* */
116 # endif
117 #endif /* !HAVE_CONFIG_H */
118
119 #ifndef _GNU_SOURCE
120 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
121 #endif
122
123 /* WIN32_NATIVE is for XEmacs.
124 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
125 #ifdef WIN32_NATIVE
126 # undef MSDOS
127 # undef WINDOWSNT
128 # define WINDOWSNT
129 #endif /* WIN32_NATIVE */
130
131 #ifdef MSDOS
132 # undef MSDOS
133 # define MSDOS TRUE
134 # include <fcntl.h>
135 # include <sys/param.h>
136 # include <io.h>
137 # ifndef HAVE_CONFIG_H
138 # define DOS_NT
139 # include <sys/config.h>
140 # endif
141 #else
142 # define MSDOS FALSE
143 #endif /* MSDOS */
144
145 #ifdef WINDOWSNT
146 # include <stdlib.h>
147 # include <fcntl.h>
148 # include <string.h>
149 # include <direct.h>
150 # include <io.h>
151 # define MAXPATHLEN _MAX_PATH
152 # undef HAVE_NTGUI
153 # undef DOS_NT
154 # define DOS_NT
155 # ifndef HAVE_GETCWD
156 # define HAVE_GETCWD
157 # endif /* undef HAVE_GETCWD */
158 #else /* not WINDOWSNT */
159 # ifdef STDC_HEADERS
160 # include <stdlib.h>
161 # include <string.h>
162 # else /* no standard C headers */
163 extern char *getenv ();
164 # ifdef VMS
165 # define EXIT_SUCCESS 1
166 # define EXIT_FAILURE 0
167 # else /* no VMS */
168 # define EXIT_SUCCESS 0
169 # define EXIT_FAILURE 1
170 # endif
171 # endif
172 #endif /* !WINDOWSNT */
173
174 #ifdef HAVE_UNISTD_H
175 # include <unistd.h>
176 #else
177 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
178 extern char *getcwd (char *buf, size_t size);
179 # endif
180 #endif /* HAVE_UNISTD_H */
181
182 #include <stdio.h>
183 #include <ctype.h>
184 #include <errno.h>
185 #ifndef errno
186 extern int errno;
187 #endif
188 #include <sys/types.h>
189 #include <sys/stat.h>
190
191 #include <assert.h>
192 #ifdef NDEBUG
193 # undef assert /* some systems have a buggy assert.h */
194 # define assert(x) ((void) 0)
195 #endif
196
197 #if !defined (S_ISREG) && defined (S_IFREG)
198 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
199 #endif
200
201 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
202 # define NO_LONG_OPTIONS TRUE
203 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
204 extern char *optarg;
205 extern int optind, opterr;
206 #else
207 # define NO_LONG_OPTIONS FALSE
208 # include <getopt.h>
209 #endif /* NO_LONG_OPTIONS */
210
211 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
212 # ifdef __CYGWIN__ /* compiling on Cygwin */
213 !!! NOTICE !!!
214 the regex.h distributed with Cygwin is not compatible with etags, alas!
215 If you want regular expression support, you should delete this notice and
216 arrange to use the GNU regex.h and regex.c.
217 # endif
218 #endif
219 #include <regex.h>
220
221 /* Define CTAGS to make the program "ctags" compatible with the usual one.
222 Leave it undefined to make the program "etags", which makes emacs-style
223 tag tables and tags typedefs, #defines and struct/union/enum by default. */
224 #ifdef CTAGS
225 # undef CTAGS
226 # define CTAGS TRUE
227 #else
228 # define CTAGS FALSE
229 #endif
230
231 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
232 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
233 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
234 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
235
236 #define CHARS 256 /* 2^sizeof(char) */
237 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
238 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
239 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
240 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
241 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
242 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
243
244 #define ISALNUM(c) isalnum (CHAR(c))
245 #define ISALPHA(c) isalpha (CHAR(c))
246 #define ISDIGIT(c) isdigit (CHAR(c))
247 #define ISLOWER(c) islower (CHAR(c))
248
249 #define lowcase(c) tolower (CHAR(c))
250 #define upcase(c) toupper (CHAR(c))
251
252
253 /*
254 * xnew, xrnew -- allocate, reallocate storage
255 *
256 * SYNOPSIS: Type *xnew (int n, Type);
257 * void xrnew (OldPointer, int n, Type);
258 */
259 #if DEBUG
260 # include "chkmalloc.h"
261 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
262 (n) * sizeof (Type)))
263 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
264 (char *) (op), (n) * sizeof (Type)))
265 #else
266 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
267 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
268 (char *) (op), (n) * sizeof (Type)))
269 #endif
270
271 #define bool int
272
273 typedef void Lang_function __P((FILE *));
274
275 typedef struct
276 {
277 char *suffix; /* file name suffix for this compressor */
278 char *command; /* takes one arg and decompresses to stdout */
279 } compressor;
280
281 typedef struct
282 {
283 char *name; /* language name */
284 char *help; /* detailed help for the language */
285 Lang_function *function; /* parse function */
286 char **suffixes; /* name suffixes of this language's files */
287 char **filenames; /* names of this language's files */
288 char **interpreters; /* interpreters for this language */
289 bool metasource; /* source used to generate other sources */
290 } language;
291
292 typedef struct fdesc
293 {
294 struct fdesc *next; /* for the linked list */
295 char *infname; /* uncompressed input file name */
296 char *infabsname; /* absolute uncompressed input file name */
297 char *infabsdir; /* absolute dir of input file */
298 char *taggedfname; /* file name to write in tagfile */
299 language *lang; /* language of file */
300 char *prop; /* file properties to write in tagfile */
301 bool usecharno; /* etags tags shall contain char number */
302 bool written; /* entry written in the tags file */
303 } fdesc;
304
305 typedef struct node_st
306 { /* sorting structure */
307 struct node_st *left, *right; /* left and right sons */
308 fdesc *fdp; /* description of file to whom tag belongs */
309 char *name; /* tag name */
310 char *regex; /* search regexp */
311 bool valid; /* write this tag on the tag file */
312 bool is_func; /* function tag: use regexp in CTAGS mode */
313 bool been_warned; /* warning already given for duplicated tag */
314 int lno; /* line number tag is on */
315 long cno; /* character number line starts on */
316 } node;
317
318 /*
319 * A `linebuffer' is a structure which holds a line of text.
320 * `readline_internal' reads a line from a stream into a linebuffer
321 * and works regardless of the length of the line.
322 * SIZE is the size of BUFFER, LEN is the length of the string in
323 * BUFFER after readline reads it.
324 */
325 typedef struct
326 {
327 long size;
328 int len;
329 char *buffer;
330 } linebuffer;
331
332 /* Used to support mixing of --lang and file names. */
333 typedef struct
334 {
335 enum {
336 at_language, /* a language specification */
337 at_regexp, /* a regular expression */
338 at_filename, /* a file name */
339 at_stdin, /* read from stdin here */
340 at_end /* stop parsing the list */
341 } arg_type; /* argument type */
342 language *lang; /* language associated with the argument */
343 char *what; /* the argument itself */
344 } argument;
345
346 /* Structure defining a regular expression. */
347 typedef struct regexp
348 {
349 struct regexp *p_next; /* pointer to next in list */
350 language *lang; /* if set, use only for this language */
351 char *pattern; /* the regexp pattern */
352 char *name; /* tag name */
353 struct re_pattern_buffer *pat; /* the compiled pattern */
354 struct re_registers regs; /* re registers */
355 bool error_signaled; /* already signaled for this regexp */
356 bool force_explicit_name; /* do not allow implict tag name */
357 bool ignore_case; /* ignore case when matching */
358 bool multi_line; /* do a multi-line match on the whole file */
359 } regexp;
360
361
362 /* Many compilers barf on this:
363 Lang_function Ada_funcs;
364 so let's write it this way */
365 static void Ada_funcs __P((FILE *));
366 static void Asm_labels __P((FILE *));
367 static void C_entries __P((int c_ext, FILE *));
368 static void default_C_entries __P((FILE *));
369 static void plain_C_entries __P((FILE *));
370 static void Cjava_entries __P((FILE *));
371 static void Cobol_paragraphs __P((FILE *));
372 static void Cplusplus_entries __P((FILE *));
373 static void Cstar_entries __P((FILE *));
374 static void Erlang_functions __P((FILE *));
375 static void Forth_words __P((FILE *));
376 static void Fortran_functions __P((FILE *));
377 static void HTML_labels __P((FILE *));
378 static void Lisp_functions __P((FILE *));
379 static void Lua_functions __P((FILE *));
380 static void Makefile_targets __P((FILE *));
381 static void Pascal_functions __P((FILE *));
382 static void Perl_functions __P((FILE *));
383 static void PHP_functions __P((FILE *));
384 static void PS_functions __P((FILE *));
385 static void Prolog_functions __P((FILE *));
386 static void Python_functions __P((FILE *));
387 static void Scheme_functions __P((FILE *));
388 static void TeX_commands __P((FILE *));
389 static void Texinfo_nodes __P((FILE *));
390 static void Yacc_entries __P((FILE *));
391 static void just_read_file __P((FILE *));
392
393 static void print_language_names __P((void));
394 static void print_version __P((void));
395 static void print_help __P((argument *));
396 int main __P((int, char **));
397
398 static compressor *get_compressor_from_suffix __P((char *, char **));
399 static language *get_language_from_langname __P((const char *));
400 static language *get_language_from_interpreter __P((char *));
401 static language *get_language_from_filename __P((char *, bool));
402 static void readline __P((linebuffer *, FILE *));
403 static long readline_internal __P((linebuffer *, FILE *));
404 static bool nocase_tail __P((char *));
405 static void get_tag __P((char *, char **));
406
407 static void analyse_regex __P((char *));
408 static void free_regexps __P((void));
409 static void regex_tag_multiline __P((void));
410 static void error __P((const char *, const char *));
411 static void suggest_asking_for_help __P((void));
412 void fatal __P((char *, char *));
413 static void pfatal __P((char *));
414 static void add_node __P((node *, node **));
415
416 static void init __P((void));
417 static void process_file_name __P((char *, language *));
418 static void process_file __P((FILE *, char *, language *));
419 static void find_entries __P((FILE *));
420 static void free_tree __P((node *));
421 static void free_fdesc __P((fdesc *));
422 static void pfnote __P((char *, bool, char *, int, int, long));
423 static void make_tag __P((char *, int, bool, char *, int, int, long));
424 static void invalidate_nodes __P((fdesc *, node **));
425 static void put_entries __P((node *));
426
427 static char *concat __P((char *, char *, char *));
428 static char *skip_spaces __P((char *));
429 static char *skip_non_spaces __P((char *));
430 static char *savenstr __P((char *, int));
431 static char *savestr __P((char *));
432 static char *etags_strchr __P((const char *, int));
433 static char *etags_strrchr __P((const char *, int));
434 static int etags_strcasecmp __P((const char *, const char *));
435 static int etags_strncasecmp __P((const char *, const char *, int));
436 static char *etags_getcwd __P((void));
437 static char *relative_filename __P((char *, char *));
438 static char *absolute_filename __P((char *, char *));
439 static char *absolute_dirname __P((char *, char *));
440 static bool filename_is_absolute __P((char *f));
441 static void canonicalize_filename __P((char *));
442 static void linebuffer_init __P((linebuffer *));
443 static void linebuffer_setlen __P((linebuffer *, int));
444 static PTR xmalloc __P((unsigned int));
445 static PTR xrealloc __P((char *, unsigned int));
446
447 \f
448 static char searchar = '/'; /* use /.../ searches */
449
450 static char *tagfile; /* output file */
451 static char *progname; /* name this program was invoked with */
452 static char *cwd; /* current working directory */
453 static char *tagfiledir; /* directory of tagfile */
454 static FILE *tagf; /* ioptr for tags file */
455
456 static fdesc *fdhead; /* head of file description list */
457 static fdesc *curfdp; /* current file description */
458 static int lineno; /* line number of current line */
459 static long charno; /* current character number */
460 static long linecharno; /* charno of start of current line */
461 static char *dbp; /* pointer to start of current tag */
462
463 static const int invalidcharno = -1;
464
465 static node *nodehead; /* the head of the binary tree of tags */
466 static node *last_node; /* the last node created */
467
468 static linebuffer lb; /* the current line */
469 static linebuffer filebuf; /* a buffer containing the whole file */
470 static linebuffer token_name; /* a buffer containing a tag name */
471
472 /* boolean "functions" (see init) */
473 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
474 static char
475 /* white chars */
476 *white = " \f\t\n\r\v",
477 /* not in a name */
478 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
479 /* token ending chars */
480 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
481 /* token starting chars */
482 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
483 /* valid in-token chars */
484 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
485
486 static bool append_to_tagfile; /* -a: append to tags */
487 /* The next four default to TRUE for etags, but to FALSE for ctags. */
488 static bool typedefs; /* -t: create tags for C and Ada typedefs */
489 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
490 /* 0 struct/enum/union decls, and C++ */
491 /* member functions. */
492 static bool constantypedefs; /* -d: create tags for C #define, enum */
493 /* constants and variables. */
494 /* -D: opposite of -d. Default under ctags. */
495 static bool globals; /* create tags for global variables */
496 static bool members; /* create tags for C member variables */
497 static bool declarations; /* --declarations: tag them and extern in C&Co*/
498 static bool no_line_directive; /* ignore #line directives (undocumented) */
499 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
500 static bool update; /* -u: update tags */
501 static bool vgrind_style; /* -v: create vgrind style index output */
502 static bool no_warnings; /* -w: suppress warnings (undocumented) */
503 static bool cxref_style; /* -x: create cxref style output */
504 static bool cplusplus; /* .[hc] means C++, not C */
505 static bool ignoreindent; /* -I: ignore indentation in C */
506 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
507
508 /* STDIN is defined in LynxOS system headers */
509 #ifdef STDIN
510 # undef STDIN
511 #endif
512
513 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
514 static bool parsing_stdin; /* --parse-stdin used */
515
516 static regexp *p_head; /* list of all regexps */
517 static bool need_filebuf; /* some regexes are multi-line */
518
519 static struct option longopts[] =
520 {
521 { "append", no_argument, NULL, 'a' },
522 { "packages-only", no_argument, &packages_only, TRUE },
523 { "c++", no_argument, NULL, 'C' },
524 { "declarations", no_argument, &declarations, TRUE },
525 { "no-line-directive", no_argument, &no_line_directive, TRUE },
526 { "no-duplicates", no_argument, &no_duplicates, TRUE },
527 { "help", no_argument, NULL, 'h' },
528 { "help", no_argument, NULL, 'H' },
529 { "ignore-indentation", no_argument, NULL, 'I' },
530 { "language", required_argument, NULL, 'l' },
531 { "members", no_argument, &members, TRUE },
532 { "no-members", no_argument, &members, FALSE },
533 { "output", required_argument, NULL, 'o' },
534 { "regex", required_argument, NULL, 'r' },
535 { "no-regex", no_argument, NULL, 'R' },
536 { "ignore-case-regex", required_argument, NULL, 'c' },
537 { "parse-stdin", required_argument, NULL, STDIN },
538 { "version", no_argument, NULL, 'V' },
539
540 #if CTAGS /* Ctags options */
541 { "backward-search", no_argument, NULL, 'B' },
542 { "cxref", no_argument, NULL, 'x' },
543 { "defines", no_argument, NULL, 'd' },
544 { "globals", no_argument, &globals, TRUE },
545 { "typedefs", no_argument, NULL, 't' },
546 { "typedefs-and-c++", no_argument, NULL, 'T' },
547 { "update", no_argument, NULL, 'u' },
548 { "vgrind", no_argument, NULL, 'v' },
549 { "no-warn", no_argument, NULL, 'w' },
550
551 #else /* Etags options */
552 { "no-defines", no_argument, NULL, 'D' },
553 { "no-globals", no_argument, &globals, FALSE },
554 { "include", required_argument, NULL, 'i' },
555 #endif
556 { NULL }
557 };
558
559 static compressor compressors[] =
560 {
561 { "z", "gzip -d -c"},
562 { "Z", "gzip -d -c"},
563 { "gz", "gzip -d -c"},
564 { "GZ", "gzip -d -c"},
565 { "bz2", "bzip2 -d -c" },
566 { NULL }
567 };
568
569 /*
570 * Language stuff.
571 */
572
573 /* Ada code */
574 static char *Ada_suffixes [] =
575 { "ads", "adb", "ada", NULL };
576 static char Ada_help [] =
577 "In Ada code, functions, procedures, packages, tasks and types are\n\
578 tags. Use the `--packages-only' option to create tags for\n\
579 packages only.\n\
580 Ada tag names have suffixes indicating the type of entity:\n\
581 Entity type: Qualifier:\n\
582 ------------ ----------\n\
583 function /f\n\
584 procedure /p\n\
585 package spec /s\n\
586 package body /b\n\
587 type /t\n\
588 task /k\n\
589 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
590 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
591 will just search for any tag `bidule'.";
592
593 /* Assembly code */
594 static char *Asm_suffixes [] =
595 { "a", /* Unix assembler */
596 "asm", /* Microcontroller assembly */
597 "def", /* BSO/Tasking definition includes */
598 "inc", /* Microcontroller include files */
599 "ins", /* Microcontroller include files */
600 "s", "sa", /* Unix assembler */
601 "S", /* cpp-processed Unix assembler */
602 "src", /* BSO/Tasking C compiler output */
603 NULL
604 };
605 static char Asm_help [] =
606 "In assembler code, labels appearing at the beginning of a line,\n\
607 followed by a colon, are tags.";
608
609
610 /* Note that .c and .h can be considered C++, if the --c++ flag was
611 given, or if the `class' or `template' keyowrds are met inside the file.
612 That is why default_C_entries is called for these. */
613 static char *default_C_suffixes [] =
614 { "c", "h", NULL };
615 static char default_C_help [] =
616 "In C code, any C function or typedef is a tag, and so are\n\
617 definitions of `struct', `union' and `enum'. `#define' macro\n\
618 definitions and `enum' constants are tags unless you specify\n\
619 `--no-defines'. Global variables are tags unless you specify\n\
620 `--no-globals' and so are struct members unless you specify\n\
621 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
622 `--no-members' can make the tags table file much smaller.\n\
623 You can tag function declarations and external variables by\n\
624 using `--declarations'.";
625
626 static char *Cplusplus_suffixes [] =
627 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
628 "M", /* Objective C++ */
629 "pdb", /* Postscript with C syntax */
630 NULL };
631 static char Cplusplus_help [] =
632 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
633 --help --lang=c --lang=c++ for full help.)\n\
634 In addition to C tags, member functions are also recognized. Member\n\
635 variables are recognized unless you use the `--no-members' option.\n\
636 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
637 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
638 `operator+'.";
639
640 static char *Cjava_suffixes [] =
641 { "java", NULL };
642 static char Cjava_help [] =
643 "In Java code, all the tags constructs of C and C++ code are\n\
644 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
645
646
647 static char *Cobol_suffixes [] =
648 { "COB", "cob", NULL };
649 static char Cobol_help [] =
650 "In Cobol code, tags are paragraph names; that is, any word\n\
651 starting in column 8 and followed by a period.";
652
653 static char *Cstar_suffixes [] =
654 { "cs", "hs", NULL };
655
656 static char *Erlang_suffixes [] =
657 { "erl", "hrl", NULL };
658 static char Erlang_help [] =
659 "In Erlang code, the tags are the functions, records and macros\n\
660 defined in the file.";
661
662 char *Forth_suffixes [] =
663 { "fth", "tok", NULL };
664 static char Forth_help [] =
665 "In Forth code, tags are words defined by `:',\n\
666 constant, code, create, defer, value, variable, buffer:, field.";
667
668 static char *Fortran_suffixes [] =
669 { "F", "f", "f90", "for", NULL };
670 static char Fortran_help [] =
671 "In Fortran code, functions, subroutines and block data are tags.";
672
673 static char *HTML_suffixes [] =
674 { "htm", "html", "shtml", NULL };
675 static char HTML_help [] =
676 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
677 `h3' headers. Also, tags are `name=' in anchors and all\n\
678 occurrences of `id='.";
679
680 static char *Lisp_suffixes [] =
681 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
682 static char Lisp_help [] =
683 "In Lisp code, any function defined with `defun', any variable\n\
684 defined with `defvar' or `defconst', and in general the first\n\
685 argument of any expression that starts with `(def' in column zero\n\
686 is a tag.";
687
688 static char *Lua_suffixes [] =
689 { "lua", "LUA", NULL };
690 static char Lua_help [] =
691 "In Lua scripts, all functions are tags.";
692
693 static char *Makefile_filenames [] =
694 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
695 static char Makefile_help [] =
696 "In makefiles, targets are tags; additionally, variables are tags\n\
697 unless you specify `--no-globals'.";
698
699 static char *Objc_suffixes [] =
700 { "lm", /* Objective lex file */
701 "m", /* Objective C file */
702 NULL };
703 static char Objc_help [] =
704 "In Objective C code, tags include Objective C definitions for classes,\n\
705 class categories, methods and protocols. Tags for variables and\n\
706 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
707 (Use --help --lang=c --lang=objc --lang=java for full help.)";
708
709 static char *Pascal_suffixes [] =
710 { "p", "pas", NULL };
711 static char Pascal_help [] =
712 "In Pascal code, the tags are the functions and procedures defined\n\
713 in the file.";
714 /* " // this is for working around an Emacs highlighting bug... */
715
716 static char *Perl_suffixes [] =
717 { "pl", "pm", NULL };
718 static char *Perl_interpreters [] =
719 { "perl", "@PERL@", NULL };
720 static char Perl_help [] =
721 "In Perl code, the tags are the packages, subroutines and variables\n\
722 defined by the `package', `sub', `my' and `local' keywords. Use\n\
723 `--globals' if you want to tag global variables. Tags for\n\
724 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
725 defined in the default package is `main::SUB'.";
726
727 static char *PHP_suffixes [] =
728 { "php", "php3", "php4", NULL };
729 static char PHP_help [] =
730 "In PHP code, tags are functions, classes and defines. Unless you use\n\
731 the `--no-members' option, vars are tags too.";
732
733 static char *plain_C_suffixes [] =
734 { "pc", /* Pro*C file */
735 NULL };
736
737 static char *PS_suffixes [] =
738 { "ps", "psw", NULL }; /* .psw is for PSWrap */
739 static char PS_help [] =
740 "In PostScript code, the tags are the functions.";
741
742 static char *Prolog_suffixes [] =
743 { "prolog", NULL };
744 static char Prolog_help [] =
745 "In Prolog code, tags are predicates and rules at the beginning of\n\
746 line.";
747
748 static char *Python_suffixes [] =
749 { "py", NULL };
750 static char Python_help [] =
751 "In Python code, `def' or `class' at the beginning of a line\n\
752 generate a tag.";
753
754 /* Can't do the `SCM' or `scm' prefix with a version number. */
755 static char *Scheme_suffixes [] =
756 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
757 static char Scheme_help [] =
758 "In Scheme code, tags include anything defined with `def' or with a\n\
759 construct whose name starts with `def'. They also include\n\
760 variables set with `set!' at top level in the file.";
761
762 static char *TeX_suffixes [] =
763 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
764 static char TeX_help [] =
765 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
766 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
767 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
768 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
769 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
770 \n\
771 Other commands can be specified by setting the environment variable\n\
772 `TEXTAGS' to a colon-separated list like, for example,\n\
773 TEXTAGS=\"mycommand:myothercommand\".";
774
775
776 static char *Texinfo_suffixes [] =
777 { "texi", "texinfo", "txi", NULL };
778 static char Texinfo_help [] =
779 "for texinfo files, lines starting with @node are tagged.";
780
781 static char *Yacc_suffixes [] =
782 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
783 static char Yacc_help [] =
784 "In Bison or Yacc input files, each rule defines as a tag the\n\
785 nonterminal it constructs. The portions of the file that contain\n\
786 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
787 for full help).";
788
789 static char auto_help [] =
790 "`auto' is not a real language, it indicates to use\n\
791 a default language for files base on file name suffix and file contents.";
792
793 static char none_help [] =
794 "`none' is not a real language, it indicates to only do\n\
795 regexp processing on files.";
796
797 static char no_lang_help [] =
798 "No detailed help available for this language.";
799
800
801 /*
802 * Table of languages.
803 *
804 * It is ok for a given function to be listed under more than one
805 * name. I just didn't.
806 */
807
808 static language lang_names [] =
809 {
810 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
811 { "asm", Asm_help, Asm_labels, Asm_suffixes },
812 { "c", default_C_help, default_C_entries, default_C_suffixes },
813 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
814 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
815 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
816 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
817 { "forth", Forth_help, Forth_words, Forth_suffixes },
818 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
819 { "html", HTML_help, HTML_labels, HTML_suffixes },
820 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
821 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
822 { "lua", Lua_help, Lua_functions, Lua_suffixes },
823 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
824 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
825 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
826 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
827 { "php", PHP_help, PHP_functions, PHP_suffixes },
828 { "postscript",PS_help, PS_functions, PS_suffixes },
829 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
830 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
831 { "python", Python_help, Python_functions, Python_suffixes },
832 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
833 { "tex", TeX_help, TeX_commands, TeX_suffixes },
834 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
835 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
836 { "auto", auto_help }, /* default guessing scheme */
837 { "none", none_help, just_read_file }, /* regexp matching only */
838 { NULL } /* end of list */
839 };
840
841 \f
842 static void
843 print_language_names ()
844 {
845 language *lang;
846 char **name, **ext;
847
848 puts ("\nThese are the currently supported languages, along with the\n\
849 default file names and dot suffixes:");
850 for (lang = lang_names; lang->name != NULL; lang++)
851 {
852 printf (" %-*s", 10, lang->name);
853 if (lang->filenames != NULL)
854 for (name = lang->filenames; *name != NULL; name++)
855 printf (" %s", *name);
856 if (lang->suffixes != NULL)
857 for (ext = lang->suffixes; *ext != NULL; ext++)
858 printf (" .%s", *ext);
859 puts ("");
860 }
861 puts ("where `auto' means use default language for files based on file\n\
862 name suffix, and `none' means only do regexp processing on files.\n\
863 If no language is specified and no matching suffix is found,\n\
864 the first line of the file is read for a sharp-bang (#!) sequence\n\
865 followed by the name of an interpreter. If no such sequence is found,\n\
866 Fortran is tried first; if no tags are found, C is tried next.\n\
867 When parsing any C file, a \"class\" or \"template\" keyword\n\
868 switches to C++.");
869 puts ("Compressed files are supported using gzip and bzip2.\n\
870 \n\
871 For detailed help on a given language use, for example,\n\
872 etags --help --lang=ada.");
873 }
874
875 #ifndef EMACS_NAME
876 # define EMACS_NAME "standalone"
877 #endif
878 #ifndef VERSION
879 # define VERSION "version"
880 #endif
881 static void
882 print_version ()
883 {
884 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
885 puts ("Copyright (C) 2007 Free Software Foundation, Inc.");
886 puts ("This program is distributed under the terms in ETAGS.README");
887
888 exit (EXIT_SUCCESS);
889 }
890
891 static void
892 print_help (argbuffer)
893 argument *argbuffer;
894 {
895 bool help_for_lang = FALSE;
896
897 for (; argbuffer->arg_type != at_end; argbuffer++)
898 if (argbuffer->arg_type == at_language)
899 {
900 if (help_for_lang)
901 puts ("");
902 puts (argbuffer->lang->help);
903 help_for_lang = TRUE;
904 }
905
906 if (help_for_lang)
907 exit (EXIT_SUCCESS);
908
909 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
910 \n\
911 These are the options accepted by %s.\n", progname, progname);
912 if (NO_LONG_OPTIONS)
913 puts ("WARNING: long option names do not work with this executable,\n\
914 as it is not linked with GNU getopt.");
915 else
916 puts ("You may use unambiguous abbreviations for the long option names.");
917 puts (" A - as file name means read names from stdin (one per line).\n\
918 Absolute names are stored in the output file as they are.\n\
919 Relative ones are stored relative to the output file's directory.\n");
920
921 puts ("-a, --append\n\
922 Append tag entries to existing tags file.");
923
924 puts ("--packages-only\n\
925 For Ada files, only generate tags for packages.");
926
927 if (CTAGS)
928 puts ("-B, --backward-search\n\
929 Write the search commands for the tag entries using '?', the\n\
930 backward-search command instead of '/', the forward-search command.");
931
932 /* This option is mostly obsolete, because etags can now automatically
933 detect C++. Retained for backward compatibility and for debugging and
934 experimentation. In principle, we could want to tag as C++ even
935 before any "class" or "template" keyword.
936 puts ("-C, --c++\n\
937 Treat files whose name suffix defaults to C language as C++ files.");
938 */
939
940 puts ("--declarations\n\
941 In C and derived languages, create tags for function declarations,");
942 if (CTAGS)
943 puts ("\tand create tags for extern variables if --globals is used.");
944 else
945 puts
946 ("\tand create tags for extern variables unless --no-globals is used.");
947
948 if (CTAGS)
949 puts ("-d, --defines\n\
950 Create tag entries for C #define constants and enum constants, too.");
951 else
952 puts ("-D, --no-defines\n\
953 Don't create tag entries for C #define constants and enum constants.\n\
954 This makes the tags file smaller.");
955
956 if (!CTAGS)
957 puts ("-i FILE, --include=FILE\n\
958 Include a note in tag file indicating that, when searching for\n\
959 a tag, one should also consult the tags file FILE after\n\
960 checking the current file.");
961
962 puts ("-l LANG, --language=LANG\n\
963 Force the following files to be considered as written in the\n\
964 named language up to the next --language=LANG option.");
965
966 if (CTAGS)
967 puts ("--globals\n\
968 Create tag entries for global variables in some languages.");
969 else
970 puts ("--no-globals\n\
971 Do not create tag entries for global variables in some\n\
972 languages. This makes the tags file smaller.");
973 puts ("--no-members\n\
974 Do not create tag entries for members of structures\n\
975 in some languages.");
976
977 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
978 Make a tag for each line matching a regular expression pattern\n\
979 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
980 files only. REGEXFILE is a file containing one REGEXP per line.\n\
981 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
982 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
983 puts (" If TAGNAME/ is present, the tags created are named.\n\
984 For example Tcl named tags can be created with:\n\
985 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
986 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
987 `m' means to allow multi-line matches, `s' implies `m' and\n\
988 causes dot to match any character, including newline.");
989 puts ("-R, --no-regex\n\
990 Don't create tags from regexps for the following files.");
991 puts ("-I, --ignore-indentation\n\
992 In C and C++ do not assume that a closing brace in the first\n\
993 column is the final brace of a function or structure definition.");
994 puts ("-o FILE, --output=FILE\n\
995 Write the tags to FILE.");
996 puts ("--parse-stdin=NAME\n\
997 Read from standard input and record tags as belonging to file NAME.");
998
999 if (CTAGS)
1000 {
1001 puts ("-t, --typedefs\n\
1002 Generate tag entries for C and Ada typedefs.");
1003 puts ("-T, --typedefs-and-c++\n\
1004 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1005 and C++ member functions.");
1006 }
1007
1008 if (CTAGS)
1009 puts ("-u, --update\n\
1010 Update the tag entries for the given files, leaving tag\n\
1011 entries for other files in place. Currently, this is\n\
1012 implemented by deleting the existing entries for the given\n\
1013 files and then rewriting the new entries at the end of the\n\
1014 tags file. It is often faster to simply rebuild the entire\n\
1015 tag file than to use this.");
1016
1017 if (CTAGS)
1018 {
1019 puts ("-v, --vgrind\n\
1020 Print on the standard output an index of items intended for\n\
1021 human consumption, similar to the output of vgrind. The index\n\
1022 is sorted, and gives the page number of each item.");
1023 # if PRINT_UNDOCUMENTED_OPTIONS_HELP
1024 puts ("-w, --no-duplicates\n\
1025 Do not create duplicate tag entries, for compatibility with\n\
1026 traditional ctags.");
1027 puts ("-w, --no-warn\n\
1028 Suppress warning messages about duplicate tag entries.");
1029 # endif /* PRINT_UNDOCUMENTED_OPTIONS_HELP */
1030 puts ("-x, --cxref\n\
1031 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1032 The output uses line numbers instead of page numbers, but\n\
1033 beyond that the differences are cosmetic; try both to see\n\
1034 which you like.");
1035 }
1036
1037 puts ("-V, --version\n\
1038 Print the version of the program.\n\
1039 -h, --help\n\
1040 Print this help message.\n\
1041 Followed by one or more `--language' options prints detailed\n\
1042 help about tag generation for the specified languages.");
1043
1044 print_language_names ();
1045
1046 puts ("");
1047 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1048
1049 exit (EXIT_SUCCESS);
1050 }
1051
1052 \f
1053 #ifdef VMS /* VMS specific functions */
1054
1055 #define EOS '\0'
1056
1057 /* This is a BUG! ANY arbitrary limit is a BUG!
1058 Won't someone please fix this? */
1059 #define MAX_FILE_SPEC_LEN 255
1060 typedef struct {
1061 short curlen;
1062 char body[MAX_FILE_SPEC_LEN + 1];
1063 } vspec;
1064
1065 /*
1066 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1067 returning in each successive call the next file name matching the input
1068 spec. The function expects that each in_spec passed
1069 to it will be processed to completion; in particular, up to and
1070 including the call following that in which the last matching name
1071 is returned, the function ignores the value of in_spec, and will
1072 only start processing a new spec with the following call.
1073 If an error occurs, on return out_spec contains the value
1074 of in_spec when the error occurred.
1075
1076 With each successive file name returned in out_spec, the
1077 function's return value is one. When there are no more matching
1078 names the function returns zero. If on the first call no file
1079 matches in_spec, or there is any other error, -1 is returned.
1080 */
1081
1082 #include <rmsdef.h>
1083 #include <descrip.h>
1084 #define OUTSIZE MAX_FILE_SPEC_LEN
1085 static short
1086 fn_exp (out, in)
1087 vspec *out;
1088 char *in;
1089 {
1090 static long context = 0;
1091 static struct dsc$descriptor_s o;
1092 static struct dsc$descriptor_s i;
1093 static bool pass1 = TRUE;
1094 long status;
1095 short retval;
1096
1097 if (pass1)
1098 {
1099 pass1 = FALSE;
1100 o.dsc$a_pointer = (char *) out;
1101 o.dsc$w_length = (short)OUTSIZE;
1102 i.dsc$a_pointer = in;
1103 i.dsc$w_length = (short)strlen(in);
1104 i.dsc$b_dtype = DSC$K_DTYPE_T;
1105 i.dsc$b_class = DSC$K_CLASS_S;
1106 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1107 o.dsc$b_class = DSC$K_CLASS_VS;
1108 }
1109 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1110 {
1111 out->body[out->curlen] = EOS;
1112 return 1;
1113 }
1114 else if (status == RMS$_NMF)
1115 retval = 0;
1116 else
1117 {
1118 strcpy(out->body, in);
1119 retval = -1;
1120 }
1121 lib$find_file_end(&context);
1122 pass1 = TRUE;
1123 return retval;
1124 }
1125
1126 /*
1127 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1128 name of each file specified by the provided arg expanding wildcards.
1129 */
1130 static char *
1131 gfnames (arg, p_error)
1132 char *arg;
1133 bool *p_error;
1134 {
1135 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1136
1137 switch (fn_exp (&filename, arg))
1138 {
1139 case 1:
1140 *p_error = FALSE;
1141 return filename.body;
1142 case 0:
1143 *p_error = FALSE;
1144 return NULL;
1145 default:
1146 *p_error = TRUE;
1147 return filename.body;
1148 }
1149 }
1150
1151 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1152 system (cmd)
1153 char *cmd;
1154 {
1155 error ("%s", "system() function not implemented under VMS");
1156 }
1157 #endif
1158
1159 #define VERSION_DELIM ';'
1160 char *massage_name (s)
1161 char *s;
1162 {
1163 char *start = s;
1164
1165 for ( ; *s; s++)
1166 if (*s == VERSION_DELIM)
1167 {
1168 *s = EOS;
1169 break;
1170 }
1171 else
1172 *s = lowcase (*s);
1173 return start;
1174 }
1175 #endif /* VMS */
1176
1177 \f
1178 int
1179 main (argc, argv)
1180 int argc;
1181 char *argv[];
1182 {
1183 int i;
1184 unsigned int nincluded_files;
1185 char **included_files;
1186 argument *argbuffer;
1187 int current_arg, file_count;
1188 linebuffer filename_lb;
1189 bool help_asked = FALSE;
1190 #ifdef VMS
1191 bool got_err;
1192 #endif
1193 char *optstring;
1194 int opt;
1195
1196
1197 #ifdef DOS_NT
1198 _fmode = O_BINARY; /* all of files are treated as binary files */
1199 #endif /* DOS_NT */
1200
1201 progname = argv[0];
1202 nincluded_files = 0;
1203 included_files = xnew (argc, char *);
1204 current_arg = 0;
1205 file_count = 0;
1206
1207 /* Allocate enough no matter what happens. Overkill, but each one
1208 is small. */
1209 argbuffer = xnew (argc, argument);
1210
1211 /*
1212 * If etags, always find typedefs and structure tags. Why not?
1213 * Also default to find macro constants, enum constants, struct
1214 * members and global variables.
1215 */
1216 if (!CTAGS)
1217 {
1218 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1219 globals = TRUE;
1220 }
1221
1222 /* When the optstring begins with a '-' getopt_long does not rearrange the
1223 non-options arguments to be at the end, but leaves them alone. */
1224 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1225 "ac:Cf:Il:o:r:RSVhH",
1226 (CTAGS) ? "BxdtTuvw" : "Di:");
1227
1228 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1229 switch (opt)
1230 {
1231 case 0:
1232 /* If getopt returns 0, then it has already processed a
1233 long-named option. We should do nothing. */
1234 break;
1235
1236 case 1:
1237 /* This means that a file name has been seen. Record it. */
1238 argbuffer[current_arg].arg_type = at_filename;
1239 argbuffer[current_arg].what = optarg;
1240 ++current_arg;
1241 ++file_count;
1242 break;
1243
1244 case STDIN:
1245 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1246 argbuffer[current_arg].arg_type = at_stdin;
1247 argbuffer[current_arg].what = optarg;
1248 ++current_arg;
1249 ++file_count;
1250 if (parsing_stdin)
1251 fatal ("cannot parse standard input more than once", (char *)NULL);
1252 parsing_stdin = TRUE;
1253 break;
1254
1255 /* Common options. */
1256 case 'a': append_to_tagfile = TRUE; break;
1257 case 'C': cplusplus = TRUE; break;
1258 case 'f': /* for compatibility with old makefiles */
1259 case 'o':
1260 if (tagfile)
1261 {
1262 error ("-o option may only be given once.", (char *)NULL);
1263 suggest_asking_for_help ();
1264 /* NOTREACHED */
1265 }
1266 tagfile = optarg;
1267 break;
1268 case 'I':
1269 case 'S': /* for backward compatibility */
1270 ignoreindent = TRUE;
1271 break;
1272 case 'l':
1273 {
1274 language *lang = get_language_from_langname (optarg);
1275 if (lang != NULL)
1276 {
1277 argbuffer[current_arg].lang = lang;
1278 argbuffer[current_arg].arg_type = at_language;
1279 ++current_arg;
1280 }
1281 }
1282 break;
1283 case 'c':
1284 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1285 optarg = concat (optarg, "i", ""); /* memory leak here */
1286 /* FALLTHRU */
1287 case 'r':
1288 argbuffer[current_arg].arg_type = at_regexp;
1289 argbuffer[current_arg].what = optarg;
1290 ++current_arg;
1291 break;
1292 case 'R':
1293 argbuffer[current_arg].arg_type = at_regexp;
1294 argbuffer[current_arg].what = NULL;
1295 ++current_arg;
1296 break;
1297 case 'V':
1298 print_version ();
1299 break;
1300 case 'h':
1301 case 'H':
1302 help_asked = TRUE;
1303 break;
1304
1305 /* Etags options */
1306 case 'D': constantypedefs = FALSE; break;
1307 case 'i': included_files[nincluded_files++] = optarg; break;
1308
1309 /* Ctags options. */
1310 case 'B': searchar = '?'; break;
1311 case 'd': constantypedefs = TRUE; break;
1312 case 't': typedefs = TRUE; break;
1313 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1314 case 'u': update = TRUE; break;
1315 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1316 case 'x': cxref_style = TRUE; break;
1317 case 'w': no_warnings = TRUE; break;
1318 default:
1319 suggest_asking_for_help ();
1320 /* NOTREACHED */
1321 }
1322
1323 /* No more options. Store the rest of arguments. */
1324 for (; optind < argc; optind++)
1325 {
1326 argbuffer[current_arg].arg_type = at_filename;
1327 argbuffer[current_arg].what = argv[optind];
1328 ++current_arg;
1329 ++file_count;
1330 }
1331
1332 argbuffer[current_arg].arg_type = at_end;
1333
1334 if (help_asked)
1335 print_help (argbuffer);
1336 /* NOTREACHED */
1337
1338 if (nincluded_files == 0 && file_count == 0)
1339 {
1340 error ("no input files specified.", (char *)NULL);
1341 suggest_asking_for_help ();
1342 /* NOTREACHED */
1343 }
1344
1345 if (tagfile == NULL)
1346 tagfile = CTAGS ? "tags" : "TAGS";
1347 cwd = etags_getcwd (); /* the current working directory */
1348 if (cwd[strlen (cwd) - 1] != '/')
1349 {
1350 char *oldcwd = cwd;
1351 cwd = concat (oldcwd, "/", "");
1352 free (oldcwd);
1353 }
1354 /* Relative file names are made relative to the current directory. */
1355 if (streq (tagfile, "-")
1356 || strneq (tagfile, "/dev/", 5))
1357 tagfiledir = cwd;
1358 else
1359 tagfiledir = absolute_dirname (tagfile, cwd);
1360
1361 init (); /* set up boolean "functions" */
1362
1363 linebuffer_init (&lb);
1364 linebuffer_init (&filename_lb);
1365 linebuffer_init (&filebuf);
1366 linebuffer_init (&token_name);
1367
1368 if (!CTAGS)
1369 {
1370 if (streq (tagfile, "-"))
1371 {
1372 tagf = stdout;
1373 #ifdef DOS_NT
1374 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1375 doesn't take effect until after `stdout' is already open). */
1376 if (!isatty (fileno (stdout)))
1377 setmode (fileno (stdout), O_BINARY);
1378 #endif /* DOS_NT */
1379 }
1380 else
1381 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1382 if (tagf == NULL)
1383 pfatal (tagfile);
1384 }
1385
1386 /*
1387 * Loop through files finding functions.
1388 */
1389 for (i = 0; i < current_arg; i++)
1390 {
1391 static language *lang; /* non-NULL if language is forced */
1392 char *this_file;
1393
1394 switch (argbuffer[i].arg_type)
1395 {
1396 case at_language:
1397 lang = argbuffer[i].lang;
1398 break;
1399 case at_regexp:
1400 analyse_regex (argbuffer[i].what);
1401 break;
1402 case at_filename:
1403 #ifdef VMS
1404 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1405 {
1406 if (got_err)
1407 {
1408 error ("can't find file %s\n", this_file);
1409 argc--, argv++;
1410 }
1411 else
1412 {
1413 this_file = massage_name (this_file);
1414 }
1415 #else
1416 this_file = argbuffer[i].what;
1417 #endif
1418 /* Input file named "-" means read file names from stdin
1419 (one per line) and use them. */
1420 if (streq (this_file, "-"))
1421 {
1422 if (parsing_stdin)
1423 fatal ("cannot parse standard input AND read file names from it",
1424 (char *)NULL);
1425 while (readline_internal (&filename_lb, stdin) > 0)
1426 process_file_name (filename_lb.buffer, lang);
1427 }
1428 else
1429 process_file_name (this_file, lang);
1430 #ifdef VMS
1431 }
1432 #endif
1433 break;
1434 case at_stdin:
1435 this_file = argbuffer[i].what;
1436 process_file (stdin, this_file, lang);
1437 break;
1438 }
1439 }
1440
1441 free_regexps ();
1442 free (lb.buffer);
1443 free (filebuf.buffer);
1444 free (token_name.buffer);
1445
1446 if (!CTAGS || cxref_style)
1447 {
1448 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1449 put_entries (nodehead);
1450 free_tree (nodehead);
1451 nodehead = NULL;
1452 if (!CTAGS)
1453 {
1454 fdesc *fdp;
1455
1456 /* Output file entries that have no tags. */
1457 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1458 if (!fdp->written)
1459 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1460
1461 while (nincluded_files-- > 0)
1462 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1463
1464 if (fclose (tagf) == EOF)
1465 pfatal (tagfile);
1466 }
1467
1468 exit (EXIT_SUCCESS);
1469 }
1470
1471 if (update)
1472 {
1473 char cmd[BUFSIZ];
1474 for (i = 0; i < current_arg; ++i)
1475 {
1476 switch (argbuffer[i].arg_type)
1477 {
1478 case at_filename:
1479 case at_stdin:
1480 break;
1481 default:
1482 continue; /* the for loop */
1483 }
1484 sprintf (cmd,
1485 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1486 tagfile, argbuffer[i].what, tagfile);
1487 if (system (cmd) != EXIT_SUCCESS)
1488 fatal ("failed to execute shell command", (char *)NULL);
1489 }
1490 append_to_tagfile = TRUE;
1491 }
1492
1493 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1494 if (tagf == NULL)
1495 pfatal (tagfile);
1496 put_entries (nodehead); /* write all the tags (CTAGS) */
1497 free_tree (nodehead);
1498 nodehead = NULL;
1499 if (fclose (tagf) == EOF)
1500 pfatal (tagfile);
1501
1502 if (CTAGS)
1503 if (append_to_tagfile || update)
1504 {
1505 char cmd[2*BUFSIZ+20];
1506 /* Maybe these should be used:
1507 setenv ("LC_COLLATE", "C", 1);
1508 setenv ("LC_ALL", "C", 1); */
1509 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1510 exit (system (cmd));
1511 }
1512 return EXIT_SUCCESS;
1513 }
1514
1515
1516 /*
1517 * Return a compressor given the file name. If EXTPTR is non-zero,
1518 * return a pointer into FILE where the compressor-specific
1519 * extension begins. If no compressor is found, NULL is returned
1520 * and EXTPTR is not significant.
1521 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1522 */
1523 static compressor *
1524 get_compressor_from_suffix (file, extptr)
1525 char *file;
1526 char **extptr;
1527 {
1528 compressor *compr;
1529 char *slash, *suffix;
1530
1531 /* This relies on FN to be after canonicalize_filename,
1532 so we don't need to consider backslashes on DOS_NT. */
1533 slash = etags_strrchr (file, '/');
1534 suffix = etags_strrchr (file, '.');
1535 if (suffix == NULL || suffix < slash)
1536 return NULL;
1537 if (extptr != NULL)
1538 *extptr = suffix;
1539 suffix += 1;
1540 /* Let those poor souls who live with DOS 8+3 file name limits get
1541 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1542 Only the first do loop is run if not MSDOS */
1543 do
1544 {
1545 for (compr = compressors; compr->suffix != NULL; compr++)
1546 if (streq (compr->suffix, suffix))
1547 return compr;
1548 if (!MSDOS)
1549 break; /* do it only once: not really a loop */
1550 if (extptr != NULL)
1551 *extptr = ++suffix;
1552 } while (*suffix != '\0');
1553 return NULL;
1554 }
1555
1556
1557
1558 /*
1559 * Return a language given the name.
1560 */
1561 static language *
1562 get_language_from_langname (name)
1563 const char *name;
1564 {
1565 language *lang;
1566
1567 if (name == NULL)
1568 error ("empty language name", (char *)NULL);
1569 else
1570 {
1571 for (lang = lang_names; lang->name != NULL; lang++)
1572 if (streq (name, lang->name))
1573 return lang;
1574 error ("unknown language \"%s\"", name);
1575 }
1576
1577 return NULL;
1578 }
1579
1580
1581 /*
1582 * Return a language given the interpreter name.
1583 */
1584 static language *
1585 get_language_from_interpreter (interpreter)
1586 char *interpreter;
1587 {
1588 language *lang;
1589 char **iname;
1590
1591 if (interpreter == NULL)
1592 return NULL;
1593 for (lang = lang_names; lang->name != NULL; lang++)
1594 if (lang->interpreters != NULL)
1595 for (iname = lang->interpreters; *iname != NULL; iname++)
1596 if (streq (*iname, interpreter))
1597 return lang;
1598
1599 return NULL;
1600 }
1601
1602
1603
1604 /*
1605 * Return a language given the file name.
1606 */
1607 static language *
1608 get_language_from_filename (file, case_sensitive)
1609 char *file;
1610 bool case_sensitive;
1611 {
1612 language *lang;
1613 char **name, **ext, *suffix;
1614
1615 /* Try whole file name first. */
1616 for (lang = lang_names; lang->name != NULL; lang++)
1617 if (lang->filenames != NULL)
1618 for (name = lang->filenames; *name != NULL; name++)
1619 if ((case_sensitive)
1620 ? streq (*name, file)
1621 : strcaseeq (*name, file))
1622 return lang;
1623
1624 /* If not found, try suffix after last dot. */
1625 suffix = etags_strrchr (file, '.');
1626 if (suffix == NULL)
1627 return NULL;
1628 suffix += 1;
1629 for (lang = lang_names; lang->name != NULL; lang++)
1630 if (lang->suffixes != NULL)
1631 for (ext = lang->suffixes; *ext != NULL; ext++)
1632 if ((case_sensitive)
1633 ? streq (*ext, suffix)
1634 : strcaseeq (*ext, suffix))
1635 return lang;
1636 return NULL;
1637 }
1638
1639 \f
1640 /*
1641 * This routine is called on each file argument.
1642 */
1643 static void
1644 process_file_name (file, lang)
1645 char *file;
1646 language *lang;
1647 {
1648 struct stat stat_buf;
1649 FILE *inf;
1650 fdesc *fdp;
1651 compressor *compr;
1652 char *compressed_name, *uncompressed_name;
1653 char *ext, *real_name;
1654 int retval;
1655
1656 canonicalize_filename (file);
1657 if (streq (file, tagfile) && !streq (tagfile, "-"))
1658 {
1659 error ("skipping inclusion of %s in self.", file);
1660 return;
1661 }
1662 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1663 {
1664 compressed_name = NULL;
1665 real_name = uncompressed_name = savestr (file);
1666 }
1667 else
1668 {
1669 real_name = compressed_name = savestr (file);
1670 uncompressed_name = savenstr (file, ext - file);
1671 }
1672
1673 /* If the canonicalized uncompressed name
1674 has already been dealt with, skip it silently. */
1675 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1676 {
1677 assert (fdp->infname != NULL);
1678 if (streq (uncompressed_name, fdp->infname))
1679 goto cleanup;
1680 }
1681
1682 if (stat (real_name, &stat_buf) != 0)
1683 {
1684 /* Reset real_name and try with a different name. */
1685 real_name = NULL;
1686 if (compressed_name != NULL) /* try with the given suffix */
1687 {
1688 if (stat (uncompressed_name, &stat_buf) == 0)
1689 real_name = uncompressed_name;
1690 }
1691 else /* try all possible suffixes */
1692 {
1693 for (compr = compressors; compr->suffix != NULL; compr++)
1694 {
1695 compressed_name = concat (file, ".", compr->suffix);
1696 if (stat (compressed_name, &stat_buf) != 0)
1697 {
1698 if (MSDOS)
1699 {
1700 char *suf = compressed_name + strlen (file);
1701 size_t suflen = strlen (compr->suffix) + 1;
1702 for ( ; suf[1]; suf++, suflen--)
1703 {
1704 memmove (suf, suf + 1, suflen);
1705 if (stat (compressed_name, &stat_buf) == 0)
1706 {
1707 real_name = compressed_name;
1708 break;
1709 }
1710 }
1711 if (real_name != NULL)
1712 break;
1713 } /* MSDOS */
1714 free (compressed_name);
1715 compressed_name = NULL;
1716 }
1717 else
1718 {
1719 real_name = compressed_name;
1720 break;
1721 }
1722 }
1723 }
1724 if (real_name == NULL)
1725 {
1726 perror (file);
1727 goto cleanup;
1728 }
1729 } /* try with a different name */
1730
1731 if (!S_ISREG (stat_buf.st_mode))
1732 {
1733 error ("skipping %s: it is not a regular file.", real_name);
1734 goto cleanup;
1735 }
1736 if (real_name == compressed_name)
1737 {
1738 char *cmd = concat (compr->command, " ", real_name);
1739 inf = (FILE *) popen (cmd, "r");
1740 free (cmd);
1741 }
1742 else
1743 inf = fopen (real_name, "r");
1744 if (inf == NULL)
1745 {
1746 perror (real_name);
1747 goto cleanup;
1748 }
1749
1750 process_file (inf, uncompressed_name, lang);
1751
1752 if (real_name == compressed_name)
1753 retval = pclose (inf);
1754 else
1755 retval = fclose (inf);
1756 if (retval < 0)
1757 pfatal (file);
1758
1759 cleanup:
1760 if (compressed_name) free (compressed_name);
1761 if (uncompressed_name) free (uncompressed_name);
1762 last_node = NULL;
1763 curfdp = NULL;
1764 return;
1765 }
1766
1767 static void
1768 process_file (fh, fn, lang)
1769 FILE *fh;
1770 char *fn;
1771 language *lang;
1772 {
1773 static const fdesc emptyfdesc;
1774 fdesc *fdp;
1775
1776 /* Create a new input file description entry. */
1777 fdp = xnew (1, fdesc);
1778 *fdp = emptyfdesc;
1779 fdp->next = fdhead;
1780 fdp->infname = savestr (fn);
1781 fdp->lang = lang;
1782 fdp->infabsname = absolute_filename (fn, cwd);
1783 fdp->infabsdir = absolute_dirname (fn, cwd);
1784 if (filename_is_absolute (fn))
1785 {
1786 /* An absolute file name. Canonicalize it. */
1787 fdp->taggedfname = absolute_filename (fn, NULL);
1788 }
1789 else
1790 {
1791 /* A file name relative to cwd. Make it relative
1792 to the directory of the tags file. */
1793 fdp->taggedfname = relative_filename (fn, tagfiledir);
1794 }
1795 fdp->usecharno = TRUE; /* use char position when making tags */
1796 fdp->prop = NULL;
1797 fdp->written = FALSE; /* not written on tags file yet */
1798
1799 fdhead = fdp;
1800 curfdp = fdhead; /* the current file description */
1801
1802 find_entries (fh);
1803
1804 /* If not Ctags, and if this is not metasource and if it contained no #line
1805 directives, we can write the tags and free all nodes pointing to
1806 curfdp. */
1807 if (!CTAGS
1808 && curfdp->usecharno /* no #line directives in this file */
1809 && !curfdp->lang->metasource)
1810 {
1811 node *np, *prev;
1812
1813 /* Look for the head of the sublist relative to this file. See add_node
1814 for the structure of the node tree. */
1815 prev = NULL;
1816 for (np = nodehead; np != NULL; prev = np, np = np->left)
1817 if (np->fdp == curfdp)
1818 break;
1819
1820 /* If we generated tags for this file, write and delete them. */
1821 if (np != NULL)
1822 {
1823 /* This is the head of the last sublist, if any. The following
1824 instructions depend on this being true. */
1825 assert (np->left == NULL);
1826
1827 assert (fdhead == curfdp);
1828 assert (last_node->fdp == curfdp);
1829 put_entries (np); /* write tags for file curfdp->taggedfname */
1830 free_tree (np); /* remove the written nodes */
1831 if (prev == NULL)
1832 nodehead = NULL; /* no nodes left */
1833 else
1834 prev->left = NULL; /* delete the pointer to the sublist */
1835 }
1836 }
1837 }
1838
1839 /*
1840 * This routine sets up the boolean pseudo-functions which work
1841 * by setting boolean flags dependent upon the corresponding character.
1842 * Every char which is NOT in that string is not a white char. Therefore,
1843 * all of the array "_wht" is set to FALSE, and then the elements
1844 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1845 * of a char is TRUE if it is the string "white", else FALSE.
1846 */
1847 static void
1848 init ()
1849 {
1850 register char *sp;
1851 register int i;
1852
1853 for (i = 0; i < CHARS; i++)
1854 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1855 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1856 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1857 notinname('\0') = notinname('\n');
1858 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1859 begtoken('\0') = begtoken('\n');
1860 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1861 intoken('\0') = intoken('\n');
1862 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1863 endtoken('\0') = endtoken('\n');
1864 }
1865
1866 /*
1867 * This routine opens the specified file and calls the function
1868 * which finds the function and type definitions.
1869 */
1870 static void
1871 find_entries (inf)
1872 FILE *inf;
1873 {
1874 char *cp;
1875 language *lang = curfdp->lang;
1876 Lang_function *parser = NULL;
1877
1878 /* If user specified a language, use it. */
1879 if (lang != NULL && lang->function != NULL)
1880 {
1881 parser = lang->function;
1882 }
1883
1884 /* Else try to guess the language given the file name. */
1885 if (parser == NULL)
1886 {
1887 lang = get_language_from_filename (curfdp->infname, TRUE);
1888 if (lang != NULL && lang->function != NULL)
1889 {
1890 curfdp->lang = lang;
1891 parser = lang->function;
1892 }
1893 }
1894
1895 /* Else look for sharp-bang as the first two characters. */
1896 if (parser == NULL
1897 && readline_internal (&lb, inf) > 0
1898 && lb.len >= 2
1899 && lb.buffer[0] == '#'
1900 && lb.buffer[1] == '!')
1901 {
1902 char *lp;
1903
1904 /* Set lp to point at the first char after the last slash in the
1905 line or, if no slashes, at the first nonblank. Then set cp to
1906 the first successive blank and terminate the string. */
1907 lp = etags_strrchr (lb.buffer+2, '/');
1908 if (lp != NULL)
1909 lp += 1;
1910 else
1911 lp = skip_spaces (lb.buffer + 2);
1912 cp = skip_non_spaces (lp);
1913 *cp = '\0';
1914
1915 if (strlen (lp) > 0)
1916 {
1917 lang = get_language_from_interpreter (lp);
1918 if (lang != NULL && lang->function != NULL)
1919 {
1920 curfdp->lang = lang;
1921 parser = lang->function;
1922 }
1923 }
1924 }
1925
1926 /* We rewind here, even if inf may be a pipe. We fail if the
1927 length of the first line is longer than the pipe block size,
1928 which is unlikely. */
1929 rewind (inf);
1930
1931 /* Else try to guess the language given the case insensitive file name. */
1932 if (parser == NULL)
1933 {
1934 lang = get_language_from_filename (curfdp->infname, FALSE);
1935 if (lang != NULL && lang->function != NULL)
1936 {
1937 curfdp->lang = lang;
1938 parser = lang->function;
1939 }
1940 }
1941
1942 /* Else try Fortran or C. */
1943 if (parser == NULL)
1944 {
1945 node *old_last_node = last_node;
1946
1947 curfdp->lang = get_language_from_langname ("fortran");
1948 find_entries (inf);
1949
1950 if (old_last_node == last_node)
1951 /* No Fortran entries found. Try C. */
1952 {
1953 /* We do not tag if rewind fails.
1954 Only the file name will be recorded in the tags file. */
1955 rewind (inf);
1956 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1957 find_entries (inf);
1958 }
1959 return;
1960 }
1961
1962 if (!no_line_directive
1963 && curfdp->lang != NULL && curfdp->lang->metasource)
1964 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1965 file, or anyway we parsed a file that is automatically generated from
1966 this one. If this is the case, the bingo.c file contained #line
1967 directives that generated tags pointing to this file. Let's delete
1968 them all before parsing this file, which is the real source. */
1969 {
1970 fdesc **fdpp = &fdhead;
1971 while (*fdpp != NULL)
1972 if (*fdpp != curfdp
1973 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1974 /* We found one of those! We must delete both the file description
1975 and all tags referring to it. */
1976 {
1977 fdesc *badfdp = *fdpp;
1978
1979 /* Delete the tags referring to badfdp->taggedfname
1980 that were obtained from badfdp->infname. */
1981 invalidate_nodes (badfdp, &nodehead);
1982
1983 *fdpp = badfdp->next; /* remove the bad description from the list */
1984 free_fdesc (badfdp);
1985 }
1986 else
1987 fdpp = &(*fdpp)->next; /* advance the list pointer */
1988 }
1989
1990 assert (parser != NULL);
1991
1992 /* Generic initialisations before reading from file. */
1993 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1994
1995 /* Generic initialisations before parsing file with readline. */
1996 lineno = 0; /* reset global line number */
1997 charno = 0; /* reset global char number */
1998 linecharno = 0; /* reset global char number of line start */
1999
2000 parser (inf);
2001
2002 regex_tag_multiline ();
2003 }
2004
2005 \f
2006 /*
2007 * Check whether an implicitly named tag should be created,
2008 * then call `pfnote'.
2009 * NAME is a string that is internally copied by this function.
2010 *
2011 * TAGS format specification
2012 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2013 * The following is explained in some more detail in etc/ETAGS.EBNF.
2014 *
2015 * make_tag creates tags with "implicit tag names" (unnamed tags)
2016 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2017 * 1. NAME does not contain any of the characters in NONAM;
2018 * 2. LINESTART contains name as either a rightmost, or rightmost but
2019 * one character, substring;
2020 * 3. the character, if any, immediately before NAME in LINESTART must
2021 * be a character in NONAM;
2022 * 4. the character, if any, immediately after NAME in LINESTART must
2023 * also be a character in NONAM.
2024 *
2025 * The implementation uses the notinname() macro, which recognises the
2026 * characters stored in the string `nonam'.
2027 * etags.el needs to use the same characters that are in NONAM.
2028 */
2029 static void
2030 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2031 char *name; /* tag name, or NULL if unnamed */
2032 int namelen; /* tag length */
2033 bool is_func; /* tag is a function */
2034 char *linestart; /* start of the line where tag is */
2035 int linelen; /* length of the line where tag is */
2036 int lno; /* line number */
2037 long cno; /* character number */
2038 {
2039 bool named = (name != NULL && namelen > 0);
2040
2041 if (!CTAGS && named) /* maybe set named to false */
2042 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2043 such that etags.el can guess a name from it. */
2044 {
2045 int i;
2046 register char *cp = name;
2047
2048 for (i = 0; i < namelen; i++)
2049 if (notinname (*cp++))
2050 break;
2051 if (i == namelen) /* rule #1 */
2052 {
2053 cp = linestart + linelen - namelen;
2054 if (notinname (linestart[linelen-1]))
2055 cp -= 1; /* rule #4 */
2056 if (cp >= linestart /* rule #2 */
2057 && (cp == linestart
2058 || notinname (cp[-1])) /* rule #3 */
2059 && strneq (name, cp, namelen)) /* rule #2 */
2060 named = FALSE; /* use implicit tag name */
2061 }
2062 }
2063
2064 if (named)
2065 name = savenstr (name, namelen);
2066 else
2067 name = NULL;
2068 pfnote (name, is_func, linestart, linelen, lno, cno);
2069 }
2070
2071 /* Record a tag. */
2072 static void
2073 pfnote (name, is_func, linestart, linelen, lno, cno)
2074 char *name; /* tag name, or NULL if unnamed */
2075 bool is_func; /* tag is a function */
2076 char *linestart; /* start of the line where tag is */
2077 int linelen; /* length of the line where tag is */
2078 int lno; /* line number */
2079 long cno; /* character number */
2080 {
2081 register node *np;
2082
2083 assert (name == NULL || name[0] != '\0');
2084 if (CTAGS && name == NULL)
2085 return;
2086
2087 np = xnew (1, node);
2088
2089 /* If ctags mode, change name "main" to M<thisfilename>. */
2090 if (CTAGS && !cxref_style && streq (name, "main"))
2091 {
2092 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2093 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2094 fp = etags_strrchr (np->name, '.');
2095 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2096 fp[0] = '\0';
2097 }
2098 else
2099 np->name = name;
2100 np->valid = TRUE;
2101 np->been_warned = FALSE;
2102 np->fdp = curfdp;
2103 np->is_func = is_func;
2104 np->lno = lno;
2105 if (np->fdp->usecharno)
2106 /* Our char numbers are 0-base, because of C language tradition?
2107 ctags compatibility? old versions compatibility? I don't know.
2108 Anyway, since emacs's are 1-base we expect etags.el to take care
2109 of the difference. If we wanted to have 1-based numbers, we would
2110 uncomment the +1 below. */
2111 np->cno = cno /* + 1 */ ;
2112 else
2113 np->cno = invalidcharno;
2114 np->left = np->right = NULL;
2115 if (CTAGS && !cxref_style)
2116 {
2117 if (strlen (linestart) < 50)
2118 np->regex = concat (linestart, "$", "");
2119 else
2120 np->regex = savenstr (linestart, 50);
2121 }
2122 else
2123 np->regex = savenstr (linestart, linelen);
2124
2125 add_node (np, &nodehead);
2126 }
2127
2128 /*
2129 * free_tree ()
2130 * recurse on left children, iterate on right children.
2131 */
2132 static void
2133 free_tree (np)
2134 register node *np;
2135 {
2136 while (np)
2137 {
2138 register node *node_right = np->right;
2139 free_tree (np->left);
2140 if (np->name != NULL)
2141 free (np->name);
2142 free (np->regex);
2143 free (np);
2144 np = node_right;
2145 }
2146 }
2147
2148 /*
2149 * free_fdesc ()
2150 * delete a file description
2151 */
2152 static void
2153 free_fdesc (fdp)
2154 register fdesc *fdp;
2155 {
2156 if (fdp->infname != NULL) free (fdp->infname);
2157 if (fdp->infabsname != NULL) free (fdp->infabsname);
2158 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2159 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2160 if (fdp->prop != NULL) free (fdp->prop);
2161 free (fdp);
2162 }
2163
2164 /*
2165 * add_node ()
2166 * Adds a node to the tree of nodes. In etags mode, sort by file
2167 * name. In ctags mode, sort by tag name. Make no attempt at
2168 * balancing.
2169 *
2170 * add_node is the only function allowed to add nodes, so it can
2171 * maintain state.
2172 */
2173 static void
2174 add_node (np, cur_node_p)
2175 node *np, **cur_node_p;
2176 {
2177 register int dif;
2178 register node *cur_node = *cur_node_p;
2179
2180 if (cur_node == NULL)
2181 {
2182 *cur_node_p = np;
2183 last_node = np;
2184 return;
2185 }
2186
2187 if (!CTAGS)
2188 /* Etags Mode */
2189 {
2190 /* For each file name, tags are in a linked sublist on the right
2191 pointer. The first tags of different files are a linked list
2192 on the left pointer. last_node points to the end of the last
2193 used sublist. */
2194 if (last_node != NULL && last_node->fdp == np->fdp)
2195 {
2196 /* Let's use the same sublist as the last added node. */
2197 assert (last_node->right == NULL);
2198 last_node->right = np;
2199 last_node = np;
2200 }
2201 else if (cur_node->fdp == np->fdp)
2202 {
2203 /* Scanning the list we found the head of a sublist which is
2204 good for us. Let's scan this sublist. */
2205 add_node (np, &cur_node->right);
2206 }
2207 else
2208 /* The head of this sublist is not good for us. Let's try the
2209 next one. */
2210 add_node (np, &cur_node->left);
2211 } /* if ETAGS mode */
2212
2213 else
2214 {
2215 /* Ctags Mode */
2216 dif = strcmp (np->name, cur_node->name);
2217
2218 /*
2219 * If this tag name matches an existing one, then
2220 * do not add the node, but maybe print a warning.
2221 */
2222 if (no_duplicates && !dif)
2223 {
2224 if (np->fdp == cur_node->fdp)
2225 {
2226 if (!no_warnings)
2227 {
2228 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2229 np->fdp->infname, lineno, np->name);
2230 fprintf (stderr, "Second entry ignored\n");
2231 }
2232 }
2233 else if (!cur_node->been_warned && !no_warnings)
2234 {
2235 fprintf
2236 (stderr,
2237 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2238 np->fdp->infname, cur_node->fdp->infname, np->name);
2239 cur_node->been_warned = TRUE;
2240 }
2241 return;
2242 }
2243
2244 /* Actually add the node */
2245 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2246 } /* if CTAGS mode */
2247 }
2248
2249 /*
2250 * invalidate_nodes ()
2251 * Scan the node tree and invalidate all nodes pointing to the
2252 * given file description (CTAGS case) or free them (ETAGS case).
2253 */
2254 static void
2255 invalidate_nodes (badfdp, npp)
2256 fdesc *badfdp;
2257 node **npp;
2258 {
2259 node *np = *npp;
2260
2261 if (np == NULL)
2262 return;
2263
2264 if (CTAGS)
2265 {
2266 if (np->left != NULL)
2267 invalidate_nodes (badfdp, &np->left);
2268 if (np->fdp == badfdp)
2269 np->valid = FALSE;
2270 if (np->right != NULL)
2271 invalidate_nodes (badfdp, &np->right);
2272 }
2273 else
2274 {
2275 assert (np->fdp != NULL);
2276 if (np->fdp == badfdp)
2277 {
2278 *npp = np->left; /* detach the sublist from the list */
2279 np->left = NULL; /* isolate it */
2280 free_tree (np); /* free it */
2281 invalidate_nodes (badfdp, npp);
2282 }
2283 else
2284 invalidate_nodes (badfdp, &np->left);
2285 }
2286 }
2287
2288 \f
2289 static int total_size_of_entries __P((node *));
2290 static int number_len __P((long));
2291
2292 /* Length of a non-negative number's decimal representation. */
2293 static int
2294 number_len (num)
2295 long num;
2296 {
2297 int len = 1;
2298 while ((num /= 10) > 0)
2299 len += 1;
2300 return len;
2301 }
2302
2303 /*
2304 * Return total number of characters that put_entries will output for
2305 * the nodes in the linked list at the right of the specified node.
2306 * This count is irrelevant with etags.el since emacs 19.34 at least,
2307 * but is still supplied for backward compatibility.
2308 */
2309 static int
2310 total_size_of_entries (np)
2311 register node *np;
2312 {
2313 register int total = 0;
2314
2315 for (; np != NULL; np = np->right)
2316 if (np->valid)
2317 {
2318 total += strlen (np->regex) + 1; /* pat\177 */
2319 if (np->name != NULL)
2320 total += strlen (np->name) + 1; /* name\001 */
2321 total += number_len ((long) np->lno) + 1; /* lno, */
2322 if (np->cno != invalidcharno) /* cno */
2323 total += number_len (np->cno);
2324 total += 1; /* newline */
2325 }
2326
2327 return total;
2328 }
2329
2330 static void
2331 put_entries (np)
2332 register node *np;
2333 {
2334 register char *sp;
2335 static fdesc *fdp = NULL;
2336
2337 if (np == NULL)
2338 return;
2339
2340 /* Output subentries that precede this one */
2341 if (CTAGS)
2342 put_entries (np->left);
2343
2344 /* Output this entry */
2345 if (np->valid)
2346 {
2347 if (!CTAGS)
2348 {
2349 /* Etags mode */
2350 if (fdp != np->fdp)
2351 {
2352 fdp = np->fdp;
2353 fprintf (tagf, "\f\n%s,%d\n",
2354 fdp->taggedfname, total_size_of_entries (np));
2355 fdp->written = TRUE;
2356 }
2357 fputs (np->regex, tagf);
2358 fputc ('\177', tagf);
2359 if (np->name != NULL)
2360 {
2361 fputs (np->name, tagf);
2362 fputc ('\001', tagf);
2363 }
2364 fprintf (tagf, "%d,", np->lno);
2365 if (np->cno != invalidcharno)
2366 fprintf (tagf, "%ld", np->cno);
2367 fputs ("\n", tagf);
2368 }
2369 else
2370 {
2371 /* Ctags mode */
2372 if (np->name == NULL)
2373 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2374
2375 if (cxref_style)
2376 {
2377 if (vgrind_style)
2378 fprintf (stdout, "%s %s %d\n",
2379 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2380 else
2381 fprintf (stdout, "%-16s %3d %-16s %s\n",
2382 np->name, np->lno, np->fdp->taggedfname, np->regex);
2383 }
2384 else
2385 {
2386 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2387
2388 if (np->is_func)
2389 { /* function or #define macro with args */
2390 putc (searchar, tagf);
2391 putc ('^', tagf);
2392
2393 for (sp = np->regex; *sp; sp++)
2394 {
2395 if (*sp == '\\' || *sp == searchar)
2396 putc ('\\', tagf);
2397 putc (*sp, tagf);
2398 }
2399 putc (searchar, tagf);
2400 }
2401 else
2402 { /* anything else; text pattern inadequate */
2403 fprintf (tagf, "%d", np->lno);
2404 }
2405 putc ('\n', tagf);
2406 }
2407 }
2408 } /* if this node contains a valid tag */
2409
2410 /* Output subentries that follow this one */
2411 put_entries (np->right);
2412 if (!CTAGS)
2413 put_entries (np->left);
2414 }
2415
2416 \f
2417 /* C extensions. */
2418 #define C_EXT 0x00fff /* C extensions */
2419 #define C_PLAIN 0x00000 /* C */
2420 #define C_PLPL 0x00001 /* C++ */
2421 #define C_STAR 0x00003 /* C* */
2422 #define C_JAVA 0x00005 /* JAVA */
2423 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2424 #define YACC 0x10000 /* yacc file */
2425
2426 /*
2427 * The C symbol tables.
2428 */
2429 enum sym_type
2430 {
2431 st_none,
2432 st_C_objprot, st_C_objimpl, st_C_objend,
2433 st_C_gnumacro,
2434 st_C_ignore, st_C_attribute,
2435 st_C_javastruct,
2436 st_C_operator,
2437 st_C_class, st_C_template,
2438 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2439 };
2440
2441 static unsigned int hash __P((const char *, unsigned int));
2442 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2443 static enum sym_type C_symtype __P((char *, int, int));
2444
2445 /* Feed stuff between (but not including) %[ and %] lines to:
2446 gperf -m 5
2447 %[
2448 %compare-strncmp
2449 %enum
2450 %struct-type
2451 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2452 %%
2453 if, 0, st_C_ignore
2454 for, 0, st_C_ignore
2455 while, 0, st_C_ignore
2456 switch, 0, st_C_ignore
2457 return, 0, st_C_ignore
2458 __attribute__, 0, st_C_attribute
2459 @interface, 0, st_C_objprot
2460 @protocol, 0, st_C_objprot
2461 @implementation,0, st_C_objimpl
2462 @end, 0, st_C_objend
2463 import, (C_JAVA & ~C_PLPL), st_C_ignore
2464 package, (C_JAVA & ~C_PLPL), st_C_ignore
2465 friend, C_PLPL, st_C_ignore
2466 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2467 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2468 interface, (C_JAVA & ~C_PLPL), st_C_struct
2469 class, 0, st_C_class
2470 namespace, C_PLPL, st_C_struct
2471 domain, C_STAR, st_C_struct
2472 union, 0, st_C_struct
2473 struct, 0, st_C_struct
2474 extern, 0, st_C_extern
2475 enum, 0, st_C_enum
2476 typedef, 0, st_C_typedef
2477 define, 0, st_C_define
2478 undef, 0, st_C_define
2479 operator, C_PLPL, st_C_operator
2480 template, 0, st_C_template
2481 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2482 DEFUN, 0, st_C_gnumacro
2483 SYSCALL, 0, st_C_gnumacro
2484 ENTRY, 0, st_C_gnumacro
2485 PSEUDO, 0, st_C_gnumacro
2486 # These are defined inside C functions, so currently they are not met.
2487 # EXFUN used in glibc, DEFVAR_* in emacs.
2488 #EXFUN, 0, st_C_gnumacro
2489 #DEFVAR_, 0, st_C_gnumacro
2490 %]
2491 and replace lines between %< and %> with its output, then:
2492 - remove the #if characterset check
2493 - make in_word_set static and not inline. */
2494 /*%<*/
2495 /* C code produced by gperf version 3.0.1 */
2496 /* Command-line: gperf -m 5 */
2497 /* Computed positions: -k'2-3' */
2498
2499 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2500 /* maximum key range = 33, duplicates = 0 */
2501
2502 #ifdef __GNUC__
2503 __inline
2504 #else
2505 #ifdef __cplusplus
2506 inline
2507 #endif
2508 #endif
2509 static unsigned int
2510 hash (str, len)
2511 register const char *str;
2512 register unsigned int len;
2513 {
2514 static unsigned char asso_values[] =
2515 {
2516 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2517 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2518 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2519 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2520 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2521 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2522 35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2523 14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2524 35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2525 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2526 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2527 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2528 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2529 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2530 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2531 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2532 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2533 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2534 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2535 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2536 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2537 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2538 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2539 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2540 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2541 35, 35, 35, 35, 35, 35
2542 };
2543 register int hval = len;
2544
2545 switch (hval)
2546 {
2547 default:
2548 hval += asso_values[(unsigned char)str[2]];
2549 /*FALLTHROUGH*/
2550 case 2:
2551 hval += asso_values[(unsigned char)str[1]];
2552 break;
2553 }
2554 return hval;
2555 }
2556
2557 static struct C_stab_entry *
2558 in_word_set (str, len)
2559 register const char *str;
2560 register unsigned int len;
2561 {
2562 enum
2563 {
2564 TOTAL_KEYWORDS = 32,
2565 MIN_WORD_LENGTH = 2,
2566 MAX_WORD_LENGTH = 15,
2567 MIN_HASH_VALUE = 2,
2568 MAX_HASH_VALUE = 34
2569 };
2570
2571 static struct C_stab_entry wordlist[] =
2572 {
2573 {""}, {""},
2574 {"if", 0, st_C_ignore},
2575 {""},
2576 {"@end", 0, st_C_objend},
2577 {"union", 0, st_C_struct},
2578 {"define", 0, st_C_define},
2579 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2580 {"template", 0, st_C_template},
2581 {"operator", C_PLPL, st_C_operator},
2582 {"@interface", 0, st_C_objprot},
2583 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2584 {"friend", C_PLPL, st_C_ignore},
2585 {"typedef", 0, st_C_typedef},
2586 {"return", 0, st_C_ignore},
2587 {"@implementation",0, st_C_objimpl},
2588 {"@protocol", 0, st_C_objprot},
2589 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2590 {"extern", 0, st_C_extern},
2591 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2592 {"struct", 0, st_C_struct},
2593 {"domain", C_STAR, st_C_struct},
2594 {"switch", 0, st_C_ignore},
2595 {"enum", 0, st_C_enum},
2596 {"for", 0, st_C_ignore},
2597 {"namespace", C_PLPL, st_C_struct},
2598 {"class", 0, st_C_class},
2599 {"while", 0, st_C_ignore},
2600 {"undef", 0, st_C_define},
2601 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2602 {"__attribute__", 0, st_C_attribute},
2603 {"SYSCALL", 0, st_C_gnumacro},
2604 {"ENTRY", 0, st_C_gnumacro},
2605 {"PSEUDO", 0, st_C_gnumacro},
2606 {"DEFUN", 0, st_C_gnumacro}
2607 };
2608
2609 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2610 {
2611 register int key = hash (str, len);
2612
2613 if (key <= MAX_HASH_VALUE && key >= 0)
2614 {
2615 register const char *s = wordlist[key].name;
2616
2617 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2618 return &wordlist[key];
2619 }
2620 }
2621 return 0;
2622 }
2623 /*%>*/
2624
2625 static enum sym_type
2626 C_symtype (str, len, c_ext)
2627 char *str;
2628 int len;
2629 int c_ext;
2630 {
2631 register struct C_stab_entry *se = in_word_set (str, len);
2632
2633 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2634 return st_none;
2635 return se->type;
2636 }
2637
2638 \f
2639 /*
2640 * Ignoring __attribute__ ((list))
2641 */
2642 static bool inattribute; /* looking at an __attribute__ construct */
2643
2644 /*
2645 * C functions and variables are recognized using a simple
2646 * finite automaton. fvdef is its state variable.
2647 */
2648 static enum
2649 {
2650 fvnone, /* nothing seen */
2651 fdefunkey, /* Emacs DEFUN keyword seen */
2652 fdefunname, /* Emacs DEFUN name seen */
2653 foperator, /* func: operator keyword seen (cplpl) */
2654 fvnameseen, /* function or variable name seen */
2655 fstartlist, /* func: just after open parenthesis */
2656 finlist, /* func: in parameter list */
2657 flistseen, /* func: after parameter list */
2658 fignore, /* func: before open brace */
2659 vignore /* var-like: ignore until ';' */
2660 } fvdef;
2661
2662 static bool fvextern; /* func or var: extern keyword seen; */
2663
2664 /*
2665 * typedefs are recognized using a simple finite automaton.
2666 * typdef is its state variable.
2667 */
2668 static enum
2669 {
2670 tnone, /* nothing seen */
2671 tkeyseen, /* typedef keyword seen */
2672 ttypeseen, /* defined type seen */
2673 tinbody, /* inside typedef body */
2674 tend, /* just before typedef tag */
2675 tignore /* junk after typedef tag */
2676 } typdef;
2677
2678 /*
2679 * struct-like structures (enum, struct and union) are recognized
2680 * using another simple finite automaton. `structdef' is its state
2681 * variable.
2682 */
2683 static enum
2684 {
2685 snone, /* nothing seen yet,
2686 or in struct body if bracelev > 0 */
2687 skeyseen, /* struct-like keyword seen */
2688 stagseen, /* struct-like tag seen */
2689 scolonseen /* colon seen after struct-like tag */
2690 } structdef;
2691
2692 /*
2693 * When objdef is different from onone, objtag is the name of the class.
2694 */
2695 static char *objtag = "<uninited>";
2696
2697 /*
2698 * Yet another little state machine to deal with preprocessor lines.
2699 */
2700 static enum
2701 {
2702 dnone, /* nothing seen */
2703 dsharpseen, /* '#' seen as first char on line */
2704 ddefineseen, /* '#' and 'define' seen */
2705 dignorerest /* ignore rest of line */
2706 } definedef;
2707
2708 /*
2709 * State machine for Objective C protocols and implementations.
2710 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2711 */
2712 static enum
2713 {
2714 onone, /* nothing seen */
2715 oprotocol, /* @interface or @protocol seen */
2716 oimplementation, /* @implementations seen */
2717 otagseen, /* class name seen */
2718 oparenseen, /* parenthesis before category seen */
2719 ocatseen, /* category name seen */
2720 oinbody, /* in @implementation body */
2721 omethodsign, /* in @implementation body, after +/- */
2722 omethodtag, /* after method name */
2723 omethodcolon, /* after method colon */
2724 omethodparm, /* after method parameter */
2725 oignore /* wait for @end */
2726 } objdef;
2727
2728
2729 /*
2730 * Use this structure to keep info about the token read, and how it
2731 * should be tagged. Used by the make_C_tag function to build a tag.
2732 */
2733 static struct tok
2734 {
2735 char *line; /* string containing the token */
2736 int offset; /* where the token starts in LINE */
2737 int length; /* token length */
2738 /*
2739 The previous members can be used to pass strings around for generic
2740 purposes. The following ones specifically refer to creating tags. In this
2741 case the token contained here is the pattern that will be used to create a
2742 tag.
2743 */
2744 bool valid; /* do not create a tag; the token should be
2745 invalidated whenever a state machine is
2746 reset prematurely */
2747 bool named; /* create a named tag */
2748 int lineno; /* source line number of tag */
2749 long linepos; /* source char number of tag */
2750 } token; /* latest token read */
2751
2752 /*
2753 * Variables and functions for dealing with nested structures.
2754 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2755 */
2756 static void pushclass_above __P((int, char *, int));
2757 static void popclass_above __P((int));
2758 static void write_classname __P((linebuffer *, char *qualifier));
2759
2760 static struct {
2761 char **cname; /* nested class names */
2762 int *bracelev; /* nested class brace level */
2763 int nl; /* class nesting level (elements used) */
2764 int size; /* length of the array */
2765 } cstack; /* stack for nested declaration tags */
2766 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2767 #define nestlev (cstack.nl)
2768 /* After struct keyword or in struct body, not inside a nested function. */
2769 #define instruct (structdef == snone && nestlev > 0 \
2770 && bracelev == cstack.bracelev[nestlev-1] + 1)
2771
2772 static void
2773 pushclass_above (bracelev, str, len)
2774 int bracelev;
2775 char *str;
2776 int len;
2777 {
2778 int nl;
2779
2780 popclass_above (bracelev);
2781 nl = cstack.nl;
2782 if (nl >= cstack.size)
2783 {
2784 int size = cstack.size *= 2;
2785 xrnew (cstack.cname, size, char *);
2786 xrnew (cstack.bracelev, size, int);
2787 }
2788 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2789 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2790 cstack.bracelev[nl] = bracelev;
2791 cstack.nl = nl + 1;
2792 }
2793
2794 static void
2795 popclass_above (bracelev)
2796 int bracelev;
2797 {
2798 int nl;
2799
2800 for (nl = cstack.nl - 1;
2801 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2802 nl--)
2803 {
2804 if (cstack.cname[nl] != NULL)
2805 free (cstack.cname[nl]);
2806 cstack.nl = nl;
2807 }
2808 }
2809
2810 static void
2811 write_classname (cn, qualifier)
2812 linebuffer *cn;
2813 char *qualifier;
2814 {
2815 int i, len;
2816 int qlen = strlen (qualifier);
2817
2818 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2819 {
2820 len = 0;
2821 cn->len = 0;
2822 cn->buffer[0] = '\0';
2823 }
2824 else
2825 {
2826 len = strlen (cstack.cname[0]);
2827 linebuffer_setlen (cn, len);
2828 strcpy (cn->buffer, cstack.cname[0]);
2829 }
2830 for (i = 1; i < cstack.nl; i++)
2831 {
2832 char *s;
2833 int slen;
2834
2835 s = cstack.cname[i];
2836 if (s == NULL)
2837 continue;
2838 slen = strlen (s);
2839 len += slen + qlen;
2840 linebuffer_setlen (cn, len);
2841 strncat (cn->buffer, qualifier, qlen);
2842 strncat (cn->buffer, s, slen);
2843 }
2844 }
2845
2846 \f
2847 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2848 static void make_C_tag __P((bool));
2849
2850 /*
2851 * consider_token ()
2852 * checks to see if the current token is at the start of a
2853 * function or variable, or corresponds to a typedef, or
2854 * is a struct/union/enum tag, or #define, or an enum constant.
2855 *
2856 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2857 * with args. C_EXTP points to which language we are looking at.
2858 *
2859 * Globals
2860 * fvdef IN OUT
2861 * structdef IN OUT
2862 * definedef IN OUT
2863 * typdef IN OUT
2864 * objdef IN OUT
2865 */
2866
2867 static bool
2868 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2869 register char *str; /* IN: token pointer */
2870 register int len; /* IN: token length */
2871 register int c; /* IN: first char after the token */
2872 int *c_extp; /* IN, OUT: C extensions mask */
2873 int bracelev; /* IN: brace level */
2874 int parlev; /* IN: parenthesis level */
2875 bool *is_func_or_var; /* OUT: function or variable found */
2876 {
2877 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2878 structtype is the type of the preceding struct-like keyword, and
2879 structbracelev is the brace level where it has been seen. */
2880 static enum sym_type structtype;
2881 static int structbracelev;
2882 static enum sym_type toktype;
2883
2884
2885 toktype = C_symtype (str, len, *c_extp);
2886
2887 /*
2888 * Skip __attribute__
2889 */
2890 if (toktype == st_C_attribute)
2891 {
2892 inattribute = TRUE;
2893 return FALSE;
2894 }
2895
2896 /*
2897 * Advance the definedef state machine.
2898 */
2899 switch (definedef)
2900 {
2901 case dnone:
2902 /* We're not on a preprocessor line. */
2903 if (toktype == st_C_gnumacro)
2904 {
2905 fvdef = fdefunkey;
2906 return FALSE;
2907 }
2908 break;
2909 case dsharpseen:
2910 if (toktype == st_C_define)
2911 {
2912 definedef = ddefineseen;
2913 }
2914 else
2915 {
2916 definedef = dignorerest;
2917 }
2918 return FALSE;
2919 case ddefineseen:
2920 /*
2921 * Make a tag for any macro, unless it is a constant
2922 * and constantypedefs is FALSE.
2923 */
2924 definedef = dignorerest;
2925 *is_func_or_var = (c == '(');
2926 if (!*is_func_or_var && !constantypedefs)
2927 return FALSE;
2928 else
2929 return TRUE;
2930 case dignorerest:
2931 return FALSE;
2932 default:
2933 error ("internal error: definedef value.", (char *)NULL);
2934 }
2935
2936 /*
2937 * Now typedefs
2938 */
2939 switch (typdef)
2940 {
2941 case tnone:
2942 if (toktype == st_C_typedef)
2943 {
2944 if (typedefs)
2945 typdef = tkeyseen;
2946 fvextern = FALSE;
2947 fvdef = fvnone;
2948 return FALSE;
2949 }
2950 break;
2951 case tkeyseen:
2952 switch (toktype)
2953 {
2954 case st_none:
2955 case st_C_class:
2956 case st_C_struct:
2957 case st_C_enum:
2958 typdef = ttypeseen;
2959 }
2960 break;
2961 case ttypeseen:
2962 if (structdef == snone && fvdef == fvnone)
2963 {
2964 fvdef = fvnameseen;
2965 return TRUE;
2966 }
2967 break;
2968 case tend:
2969 switch (toktype)
2970 {
2971 case st_C_class:
2972 case st_C_struct:
2973 case st_C_enum:
2974 return FALSE;
2975 }
2976 return TRUE;
2977 }
2978
2979 /*
2980 * This structdef business is NOT invoked when we are ctags and the
2981 * file is plain C. This is because a struct tag may have the same
2982 * name as another tag, and this loses with ctags.
2983 */
2984 switch (toktype)
2985 {
2986 case st_C_javastruct:
2987 if (structdef == stagseen)
2988 structdef = scolonseen;
2989 return FALSE;
2990 case st_C_template:
2991 case st_C_class:
2992 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2993 && bracelev == 0
2994 && definedef == dnone && structdef == snone
2995 && typdef == tnone && fvdef == fvnone)
2996 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2997 if (toktype == st_C_template)
2998 break;
2999 /* FALLTHRU */
3000 case st_C_struct:
3001 case st_C_enum:
3002 if (parlev == 0
3003 && fvdef != vignore
3004 && (typdef == tkeyseen
3005 || (typedefs_or_cplusplus && structdef == snone)))
3006 {
3007 structdef = skeyseen;
3008 structtype = toktype;
3009 structbracelev = bracelev;
3010 if (fvdef == fvnameseen)
3011 fvdef = fvnone;
3012 }
3013 return FALSE;
3014 }
3015
3016 if (structdef == skeyseen)
3017 {
3018 structdef = stagseen;
3019 return TRUE;
3020 }
3021
3022 if (typdef != tnone)
3023 definedef = dnone;
3024
3025 /* Detect Objective C constructs. */
3026 switch (objdef)
3027 {
3028 case onone:
3029 switch (toktype)
3030 {
3031 case st_C_objprot:
3032 objdef = oprotocol;
3033 return FALSE;
3034 case st_C_objimpl:
3035 objdef = oimplementation;
3036 return FALSE;
3037 }
3038 break;
3039 case oimplementation:
3040 /* Save the class tag for functions or variables defined inside. */
3041 objtag = savenstr (str, len);
3042 objdef = oinbody;
3043 return FALSE;
3044 case oprotocol:
3045 /* Save the class tag for categories. */
3046 objtag = savenstr (str, len);
3047 objdef = otagseen;
3048 *is_func_or_var = TRUE;
3049 return TRUE;
3050 case oparenseen:
3051 objdef = ocatseen;
3052 *is_func_or_var = TRUE;
3053 return TRUE;
3054 case oinbody:
3055 break;
3056 case omethodsign:
3057 if (parlev == 0)
3058 {
3059 fvdef = fvnone;
3060 objdef = omethodtag;
3061 linebuffer_setlen (&token_name, len);
3062 strncpy (token_name.buffer, str, len);
3063 token_name.buffer[len] = '\0';
3064 return TRUE;
3065 }
3066 return FALSE;
3067 case omethodcolon:
3068 if (parlev == 0)
3069 objdef = omethodparm;
3070 return FALSE;
3071 case omethodparm:
3072 if (parlev == 0)
3073 {
3074 fvdef = fvnone;
3075 objdef = omethodtag;
3076 linebuffer_setlen (&token_name, token_name.len + len);
3077 strncat (token_name.buffer, str, len);
3078 return TRUE;
3079 }
3080 return FALSE;
3081 case oignore:
3082 if (toktype == st_C_objend)
3083 {
3084 /* Memory leakage here: the string pointed by objtag is
3085 never released, because many tests would be needed to
3086 avoid breaking on incorrect input code. The amount of
3087 memory leaked here is the sum of the lengths of the
3088 class tags.
3089 free (objtag); */
3090 objdef = onone;
3091 }
3092 return FALSE;
3093 }
3094
3095 /* A function, variable or enum constant? */
3096 switch (toktype)
3097 {
3098 case st_C_extern:
3099 fvextern = TRUE;
3100 switch (fvdef)
3101 {
3102 case finlist:
3103 case flistseen:
3104 case fignore:
3105 case vignore:
3106 break;
3107 default:
3108 fvdef = fvnone;
3109 }
3110 return FALSE;
3111 case st_C_ignore:
3112 fvextern = FALSE;
3113 fvdef = vignore;
3114 return FALSE;
3115 case st_C_operator:
3116 fvdef = foperator;
3117 *is_func_or_var = TRUE;
3118 return TRUE;
3119 case st_none:
3120 if (constantypedefs
3121 && structdef == snone
3122 && structtype == st_C_enum && bracelev > structbracelev)
3123 return TRUE; /* enum constant */
3124 switch (fvdef)
3125 {
3126 case fdefunkey:
3127 if (bracelev > 0)
3128 break;
3129 fvdef = fdefunname; /* GNU macro */
3130 *is_func_or_var = TRUE;
3131 return TRUE;
3132 case fvnone:
3133 switch (typdef)
3134 {
3135 case ttypeseen:
3136 return FALSE;
3137 case tnone:
3138 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3139 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3140 {
3141 fvdef = vignore;
3142 return FALSE;
3143 }
3144 break;
3145 }
3146 /* FALLTHRU */
3147 case fvnameseen:
3148 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3149 {
3150 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3151 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3152 fvdef = foperator;
3153 *is_func_or_var = TRUE;
3154 return TRUE;
3155 }
3156 if (bracelev > 0 && !instruct)
3157 break;
3158 fvdef = fvnameseen; /* function or variable */
3159 *is_func_or_var = TRUE;
3160 return TRUE;
3161 }
3162 break;
3163 }
3164
3165 return FALSE;
3166 }
3167
3168 \f
3169 /*
3170 * C_entries often keeps pointers to tokens or lines which are older than
3171 * the line currently read. By keeping two line buffers, and switching
3172 * them at end of line, it is possible to use those pointers.
3173 */
3174 static struct
3175 {
3176 long linepos;
3177 linebuffer lb;
3178 } lbs[2];
3179
3180 #define current_lb_is_new (newndx == curndx)
3181 #define switch_line_buffers() (curndx = 1 - curndx)
3182
3183 #define curlb (lbs[curndx].lb)
3184 #define newlb (lbs[newndx].lb)
3185 #define curlinepos (lbs[curndx].linepos)
3186 #define newlinepos (lbs[newndx].linepos)
3187
3188 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3189 #define cplpl (c_ext & C_PLPL)
3190 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3191
3192 #define CNL_SAVE_DEFINEDEF() \
3193 do { \
3194 curlinepos = charno; \
3195 readline (&curlb, inf); \
3196 lp = curlb.buffer; \
3197 quotednl = FALSE; \
3198 newndx = curndx; \
3199 } while (0)
3200
3201 #define CNL() \
3202 do { \
3203 CNL_SAVE_DEFINEDEF(); \
3204 if (savetoken.valid) \
3205 { \
3206 token = savetoken; \
3207 savetoken.valid = FALSE; \
3208 } \
3209 definedef = dnone; \
3210 } while (0)
3211
3212
3213 static void
3214 make_C_tag (isfun)
3215 bool isfun;
3216 {
3217 /* This function is never called when token.valid is FALSE, but
3218 we must protect against invalid input or internal errors. */
3219 if (!DEBUG && !token.valid)
3220 return;
3221
3222 if (token.valid)
3223 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3224 token.offset+token.length+1, token.lineno, token.linepos);
3225 else /* this case is optimised away if !DEBUG */
3226 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3227 token_name.len + 17, isfun, token.line,
3228 token.offset+token.length+1, token.lineno, token.linepos);
3229
3230 token.valid = FALSE;
3231 }
3232
3233
3234 /*
3235 * C_entries ()
3236 * This routine finds functions, variables, typedefs,
3237 * #define's, enum constants and struct/union/enum definitions in
3238 * C syntax and adds them to the list.
3239 */
3240 static void
3241 C_entries (c_ext, inf)
3242 int c_ext; /* extension of C */
3243 FILE *inf; /* input file */
3244 {
3245 register char c; /* latest char read; '\0' for end of line */
3246 register char *lp; /* pointer one beyond the character `c' */
3247 int curndx, newndx; /* indices for current and new lb */
3248 register int tokoff; /* offset in line of start of current token */
3249 register int toklen; /* length of current token */
3250 char *qualifier; /* string used to qualify names */
3251 int qlen; /* length of qualifier */
3252 int bracelev; /* current brace level */
3253 int bracketlev; /* current bracket level */
3254 int parlev; /* current parenthesis level */
3255 int attrparlev; /* __attribute__ parenthesis level */
3256 int templatelev; /* current template level */
3257 int typdefbracelev; /* bracelev where a typedef struct body begun */
3258 bool incomm, inquote, inchar, quotednl, midtoken;
3259 bool yacc_rules; /* in the rules part of a yacc file */
3260 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3261
3262
3263 linebuffer_init (&lbs[0].lb);
3264 linebuffer_init (&lbs[1].lb);
3265 if (cstack.size == 0)
3266 {
3267 cstack.size = (DEBUG) ? 1 : 4;
3268 cstack.nl = 0;
3269 cstack.cname = xnew (cstack.size, char *);
3270 cstack.bracelev = xnew (cstack.size, int);
3271 }
3272
3273 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3274 curndx = newndx = 0;
3275 lp = curlb.buffer;
3276 *lp = 0;
3277
3278 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3279 structdef = snone; definedef = dnone; objdef = onone;
3280 yacc_rules = FALSE;
3281 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3282 token.valid = savetoken.valid = FALSE;
3283 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3284 if (cjava)
3285 { qualifier = "."; qlen = 1; }
3286 else
3287 { qualifier = "::"; qlen = 2; }
3288
3289
3290 while (!feof (inf))
3291 {
3292 c = *lp++;
3293 if (c == '\\')
3294 {
3295 /* If we are at the end of the line, the next character is a
3296 '\0'; do not skip it, because it is what tells us
3297 to read the next line. */
3298 if (*lp == '\0')
3299 {
3300 quotednl = TRUE;
3301 continue;
3302 }
3303 lp++;
3304 c = ' ';
3305 }
3306 else if (incomm)
3307 {
3308 switch (c)
3309 {
3310 case '*':
3311 if (*lp == '/')
3312 {
3313 c = *lp++;
3314 incomm = FALSE;
3315 }
3316 break;
3317 case '\0':
3318 /* Newlines inside comments do not end macro definitions in
3319 traditional cpp. */
3320 CNL_SAVE_DEFINEDEF ();
3321 break;
3322 }
3323 continue;
3324 }
3325 else if (inquote)
3326 {
3327 switch (c)
3328 {
3329 case '"':
3330 inquote = FALSE;
3331 break;
3332 case '\0':
3333 /* Newlines inside strings do not end macro definitions
3334 in traditional cpp, even though compilers don't
3335 usually accept them. */
3336 CNL_SAVE_DEFINEDEF ();
3337 break;
3338 }
3339 continue;
3340 }
3341 else if (inchar)
3342 {
3343 switch (c)
3344 {
3345 case '\0':
3346 /* Hmmm, something went wrong. */
3347 CNL ();
3348 /* FALLTHRU */
3349 case '\'':
3350 inchar = FALSE;
3351 break;
3352 }
3353 continue;
3354 }
3355 else if (bracketlev > 0)
3356 {
3357 switch (c)
3358 {
3359 case ']':
3360 if (--bracketlev > 0)
3361 continue;
3362 break;
3363 case '\0':
3364 CNL_SAVE_DEFINEDEF ();
3365 break;
3366 }
3367 continue;
3368 }
3369 else switch (c)
3370 {
3371 case '"':
3372 inquote = TRUE;
3373 if (inattribute)
3374 break;
3375 switch (fvdef)
3376 {
3377 case fdefunkey:
3378 case fstartlist:
3379 case finlist:
3380 case fignore:
3381 case vignore:
3382 break;
3383 default:
3384 fvextern = FALSE;
3385 fvdef = fvnone;
3386 }
3387 continue;
3388 case '\'':
3389 inchar = TRUE;
3390 if (inattribute)
3391 break;
3392 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3393 {
3394 fvextern = FALSE;
3395 fvdef = fvnone;
3396 }
3397 continue;
3398 case '/':
3399 if (*lp == '*')
3400 {
3401 lp++;
3402 incomm = TRUE;
3403 continue;
3404 }
3405 else if (/* cplpl && */ *lp == '/')
3406 {
3407 c = '\0';
3408 break;
3409 }
3410 else
3411 break;
3412 case '%':
3413 if ((c_ext & YACC) && *lp == '%')
3414 {
3415 /* Entering or exiting rules section in yacc file. */
3416 lp++;
3417 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3418 typdef = tnone; structdef = snone;
3419 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3420 bracelev = 0;
3421 yacc_rules = !yacc_rules;
3422 continue;
3423 }
3424 else
3425 break;
3426 case '#':
3427 if (definedef == dnone)
3428 {
3429 char *cp;
3430 bool cpptoken = TRUE;
3431
3432 /* Look back on this line. If all blanks, or nonblanks
3433 followed by an end of comment, this is a preprocessor
3434 token. */
3435 for (cp = newlb.buffer; cp < lp-1; cp++)
3436 if (!iswhite (*cp))
3437 {
3438 if (*cp == '*' && *(cp+1) == '/')
3439 {
3440 cp++;
3441 cpptoken = TRUE;
3442 }
3443 else
3444 cpptoken = FALSE;
3445 }
3446 if (cpptoken)
3447 definedef = dsharpseen;
3448 } /* if (definedef == dnone) */
3449 continue;
3450 case '[':
3451 bracketlev++;
3452 continue;
3453 } /* switch (c) */
3454
3455
3456 /* Consider token only if some involved conditions are satisfied. */
3457 if (typdef != tignore
3458 && definedef != dignorerest
3459 && fvdef != finlist
3460 && templatelev == 0
3461 && (definedef != dnone
3462 || structdef != scolonseen)
3463 && !inattribute)
3464 {
3465 if (midtoken)
3466 {
3467 if (endtoken (c))
3468 {
3469 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3470 /* This handles :: in the middle,
3471 but not at the beginning of an identifier.
3472 Also, space-separated :: is not recognised. */
3473 {
3474 if (c_ext & C_AUTO) /* automatic detection of C++ */
3475 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3476 lp += 2;
3477 toklen += 2;
3478 c = lp[-1];
3479 goto still_in_token;
3480 }
3481 else
3482 {
3483 bool funorvar = FALSE;
3484
3485 if (yacc_rules
3486 || consider_token (newlb.buffer + tokoff, toklen, c,
3487 &c_ext, bracelev, parlev,
3488 &funorvar))
3489 {
3490 if (fvdef == foperator)
3491 {
3492 char *oldlp = lp;
3493 lp = skip_spaces (lp-1);
3494 if (*lp != '\0')
3495 lp += 1;
3496 while (*lp != '\0'
3497 && !iswhite (*lp) && *lp != '(')
3498 lp += 1;
3499 c = *lp++;
3500 toklen += lp - oldlp;
3501 }
3502 token.named = FALSE;
3503 if (!plainc
3504 && nestlev > 0 && definedef == dnone)
3505 /* in struct body */
3506 {
3507 write_classname (&token_name, qualifier);
3508 linebuffer_setlen (&token_name,
3509 token_name.len+qlen+toklen);
3510 strcat (token_name.buffer, qualifier);
3511 strncat (token_name.buffer,
3512 newlb.buffer + tokoff, toklen);
3513 token.named = TRUE;
3514 }
3515 else if (objdef == ocatseen)
3516 /* Objective C category */
3517 {
3518 int len = strlen (objtag) + 2 + toklen;
3519 linebuffer_setlen (&token_name, len);
3520 strcpy (token_name.buffer, objtag);
3521 strcat (token_name.buffer, "(");
3522 strncat (token_name.buffer,
3523 newlb.buffer + tokoff, toklen);
3524 strcat (token_name.buffer, ")");
3525 token.named = TRUE;
3526 }
3527 else if (objdef == omethodtag
3528 || objdef == omethodparm)
3529 /* Objective C method */
3530 {
3531 token.named = TRUE;
3532 }
3533 else if (fvdef == fdefunname)
3534 /* GNU DEFUN and similar macros */
3535 {
3536 bool defun = (newlb.buffer[tokoff] == 'F');
3537 int off = tokoff;
3538 int len = toklen;
3539
3540 /* Rewrite the tag so that emacs lisp DEFUNs
3541 can be found by their elisp name */
3542 if (defun)
3543 {
3544 off += 1;
3545 len -= 1;
3546 }
3547 linebuffer_setlen (&token_name, len);
3548 strncpy (token_name.buffer,
3549 newlb.buffer + off, len);
3550 token_name.buffer[len] = '\0';
3551 if (defun)
3552 while (--len >= 0)
3553 if (token_name.buffer[len] == '_')
3554 token_name.buffer[len] = '-';
3555 token.named = defun;
3556 }
3557 else
3558 {
3559 linebuffer_setlen (&token_name, toklen);
3560 strncpy (token_name.buffer,
3561 newlb.buffer + tokoff, toklen);
3562 token_name.buffer[toklen] = '\0';
3563 /* Name macros and members. */
3564 token.named = (structdef == stagseen
3565 || typdef == ttypeseen
3566 || typdef == tend
3567 || (funorvar
3568 && definedef == dignorerest)
3569 || (funorvar
3570 && definedef == dnone
3571 && structdef == snone
3572 && bracelev > 0));
3573 }
3574 token.lineno = lineno;
3575 token.offset = tokoff;
3576 token.length = toklen;
3577 token.line = newlb.buffer;
3578 token.linepos = newlinepos;
3579 token.valid = TRUE;
3580
3581 if (definedef == dnone
3582 && (fvdef == fvnameseen
3583 || fvdef == foperator
3584 || structdef == stagseen
3585 || typdef == tend
3586 || typdef == ttypeseen
3587 || objdef != onone))
3588 {
3589 if (current_lb_is_new)
3590 switch_line_buffers ();
3591 }
3592 else if (definedef != dnone
3593 || fvdef == fdefunname
3594 || instruct)
3595 make_C_tag (funorvar);
3596 }
3597 else /* not yacc and consider_token failed */
3598 {
3599 if (inattribute && fvdef == fignore)
3600 {
3601 /* We have just met __attribute__ after a
3602 function parameter list: do not tag the
3603 function again. */
3604 fvdef = fvnone;
3605 }
3606 }
3607 midtoken = FALSE;
3608 }
3609 } /* if (endtoken (c)) */
3610 else if (intoken (c))
3611 still_in_token:
3612 {
3613 toklen++;
3614 continue;
3615 }
3616 } /* if (midtoken) */
3617 else if (begtoken (c))
3618 {
3619 switch (definedef)
3620 {
3621 case dnone:
3622 switch (fvdef)
3623 {
3624 case fstartlist:
3625 /* This prevents tagging fb in
3626 void (__attribute__((noreturn)) *fb) (void);
3627 Fixing this is not easy and not very important. */
3628 fvdef = finlist;
3629 continue;
3630 case flistseen:
3631 if (plainc || declarations)
3632 {
3633 make_C_tag (TRUE); /* a function */
3634 fvdef = fignore;
3635 }
3636 break;
3637 }
3638 if (structdef == stagseen && !cjava)
3639 {
3640 popclass_above (bracelev);
3641 structdef = snone;
3642 }
3643 break;
3644 case dsharpseen:
3645 savetoken = token;
3646 break;
3647 }
3648 if (!yacc_rules || lp == newlb.buffer + 1)
3649 {
3650 tokoff = lp - 1 - newlb.buffer;
3651 toklen = 1;
3652 midtoken = TRUE;
3653 }
3654 continue;
3655 } /* if (begtoken) */
3656 } /* if must look at token */
3657
3658
3659 /* Detect end of line, colon, comma, semicolon and various braces
3660 after having handled a token.*/
3661 switch (c)
3662 {
3663 case ':':
3664 if (inattribute)
3665 break;
3666 if (yacc_rules && token.offset == 0 && token.valid)
3667 {
3668 make_C_tag (FALSE); /* a yacc function */
3669 break;
3670 }
3671 if (definedef != dnone)
3672 break;
3673 switch (objdef)
3674 {
3675 case otagseen:
3676 objdef = oignore;
3677 make_C_tag (TRUE); /* an Objective C class */
3678 break;
3679 case omethodtag:
3680 case omethodparm:
3681 objdef = omethodcolon;
3682 linebuffer_setlen (&token_name, token_name.len + 1);
3683 strcat (token_name.buffer, ":");
3684 break;
3685 }
3686 if (structdef == stagseen)
3687 {
3688 structdef = scolonseen;
3689 break;
3690 }
3691 /* Should be useless, but may be work as a safety net. */
3692 if (cplpl && fvdef == flistseen)
3693 {
3694 make_C_tag (TRUE); /* a function */
3695 fvdef = fignore;
3696 break;
3697 }
3698 break;
3699 case ';':
3700 if (definedef != dnone || inattribute)
3701 break;
3702 switch (typdef)
3703 {
3704 case tend:
3705 case ttypeseen:
3706 make_C_tag (FALSE); /* a typedef */
3707 typdef = tnone;
3708 fvdef = fvnone;
3709 break;
3710 case tnone:
3711 case tinbody:
3712 case tignore:
3713 switch (fvdef)
3714 {
3715 case fignore:
3716 if (typdef == tignore || cplpl)
3717 fvdef = fvnone;
3718 break;
3719 case fvnameseen:
3720 if ((globals && bracelev == 0 && (!fvextern || declarations))
3721 || (members && instruct))
3722 make_C_tag (FALSE); /* a variable */
3723 fvextern = FALSE;
3724 fvdef = fvnone;
3725 token.valid = FALSE;
3726 break;
3727 case flistseen:
3728 if ((declarations
3729 && (cplpl || !instruct)
3730 && (typdef == tnone || (typdef != tignore && instruct)))
3731 || (members
3732 && plainc && instruct))
3733 make_C_tag (TRUE); /* a function */
3734 /* FALLTHRU */
3735 default:
3736 fvextern = FALSE;
3737 fvdef = fvnone;
3738 if (declarations
3739 && cplpl && structdef == stagseen)
3740 make_C_tag (FALSE); /* forward declaration */
3741 else
3742 token.valid = FALSE;
3743 } /* switch (fvdef) */
3744 /* FALLTHRU */
3745 default:
3746 if (!instruct)
3747 typdef = tnone;
3748 }
3749 if (structdef == stagseen)
3750 structdef = snone;
3751 break;
3752 case ',':
3753 if (definedef != dnone || inattribute)
3754 break;
3755 switch (objdef)
3756 {
3757 case omethodtag:
3758 case omethodparm:
3759 make_C_tag (TRUE); /* an Objective C method */
3760 objdef = oinbody;
3761 break;
3762 }
3763 switch (fvdef)
3764 {
3765 case fdefunkey:
3766 case foperator:
3767 case fstartlist:
3768 case finlist:
3769 case fignore:
3770 case vignore:
3771 break;
3772 case fdefunname:
3773 fvdef = fignore;
3774 break;
3775 case fvnameseen:
3776 if (parlev == 0
3777 && ((globals
3778 && bracelev == 0
3779 && templatelev == 0
3780 && (!fvextern || declarations))
3781 || (members && instruct)))
3782 make_C_tag (FALSE); /* a variable */
3783 break;
3784 case flistseen:
3785 if ((declarations && typdef == tnone && !instruct)
3786 || (members && typdef != tignore && instruct))
3787 {
3788 make_C_tag (TRUE); /* a function */
3789 fvdef = fvnameseen;
3790 }
3791 else if (!declarations)
3792 fvdef = fvnone;
3793 token.valid = FALSE;
3794 break;
3795 default:
3796 fvdef = fvnone;
3797 }
3798 if (structdef == stagseen)
3799 structdef = snone;
3800 break;
3801 case ']':
3802 if (definedef != dnone || inattribute)
3803 break;
3804 if (structdef == stagseen)
3805 structdef = snone;
3806 switch (typdef)
3807 {
3808 case ttypeseen:
3809 case tend:
3810 typdef = tignore;
3811 make_C_tag (FALSE); /* a typedef */
3812 break;
3813 case tnone:
3814 case tinbody:
3815 switch (fvdef)
3816 {
3817 case foperator:
3818 case finlist:
3819 case fignore:
3820 case vignore:
3821 break;
3822 case fvnameseen:
3823 if ((members && bracelev == 1)
3824 || (globals && bracelev == 0
3825 && (!fvextern || declarations)))
3826 make_C_tag (FALSE); /* a variable */
3827 /* FALLTHRU */
3828 default:
3829 fvdef = fvnone;
3830 }
3831 break;
3832 }
3833 break;
3834 case '(':
3835 if (inattribute)
3836 {
3837 attrparlev++;
3838 break;
3839 }
3840 if (definedef != dnone)
3841 break;
3842 if (objdef == otagseen && parlev == 0)
3843 objdef = oparenseen;
3844 switch (fvdef)
3845 {
3846 case fvnameseen:
3847 if (typdef == ttypeseen
3848 && *lp != '*'
3849 && !instruct)
3850 {
3851 /* This handles constructs like:
3852 typedef void OperatorFun (int fun); */
3853 make_C_tag (FALSE);
3854 typdef = tignore;
3855 fvdef = fignore;
3856 break;
3857 }
3858 /* FALLTHRU */
3859 case foperator:
3860 fvdef = fstartlist;
3861 break;
3862 case flistseen:
3863 fvdef = finlist;
3864 break;
3865 }
3866 parlev++;
3867 break;
3868 case ')':
3869 if (inattribute)
3870 {
3871 if (--attrparlev == 0)
3872 inattribute = FALSE;
3873 break;
3874 }
3875 if (definedef != dnone)
3876 break;
3877 if (objdef == ocatseen && parlev == 1)
3878 {
3879 make_C_tag (TRUE); /* an Objective C category */
3880 objdef = oignore;
3881 }
3882 if (--parlev == 0)
3883 {
3884 switch (fvdef)
3885 {
3886 case fstartlist:
3887 case finlist:
3888 fvdef = flistseen;
3889 break;
3890 }
3891 if (!instruct
3892 && (typdef == tend
3893 || typdef == ttypeseen))
3894 {
3895 typdef = tignore;
3896 make_C_tag (FALSE); /* a typedef */
3897 }
3898 }
3899 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3900 parlev = 0;
3901 break;
3902 case '{':
3903 if (definedef != dnone)
3904 break;
3905 if (typdef == ttypeseen)
3906 {
3907 /* Whenever typdef is set to tinbody (currently only
3908 here), typdefbracelev should be set to bracelev. */
3909 typdef = tinbody;
3910 typdefbracelev = bracelev;
3911 }
3912 switch (fvdef)
3913 {
3914 case flistseen:
3915 make_C_tag (TRUE); /* a function */
3916 /* FALLTHRU */
3917 case fignore:
3918 fvdef = fvnone;
3919 break;
3920 case fvnone:
3921 switch (objdef)
3922 {
3923 case otagseen:
3924 make_C_tag (TRUE); /* an Objective C class */
3925 objdef = oignore;
3926 break;
3927 case omethodtag:
3928 case omethodparm:
3929 make_C_tag (TRUE); /* an Objective C method */
3930 objdef = oinbody;
3931 break;
3932 default:
3933 /* Neutralize `extern "C" {' grot. */
3934 if (bracelev == 0 && structdef == snone && nestlev == 0
3935 && typdef == tnone)
3936 bracelev = -1;
3937 }
3938 break;
3939 }
3940 switch (structdef)
3941 {
3942 case skeyseen: /* unnamed struct */
3943 pushclass_above (bracelev, NULL, 0);
3944 structdef = snone;
3945 break;
3946 case stagseen: /* named struct or enum */
3947 case scolonseen: /* a class */
3948 pushclass_above (bracelev,token.line+token.offset, token.length);
3949 structdef = snone;
3950 make_C_tag (FALSE); /* a struct or enum */
3951 break;
3952 }
3953 bracelev++;
3954 break;
3955 case '*':
3956 if (definedef != dnone)
3957 break;
3958 if (fvdef == fstartlist)
3959 {
3960 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3961 token.valid = FALSE;
3962 }
3963 break;
3964 case '}':
3965 if (definedef != dnone)
3966 break;
3967 if (!ignoreindent && lp == newlb.buffer + 1)
3968 {
3969 if (bracelev != 0)
3970 token.valid = FALSE;
3971 bracelev = 0; /* reset brace level if first column */
3972 parlev = 0; /* also reset paren level, just in case... */
3973 }
3974 else if (bracelev > 0)
3975 bracelev--;
3976 else
3977 token.valid = FALSE; /* something gone amiss, token unreliable */
3978 popclass_above (bracelev);
3979 structdef = snone;
3980 /* Only if typdef == tinbody is typdefbracelev significant. */
3981 if (typdef == tinbody && bracelev <= typdefbracelev)
3982 {
3983 assert (bracelev == typdefbracelev);
3984 typdef = tend;
3985 }
3986 break;
3987 case '=':
3988 if (definedef != dnone)
3989 break;
3990 switch (fvdef)
3991 {
3992 case foperator:
3993 case finlist:
3994 case fignore:
3995 case vignore:
3996 break;
3997 case fvnameseen:
3998 if ((members && bracelev == 1)
3999 || (globals && bracelev == 0 && (!fvextern || declarations)))
4000 make_C_tag (FALSE); /* a variable */
4001 /* FALLTHRU */
4002 default:
4003 fvdef = vignore;
4004 }
4005 break;
4006 case '<':
4007 if (cplpl
4008 && (structdef == stagseen || fvdef == fvnameseen))
4009 {
4010 templatelev++;
4011 break;
4012 }
4013 goto resetfvdef;
4014 case '>':
4015 if (templatelev > 0)
4016 {
4017 templatelev--;
4018 break;
4019 }
4020 goto resetfvdef;
4021 case '+':
4022 case '-':
4023 if (objdef == oinbody && bracelev == 0)
4024 {
4025 objdef = omethodsign;
4026 break;
4027 }
4028 /* FALLTHRU */
4029 resetfvdef:
4030 case '#': case '~': case '&': case '%': case '/':
4031 case '|': case '^': case '!': case '.': case '?':
4032 if (definedef != dnone)
4033 break;
4034 /* These surely cannot follow a function tag in C. */
4035 switch (fvdef)
4036 {
4037 case foperator:
4038 case finlist:
4039 case fignore:
4040 case vignore:
4041 break;
4042 default:
4043 fvdef = fvnone;
4044 }
4045 break;
4046 case '\0':
4047 if (objdef == otagseen)
4048 {
4049 make_C_tag (TRUE); /* an Objective C class */
4050 objdef = oignore;
4051 }
4052 /* If a macro spans multiple lines don't reset its state. */
4053 if (quotednl)
4054 CNL_SAVE_DEFINEDEF ();
4055 else
4056 CNL ();
4057 break;
4058 } /* switch (c) */
4059
4060 } /* while not eof */
4061
4062 free (lbs[0].lb.buffer);
4063 free (lbs[1].lb.buffer);
4064 }
4065
4066 /*
4067 * Process either a C++ file or a C file depending on the setting
4068 * of a global flag.
4069 */
4070 static void
4071 default_C_entries (inf)
4072 FILE *inf;
4073 {
4074 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4075 }
4076
4077 /* Always do plain C. */
4078 static void
4079 plain_C_entries (inf)
4080 FILE *inf;
4081 {
4082 C_entries (0, inf);
4083 }
4084
4085 /* Always do C++. */
4086 static void
4087 Cplusplus_entries (inf)
4088 FILE *inf;
4089 {
4090 C_entries (C_PLPL, inf);
4091 }
4092
4093 /* Always do Java. */
4094 static void
4095 Cjava_entries (inf)
4096 FILE *inf;
4097 {
4098 C_entries (C_JAVA, inf);
4099 }
4100
4101 /* Always do C*. */
4102 static void
4103 Cstar_entries (inf)
4104 FILE *inf;
4105 {
4106 C_entries (C_STAR, inf);
4107 }
4108
4109 /* Always do Yacc. */
4110 static void
4111 Yacc_entries (inf)
4112 FILE *inf;
4113 {
4114 C_entries (YACC, inf);
4115 }
4116
4117 \f
4118 /* Useful macros. */
4119 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4120 for (; /* loop initialization */ \
4121 !feof (file_pointer) /* loop test */ \
4122 && /* instructions at start of loop */ \
4123 (readline (&line_buffer, file_pointer), \
4124 char_pointer = line_buffer.buffer, \
4125 TRUE); \
4126 )
4127
4128 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4129 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4130 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4131 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4132 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4133
4134 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4135 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4136 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4137 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4138 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4139
4140 /*
4141 * Read a file, but do no processing. This is used to do regexp
4142 * matching on files that have no language defined.
4143 */
4144 static void
4145 just_read_file (inf)
4146 FILE *inf;
4147 {
4148 register char *dummy;
4149
4150 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4151 continue;
4152 }
4153
4154 \f
4155 /* Fortran parsing */
4156
4157 static void F_takeprec __P((void));
4158 static void F_getit __P((FILE *));
4159
4160 static void
4161 F_takeprec ()
4162 {
4163 dbp = skip_spaces (dbp);
4164 if (*dbp != '*')
4165 return;
4166 dbp++;
4167 dbp = skip_spaces (dbp);
4168 if (strneq (dbp, "(*)", 3))
4169 {
4170 dbp += 3;
4171 return;
4172 }
4173 if (!ISDIGIT (*dbp))
4174 {
4175 --dbp; /* force failure */
4176 return;
4177 }
4178 do
4179 dbp++;
4180 while (ISDIGIT (*dbp));
4181 }
4182
4183 static void
4184 F_getit (inf)
4185 FILE *inf;
4186 {
4187 register char *cp;
4188
4189 dbp = skip_spaces (dbp);
4190 if (*dbp == '\0')
4191 {
4192 readline (&lb, inf);
4193 dbp = lb.buffer;
4194 if (dbp[5] != '&')
4195 return;
4196 dbp += 6;
4197 dbp = skip_spaces (dbp);
4198 }
4199 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4200 return;
4201 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4202 continue;
4203 make_tag (dbp, cp-dbp, TRUE,
4204 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4205 }
4206
4207
4208 static void
4209 Fortran_functions (inf)
4210 FILE *inf;
4211 {
4212 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4213 {
4214 if (*dbp == '%')
4215 dbp++; /* Ratfor escape to fortran */
4216 dbp = skip_spaces (dbp);
4217 if (*dbp == '\0')
4218 continue;
4219 switch (lowcase (*dbp))
4220 {
4221 case 'i':
4222 if (nocase_tail ("integer"))
4223 F_takeprec ();
4224 break;
4225 case 'r':
4226 if (nocase_tail ("real"))
4227 F_takeprec ();
4228 break;
4229 case 'l':
4230 if (nocase_tail ("logical"))
4231 F_takeprec ();
4232 break;
4233 case 'c':
4234 if (nocase_tail ("complex") || nocase_tail ("character"))
4235 F_takeprec ();
4236 break;
4237 case 'd':
4238 if (nocase_tail ("double"))
4239 {
4240 dbp = skip_spaces (dbp);
4241 if (*dbp == '\0')
4242 continue;
4243 if (nocase_tail ("precision"))
4244 break;
4245 continue;
4246 }
4247 break;
4248 }
4249 dbp = skip_spaces (dbp);
4250 if (*dbp == '\0')
4251 continue;
4252 switch (lowcase (*dbp))
4253 {
4254 case 'f':
4255 if (nocase_tail ("function"))
4256 F_getit (inf);
4257 continue;
4258 case 's':
4259 if (nocase_tail ("subroutine"))
4260 F_getit (inf);
4261 continue;
4262 case 'e':
4263 if (nocase_tail ("entry"))
4264 F_getit (inf);
4265 continue;
4266 case 'b':
4267 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4268 {
4269 dbp = skip_spaces (dbp);
4270 if (*dbp == '\0') /* assume un-named */
4271 make_tag ("blockdata", 9, TRUE,
4272 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4273 else
4274 F_getit (inf); /* look for name */
4275 }
4276 continue;
4277 }
4278 }
4279 }
4280
4281 \f
4282 /*
4283 * Ada parsing
4284 * Original code by
4285 * Philippe Waroquiers (1998)
4286 */
4287
4288 static void Ada_getit __P((FILE *, char *));
4289
4290 /* Once we are positioned after an "interesting" keyword, let's get
4291 the real tag value necessary. */
4292 static void
4293 Ada_getit (inf, name_qualifier)
4294 FILE *inf;
4295 char *name_qualifier;
4296 {
4297 register char *cp;
4298 char *name;
4299 char c;
4300
4301 while (!feof (inf))
4302 {
4303 dbp = skip_spaces (dbp);
4304 if (*dbp == '\0'
4305 || (dbp[0] == '-' && dbp[1] == '-'))
4306 {
4307 readline (&lb, inf);
4308 dbp = lb.buffer;
4309 }
4310 switch (lowcase(*dbp))
4311 {
4312 case 'b':
4313 if (nocase_tail ("body"))
4314 {
4315 /* Skipping body of procedure body or package body or ....
4316 resetting qualifier to body instead of spec. */
4317 name_qualifier = "/b";
4318 continue;
4319 }
4320 break;
4321 case 't':
4322 /* Skipping type of task type or protected type ... */
4323 if (nocase_tail ("type"))
4324 continue;
4325 break;
4326 }
4327 if (*dbp == '"')
4328 {
4329 dbp += 1;
4330 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4331 continue;
4332 }
4333 else
4334 {
4335 dbp = skip_spaces (dbp);
4336 for (cp = dbp;
4337 (*cp != '\0'
4338 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4339 cp++)
4340 continue;
4341 if (cp == dbp)
4342 return;
4343 }
4344 c = *cp;
4345 *cp = '\0';
4346 name = concat (dbp, name_qualifier, "");
4347 *cp = c;
4348 make_tag (name, strlen (name), TRUE,
4349 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4350 free (name);
4351 if (c == '"')
4352 dbp = cp + 1;
4353 return;
4354 }
4355 }
4356
4357 static void
4358 Ada_funcs (inf)
4359 FILE *inf;
4360 {
4361 bool inquote = FALSE;
4362 bool skip_till_semicolumn = FALSE;
4363
4364 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4365 {
4366 while (*dbp != '\0')
4367 {
4368 /* Skip a string i.e. "abcd". */
4369 if (inquote || (*dbp == '"'))
4370 {
4371 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4372 if (dbp != NULL)
4373 {
4374 inquote = FALSE;
4375 dbp += 1;
4376 continue; /* advance char */
4377 }
4378 else
4379 {
4380 inquote = TRUE;
4381 break; /* advance line */
4382 }
4383 }
4384
4385 /* Skip comments. */
4386 if (dbp[0] == '-' && dbp[1] == '-')
4387 break; /* advance line */
4388
4389 /* Skip character enclosed in single quote i.e. 'a'
4390 and skip single quote starting an attribute i.e. 'Image. */
4391 if (*dbp == '\'')
4392 {
4393 dbp++ ;
4394 if (*dbp != '\0')
4395 dbp++;
4396 continue;
4397 }
4398
4399 if (skip_till_semicolumn)
4400 {
4401 if (*dbp == ';')
4402 skip_till_semicolumn = FALSE;
4403 dbp++;
4404 continue; /* advance char */
4405 }
4406
4407 /* Search for beginning of a token. */
4408 if (!begtoken (*dbp))
4409 {
4410 dbp++;
4411 continue; /* advance char */
4412 }
4413
4414 /* We are at the beginning of a token. */
4415 switch (lowcase(*dbp))
4416 {
4417 case 'f':
4418 if (!packages_only && nocase_tail ("function"))
4419 Ada_getit (inf, "/f");
4420 else
4421 break; /* from switch */
4422 continue; /* advance char */
4423 case 'p':
4424 if (!packages_only && nocase_tail ("procedure"))
4425 Ada_getit (inf, "/p");
4426 else if (nocase_tail ("package"))
4427 Ada_getit (inf, "/s");
4428 else if (nocase_tail ("protected")) /* protected type */
4429 Ada_getit (inf, "/t");
4430 else
4431 break; /* from switch */
4432 continue; /* advance char */
4433
4434 case 'u':
4435 if (typedefs && !packages_only && nocase_tail ("use"))
4436 {
4437 /* when tagging types, avoid tagging use type Pack.Typename;
4438 for this, we will skip everything till a ; */
4439 skip_till_semicolumn = TRUE;
4440 continue; /* advance char */
4441 }
4442
4443 case 't':
4444 if (!packages_only && nocase_tail ("task"))
4445 Ada_getit (inf, "/k");
4446 else if (typedefs && !packages_only && nocase_tail ("type"))
4447 {
4448 Ada_getit (inf, "/t");
4449 while (*dbp != '\0')
4450 dbp += 1;
4451 }
4452 else
4453 break; /* from switch */
4454 continue; /* advance char */
4455 }
4456
4457 /* Look for the end of the token. */
4458 while (!endtoken (*dbp))
4459 dbp++;
4460
4461 } /* advance char */
4462 } /* advance line */
4463 }
4464
4465 \f
4466 /*
4467 * Unix and microcontroller assembly tag handling
4468 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4469 * Idea by Bob Weiner, Motorola Inc. (1994)
4470 */
4471 static void
4472 Asm_labels (inf)
4473 FILE *inf;
4474 {
4475 register char *cp;
4476
4477 LOOP_ON_INPUT_LINES (inf, lb, cp)
4478 {
4479 /* If first char is alphabetic or one of [_.$], test for colon
4480 following identifier. */
4481 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4482 {
4483 /* Read past label. */
4484 cp++;
4485 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4486 cp++;
4487 if (*cp == ':' || iswhite (*cp))
4488 /* Found end of label, so copy it and add it to the table. */
4489 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4490 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4491 }
4492 }
4493 }
4494
4495 \f
4496 /*
4497 * Perl support
4498 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4499 * Perl variable names: /^(my|local).../
4500 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4501 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4502 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4503 */
4504 static void
4505 Perl_functions (inf)
4506 FILE *inf;
4507 {
4508 char *package = savestr ("main"); /* current package name */
4509 register char *cp;
4510
4511 LOOP_ON_INPUT_LINES (inf, lb, cp)
4512 {
4513 skip_spaces(cp);
4514
4515 if (LOOKING_AT (cp, "package"))
4516 {
4517 free (package);
4518 get_tag (cp, &package);
4519 }
4520 else if (LOOKING_AT (cp, "sub"))
4521 {
4522 char *pos;
4523 char *sp = cp;
4524
4525 while (!notinname (*cp))
4526 cp++;
4527 if (cp == sp)
4528 continue; /* nothing found */
4529 if ((pos = etags_strchr (sp, ':')) != NULL
4530 && pos < cp && pos[1] == ':')
4531 /* The name is already qualified. */
4532 make_tag (sp, cp - sp, TRUE,
4533 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4534 else
4535 /* Qualify it. */
4536 {
4537 char savechar, *name;
4538
4539 savechar = *cp;
4540 *cp = '\0';
4541 name = concat (package, "::", sp);
4542 *cp = savechar;
4543 make_tag (name, strlen(name), TRUE,
4544 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4545 free (name);
4546 }
4547 }
4548 else if (globals) /* only if we are tagging global vars */
4549 {
4550 /* Skip a qualifier, if any. */
4551 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4552 /* After "my" or "local", but before any following paren or space. */
4553 char *varstart = cp;
4554
4555 if (qual /* should this be removed? If yes, how? */
4556 && (*cp == '$' || *cp == '@' || *cp == '%'))
4557 {
4558 varstart += 1;
4559 do
4560 cp++;
4561 while (ISALNUM (*cp) || *cp == '_');
4562 }
4563 else if (qual)
4564 {
4565 /* Should be examining a variable list at this point;
4566 could insist on seeing an open parenthesis. */
4567 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4568 cp++;
4569 }
4570 else
4571 continue;
4572
4573 make_tag (varstart, cp - varstart, FALSE,
4574 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4575 }
4576 }
4577 free (package);
4578 }
4579
4580
4581 /*
4582 * Python support
4583 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4584 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4585 * More ideas by seb bacon <seb@jamkit.com> (2002)
4586 */
4587 static void
4588 Python_functions (inf)
4589 FILE *inf;
4590 {
4591 register char *cp;
4592
4593 LOOP_ON_INPUT_LINES (inf, lb, cp)
4594 {
4595 cp = skip_spaces (cp);
4596 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4597 {
4598 char *name = cp;
4599 while (!notinname (*cp) && *cp != ':')
4600 cp++;
4601 make_tag (name, cp - name, TRUE,
4602 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4603 }
4604 }
4605 }
4606
4607 \f
4608 /*
4609 * PHP support
4610 * Look for:
4611 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4612 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4613 * - /^[ \t]*define\(\"[^\"]+/
4614 * Only with --members:
4615 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4616 * Idea by Diez B. Roggisch (2001)
4617 */
4618 static void
4619 PHP_functions (inf)
4620 FILE *inf;
4621 {
4622 register char *cp, *name;
4623 bool search_identifier = FALSE;
4624
4625 LOOP_ON_INPUT_LINES (inf, lb, cp)
4626 {
4627 cp = skip_spaces (cp);
4628 name = cp;
4629 if (search_identifier
4630 && *cp != '\0')
4631 {
4632 while (!notinname (*cp))
4633 cp++;
4634 make_tag (name, cp - name, TRUE,
4635 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4636 search_identifier = FALSE;
4637 }
4638 else if (LOOKING_AT (cp, "function"))
4639 {
4640 if(*cp == '&')
4641 cp = skip_spaces (cp+1);
4642 if(*cp != '\0')
4643 {
4644 name = cp;
4645 while (!notinname (*cp))
4646 cp++;
4647 make_tag (name, cp - name, TRUE,
4648 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4649 }
4650 else
4651 search_identifier = TRUE;
4652 }
4653 else if (LOOKING_AT (cp, "class"))
4654 {
4655 if (*cp != '\0')
4656 {
4657 name = cp;
4658 while (*cp != '\0' && !iswhite (*cp))
4659 cp++;
4660 make_tag (name, cp - name, FALSE,
4661 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4662 }
4663 else
4664 search_identifier = TRUE;
4665 }
4666 else if (strneq (cp, "define", 6)
4667 && (cp = skip_spaces (cp+6))
4668 && *cp++ == '('
4669 && (*cp == '"' || *cp == '\''))
4670 {
4671 char quote = *cp++;
4672 name = cp;
4673 while (*cp != quote && *cp != '\0')
4674 cp++;
4675 make_tag (name, cp - name, FALSE,
4676 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4677 }
4678 else if (members
4679 && LOOKING_AT (cp, "var")
4680 && *cp == '$')
4681 {
4682 name = cp;
4683 while (!notinname(*cp))
4684 cp++;
4685 make_tag (name, cp - name, FALSE,
4686 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4687 }
4688 }
4689 }
4690
4691 \f
4692 /*
4693 * Cobol tag functions
4694 * We could look for anything that could be a paragraph name.
4695 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4696 * Idea by Corny de Souza (1993)
4697 */
4698 static void
4699 Cobol_paragraphs (inf)
4700 FILE *inf;
4701 {
4702 register char *bp, *ep;
4703
4704 LOOP_ON_INPUT_LINES (inf, lb, bp)
4705 {
4706 if (lb.len < 9)
4707 continue;
4708 bp += 8;
4709
4710 /* If eoln, compiler option or comment ignore whole line. */
4711 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4712 continue;
4713
4714 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4715 continue;
4716 if (*ep++ == '.')
4717 make_tag (bp, ep - bp, TRUE,
4718 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4719 }
4720 }
4721
4722 \f
4723 /*
4724 * Makefile support
4725 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4726 */
4727 static void
4728 Makefile_targets (inf)
4729 FILE *inf;
4730 {
4731 register char *bp;
4732
4733 LOOP_ON_INPUT_LINES (inf, lb, bp)
4734 {
4735 if (*bp == '\t' || *bp == '#')
4736 continue;
4737 while (*bp != '\0' && *bp != '=' && *bp != ':')
4738 bp++;
4739 if (*bp == ':' || (globals && *bp == '='))
4740 {
4741 /* We should detect if there is more than one tag, but we do not.
4742 We just skip initial and final spaces. */
4743 char * namestart = skip_spaces (lb.buffer);
4744 while (--bp > namestart)
4745 if (!notinname (*bp))
4746 break;
4747 make_tag (namestart, bp - namestart + 1, TRUE,
4748 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4749 }
4750 }
4751 }
4752
4753 \f
4754 /*
4755 * Pascal parsing
4756 * Original code by Mosur K. Mohan (1989)
4757 *
4758 * Locates tags for procedures & functions. Doesn't do any type- or
4759 * var-definitions. It does look for the keyword "extern" or
4760 * "forward" immediately following the procedure statement; if found,
4761 * the tag is skipped.
4762 */
4763 static void
4764 Pascal_functions (inf)
4765 FILE *inf;
4766 {
4767 linebuffer tline; /* mostly copied from C_entries */
4768 long save_lcno;
4769 int save_lineno, namelen, taglen;
4770 char c, *name;
4771
4772 bool /* each of these flags is TRUE iff: */
4773 incomment, /* point is inside a comment */
4774 inquote, /* point is inside '..' string */
4775 get_tagname, /* point is after PROCEDURE/FUNCTION
4776 keyword, so next item = potential tag */
4777 found_tag, /* point is after a potential tag */
4778 inparms, /* point is within parameter-list */
4779 verify_tag; /* point has passed the parm-list, so the
4780 next token will determine whether this
4781 is a FORWARD/EXTERN to be ignored, or
4782 whether it is a real tag */
4783
4784 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4785 name = NULL; /* keep compiler quiet */
4786 dbp = lb.buffer;
4787 *dbp = '\0';
4788 linebuffer_init (&tline);
4789
4790 incomment = inquote = FALSE;
4791 found_tag = FALSE; /* have a proc name; check if extern */
4792 get_tagname = FALSE; /* found "procedure" keyword */
4793 inparms = FALSE; /* found '(' after "proc" */
4794 verify_tag = FALSE; /* check if "extern" is ahead */
4795
4796
4797 while (!feof (inf)) /* long main loop to get next char */
4798 {
4799 c = *dbp++;
4800 if (c == '\0') /* if end of line */
4801 {
4802 readline (&lb, inf);
4803 dbp = lb.buffer;
4804 if (*dbp == '\0')
4805 continue;
4806 if (!((found_tag && verify_tag)
4807 || get_tagname))
4808 c = *dbp++; /* only if don't need *dbp pointing
4809 to the beginning of the name of
4810 the procedure or function */
4811 }
4812 if (incomment)
4813 {
4814 if (c == '}') /* within { } comments */
4815 incomment = FALSE;
4816 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4817 {
4818 dbp++;
4819 incomment = FALSE;
4820 }
4821 continue;
4822 }
4823 else if (inquote)
4824 {
4825 if (c == '\'')
4826 inquote = FALSE;
4827 continue;
4828 }
4829 else
4830 switch (c)
4831 {
4832 case '\'':
4833 inquote = TRUE; /* found first quote */
4834 continue;
4835 case '{': /* found open { comment */
4836 incomment = TRUE;
4837 continue;
4838 case '(':
4839 if (*dbp == '*') /* found open (* comment */
4840 {
4841 incomment = TRUE;
4842 dbp++;
4843 }
4844 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4845 inparms = TRUE;
4846 continue;
4847 case ')': /* end of parms list */
4848 if (inparms)
4849 inparms = FALSE;
4850 continue;
4851 case ';':
4852 if (found_tag && !inparms) /* end of proc or fn stmt */
4853 {
4854 verify_tag = TRUE;
4855 break;
4856 }
4857 continue;
4858 }
4859 if (found_tag && verify_tag && (*dbp != ' '))
4860 {
4861 /* Check if this is an "extern" declaration. */
4862 if (*dbp == '\0')
4863 continue;
4864 if (lowcase (*dbp == 'e'))
4865 {
4866 if (nocase_tail ("extern")) /* superfluous, really! */
4867 {
4868 found_tag = FALSE;
4869 verify_tag = FALSE;
4870 }
4871 }
4872 else if (lowcase (*dbp) == 'f')
4873 {
4874 if (nocase_tail ("forward")) /* check for forward reference */
4875 {
4876 found_tag = FALSE;
4877 verify_tag = FALSE;
4878 }
4879 }
4880 if (found_tag && verify_tag) /* not external proc, so make tag */
4881 {
4882 found_tag = FALSE;
4883 verify_tag = FALSE;
4884 make_tag (name, namelen, TRUE,
4885 tline.buffer, taglen, save_lineno, save_lcno);
4886 continue;
4887 }
4888 }
4889 if (get_tagname) /* grab name of proc or fn */
4890 {
4891 char *cp;
4892
4893 if (*dbp == '\0')
4894 continue;
4895
4896 /* Find block name. */
4897 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4898 continue;
4899
4900 /* Save all values for later tagging. */
4901 linebuffer_setlen (&tline, lb.len);
4902 strcpy (tline.buffer, lb.buffer);
4903 save_lineno = lineno;
4904 save_lcno = linecharno;
4905 name = tline.buffer + (dbp - lb.buffer);
4906 namelen = cp - dbp;
4907 taglen = cp - lb.buffer + 1;
4908
4909 dbp = cp; /* set dbp to e-o-token */
4910 get_tagname = FALSE;
4911 found_tag = TRUE;
4912 continue;
4913
4914 /* And proceed to check for "extern". */
4915 }
4916 else if (!incomment && !inquote && !found_tag)
4917 {
4918 /* Check for proc/fn keywords. */
4919 switch (lowcase (c))
4920 {
4921 case 'p':
4922 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4923 get_tagname = TRUE;
4924 continue;
4925 case 'f':
4926 if (nocase_tail ("unction"))
4927 get_tagname = TRUE;
4928 continue;
4929 }
4930 }
4931 } /* while not eof */
4932
4933 free (tline.buffer);
4934 }
4935
4936 \f
4937 /*
4938 * Lisp tag functions
4939 * look for (def or (DEF, quote or QUOTE
4940 */
4941
4942 static void L_getit __P((void));
4943
4944 static void
4945 L_getit ()
4946 {
4947 if (*dbp == '\'') /* Skip prefix quote */
4948 dbp++;
4949 else if (*dbp == '(')
4950 {
4951 dbp++;
4952 /* Try to skip "(quote " */
4953 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4954 /* Ok, then skip "(" before name in (defstruct (foo)) */
4955 dbp = skip_spaces (dbp);
4956 }
4957 get_tag (dbp, NULL);
4958 }
4959
4960 static void
4961 Lisp_functions (inf)
4962 FILE *inf;
4963 {
4964 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4965 {
4966 if (dbp[0] != '(')
4967 continue;
4968
4969 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4970 {
4971 dbp = skip_non_spaces (dbp);
4972 dbp = skip_spaces (dbp);
4973 L_getit ();
4974 }
4975 else
4976 {
4977 /* Check for (foo::defmumble name-defined ... */
4978 do
4979 dbp++;
4980 while (!notinname (*dbp) && *dbp != ':');
4981 if (*dbp == ':')
4982 {
4983 do
4984 dbp++;
4985 while (*dbp == ':');
4986
4987 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4988 {
4989 dbp = skip_non_spaces (dbp);
4990 dbp = skip_spaces (dbp);
4991 L_getit ();
4992 }
4993 }
4994 }
4995 }
4996 }
4997
4998 \f
4999 /*
5000 * Lua script language parsing
5001 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5002 *
5003 * "function" and "local function" are tags if they start at column 1.
5004 */
5005 static void
5006 Lua_functions (inf)
5007 FILE *inf;
5008 {
5009 register char *bp;
5010
5011 LOOP_ON_INPUT_LINES (inf, lb, bp)
5012 {
5013 if (bp[0] != 'f' && bp[0] != 'l')
5014 continue;
5015
5016 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5017
5018 if (LOOKING_AT (bp, "function"))
5019 get_tag (bp, NULL);
5020 }
5021 }
5022
5023 \f
5024 /*
5025 * Postscript tags
5026 * Just look for lines where the first character is '/'
5027 * Also look at "defineps" for PSWrap
5028 * Ideas by:
5029 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5030 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5031 */
5032 static void
5033 PS_functions (inf)
5034 FILE *inf;
5035 {
5036 register char *bp, *ep;
5037
5038 LOOP_ON_INPUT_LINES (inf, lb, bp)
5039 {
5040 if (bp[0] == '/')
5041 {
5042 for (ep = bp+1;
5043 *ep != '\0' && *ep != ' ' && *ep != '{';
5044 ep++)
5045 continue;
5046 make_tag (bp, ep - bp, TRUE,
5047 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5048 }
5049 else if (LOOKING_AT (bp, "defineps"))
5050 get_tag (bp, NULL);
5051 }
5052 }
5053
5054 \f
5055 /*
5056 * Forth tags
5057 * Ignore anything after \ followed by space or in ( )
5058 * Look for words defined by :
5059 * Look for constant, code, create, defer, value, and variable
5060 * OBP extensions: Look for buffer:, field,
5061 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5062 */
5063 static void
5064 Forth_words (inf)
5065 FILE *inf;
5066 {
5067 register char *bp;
5068
5069 LOOP_ON_INPUT_LINES (inf, lb, bp)
5070 while ((bp = skip_spaces (bp))[0] != '\0')
5071 if (bp[0] == '\\' && iswhite(bp[1]))
5072 break; /* read next line */
5073 else if (bp[0] == '(' && iswhite(bp[1]))
5074 do /* skip to ) or eol */
5075 bp++;
5076 while (*bp != ')' && *bp != '\0');
5077 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5078 || LOOKING_AT_NOCASE (bp, "constant")
5079 || LOOKING_AT_NOCASE (bp, "code")
5080 || LOOKING_AT_NOCASE (bp, "create")
5081 || LOOKING_AT_NOCASE (bp, "defer")
5082 || LOOKING_AT_NOCASE (bp, "value")
5083 || LOOKING_AT_NOCASE (bp, "variable")
5084 || LOOKING_AT_NOCASE (bp, "buffer:")
5085 || LOOKING_AT_NOCASE (bp, "field"))
5086 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5087 else
5088 bp = skip_non_spaces (bp);
5089 }
5090
5091 \f
5092 /*
5093 * Scheme tag functions
5094 * look for (def... xyzzy
5095 * (def... (xyzzy
5096 * (def ... ((...(xyzzy ....
5097 * (set! xyzzy
5098 * Original code by Ken Haase (1985?)
5099 */
5100 static void
5101 Scheme_functions (inf)
5102 FILE *inf;
5103 {
5104 register char *bp;
5105
5106 LOOP_ON_INPUT_LINES (inf, lb, bp)
5107 {
5108 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5109 {
5110 bp = skip_non_spaces (bp+4);
5111 /* Skip over open parens and white space */
5112 while (notinname (*bp))
5113 bp++;
5114 get_tag (bp, NULL);
5115 }
5116 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5117 get_tag (bp, NULL);
5118 }
5119 }
5120
5121 \f
5122 /* Find tags in TeX and LaTeX input files. */
5123
5124 /* TEX_toktab is a table of TeX control sequences that define tags.
5125 * Each entry records one such control sequence.
5126 *
5127 * Original code from who knows whom.
5128 * Ideas by:
5129 * Stefan Monnier (2002)
5130 */
5131
5132 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5133
5134 /* Default set of control sequences to put into TEX_toktab.
5135 The value of environment var TEXTAGS is prepended to this. */
5136 static char *TEX_defenv = "\
5137 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5138 :part:appendix:entry:index:def\
5139 :newcommand:renewcommand:newenvironment:renewenvironment";
5140
5141 static void TEX_mode __P((FILE *));
5142 static void TEX_decode_env __P((char *, char *));
5143
5144 static char TEX_esc = '\\';
5145 static char TEX_opgrp = '{';
5146 static char TEX_clgrp = '}';
5147
5148 /*
5149 * TeX/LaTeX scanning loop.
5150 */
5151 static void
5152 TeX_commands (inf)
5153 FILE *inf;
5154 {
5155 char *cp;
5156 linebuffer *key;
5157
5158 /* Select either \ or ! as escape character. */
5159 TEX_mode (inf);
5160
5161 /* Initialize token table once from environment. */
5162 if (TEX_toktab == NULL)
5163 TEX_decode_env ("TEXTAGS", TEX_defenv);
5164
5165 LOOP_ON_INPUT_LINES (inf, lb, cp)
5166 {
5167 /* Look at each TEX keyword in line. */
5168 for (;;)
5169 {
5170 /* Look for a TEX escape. */
5171 while (*cp++ != TEX_esc)
5172 if (cp[-1] == '\0' || cp[-1] == '%')
5173 goto tex_next_line;
5174
5175 for (key = TEX_toktab; key->buffer != NULL; key++)
5176 if (strneq (cp, key->buffer, key->len))
5177 {
5178 register char *p;
5179 int namelen, linelen;
5180 bool opgrp = FALSE;
5181
5182 cp = skip_spaces (cp + key->len);
5183 if (*cp == TEX_opgrp)
5184 {
5185 opgrp = TRUE;
5186 cp++;
5187 }
5188 for (p = cp;
5189 (!iswhite (*p) && *p != '#' &&
5190 *p != TEX_opgrp && *p != TEX_clgrp);
5191 p++)
5192 continue;
5193 namelen = p - cp;
5194 linelen = lb.len;
5195 if (!opgrp || *p == TEX_clgrp)
5196 {
5197 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5198 p++;
5199 linelen = p - lb.buffer + 1;
5200 }
5201 make_tag (cp, namelen, TRUE,
5202 lb.buffer, linelen, lineno, linecharno);
5203 goto tex_next_line; /* We only tag a line once */
5204 }
5205 }
5206 tex_next_line:
5207 ;
5208 }
5209 }
5210
5211 #define TEX_LESC '\\'
5212 #define TEX_SESC '!'
5213
5214 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5215 chars accordingly. */
5216 static void
5217 TEX_mode (inf)
5218 FILE *inf;
5219 {
5220 int c;
5221
5222 while ((c = getc (inf)) != EOF)
5223 {
5224 /* Skip to next line if we hit the TeX comment char. */
5225 if (c == '%')
5226 while (c != '\n' && c != EOF)
5227 c = getc (inf);
5228 else if (c == TEX_LESC || c == TEX_SESC )
5229 break;
5230 }
5231
5232 if (c == TEX_LESC)
5233 {
5234 TEX_esc = TEX_LESC;
5235 TEX_opgrp = '{';
5236 TEX_clgrp = '}';
5237 }
5238 else
5239 {
5240 TEX_esc = TEX_SESC;
5241 TEX_opgrp = '<';
5242 TEX_clgrp = '>';
5243 }
5244 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5245 No attempt is made to correct the situation. */
5246 rewind (inf);
5247 }
5248
5249 /* Read environment and prepend it to the default string.
5250 Build token table. */
5251 static void
5252 TEX_decode_env (evarname, defenv)
5253 char *evarname;
5254 char *defenv;
5255 {
5256 register char *env, *p;
5257 int i, len;
5258
5259 /* Append default string to environment. */
5260 env = getenv (evarname);
5261 if (!env)
5262 env = defenv;
5263 else
5264 {
5265 char *oldenv = env;
5266 env = concat (oldenv, defenv, "");
5267 }
5268
5269 /* Allocate a token table */
5270 for (len = 1, p = env; p;)
5271 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5272 len++;
5273 TEX_toktab = xnew (len, linebuffer);
5274
5275 /* Unpack environment string into token table. Be careful about */
5276 /* zero-length strings (leading ':', "::" and trailing ':') */
5277 for (i = 0; *env != '\0';)
5278 {
5279 p = etags_strchr (env, ':');
5280 if (!p) /* End of environment string. */
5281 p = env + strlen (env);
5282 if (p - env > 0)
5283 { /* Only non-zero strings. */
5284 TEX_toktab[i].buffer = savenstr (env, p - env);
5285 TEX_toktab[i].len = p - env;
5286 i++;
5287 }
5288 if (*p)
5289 env = p + 1;
5290 else
5291 {
5292 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5293 TEX_toktab[i].len = 0;
5294 break;
5295 }
5296 }
5297 }
5298
5299 \f
5300 /* Texinfo support. Dave Love, Mar. 2000. */
5301 static void
5302 Texinfo_nodes (inf)
5303 FILE * inf;
5304 {
5305 char *cp, *start;
5306 LOOP_ON_INPUT_LINES (inf, lb, cp)
5307 if (LOOKING_AT (cp, "@node"))
5308 {
5309 start = cp;
5310 while (*cp != '\0' && *cp != ',')
5311 cp++;
5312 make_tag (start, cp - start, TRUE,
5313 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5314 }
5315 }
5316
5317 \f
5318 /*
5319 * HTML support.
5320 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5321 * Contents of <a name=xxx> are tags with name xxx.
5322 *
5323 * Francesco Potortì, 2002.
5324 */
5325 static void
5326 HTML_labels (inf)
5327 FILE * inf;
5328 {
5329 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5330 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5331 bool intag = FALSE; /* inside an html tag, looking for ID= */
5332 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5333 char *end;
5334
5335
5336 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5337
5338 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5339 for (;;) /* loop on the same line */
5340 {
5341 if (skiptag) /* skip HTML tag */
5342 {
5343 while (*dbp != '\0' && *dbp != '>')
5344 dbp++;
5345 if (*dbp == '>')
5346 {
5347 dbp += 1;
5348 skiptag = FALSE;
5349 continue; /* look on the same line */
5350 }
5351 break; /* go to next line */
5352 }
5353
5354 else if (intag) /* look for "name=" or "id=" */
5355 {
5356 while (*dbp != '\0' && *dbp != '>'
5357 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5358 dbp++;
5359 if (*dbp == '\0')
5360 break; /* go to next line */
5361 if (*dbp == '>')
5362 {
5363 dbp += 1;
5364 intag = FALSE;
5365 continue; /* look on the same line */
5366 }
5367 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5368 || LOOKING_AT_NOCASE (dbp, "id="))
5369 {
5370 bool quoted = (dbp[0] == '"');
5371
5372 if (quoted)
5373 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5374 continue;
5375 else
5376 for (end = dbp; *end != '\0' && intoken (*end); end++)
5377 continue;
5378 linebuffer_setlen (&token_name, end - dbp);
5379 strncpy (token_name.buffer, dbp, end - dbp);
5380 token_name.buffer[end - dbp] = '\0';
5381
5382 dbp = end;
5383 intag = FALSE; /* we found what we looked for */
5384 skiptag = TRUE; /* skip to the end of the tag */
5385 getnext = TRUE; /* then grab the text */
5386 continue; /* look on the same line */
5387 }
5388 dbp += 1;
5389 }
5390
5391 else if (getnext) /* grab next tokens and tag them */
5392 {
5393 dbp = skip_spaces (dbp);
5394 if (*dbp == '\0')
5395 break; /* go to next line */
5396 if (*dbp == '<')
5397 {
5398 intag = TRUE;
5399 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5400 continue; /* look on the same line */
5401 }
5402
5403 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5404 continue;
5405 make_tag (token_name.buffer, token_name.len, TRUE,
5406 dbp, end - dbp, lineno, linecharno);
5407 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5408 getnext = FALSE;
5409 break; /* go to next line */
5410 }
5411
5412 else /* look for an interesting HTML tag */
5413 {
5414 while (*dbp != '\0' && *dbp != '<')
5415 dbp++;
5416 if (*dbp == '\0')
5417 break; /* go to next line */
5418 intag = TRUE;
5419 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5420 {
5421 inanchor = TRUE;
5422 continue; /* look on the same line */
5423 }
5424 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5425 || LOOKING_AT_NOCASE (dbp, "<h1>")
5426 || LOOKING_AT_NOCASE (dbp, "<h2>")
5427 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5428 {
5429 intag = FALSE;
5430 getnext = TRUE;
5431 continue; /* look on the same line */
5432 }
5433 dbp += 1;
5434 }
5435 }
5436 }
5437
5438 \f
5439 /*
5440 * Prolog support
5441 *
5442 * Assumes that the predicate or rule starts at column 0.
5443 * Only the first clause of a predicate or rule is added.
5444 * Original code by Sunichirou Sugou (1989)
5445 * Rewritten by Anders Lindgren (1996)
5446 */
5447 static int prolog_pr __P((char *, char *));
5448 static void prolog_skip_comment __P((linebuffer *, FILE *));
5449 static int prolog_atom __P((char *, int));
5450
5451 static void
5452 Prolog_functions (inf)
5453 FILE *inf;
5454 {
5455 char *cp, *last;
5456 int len;
5457 int allocated;
5458
5459 allocated = 0;
5460 len = 0;
5461 last = NULL;
5462
5463 LOOP_ON_INPUT_LINES (inf, lb, cp)
5464 {
5465 if (cp[0] == '\0') /* Empty line */
5466 continue;
5467 else if (iswhite (cp[0])) /* Not a predicate */
5468 continue;
5469 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5470 prolog_skip_comment (&lb, inf);
5471 else if ((len = prolog_pr (cp, last)) > 0)
5472 {
5473 /* Predicate or rule. Store the function name so that we
5474 only generate a tag for the first clause. */
5475 if (last == NULL)
5476 last = xnew(len + 1, char);
5477 else if (len + 1 > allocated)
5478 xrnew (last, len + 1, char);
5479 allocated = len + 1;
5480 strncpy (last, cp, len);
5481 last[len] = '\0';
5482 }
5483 }
5484 if (last != NULL)
5485 free (last);
5486 }
5487
5488
5489 static void
5490 prolog_skip_comment (plb, inf)
5491 linebuffer *plb;
5492 FILE *inf;
5493 {
5494 char *cp;
5495
5496 do
5497 {
5498 for (cp = plb->buffer; *cp != '\0'; cp++)
5499 if (cp[0] == '*' && cp[1] == '/')
5500 return;
5501 readline (plb, inf);
5502 }
5503 while (!feof(inf));
5504 }
5505
5506 /*
5507 * A predicate or rule definition is added if it matches:
5508 * <beginning of line><Prolog Atom><whitespace>(
5509 * or <beginning of line><Prolog Atom><whitespace>:-
5510 *
5511 * It is added to the tags database if it doesn't match the
5512 * name of the previous clause header.
5513 *
5514 * Return the size of the name of the predicate or rule, or 0 if no
5515 * header was found.
5516 */
5517 static int
5518 prolog_pr (s, last)
5519 char *s;
5520 char *last; /* Name of last clause. */
5521 {
5522 int pos;
5523 int len;
5524
5525 pos = prolog_atom (s, 0);
5526 if (pos < 1)
5527 return 0;
5528
5529 len = pos;
5530 pos = skip_spaces (s + pos) - s;
5531
5532 if ((s[pos] == '.'
5533 || (s[pos] == '(' && (pos += 1))
5534 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5535 && (last == NULL /* save only the first clause */
5536 || len != (int)strlen (last)
5537 || !strneq (s, last, len)))
5538 {
5539 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5540 return len;
5541 }
5542 else
5543 return 0;
5544 }
5545
5546 /*
5547 * Consume a Prolog atom.
5548 * Return the number of bytes consumed, or -1 if there was an error.
5549 *
5550 * A prolog atom, in this context, could be one of:
5551 * - An alphanumeric sequence, starting with a lower case letter.
5552 * - A quoted arbitrary string. Single quotes can escape themselves.
5553 * Backslash quotes everything.
5554 */
5555 static int
5556 prolog_atom (s, pos)
5557 char *s;
5558 int pos;
5559 {
5560 int origpos;
5561
5562 origpos = pos;
5563
5564 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5565 {
5566 /* The atom is unquoted. */
5567 pos++;
5568 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5569 {
5570 pos++;
5571 }
5572 return pos - origpos;
5573 }
5574 else if (s[pos] == '\'')
5575 {
5576 pos++;
5577
5578 for (;;)
5579 {
5580 if (s[pos] == '\'')
5581 {
5582 pos++;
5583 if (s[pos] != '\'')
5584 break;
5585 pos++; /* A double quote */
5586 }
5587 else if (s[pos] == '\0')
5588 /* Multiline quoted atoms are ignored. */
5589 return -1;
5590 else if (s[pos] == '\\')
5591 {
5592 if (s[pos+1] == '\0')
5593 return -1;
5594 pos += 2;
5595 }
5596 else
5597 pos++;
5598 }
5599 return pos - origpos;
5600 }
5601 else
5602 return -1;
5603 }
5604
5605 \f
5606 /*
5607 * Support for Erlang
5608 *
5609 * Generates tags for functions, defines, and records.
5610 * Assumes that Erlang functions start at column 0.
5611 * Original code by Anders Lindgren (1996)
5612 */
5613 static int erlang_func __P((char *, char *));
5614 static void erlang_attribute __P((char *));
5615 static int erlang_atom __P((char *));
5616
5617 static void
5618 Erlang_functions (inf)
5619 FILE *inf;
5620 {
5621 char *cp, *last;
5622 int len;
5623 int allocated;
5624
5625 allocated = 0;
5626 len = 0;
5627 last = NULL;
5628
5629 LOOP_ON_INPUT_LINES (inf, lb, cp)
5630 {
5631 if (cp[0] == '\0') /* Empty line */
5632 continue;
5633 else if (iswhite (cp[0])) /* Not function nor attribute */
5634 continue;
5635 else if (cp[0] == '%') /* comment */
5636 continue;
5637 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5638 continue;
5639 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5640 {
5641 erlang_attribute (cp);
5642 if (last != NULL)
5643 {
5644 free (last);
5645 last = NULL;
5646 }
5647 }
5648 else if ((len = erlang_func (cp, last)) > 0)
5649 {
5650 /*
5651 * Function. Store the function name so that we only
5652 * generates a tag for the first clause.
5653 */
5654 if (last == NULL)
5655 last = xnew (len + 1, char);
5656 else if (len + 1 > allocated)
5657 xrnew (last, len + 1, char);
5658 allocated = len + 1;
5659 strncpy (last, cp, len);
5660 last[len] = '\0';
5661 }
5662 }
5663 if (last != NULL)
5664 free (last);
5665 }
5666
5667
5668 /*
5669 * A function definition is added if it matches:
5670 * <beginning of line><Erlang Atom><whitespace>(
5671 *
5672 * It is added to the tags database if it doesn't match the
5673 * name of the previous clause header.
5674 *
5675 * Return the size of the name of the function, or 0 if no function
5676 * was found.
5677 */
5678 static int
5679 erlang_func (s, last)
5680 char *s;
5681 char *last; /* Name of last clause. */
5682 {
5683 int pos;
5684 int len;
5685
5686 pos = erlang_atom (s);
5687 if (pos < 1)
5688 return 0;
5689
5690 len = pos;
5691 pos = skip_spaces (s + pos) - s;
5692
5693 /* Save only the first clause. */
5694 if (s[pos++] == '('
5695 && (last == NULL
5696 || len != (int)strlen (last)
5697 || !strneq (s, last, len)))
5698 {
5699 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5700 return len;
5701 }
5702
5703 return 0;
5704 }
5705
5706
5707 /*
5708 * Handle attributes. Currently, tags are generated for defines
5709 * and records.
5710 *
5711 * They are on the form:
5712 * -define(foo, bar).
5713 * -define(Foo(M, N), M+N).
5714 * -record(graph, {vtab = notable, cyclic = true}).
5715 */
5716 static void
5717 erlang_attribute (s)
5718 char *s;
5719 {
5720 char *cp = s;
5721
5722 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5723 && *cp++ == '(')
5724 {
5725 int len = erlang_atom (skip_spaces (cp));
5726 if (len > 0)
5727 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5728 }
5729 return;
5730 }
5731
5732
5733 /*
5734 * Consume an Erlang atom (or variable).
5735 * Return the number of bytes consumed, or -1 if there was an error.
5736 */
5737 static int
5738 erlang_atom (s)
5739 char *s;
5740 {
5741 int pos = 0;
5742
5743 if (ISALPHA (s[pos]) || s[pos] == '_')
5744 {
5745 /* The atom is unquoted. */
5746 do
5747 pos++;
5748 while (ISALNUM (s[pos]) || s[pos] == '_');
5749 }
5750 else if (s[pos] == '\'')
5751 {
5752 for (pos++; s[pos] != '\''; pos++)
5753 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5754 || (s[pos] == '\\' && s[++pos] == '\0'))
5755 return 0;
5756 pos++;
5757 }
5758
5759 return pos;
5760 }
5761
5762 \f
5763 static char *scan_separators __P((char *));
5764 static void add_regex __P((char *, language *));
5765 static char *substitute __P((char *, char *, struct re_registers *));
5766
5767 /*
5768 * Take a string like "/blah/" and turn it into "blah", verifying
5769 * that the first and last characters are the same, and handling
5770 * quoted separator characters. Actually, stops on the occurrence of
5771 * an unquoted separator. Also process \t, \n, etc. and turn into
5772 * appropriate characters. Works in place. Null terminates name string.
5773 * Returns pointer to terminating separator, or NULL for
5774 * unterminated regexps.
5775 */
5776 static char *
5777 scan_separators (name)
5778 char *name;
5779 {
5780 char sep = name[0];
5781 char *copyto = name;
5782 bool quoted = FALSE;
5783
5784 for (++name; *name != '\0'; ++name)
5785 {
5786 if (quoted)
5787 {
5788 switch (*name)
5789 {
5790 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5791 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5792 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5793 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5794 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5795 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5796 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5797 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5798 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5799 default:
5800 if (*name == sep)
5801 *copyto++ = sep;
5802 else
5803 {
5804 /* Something else is quoted, so preserve the quote. */
5805 *copyto++ = '\\';
5806 *copyto++ = *name;
5807 }
5808 break;
5809 }
5810 quoted = FALSE;
5811 }
5812 else if (*name == '\\')
5813 quoted = TRUE;
5814 else if (*name == sep)
5815 break;
5816 else
5817 *copyto++ = *name;
5818 }
5819 if (*name != sep)
5820 name = NULL; /* signal unterminated regexp */
5821
5822 /* Terminate copied string. */
5823 *copyto = '\0';
5824 return name;
5825 }
5826
5827 /* Look at the argument of --regex or --no-regex and do the right
5828 thing. Same for each line of a regexp file. */
5829 static void
5830 analyse_regex (regex_arg)
5831 char *regex_arg;
5832 {
5833 if (regex_arg == NULL)
5834 {
5835 free_regexps (); /* --no-regex: remove existing regexps */
5836 return;
5837 }
5838
5839 /* A real --regexp option or a line in a regexp file. */
5840 switch (regex_arg[0])
5841 {
5842 /* Comments in regexp file or null arg to --regex. */
5843 case '\0':
5844 case ' ':
5845 case '\t':
5846 break;
5847
5848 /* Read a regex file. This is recursive and may result in a
5849 loop, which will stop when the file descriptors are exhausted. */
5850 case '@':
5851 {
5852 FILE *regexfp;
5853 linebuffer regexbuf;
5854 char *regexfile = regex_arg + 1;
5855
5856 /* regexfile is a file containing regexps, one per line. */
5857 regexfp = fopen (regexfile, "r");
5858 if (regexfp == NULL)
5859 {
5860 pfatal (regexfile);
5861 return;
5862 }
5863 linebuffer_init (&regexbuf);
5864 while (readline_internal (&regexbuf, regexfp) > 0)
5865 analyse_regex (regexbuf.buffer);
5866 free (regexbuf.buffer);
5867 fclose (regexfp);
5868 }
5869 break;
5870
5871 /* Regexp to be used for a specific language only. */
5872 case '{':
5873 {
5874 language *lang;
5875 char *lang_name = regex_arg + 1;
5876 char *cp;
5877
5878 for (cp = lang_name; *cp != '}'; cp++)
5879 if (*cp == '\0')
5880 {
5881 error ("unterminated language name in regex: %s", regex_arg);
5882 return;
5883 }
5884 *cp++ = '\0';
5885 lang = get_language_from_langname (lang_name);
5886 if (lang == NULL)
5887 return;
5888 add_regex (cp, lang);
5889 }
5890 break;
5891
5892 /* Regexp to be used for any language. */
5893 default:
5894 add_regex (regex_arg, NULL);
5895 break;
5896 }
5897 }
5898
5899 /* Separate the regexp pattern, compile it,
5900 and care for optional name and modifiers. */
5901 static void
5902 add_regex (regexp_pattern, lang)
5903 char *regexp_pattern;
5904 language *lang;
5905 {
5906 static struct re_pattern_buffer zeropattern;
5907 char sep, *pat, *name, *modifiers;
5908 const char *err;
5909 struct re_pattern_buffer *patbuf;
5910 regexp *rp;
5911 bool
5912 force_explicit_name = TRUE, /* do not use implicit tag names */
5913 ignore_case = FALSE, /* case is significant */
5914 multi_line = FALSE, /* matches are done one line at a time */
5915 single_line = FALSE; /* dot does not match newline */
5916
5917
5918 if (strlen(regexp_pattern) < 3)
5919 {
5920 error ("null regexp", (char *)NULL);
5921 return;
5922 }
5923 sep = regexp_pattern[0];
5924 name = scan_separators (regexp_pattern);
5925 if (name == NULL)
5926 {
5927 error ("%s: unterminated regexp", regexp_pattern);
5928 return;
5929 }
5930 if (name[1] == sep)
5931 {
5932 error ("null name for regexp \"%s\"", regexp_pattern);
5933 return;
5934 }
5935 modifiers = scan_separators (name);
5936 if (modifiers == NULL) /* no terminating separator --> no name */
5937 {
5938 modifiers = name;
5939 name = "";
5940 }
5941 else
5942 modifiers += 1; /* skip separator */
5943
5944 /* Parse regex modifiers. */
5945 for (; modifiers[0] != '\0'; modifiers++)
5946 switch (modifiers[0])
5947 {
5948 case 'N':
5949 if (modifiers == name)
5950 error ("forcing explicit tag name but no name, ignoring", NULL);
5951 force_explicit_name = TRUE;
5952 break;
5953 case 'i':
5954 ignore_case = TRUE;
5955 break;
5956 case 's':
5957 single_line = TRUE;
5958 /* FALLTHRU */
5959 case 'm':
5960 multi_line = TRUE;
5961 need_filebuf = TRUE;
5962 break;
5963 default:
5964 {
5965 char wrongmod [2];
5966 wrongmod[0] = modifiers[0];
5967 wrongmod[1] = '\0';
5968 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5969 }
5970 break;
5971 }
5972
5973 patbuf = xnew (1, struct re_pattern_buffer);
5974 *patbuf = zeropattern;
5975 if (ignore_case)
5976 {
5977 static char lc_trans[CHARS];
5978 int i;
5979 for (i = 0; i < CHARS; i++)
5980 lc_trans[i] = lowcase (i);
5981 patbuf->translate = lc_trans; /* translation table to fold case */
5982 }
5983
5984 if (multi_line)
5985 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5986 else
5987 pat = regexp_pattern;
5988
5989 if (single_line)
5990 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5991 else
5992 re_set_syntax (RE_SYNTAX_EMACS);
5993
5994 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5995 if (multi_line)
5996 free (pat);
5997 if (err != NULL)
5998 {
5999 error ("%s while compiling pattern", err);
6000 return;
6001 }
6002
6003 rp = p_head;
6004 p_head = xnew (1, regexp);
6005 p_head->pattern = savestr (regexp_pattern);
6006 p_head->p_next = rp;
6007 p_head->lang = lang;
6008 p_head->pat = patbuf;
6009 p_head->name = savestr (name);
6010 p_head->error_signaled = FALSE;
6011 p_head->force_explicit_name = force_explicit_name;
6012 p_head->ignore_case = ignore_case;
6013 p_head->multi_line = multi_line;
6014 }
6015
6016 /*
6017 * Do the substitutions indicated by the regular expression and
6018 * arguments.
6019 */
6020 static char *
6021 substitute (in, out, regs)
6022 char *in, *out;
6023 struct re_registers *regs;
6024 {
6025 char *result, *t;
6026 int size, dig, diglen;
6027
6028 result = NULL;
6029 size = strlen (out);
6030
6031 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6032 if (out[size - 1] == '\\')
6033 fatal ("pattern error in \"%s\"", out);
6034 for (t = etags_strchr (out, '\\');
6035 t != NULL;
6036 t = etags_strchr (t + 2, '\\'))
6037 if (ISDIGIT (t[1]))
6038 {
6039 dig = t[1] - '0';
6040 diglen = regs->end[dig] - regs->start[dig];
6041 size += diglen - 2;
6042 }
6043 else
6044 size -= 1;
6045
6046 /* Allocate space and do the substitutions. */
6047 assert (size >= 0);
6048 result = xnew (size + 1, char);
6049
6050 for (t = result; *out != '\0'; out++)
6051 if (*out == '\\' && ISDIGIT (*++out))
6052 {
6053 dig = *out - '0';
6054 diglen = regs->end[dig] - regs->start[dig];
6055 strncpy (t, in + regs->start[dig], diglen);
6056 t += diglen;
6057 }
6058 else
6059 *t++ = *out;
6060 *t = '\0';
6061
6062 assert (t <= result + size);
6063 assert (t - result == (int)strlen (result));
6064
6065 return result;
6066 }
6067
6068 /* Deallocate all regexps. */
6069 static void
6070 free_regexps ()
6071 {
6072 regexp *rp;
6073 while (p_head != NULL)
6074 {
6075 rp = p_head->p_next;
6076 free (p_head->pattern);
6077 free (p_head->name);
6078 free (p_head);
6079 p_head = rp;
6080 }
6081 return;
6082 }
6083
6084 /*
6085 * Reads the whole file as a single string from `filebuf' and looks for
6086 * multi-line regular expressions, creating tags on matches.
6087 * readline already dealt with normal regexps.
6088 *
6089 * Idea by Ben Wing <ben@666.com> (2002).
6090 */
6091 static void
6092 regex_tag_multiline ()
6093 {
6094 char *buffer = filebuf.buffer;
6095 regexp *rp;
6096 char *name;
6097
6098 for (rp = p_head; rp != NULL; rp = rp->p_next)
6099 {
6100 int match = 0;
6101
6102 if (!rp->multi_line)
6103 continue; /* skip normal regexps */
6104
6105 /* Generic initialisations before parsing file from memory. */
6106 lineno = 1; /* reset global line number */
6107 charno = 0; /* reset global char number */
6108 linecharno = 0; /* reset global char number of line start */
6109
6110 /* Only use generic regexps or those for the current language. */
6111 if (rp->lang != NULL && rp->lang != curfdp->lang)
6112 continue;
6113
6114 while (match >= 0 && match < filebuf.len)
6115 {
6116 match = re_search (rp->pat, buffer, filebuf.len, charno,
6117 filebuf.len - match, &rp->regs);
6118 switch (match)
6119 {
6120 case -2:
6121 /* Some error. */
6122 if (!rp->error_signaled)
6123 {
6124 error ("regexp stack overflow while matching \"%s\"",
6125 rp->pattern);
6126 rp->error_signaled = TRUE;
6127 }
6128 break;
6129 case -1:
6130 /* No match. */
6131 break;
6132 default:
6133 if (match == rp->regs.end[0])
6134 {
6135 if (!rp->error_signaled)
6136 {
6137 error ("regexp matches the empty string: \"%s\"",
6138 rp->pattern);
6139 rp->error_signaled = TRUE;
6140 }
6141 match = -3; /* exit from while loop */
6142 break;
6143 }
6144
6145 /* Match occurred. Construct a tag. */
6146 while (charno < rp->regs.end[0])
6147 if (buffer[charno++] == '\n')
6148 lineno++, linecharno = charno;
6149 name = rp->name;
6150 if (name[0] == '\0')
6151 name = NULL;
6152 else /* make a named tag */
6153 name = substitute (buffer, rp->name, &rp->regs);
6154 if (rp->force_explicit_name)
6155 /* Force explicit tag name, if a name is there. */
6156 pfnote (name, TRUE, buffer + linecharno,
6157 charno - linecharno + 1, lineno, linecharno);
6158 else
6159 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6160 charno - linecharno + 1, lineno, linecharno);
6161 break;
6162 }
6163 }
6164 }
6165 }
6166
6167 \f
6168 static bool
6169 nocase_tail (cp)
6170 char *cp;
6171 {
6172 register int len = 0;
6173
6174 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6175 cp++, len++;
6176 if (*cp == '\0' && !intoken (dbp[len]))
6177 {
6178 dbp += len;
6179 return TRUE;
6180 }
6181 return FALSE;
6182 }
6183
6184 static void
6185 get_tag (bp, namepp)
6186 register char *bp;
6187 char **namepp;
6188 {
6189 register char *cp = bp;
6190
6191 if (*bp != '\0')
6192 {
6193 /* Go till you get to white space or a syntactic break */
6194 for (cp = bp + 1; !notinname (*cp); cp++)
6195 continue;
6196 make_tag (bp, cp - bp, TRUE,
6197 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6198 }
6199
6200 if (namepp != NULL)
6201 *namepp = savenstr (bp, cp - bp);
6202 }
6203
6204 /*
6205 * Read a line of text from `stream' into `lbp', excluding the
6206 * newline or CR-NL, if any. Return the number of characters read from
6207 * `stream', which is the length of the line including the newline.
6208 *
6209 * On DOS or Windows we do not count the CR character, if any before the
6210 * NL, in the returned length; this mirrors the behavior of Emacs on those
6211 * platforms (for text files, it translates CR-NL to NL as it reads in the
6212 * file).
6213 *
6214 * If multi-line regular expressions are requested, each line read is
6215 * appended to `filebuf'.
6216 */
6217 static long
6218 readline_internal (lbp, stream)
6219 linebuffer *lbp;
6220 register FILE *stream;
6221 {
6222 char *buffer = lbp->buffer;
6223 register char *p = lbp->buffer;
6224 register char *pend;
6225 int chars_deleted;
6226
6227 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6228
6229 for (;;)
6230 {
6231 register int c = getc (stream);
6232 if (p == pend)
6233 {
6234 /* We're at the end of linebuffer: expand it. */
6235 lbp->size *= 2;
6236 xrnew (buffer, lbp->size, char);
6237 p += buffer - lbp->buffer;
6238 pend = buffer + lbp->size;
6239 lbp->buffer = buffer;
6240 }
6241 if (c == EOF)
6242 {
6243 *p = '\0';
6244 chars_deleted = 0;
6245 break;
6246 }
6247 if (c == '\n')
6248 {
6249 if (p > buffer && p[-1] == '\r')
6250 {
6251 p -= 1;
6252 #ifdef DOS_NT
6253 /* Assume CRLF->LF translation will be performed by Emacs
6254 when loading this file, so CRs won't appear in the buffer.
6255 It would be cleaner to compensate within Emacs;
6256 however, Emacs does not know how many CRs were deleted
6257 before any given point in the file. */
6258 chars_deleted = 1;
6259 #else
6260 chars_deleted = 2;
6261 #endif
6262 }
6263 else
6264 {
6265 chars_deleted = 1;
6266 }
6267 *p = '\0';
6268 break;
6269 }
6270 *p++ = c;
6271 }
6272 lbp->len = p - buffer;
6273
6274 if (need_filebuf /* we need filebuf for multi-line regexps */
6275 && chars_deleted > 0) /* not at EOF */
6276 {
6277 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6278 {
6279 /* Expand filebuf. */
6280 filebuf.size *= 2;
6281 xrnew (filebuf.buffer, filebuf.size, char);
6282 }
6283 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6284 filebuf.len += lbp->len;
6285 filebuf.buffer[filebuf.len++] = '\n';
6286 filebuf.buffer[filebuf.len] = '\0';
6287 }
6288
6289 return lbp->len + chars_deleted;
6290 }
6291
6292 /*
6293 * Like readline_internal, above, but in addition try to match the
6294 * input line against relevant regular expressions and manage #line
6295 * directives.
6296 */
6297 static void
6298 readline (lbp, stream)
6299 linebuffer *lbp;
6300 FILE *stream;
6301 {
6302 long result;
6303
6304 linecharno = charno; /* update global char number of line start */
6305 result = readline_internal (lbp, stream); /* read line */
6306 lineno += 1; /* increment global line number */
6307 charno += result; /* increment global char number */
6308
6309 /* Honour #line directives. */
6310 if (!no_line_directive)
6311 {
6312 static bool discard_until_line_directive;
6313
6314 /* Check whether this is a #line directive. */
6315 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6316 {
6317 unsigned int lno;
6318 int start = 0;
6319
6320 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6321 && start > 0) /* double quote character found */
6322 {
6323 char *endp = lbp->buffer + start;
6324
6325 while ((endp = etags_strchr (endp, '"')) != NULL
6326 && endp[-1] == '\\')
6327 endp++;
6328 if (endp != NULL)
6329 /* Ok, this is a real #line directive. Let's deal with it. */
6330 {
6331 char *taggedabsname; /* absolute name of original file */
6332 char *taggedfname; /* name of original file as given */
6333 char *name; /* temp var */
6334
6335 discard_until_line_directive = FALSE; /* found it */
6336 name = lbp->buffer + start;
6337 *endp = '\0';
6338 canonicalize_filename (name); /* for DOS */
6339 taggedabsname = absolute_filename (name, tagfiledir);
6340 if (filename_is_absolute (name)
6341 || filename_is_absolute (curfdp->infname))
6342 taggedfname = savestr (taggedabsname);
6343 else
6344 taggedfname = relative_filename (taggedabsname,tagfiledir);
6345
6346 if (streq (curfdp->taggedfname, taggedfname))
6347 /* The #line directive is only a line number change. We
6348 deal with this afterwards. */
6349 free (taggedfname);
6350 else
6351 /* The tags following this #line directive should be
6352 attributed to taggedfname. In order to do this, set
6353 curfdp accordingly. */
6354 {
6355 fdesc *fdp; /* file description pointer */
6356
6357 /* Go look for a file description already set up for the
6358 file indicated in the #line directive. If there is
6359 one, use it from now until the next #line
6360 directive. */
6361 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6362 if (streq (fdp->infname, curfdp->infname)
6363 && streq (fdp->taggedfname, taggedfname))
6364 /* If we remove the second test above (after the &&)
6365 then all entries pertaining to the same file are
6366 coalesced in the tags file. If we use it, then
6367 entries pertaining to the same file but generated
6368 from different files (via #line directives) will
6369 go into separate sections in the tags file. These
6370 alternatives look equivalent. The first one
6371 destroys some apparently useless information. */
6372 {
6373 curfdp = fdp;
6374 free (taggedfname);
6375 break;
6376 }
6377 /* Else, if we already tagged the real file, skip all
6378 input lines until the next #line directive. */
6379 if (fdp == NULL) /* not found */
6380 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6381 if (streq (fdp->infabsname, taggedabsname))
6382 {
6383 discard_until_line_directive = TRUE;
6384 free (taggedfname);
6385 break;
6386 }
6387 /* Else create a new file description and use that from
6388 now on, until the next #line directive. */
6389 if (fdp == NULL) /* not found */
6390 {
6391 fdp = fdhead;
6392 fdhead = xnew (1, fdesc);
6393 *fdhead = *curfdp; /* copy curr. file description */
6394 fdhead->next = fdp;
6395 fdhead->infname = savestr (curfdp->infname);
6396 fdhead->infabsname = savestr (curfdp->infabsname);
6397 fdhead->infabsdir = savestr (curfdp->infabsdir);
6398 fdhead->taggedfname = taggedfname;
6399 fdhead->usecharno = FALSE;
6400 fdhead->prop = NULL;
6401 fdhead->written = FALSE;
6402 curfdp = fdhead;
6403 }
6404 }
6405 free (taggedabsname);
6406 lineno = lno - 1;
6407 readline (lbp, stream);
6408 return;
6409 } /* if a real #line directive */
6410 } /* if #line is followed by a a number */
6411 } /* if line begins with "#line " */
6412
6413 /* If we are here, no #line directive was found. */
6414 if (discard_until_line_directive)
6415 {
6416 if (result > 0)
6417 {
6418 /* Do a tail recursion on ourselves, thus discarding the contents
6419 of the line buffer. */
6420 readline (lbp, stream);
6421 return;
6422 }
6423 /* End of file. */
6424 discard_until_line_directive = FALSE;
6425 return;
6426 }
6427 } /* if #line directives should be considered */
6428
6429 {
6430 int match;
6431 regexp *rp;
6432 char *name;
6433
6434 /* Match against relevant regexps. */
6435 if (lbp->len > 0)
6436 for (rp = p_head; rp != NULL; rp = rp->p_next)
6437 {
6438 /* Only use generic regexps or those for the current language.
6439 Also do not use multiline regexps, which is the job of
6440 regex_tag_multiline. */
6441 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6442 || rp->multi_line)
6443 continue;
6444
6445 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6446 switch (match)
6447 {
6448 case -2:
6449 /* Some error. */
6450 if (!rp->error_signaled)
6451 {
6452 error ("regexp stack overflow while matching \"%s\"",
6453 rp->pattern);
6454 rp->error_signaled = TRUE;
6455 }
6456 break;
6457 case -1:
6458 /* No match. */
6459 break;
6460 case 0:
6461 /* Empty string matched. */
6462 if (!rp->error_signaled)
6463 {
6464 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6465 rp->error_signaled = TRUE;
6466 }
6467 break;
6468 default:
6469 /* Match occurred. Construct a tag. */
6470 name = rp->name;
6471 if (name[0] == '\0')
6472 name = NULL;
6473 else /* make a named tag */
6474 name = substitute (lbp->buffer, rp->name, &rp->regs);
6475 if (rp->force_explicit_name)
6476 /* Force explicit tag name, if a name is there. */
6477 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6478 else
6479 make_tag (name, strlen (name), TRUE,
6480 lbp->buffer, match, lineno, linecharno);
6481 break;
6482 }
6483 }
6484 }
6485 }
6486
6487 \f
6488 /*
6489 * Return a pointer to a space of size strlen(cp)+1 allocated
6490 * with xnew where the string CP has been copied.
6491 */
6492 static char *
6493 savestr (cp)
6494 char *cp;
6495 {
6496 return savenstr (cp, strlen (cp));
6497 }
6498
6499 /*
6500 * Return a pointer to a space of size LEN+1 allocated with xnew where
6501 * the string CP has been copied for at most the first LEN characters.
6502 */
6503 static char *
6504 savenstr (cp, len)
6505 char *cp;
6506 int len;
6507 {
6508 register char *dp;
6509
6510 dp = xnew (len + 1, char);
6511 strncpy (dp, cp, len);
6512 dp[len] = '\0';
6513 return dp;
6514 }
6515
6516 /*
6517 * Return the ptr in sp at which the character c last
6518 * appears; NULL if not found
6519 *
6520 * Identical to POSIX strrchr, included for portability.
6521 */
6522 static char *
6523 etags_strrchr (sp, c)
6524 register const char *sp;
6525 register int c;
6526 {
6527 register const char *r;
6528
6529 r = NULL;
6530 do
6531 {
6532 if (*sp == c)
6533 r = sp;
6534 } while (*sp++);
6535 return (char *)r;
6536 }
6537
6538 /*
6539 * Return the ptr in sp at which the character c first
6540 * appears; NULL if not found
6541 *
6542 * Identical to POSIX strchr, included for portability.
6543 */
6544 static char *
6545 etags_strchr (sp, c)
6546 register const char *sp;
6547 register int c;
6548 {
6549 do
6550 {
6551 if (*sp == c)
6552 return (char *)sp;
6553 } while (*sp++);
6554 return NULL;
6555 }
6556
6557 /*
6558 * Compare two strings, ignoring case for alphabetic characters.
6559 *
6560 * Same as BSD's strcasecmp, included for portability.
6561 */
6562 static int
6563 etags_strcasecmp (s1, s2)
6564 register const char *s1;
6565 register const char *s2;
6566 {
6567 while (*s1 != '\0'
6568 && (ISALPHA (*s1) && ISALPHA (*s2)
6569 ? lowcase (*s1) == lowcase (*s2)
6570 : *s1 == *s2))
6571 s1++, s2++;
6572
6573 return (ISALPHA (*s1) && ISALPHA (*s2)
6574 ? lowcase (*s1) - lowcase (*s2)
6575 : *s1 - *s2);
6576 }
6577
6578 /*
6579 * Compare two strings, ignoring case for alphabetic characters.
6580 * Stop after a given number of characters
6581 *
6582 * Same as BSD's strncasecmp, included for portability.
6583 */
6584 static int
6585 etags_strncasecmp (s1, s2, n)
6586 register const char *s1;
6587 register const char *s2;
6588 register int n;
6589 {
6590 while (*s1 != '\0' && n-- > 0
6591 && (ISALPHA (*s1) && ISALPHA (*s2)
6592 ? lowcase (*s1) == lowcase (*s2)
6593 : *s1 == *s2))
6594 s1++, s2++;
6595
6596 if (n < 0)
6597 return 0;
6598 else
6599 return (ISALPHA (*s1) && ISALPHA (*s2)
6600 ? lowcase (*s1) - lowcase (*s2)
6601 : *s1 - *s2);
6602 }
6603
6604 /* Skip spaces (end of string is not space), return new pointer. */
6605 static char *
6606 skip_spaces (cp)
6607 char *cp;
6608 {
6609 while (iswhite (*cp))
6610 cp++;
6611 return cp;
6612 }
6613
6614 /* Skip non spaces, except end of string, return new pointer. */
6615 static char *
6616 skip_non_spaces (cp)
6617 char *cp;
6618 {
6619 while (*cp != '\0' && !iswhite (*cp))
6620 cp++;
6621 return cp;
6622 }
6623
6624 /* Print error message and exit. */
6625 void
6626 fatal (s1, s2)
6627 char *s1, *s2;
6628 {
6629 error (s1, s2);
6630 exit (EXIT_FAILURE);
6631 }
6632
6633 static void
6634 pfatal (s1)
6635 char *s1;
6636 {
6637 perror (s1);
6638 exit (EXIT_FAILURE);
6639 }
6640
6641 static void
6642 suggest_asking_for_help ()
6643 {
6644 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6645 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6646 exit (EXIT_FAILURE);
6647 }
6648
6649 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6650 static void
6651 error (s1, s2)
6652 const char *s1, *s2;
6653 {
6654 fprintf (stderr, "%s: ", progname);
6655 fprintf (stderr, s1, s2);
6656 fprintf (stderr, "\n");
6657 }
6658
6659 /* Return a newly-allocated string whose contents
6660 concatenate those of s1, s2, s3. */
6661 static char *
6662 concat (s1, s2, s3)
6663 char *s1, *s2, *s3;
6664 {
6665 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6666 char *result = xnew (len1 + len2 + len3 + 1, char);
6667
6668 strcpy (result, s1);
6669 strcpy (result + len1, s2);
6670 strcpy (result + len1 + len2, s3);
6671 result[len1 + len2 + len3] = '\0';
6672
6673 return result;
6674 }
6675
6676 \f
6677 /* Does the same work as the system V getcwd, but does not need to
6678 guess the buffer size in advance. */
6679 static char *
6680 etags_getcwd ()
6681 {
6682 #ifdef HAVE_GETCWD
6683 int bufsize = 200;
6684 char *path = xnew (bufsize, char);
6685
6686 while (getcwd (path, bufsize) == NULL)
6687 {
6688 if (errno != ERANGE)
6689 pfatal ("getcwd");
6690 bufsize *= 2;
6691 free (path);
6692 path = xnew (bufsize, char);
6693 }
6694
6695 canonicalize_filename (path);
6696 return path;
6697
6698 #else /* not HAVE_GETCWD */
6699 #if MSDOS
6700
6701 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6702
6703 getwd (path);
6704
6705 for (p = path; *p != '\0'; p++)
6706 if (*p == '\\')
6707 *p = '/';
6708 else
6709 *p = lowcase (*p);
6710
6711 return strdup (path);
6712 #else /* not MSDOS */
6713 linebuffer path;
6714 FILE *pipe;
6715
6716 linebuffer_init (&path);
6717 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6718 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6719 pfatal ("pwd");
6720 pclose (pipe);
6721
6722 return path.buffer;
6723 #endif /* not MSDOS */
6724 #endif /* not HAVE_GETCWD */
6725 }
6726
6727 /* Return a newly allocated string containing the file name of FILE
6728 relative to the absolute directory DIR (which should end with a slash). */
6729 static char *
6730 relative_filename (file, dir)
6731 char *file, *dir;
6732 {
6733 char *fp, *dp, *afn, *res;
6734 int i;
6735
6736 /* Find the common root of file and dir (with a trailing slash). */
6737 afn = absolute_filename (file, cwd);
6738 fp = afn;
6739 dp = dir;
6740 while (*fp++ == *dp++)
6741 continue;
6742 fp--, dp--; /* back to the first differing char */
6743 #ifdef DOS_NT
6744 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6745 return afn;
6746 #endif
6747 do /* look at the equal chars until '/' */
6748 fp--, dp--;
6749 while (*fp != '/');
6750
6751 /* Build a sequence of "../" strings for the resulting relative file name. */
6752 i = 0;
6753 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6754 i += 1;
6755 res = xnew (3*i + strlen (fp + 1) + 1, char);
6756 res[0] = '\0';
6757 while (i-- > 0)
6758 strcat (res, "../");
6759
6760 /* Add the file name relative to the common root of file and dir. */
6761 strcat (res, fp + 1);
6762 free (afn);
6763
6764 return res;
6765 }
6766
6767 /* Return a newly allocated string containing the absolute file name
6768 of FILE given DIR (which should end with a slash). */
6769 static char *
6770 absolute_filename (file, dir)
6771 char *file, *dir;
6772 {
6773 char *slashp, *cp, *res;
6774
6775 if (filename_is_absolute (file))
6776 res = savestr (file);
6777 #ifdef DOS_NT
6778 /* We don't support non-absolute file names with a drive
6779 letter, like `d:NAME' (it's too much hassle). */
6780 else if (file[1] == ':')
6781 fatal ("%s: relative file names with drive letters not supported", file);
6782 #endif
6783 else
6784 res = concat (dir, file, "");
6785
6786 /* Delete the "/dirname/.." and "/." substrings. */
6787 slashp = etags_strchr (res, '/');
6788 while (slashp != NULL && slashp[0] != '\0')
6789 {
6790 if (slashp[1] == '.')
6791 {
6792 if (slashp[2] == '.'
6793 && (slashp[3] == '/' || slashp[3] == '\0'))
6794 {
6795 cp = slashp;
6796 do
6797 cp--;
6798 while (cp >= res && !filename_is_absolute (cp));
6799 if (cp < res)
6800 cp = slashp; /* the absolute name begins with "/.." */
6801 #ifdef DOS_NT
6802 /* Under MSDOS and NT we get `d:/NAME' as absolute
6803 file name, so the luser could say `d:/../NAME'.
6804 We silently treat this as `d:/NAME'. */
6805 else if (cp[0] != '/')
6806 cp = slashp;
6807 #endif
6808 strcpy (cp, slashp + 3);
6809 slashp = cp;
6810 continue;
6811 }
6812 else if (slashp[2] == '/' || slashp[2] == '\0')
6813 {
6814 strcpy (slashp, slashp + 2);
6815 continue;
6816 }
6817 }
6818
6819 slashp = etags_strchr (slashp + 1, '/');
6820 }
6821
6822 if (res[0] == '\0') /* just a safety net: should never happen */
6823 {
6824 free (res);
6825 return savestr ("/");
6826 }
6827 else
6828 return res;
6829 }
6830
6831 /* Return a newly allocated string containing the absolute
6832 file name of dir where FILE resides given DIR (which should
6833 end with a slash). */
6834 static char *
6835 absolute_dirname (file, dir)
6836 char *file, *dir;
6837 {
6838 char *slashp, *res;
6839 char save;
6840
6841 canonicalize_filename (file);
6842 slashp = etags_strrchr (file, '/');
6843 if (slashp == NULL)
6844 return savestr (dir);
6845 save = slashp[1];
6846 slashp[1] = '\0';
6847 res = absolute_filename (file, dir);
6848 slashp[1] = save;
6849
6850 return res;
6851 }
6852
6853 /* Whether the argument string is an absolute file name. The argument
6854 string must have been canonicalized with canonicalize_filename. */
6855 static bool
6856 filename_is_absolute (fn)
6857 char *fn;
6858 {
6859 return (fn[0] == '/'
6860 #ifdef DOS_NT
6861 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6862 #endif
6863 );
6864 }
6865
6866 /* Translate backslashes into slashes. Works in place. */
6867 static void
6868 canonicalize_filename (fn)
6869 register char *fn;
6870 {
6871 #ifdef DOS_NT
6872 /* Canonicalize drive letter case. */
6873 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6874 fn[0] = upcase (fn[0]);
6875 /* Convert backslashes to slashes. */
6876 for (; *fn != '\0'; fn++)
6877 if (*fn == '\\')
6878 *fn = '/';
6879 #else
6880 /* No action. */
6881 fn = NULL; /* shut up the compiler */
6882 #endif
6883 }
6884
6885 \f
6886 /* Initialize a linebuffer for use */
6887 static void
6888 linebuffer_init (lbp)
6889 linebuffer *lbp;
6890 {
6891 lbp->size = (DEBUG) ? 3 : 200;
6892 lbp->buffer = xnew (lbp->size, char);
6893 lbp->buffer[0] = '\0';
6894 lbp->len = 0;
6895 }
6896
6897 /* Set the minimum size of a string contained in a linebuffer. */
6898 static void
6899 linebuffer_setlen (lbp, toksize)
6900 linebuffer *lbp;
6901 int toksize;
6902 {
6903 while (lbp->size <= toksize)
6904 {
6905 lbp->size *= 2;
6906 xrnew (lbp->buffer, lbp->size, char);
6907 }
6908 lbp->len = toksize;
6909 }
6910
6911 /* Like malloc but get fatal error if memory is exhausted. */
6912 static PTR
6913 xmalloc (size)
6914 unsigned int size;
6915 {
6916 PTR result = (PTR) malloc (size);
6917 if (result == NULL)
6918 fatal ("virtual memory exhausted", (char *)NULL);
6919 return result;
6920 }
6921
6922 static PTR
6923 xrealloc (ptr, size)
6924 char *ptr;
6925 unsigned int size;
6926 {
6927 PTR result = (PTR) realloc (ptr, size);
6928 if (result == NULL)
6929 fatal ("virtual memory exhausted", (char *)NULL);
6930 return result;
6931 }
6932
6933 /*
6934 * Local Variables:
6935 * indent-tabs-mode: t
6936 * tab-width: 8
6937 * fill-column: 79
6938 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6939 * c-file-style: "gnu"
6940 * End:
6941 */
6942
6943 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6944 (do not change this comment) */
6945
6946 /* etags.c ends here */