Merge from emacs--rel--22
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
33 Free Software Foundation, Inc.
34
35 This file is not considered part of GNU Emacs.
36
37 This program is free software; you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation; either version 3, or (at your option)
40 any later version.
41
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
46
47 You should have received a copy of the GNU General Public License
48 along with this program; see the file COPYING. If not, write to the
49 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
50 Boston, MA 02110-1301, USA. */
51
52
53 /* NB To comply with the above BSD license, copyright information is
54 reproduced in etc/ETAGS.README. That file should be updated when the
55 above notices are.
56
57 To the best of our knowledge, this code was originally based on the
58 ctags.c distributed with BSD4.2, which was copyrighted by the
59 University of California, as described above. */
60
61
62 /*
63 * Authors:
64 * 1983 Ctags originally by Ken Arnold.
65 * 1984 Fortran added by Jim Kleckner.
66 * 1984 Ed Pelegri-Llopart added C typedefs.
67 * 1985 Emacs TAGS format by Richard Stallman.
68 * 1989 Sam Kendall added C++.
69 * 1992 Joseph B. Wells improved C and C++ parsing.
70 * 1993 Francesco Potortì reorganised C and C++.
71 * 1994 Line-by-line regexp tags by Tom Tromey.
72 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
73 * 2002 #line directives by Francesco Potortì.
74 *
75 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
76 */
77
78 /*
79 * If you want to add support for a new language, start by looking at the LUA
80 * language, which is the simplest. Alternatively, consider shipping a
81 * configuration file containing regexp definitions for etags.
82 */
83
84 char pot_etags_version[] = "@(#) pot revision number is 17.34";
85
86 #define TRUE 1
87 #define FALSE 0
88
89 #ifdef DEBUG
90 # undef DEBUG
91 # define DEBUG TRUE
92 #else
93 # define DEBUG FALSE
94 # define NDEBUG /* disable assert */
95 #endif
96
97 #ifdef HAVE_CONFIG_H
98 # include <config.h>
99 /* On some systems, Emacs defines static as nothing for the sake
100 of unexec. We don't want that here since we don't use unexec. */
101 # undef static
102 # ifndef PTR /* for XEmacs */
103 # define PTR void *
104 # endif
105 # ifndef __P /* for XEmacs */
106 # define __P(args) args
107 # endif
108 #else /* no config.h */
109 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
110 # define __P(args) args /* use prototypes */
111 # define PTR void * /* for generic pointers */
112 # else /* not standard C */
113 # define __P(args) () /* no prototypes */
114 # define const /* remove const for old compilers' sake */
115 # define PTR long * /* don't use void* */
116 # endif
117 #endif /* !HAVE_CONFIG_H */
118
119 #ifndef _GNU_SOURCE
120 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
121 #endif
122
123 /* WIN32_NATIVE is for XEmacs.
124 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
125 #ifdef WIN32_NATIVE
126 # undef MSDOS
127 # undef WINDOWSNT
128 # define WINDOWSNT
129 #endif /* WIN32_NATIVE */
130
131 #ifdef MSDOS
132 # undef MSDOS
133 # define MSDOS TRUE
134 # include <fcntl.h>
135 # include <sys/param.h>
136 # include <io.h>
137 # ifndef HAVE_CONFIG_H
138 # define DOS_NT
139 # include <sys/config.h>
140 # endif
141 #else
142 # define MSDOS FALSE
143 #endif /* MSDOS */
144
145 #ifdef WINDOWSNT
146 # include <stdlib.h>
147 # include <fcntl.h>
148 # include <string.h>
149 # include <direct.h>
150 # include <io.h>
151 # define MAXPATHLEN _MAX_PATH
152 # undef HAVE_NTGUI
153 # undef DOS_NT
154 # define DOS_NT
155 # ifndef HAVE_GETCWD
156 # define HAVE_GETCWD
157 # endif /* undef HAVE_GETCWD */
158 #else /* not WINDOWSNT */
159 # ifdef STDC_HEADERS
160 # include <stdlib.h>
161 # include <string.h>
162 # else /* no standard C headers */
163 extern char *getenv ();
164 extern char *strcpy ();
165 extern char *strncpy ();
166 extern char *strcat ();
167 extern char *strncat ();
168 extern unsigned long strlen ();
169 extern PTR malloc ();
170 extern PTR realloc ();
171 # ifdef VMS
172 # define EXIT_SUCCESS 1
173 # define EXIT_FAILURE 0
174 # else /* no VMS */
175 # define EXIT_SUCCESS 0
176 # define EXIT_FAILURE 1
177 # endif
178 # endif
179 #endif /* !WINDOWSNT */
180
181 #ifdef HAVE_UNISTD_H
182 # include <unistd.h>
183 #else
184 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
185 extern char *getcwd (char *buf, size_t size);
186 # endif
187 #endif /* HAVE_UNISTD_H */
188
189 #include <stdio.h>
190 #include <ctype.h>
191 #include <errno.h>
192 #ifndef errno
193 extern int errno;
194 #endif
195 #include <sys/types.h>
196 #include <sys/stat.h>
197
198 #include <assert.h>
199 #ifdef NDEBUG
200 # undef assert /* some systems have a buggy assert.h */
201 # define assert(x) ((void) 0)
202 #endif
203
204 #if !defined (S_ISREG) && defined (S_IFREG)
205 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
206 #endif
207
208 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
209 # define NO_LONG_OPTIONS TRUE
210 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
211 extern char *optarg;
212 extern int optind, opterr;
213 #else
214 # define NO_LONG_OPTIONS FALSE
215 # include <getopt.h>
216 #endif /* NO_LONG_OPTIONS */
217
218 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
219 # ifdef __CYGWIN__ /* compiling on Cygwin */
220 !!! NOTICE !!!
221 the regex.h distributed with Cygwin is not compatible with etags, alas!
222 If you want regular expression support, you should delete this notice and
223 arrange to use the GNU regex.h and regex.c.
224 # endif
225 #endif
226 #include <regex.h>
227
228 /* Define CTAGS to make the program "ctags" compatible with the usual one.
229 Leave it undefined to make the program "etags", which makes emacs-style
230 tag tables and tags typedefs, #defines and struct/union/enum by default. */
231 #ifdef CTAGS
232 # undef CTAGS
233 # define CTAGS TRUE
234 #else
235 # define CTAGS FALSE
236 #endif
237
238 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
239 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
240 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
241 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
242
243 #define CHARS 256 /* 2^sizeof(char) */
244 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
245 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
246 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
247 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
248 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
249 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
250
251 #define ISALNUM(c) isalnum (CHAR(c))
252 #define ISALPHA(c) isalpha (CHAR(c))
253 #define ISDIGIT(c) isdigit (CHAR(c))
254 #define ISLOWER(c) islower (CHAR(c))
255
256 #define lowcase(c) tolower (CHAR(c))
257 #define upcase(c) toupper (CHAR(c))
258
259
260 /*
261 * xnew, xrnew -- allocate, reallocate storage
262 *
263 * SYNOPSIS: Type *xnew (int n, Type);
264 * void xrnew (OldPointer, int n, Type);
265 */
266 #if DEBUG
267 # include "chkmalloc.h"
268 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
269 (n) * sizeof (Type)))
270 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
271 (char *) (op), (n) * sizeof (Type)))
272 #else
273 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
274 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
275 (char *) (op), (n) * sizeof (Type)))
276 #endif
277
278 #define bool int
279
280 typedef void Lang_function __P((FILE *));
281
282 typedef struct
283 {
284 char *suffix; /* file name suffix for this compressor */
285 char *command; /* takes one arg and decompresses to stdout */
286 } compressor;
287
288 typedef struct
289 {
290 char *name; /* language name */
291 char *help; /* detailed help for the language */
292 Lang_function *function; /* parse function */
293 char **suffixes; /* name suffixes of this language's files */
294 char **filenames; /* names of this language's files */
295 char **interpreters; /* interpreters for this language */
296 bool metasource; /* source used to generate other sources */
297 } language;
298
299 typedef struct fdesc
300 {
301 struct fdesc *next; /* for the linked list */
302 char *infname; /* uncompressed input file name */
303 char *infabsname; /* absolute uncompressed input file name */
304 char *infabsdir; /* absolute dir of input file */
305 char *taggedfname; /* file name to write in tagfile */
306 language *lang; /* language of file */
307 char *prop; /* file properties to write in tagfile */
308 bool usecharno; /* etags tags shall contain char number */
309 bool written; /* entry written in the tags file */
310 } fdesc;
311
312 typedef struct node_st
313 { /* sorting structure */
314 struct node_st *left, *right; /* left and right sons */
315 fdesc *fdp; /* description of file to whom tag belongs */
316 char *name; /* tag name */
317 char *regex; /* search regexp */
318 bool valid; /* write this tag on the tag file */
319 bool is_func; /* function tag: use regexp in CTAGS mode */
320 bool been_warned; /* warning already given for duplicated tag */
321 int lno; /* line number tag is on */
322 long cno; /* character number line starts on */
323 } node;
324
325 /*
326 * A `linebuffer' is a structure which holds a line of text.
327 * `readline_internal' reads a line from a stream into a linebuffer
328 * and works regardless of the length of the line.
329 * SIZE is the size of BUFFER, LEN is the length of the string in
330 * BUFFER after readline reads it.
331 */
332 typedef struct
333 {
334 long size;
335 int len;
336 char *buffer;
337 } linebuffer;
338
339 /* Used to support mixing of --lang and file names. */
340 typedef struct
341 {
342 enum {
343 at_language, /* a language specification */
344 at_regexp, /* a regular expression */
345 at_filename, /* a file name */
346 at_stdin, /* read from stdin here */
347 at_end /* stop parsing the list */
348 } arg_type; /* argument type */
349 language *lang; /* language associated with the argument */
350 char *what; /* the argument itself */
351 } argument;
352
353 /* Structure defining a regular expression. */
354 typedef struct regexp
355 {
356 struct regexp *p_next; /* pointer to next in list */
357 language *lang; /* if set, use only for this language */
358 char *pattern; /* the regexp pattern */
359 char *name; /* tag name */
360 struct re_pattern_buffer *pat; /* the compiled pattern */
361 struct re_registers regs; /* re registers */
362 bool error_signaled; /* already signaled for this regexp */
363 bool force_explicit_name; /* do not allow implict tag name */
364 bool ignore_case; /* ignore case when matching */
365 bool multi_line; /* do a multi-line match on the whole file */
366 } regexp;
367
368
369 /* Many compilers barf on this:
370 Lang_function Ada_funcs;
371 so let's write it this way */
372 static void Ada_funcs __P((FILE *));
373 static void Asm_labels __P((FILE *));
374 static void C_entries __P((int c_ext, FILE *));
375 static void default_C_entries __P((FILE *));
376 static void plain_C_entries __P((FILE *));
377 static void Cjava_entries __P((FILE *));
378 static void Cobol_paragraphs __P((FILE *));
379 static void Cplusplus_entries __P((FILE *));
380 static void Cstar_entries __P((FILE *));
381 static void Erlang_functions __P((FILE *));
382 static void Forth_words __P((FILE *));
383 static void Fortran_functions __P((FILE *));
384 static void HTML_labels __P((FILE *));
385 static void Lisp_functions __P((FILE *));
386 static void Lua_functions __P((FILE *));
387 static void Makefile_targets __P((FILE *));
388 static void Pascal_functions __P((FILE *));
389 static void Perl_functions __P((FILE *));
390 static void PHP_functions __P((FILE *));
391 static void PS_functions __P((FILE *));
392 static void Prolog_functions __P((FILE *));
393 static void Python_functions __P((FILE *));
394 static void Scheme_functions __P((FILE *));
395 static void TeX_commands __P((FILE *));
396 static void Texinfo_nodes __P((FILE *));
397 static void Yacc_entries __P((FILE *));
398 static void just_read_file __P((FILE *));
399
400 static void print_language_names __P((void));
401 static void print_version __P((void));
402 static void print_help __P((argument *));
403 int main __P((int, char **));
404
405 static compressor *get_compressor_from_suffix __P((char *, char **));
406 static language *get_language_from_langname __P((const char *));
407 static language *get_language_from_interpreter __P((char *));
408 static language *get_language_from_filename __P((char *, bool));
409 static void readline __P((linebuffer *, FILE *));
410 static long readline_internal __P((linebuffer *, FILE *));
411 static bool nocase_tail __P((char *));
412 static void get_tag __P((char *, char **));
413
414 static void analyse_regex __P((char *));
415 static void free_regexps __P((void));
416 static void regex_tag_multiline __P((void));
417 static void error __P((const char *, const char *));
418 static void suggest_asking_for_help __P((void));
419 void fatal __P((char *, char *));
420 static void pfatal __P((char *));
421 static void add_node __P((node *, node **));
422
423 static void init __P((void));
424 static void process_file_name __P((char *, language *));
425 static void process_file __P((FILE *, char *, language *));
426 static void find_entries __P((FILE *));
427 static void free_tree __P((node *));
428 static void free_fdesc __P((fdesc *));
429 static void pfnote __P((char *, bool, char *, int, int, long));
430 static void make_tag __P((char *, int, bool, char *, int, int, long));
431 static void invalidate_nodes __P((fdesc *, node **));
432 static void put_entries __P((node *));
433
434 static char *concat __P((char *, char *, char *));
435 static char *skip_spaces __P((char *));
436 static char *skip_non_spaces __P((char *));
437 static char *savenstr __P((char *, int));
438 static char *savestr __P((char *));
439 static char *etags_strchr __P((const char *, int));
440 static char *etags_strrchr __P((const char *, int));
441 static int etags_strcasecmp __P((const char *, const char *));
442 static int etags_strncasecmp __P((const char *, const char *, int));
443 static char *etags_getcwd __P((void));
444 static char *relative_filename __P((char *, char *));
445 static char *absolute_filename __P((char *, char *));
446 static char *absolute_dirname __P((char *, char *));
447 static bool filename_is_absolute __P((char *f));
448 static void canonicalize_filename __P((char *));
449 static void linebuffer_init __P((linebuffer *));
450 static void linebuffer_setlen __P((linebuffer *, int));
451 static PTR xmalloc __P((unsigned int));
452 static PTR xrealloc __P((char *, unsigned int));
453
454 \f
455 static char searchar = '/'; /* use /.../ searches */
456
457 static char *tagfile; /* output file */
458 static char *progname; /* name this program was invoked with */
459 static char *cwd; /* current working directory */
460 static char *tagfiledir; /* directory of tagfile */
461 static FILE *tagf; /* ioptr for tags file */
462
463 static fdesc *fdhead; /* head of file description list */
464 static fdesc *curfdp; /* current file description */
465 static int lineno; /* line number of current line */
466 static long charno; /* current character number */
467 static long linecharno; /* charno of start of current line */
468 static char *dbp; /* pointer to start of current tag */
469
470 static const int invalidcharno = -1;
471
472 static node *nodehead; /* the head of the binary tree of tags */
473 static node *last_node; /* the last node created */
474
475 static linebuffer lb; /* the current line */
476 static linebuffer filebuf; /* a buffer containing the whole file */
477 static linebuffer token_name; /* a buffer containing a tag name */
478
479 /* boolean "functions" (see init) */
480 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
481 static char
482 /* white chars */
483 *white = " \f\t\n\r\v",
484 /* not in a name */
485 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
486 /* token ending chars */
487 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
488 /* token starting chars */
489 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
490 /* valid in-token chars */
491 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
492
493 static bool append_to_tagfile; /* -a: append to tags */
494 /* The next five default to TRUE for etags, but to FALSE for ctags. */
495 static bool typedefs; /* -t: create tags for C and Ada typedefs */
496 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
497 /* 0 struct/enum/union decls, and C++ */
498 /* member functions. */
499 static bool constantypedefs; /* -d: create tags for C #define, enum */
500 /* constants and variables. */
501 /* -D: opposite of -d. Default under ctags. */
502 static bool globals; /* create tags for global variables */
503 static bool members; /* create tags for C member variables */
504 static bool declarations; /* --declarations: tag them and extern in C&Co*/
505 static bool no_line_directive; /* ignore #line directives (undocumented) */
506 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
507 static bool update; /* -u: update tags */
508 static bool vgrind_style; /* -v: create vgrind style index output */
509 static bool no_warnings; /* -w: suppress warnings (undocumented) */
510 static bool cxref_style; /* -x: create cxref style output */
511 static bool cplusplus; /* .[hc] means C++, not C */
512 static bool ignoreindent; /* -I: ignore indentation in C */
513 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
514
515 /* STDIN is defined in LynxOS system headers */
516 #ifdef STDIN
517 # undef STDIN
518 #endif
519
520 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
521 static bool parsing_stdin; /* --parse-stdin used */
522
523 static regexp *p_head; /* list of all regexps */
524 static bool need_filebuf; /* some regexes are multi-line */
525
526 static struct option longopts[] =
527 {
528 { "append", no_argument, NULL, 'a' },
529 { "packages-only", no_argument, &packages_only, TRUE },
530 { "c++", no_argument, NULL, 'C' },
531 { "declarations", no_argument, &declarations, TRUE },
532 { "no-line-directive", no_argument, &no_line_directive, TRUE },
533 { "no-duplicates", no_argument, &no_duplicates, TRUE },
534 { "help", no_argument, NULL, 'h' },
535 { "help", no_argument, NULL, 'H' },
536 { "ignore-indentation", no_argument, NULL, 'I' },
537 { "language", required_argument, NULL, 'l' },
538 { "members", no_argument, &members, TRUE },
539 { "no-members", no_argument, &members, FALSE },
540 { "output", required_argument, NULL, 'o' },
541 { "regex", required_argument, NULL, 'r' },
542 { "no-regex", no_argument, NULL, 'R' },
543 { "ignore-case-regex", required_argument, NULL, 'c' },
544 { "parse-stdin", required_argument, NULL, STDIN },
545 { "version", no_argument, NULL, 'V' },
546
547 #if CTAGS /* Ctags options */
548 { "backward-search", no_argument, NULL, 'B' },
549 { "cxref", no_argument, NULL, 'x' },
550 { "defines", no_argument, NULL, 'd' },
551 { "globals", no_argument, &globals, TRUE },
552 { "typedefs", no_argument, NULL, 't' },
553 { "typedefs-and-c++", no_argument, NULL, 'T' },
554 { "update", no_argument, NULL, 'u' },
555 { "vgrind", no_argument, NULL, 'v' },
556 { "no-warn", no_argument, NULL, 'w' },
557
558 #else /* Etags options */
559 { "no-defines", no_argument, NULL, 'D' },
560 { "no-globals", no_argument, &globals, FALSE },
561 { "include", required_argument, NULL, 'i' },
562 #endif
563 { NULL }
564 };
565
566 static compressor compressors[] =
567 {
568 { "z", "gzip -d -c"},
569 { "Z", "gzip -d -c"},
570 { "gz", "gzip -d -c"},
571 { "GZ", "gzip -d -c"},
572 { "bz2", "bzip2 -d -c" },
573 { NULL }
574 };
575
576 /*
577 * Language stuff.
578 */
579
580 /* Ada code */
581 static char *Ada_suffixes [] =
582 { "ads", "adb", "ada", NULL };
583 static char Ada_help [] =
584 "In Ada code, functions, procedures, packages, tasks and types are\n\
585 tags. Use the `--packages-only' option to create tags for\n\
586 packages only.\n\
587 Ada tag names have suffixes indicating the type of entity:\n\
588 Entity type: Qualifier:\n\
589 ------------ ----------\n\
590 function /f\n\
591 procedure /p\n\
592 package spec /s\n\
593 package body /b\n\
594 type /t\n\
595 task /k\n\
596 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
597 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
598 will just search for any tag `bidule'.";
599
600 /* Assembly code */
601 static char *Asm_suffixes [] =
602 { "a", /* Unix assembler */
603 "asm", /* Microcontroller assembly */
604 "def", /* BSO/Tasking definition includes */
605 "inc", /* Microcontroller include files */
606 "ins", /* Microcontroller include files */
607 "s", "sa", /* Unix assembler */
608 "S", /* cpp-processed Unix assembler */
609 "src", /* BSO/Tasking C compiler output */
610 NULL
611 };
612 static char Asm_help [] =
613 "In assembler code, labels appearing at the beginning of a line,\n\
614 followed by a colon, are tags.";
615
616
617 /* Note that .c and .h can be considered C++, if the --c++ flag was
618 given, or if the `class' or `template' keyowrds are met inside the file.
619 That is why default_C_entries is called for these. */
620 static char *default_C_suffixes [] =
621 { "c", "h", NULL };
622 static char default_C_help [] =
623 "In C code, any C function or typedef is a tag, and so are\n\
624 definitions of `struct', `union' and `enum'. `#define' macro\n\
625 definitions and `enum' constants are tags unless you specify\n\
626 `--no-defines'. Global variables are tags unless you specify\n\
627 `--no-globals' and so are struct members unless you specify\n\
628 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
629 `--no-members' can make the tags table file much smaller.\n\
630 You can tag function declarations and external variables by\n\
631 using `--declarations'.";
632
633 static char *Cplusplus_suffixes [] =
634 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
635 "M", /* Objective C++ */
636 "pdb", /* Postscript with C syntax */
637 NULL };
638 static char Cplusplus_help [] =
639 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
640 --help --lang=c --lang=c++ for full help.)\n\
641 In addition to C tags, member functions are also recognized. Member\n\
642 variables are recognized unless you use the `--no-members' option.\n\
643 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
644 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
645 `operator+'.";
646
647 static char *Cjava_suffixes [] =
648 { "java", NULL };
649 static char Cjava_help [] =
650 "In Java code, all the tags constructs of C and C++ code are\n\
651 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
652
653
654 static char *Cobol_suffixes [] =
655 { "COB", "cob", NULL };
656 static char Cobol_help [] =
657 "In Cobol code, tags are paragraph names; that is, any word\n\
658 starting in column 8 and followed by a period.";
659
660 static char *Cstar_suffixes [] =
661 { "cs", "hs", NULL };
662
663 static char *Erlang_suffixes [] =
664 { "erl", "hrl", NULL };
665 static char Erlang_help [] =
666 "In Erlang code, the tags are the functions, records and macros\n\
667 defined in the file.";
668
669 char *Forth_suffixes [] =
670 { "fth", "tok", NULL };
671 static char Forth_help [] =
672 "In Forth code, tags are words defined by `:',\n\
673 constant, code, create, defer, value, variable, buffer:, field.";
674
675 static char *Fortran_suffixes [] =
676 { "F", "f", "f90", "for", NULL };
677 static char Fortran_help [] =
678 "In Fortran code, functions, subroutines and block data are tags.";
679
680 static char *HTML_suffixes [] =
681 { "htm", "html", "shtml", NULL };
682 static char HTML_help [] =
683 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
684 `h3' headers. Also, tags are `name=' in anchors and all\n\
685 occurrences of `id='.";
686
687 static char *Lisp_suffixes [] =
688 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
689 static char Lisp_help [] =
690 "In Lisp code, any function defined with `defun', any variable\n\
691 defined with `defvar' or `defconst', and in general the first\n\
692 argument of any expression that starts with `(def' in column zero\n\
693 is a tag.";
694
695 static char *Lua_suffixes [] =
696 { "lua", "LUA", NULL };
697 static char Lua_help [] =
698 "In Lua scripts, all functions are tags.";
699
700 static char *Makefile_filenames [] =
701 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
702 static char Makefile_help [] =
703 "In makefiles, targets are tags; additionally, variables are tags\n\
704 unless you specify `--no-globals'.";
705
706 static char *Objc_suffixes [] =
707 { "lm", /* Objective lex file */
708 "m", /* Objective C file */
709 NULL };
710 static char Objc_help [] =
711 "In Objective C code, tags include Objective C definitions for classes,\n\
712 class categories, methods and protocols. Tags for variables and\n\
713 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
714 (Use --help --lang=c --lang=objc --lang=java for full help.)";
715
716 static char *Pascal_suffixes [] =
717 { "p", "pas", NULL };
718 static char Pascal_help [] =
719 "In Pascal code, the tags are the functions and procedures defined\n\
720 in the file.";
721 /* " // this is for working around an Emacs highlighting bug... */
722
723 static char *Perl_suffixes [] =
724 { "pl", "pm", NULL };
725 static char *Perl_interpreters [] =
726 { "perl", "@PERL@", NULL };
727 static char Perl_help [] =
728 "In Perl code, the tags are the packages, subroutines and variables\n\
729 defined by the `package', `sub', `my' and `local' keywords. Use\n\
730 `--globals' if you want to tag global variables. Tags for\n\
731 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
732 defined in the default package is `main::SUB'.";
733
734 static char *PHP_suffixes [] =
735 { "php", "php3", "php4", NULL };
736 static char PHP_help [] =
737 "In PHP code, tags are functions, classes and defines. Unless you use\n\
738 the `--no-members' option, vars are tags too.";
739
740 static char *plain_C_suffixes [] =
741 { "pc", /* Pro*C file */
742 NULL };
743
744 static char *PS_suffixes [] =
745 { "ps", "psw", NULL }; /* .psw is for PSWrap */
746 static char PS_help [] =
747 "In PostScript code, the tags are the functions.";
748
749 static char *Prolog_suffixes [] =
750 { "prolog", NULL };
751 static char Prolog_help [] =
752 "In Prolog code, tags are predicates and rules at the beginning of\n\
753 line.";
754
755 static char *Python_suffixes [] =
756 { "py", NULL };
757 static char Python_help [] =
758 "In Python code, `def' or `class' at the beginning of a line\n\
759 generate a tag.";
760
761 /* Can't do the `SCM' or `scm' prefix with a version number. */
762 static char *Scheme_suffixes [] =
763 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
764 static char Scheme_help [] =
765 "In Scheme code, tags include anything defined with `def' or with a\n\
766 construct whose name starts with `def'. They also include\n\
767 variables set with `set!' at top level in the file.";
768
769 static char *TeX_suffixes [] =
770 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
771 static char TeX_help [] =
772 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
773 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
774 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
775 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
776 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
777 \n\
778 Other commands can be specified by setting the environment variable\n\
779 `TEXTAGS' to a colon-separated list like, for example,\n\
780 TEXTAGS=\"mycommand:myothercommand\".";
781
782
783 static char *Texinfo_suffixes [] =
784 { "texi", "texinfo", "txi", NULL };
785 static char Texinfo_help [] =
786 "for texinfo files, lines starting with @node are tagged.";
787
788 static char *Yacc_suffixes [] =
789 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
790 static char Yacc_help [] =
791 "In Bison or Yacc input files, each rule defines as a tag the\n\
792 nonterminal it constructs. The portions of the file that contain\n\
793 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
794 for full help).";
795
796 static char auto_help [] =
797 "`auto' is not a real language, it indicates to use\n\
798 a default language for files base on file name suffix and file contents.";
799
800 static char none_help [] =
801 "`none' is not a real language, it indicates to only do\n\
802 regexp processing on files.";
803
804 static char no_lang_help [] =
805 "No detailed help available for this language.";
806
807
808 /*
809 * Table of languages.
810 *
811 * It is ok for a given function to be listed under more than one
812 * name. I just didn't.
813 */
814
815 static language lang_names [] =
816 {
817 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
818 { "asm", Asm_help, Asm_labels, Asm_suffixes },
819 { "c", default_C_help, default_C_entries, default_C_suffixes },
820 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
821 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
822 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
823 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
824 { "forth", Forth_help, Forth_words, Forth_suffixes },
825 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
826 { "html", HTML_help, HTML_labels, HTML_suffixes },
827 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
828 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
829 { "lua", Lua_help, Lua_functions, Lua_suffixes },
830 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
831 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
832 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
833 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
834 { "php", PHP_help, PHP_functions, PHP_suffixes },
835 { "postscript",PS_help, PS_functions, PS_suffixes },
836 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
837 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
838 { "python", Python_help, Python_functions, Python_suffixes },
839 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
840 { "tex", TeX_help, TeX_commands, TeX_suffixes },
841 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
842 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
843 { "auto", auto_help }, /* default guessing scheme */
844 { "none", none_help, just_read_file }, /* regexp matching only */
845 { NULL } /* end of list */
846 };
847
848 \f
849 static void
850 print_language_names ()
851 {
852 language *lang;
853 char **name, **ext;
854
855 puts ("\nThese are the currently supported languages, along with the\n\
856 default file names and dot suffixes:");
857 for (lang = lang_names; lang->name != NULL; lang++)
858 {
859 printf (" %-*s", 10, lang->name);
860 if (lang->filenames != NULL)
861 for (name = lang->filenames; *name != NULL; name++)
862 printf (" %s", *name);
863 if (lang->suffixes != NULL)
864 for (ext = lang->suffixes; *ext != NULL; ext++)
865 printf (" .%s", *ext);
866 puts ("");
867 }
868 puts ("where `auto' means use default language for files based on file\n\
869 name suffix, and `none' means only do regexp processing on files.\n\
870 If no language is specified and no matching suffix is found,\n\
871 the first line of the file is read for a sharp-bang (#!) sequence\n\
872 followed by the name of an interpreter. If no such sequence is found,\n\
873 Fortran is tried first; if no tags are found, C is tried next.\n\
874 When parsing any C file, a \"class\" or \"template\" keyword\n\
875 switches to C++.");
876 puts ("Compressed files are supported using gzip and bzip2.\n\
877 \n\
878 For detailed help on a given language use, for example,\n\
879 etags --help --lang=ada.");
880 }
881
882 #ifndef EMACS_NAME
883 # define EMACS_NAME "standalone"
884 #endif
885 #ifndef VERSION
886 # define VERSION "17.34"
887 #endif
888 static void
889 print_version ()
890 {
891 /* Makes it easier to update automatically. */
892 char emacs_copyright[] = "Copyright (C) 2007 Free Software Foundation, Inc.";
893
894 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
895 puts (emacs_copyright);
896 puts ("This program is distributed under the terms in ETAGS.README");
897
898 exit (EXIT_SUCCESS);
899 }
900
901 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
902 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
903 #endif
904
905 static void
906 print_help (argbuffer)
907 argument *argbuffer;
908 {
909 bool help_for_lang = FALSE;
910
911 for (; argbuffer->arg_type != at_end; argbuffer++)
912 if (argbuffer->arg_type == at_language)
913 {
914 if (help_for_lang)
915 puts ("");
916 puts (argbuffer->lang->help);
917 help_for_lang = TRUE;
918 }
919
920 if (help_for_lang)
921 exit (EXIT_SUCCESS);
922
923 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
924 \n\
925 These are the options accepted by %s.\n", progname, progname);
926 if (NO_LONG_OPTIONS)
927 puts ("WARNING: long option names do not work with this executable,\n\
928 as it is not linked with GNU getopt.");
929 else
930 puts ("You may use unambiguous abbreviations for the long option names.");
931 puts (" A - as file name means read names from stdin (one per line).\n\
932 Absolute names are stored in the output file as they are.\n\
933 Relative ones are stored relative to the output file's directory.\n");
934
935 puts ("-a, --append\n\
936 Append tag entries to existing tags file.");
937
938 puts ("--packages-only\n\
939 For Ada files, only generate tags for packages.");
940
941 if (CTAGS)
942 puts ("-B, --backward-search\n\
943 Write the search commands for the tag entries using '?', the\n\
944 backward-search command instead of '/', the forward-search command.");
945
946 /* This option is mostly obsolete, because etags can now automatically
947 detect C++. Retained for backward compatibility and for debugging and
948 experimentation. In principle, we could want to tag as C++ even
949 before any "class" or "template" keyword.
950 puts ("-C, --c++\n\
951 Treat files whose name suffix defaults to C language as C++ files.");
952 */
953
954 puts ("--declarations\n\
955 In C and derived languages, create tags for function declarations,");
956 if (CTAGS)
957 puts ("\tand create tags for extern variables if --globals is used.");
958 else
959 puts
960 ("\tand create tags for extern variables unless --no-globals is used.");
961
962 if (CTAGS)
963 puts ("-d, --defines\n\
964 Create tag entries for C #define constants and enum constants, too.");
965 else
966 puts ("-D, --no-defines\n\
967 Don't create tag entries for C #define constants and enum constants.\n\
968 This makes the tags file smaller.");
969
970 if (!CTAGS)
971 puts ("-i FILE, --include=FILE\n\
972 Include a note in tag file indicating that, when searching for\n\
973 a tag, one should also consult the tags file FILE after\n\
974 checking the current file.");
975
976 puts ("-l LANG, --language=LANG\n\
977 Force the following files to be considered as written in the\n\
978 named language up to the next --language=LANG option.");
979
980 if (CTAGS)
981 puts ("--globals\n\
982 Create tag entries for global variables in some languages.");
983 else
984 puts ("--no-globals\n\
985 Do not create tag entries for global variables in some\n\
986 languages. This makes the tags file smaller.");
987
988 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
989 puts ("--no-line-directive\n\
990 Ignore #line preprocessor directives in C and derived languages.");
991
992 if (CTAGS)
993 puts ("--members\n\
994 Create tag entries for members of structures in some languages.");
995 else
996 puts ("--no-members\n\
997 Do not create tag entries for members of structures\n\
998 in some languages.");
999
1000 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1001 Make a tag for each line matching a regular expression pattern\n\
1002 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1003 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1004 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1005 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1006 puts (" If TAGNAME/ is present, the tags created are named.\n\
1007 For example Tcl named tags can be created with:\n\
1008 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1009 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1010 `m' means to allow multi-line matches, `s' implies `m' and\n\
1011 causes dot to match any character, including newline.");
1012
1013 puts ("-R, --no-regex\n\
1014 Don't create tags from regexps for the following files.");
1015
1016 puts ("-I, --ignore-indentation\n\
1017 In C and C++ do not assume that a closing brace in the first\n\
1018 column is the final brace of a function or structure definition.");
1019
1020 puts ("-o FILE, --output=FILE\n\
1021 Write the tags to FILE.");
1022
1023 puts ("--parse-stdin=NAME\n\
1024 Read from standard input and record tags as belonging to file NAME.");
1025
1026 if (CTAGS)
1027 {
1028 puts ("-t, --typedefs\n\
1029 Generate tag entries for C and Ada typedefs.");
1030 puts ("-T, --typedefs-and-c++\n\
1031 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1032 and C++ member functions.");
1033 }
1034
1035 if (CTAGS)
1036 puts ("-u, --update\n\
1037 Update the tag entries for the given files, leaving tag\n\
1038 entries for other files in place. Currently, this is\n\
1039 implemented by deleting the existing entries for the given\n\
1040 files and then rewriting the new entries at the end of the\n\
1041 tags file. It is often faster to simply rebuild the entire\n\
1042 tag file than to use this.");
1043
1044 if (CTAGS)
1045 {
1046 puts ("-v, --vgrind\n\
1047 Print on the standard output an index of items intended for\n\
1048 human consumption, similar to the output of vgrind. The index\n\
1049 is sorted, and gives the page number of each item.");
1050
1051 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1052 puts ("-w, --no-duplicates\n\
1053 Do not create duplicate tag entries, for compatibility with\n\
1054 traditional ctags.");
1055
1056 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1057 puts ("-w, --no-warn\n\
1058 Suppress warning messages about duplicate tag entries.");
1059
1060 puts ("-x, --cxref\n\
1061 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1062 The output uses line numbers instead of page numbers, but\n\
1063 beyond that the differences are cosmetic; try both to see\n\
1064 which you like.");
1065 }
1066
1067 puts ("-V, --version\n\
1068 Print the version of the program.\n\
1069 -h, --help\n\
1070 Print this help message.\n\
1071 Followed by one or more `--language' options prints detailed\n\
1072 help about tag generation for the specified languages.");
1073
1074 print_language_names ();
1075
1076 puts ("");
1077 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1078
1079 exit (EXIT_SUCCESS);
1080 }
1081
1082 \f
1083 #ifdef VMS /* VMS specific functions */
1084
1085 #define EOS '\0'
1086
1087 /* This is a BUG! ANY arbitrary limit is a BUG!
1088 Won't someone please fix this? */
1089 #define MAX_FILE_SPEC_LEN 255
1090 typedef struct {
1091 short curlen;
1092 char body[MAX_FILE_SPEC_LEN + 1];
1093 } vspec;
1094
1095 /*
1096 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1097 returning in each successive call the next file name matching the input
1098 spec. The function expects that each in_spec passed
1099 to it will be processed to completion; in particular, up to and
1100 including the call following that in which the last matching name
1101 is returned, the function ignores the value of in_spec, and will
1102 only start processing a new spec with the following call.
1103 If an error occurs, on return out_spec contains the value
1104 of in_spec when the error occurred.
1105
1106 With each successive file name returned in out_spec, the
1107 function's return value is one. When there are no more matching
1108 names the function returns zero. If on the first call no file
1109 matches in_spec, or there is any other error, -1 is returned.
1110 */
1111
1112 #include <rmsdef.h>
1113 #include <descrip.h>
1114 #define OUTSIZE MAX_FILE_SPEC_LEN
1115 static short
1116 fn_exp (out, in)
1117 vspec *out;
1118 char *in;
1119 {
1120 static long context = 0;
1121 static struct dsc$descriptor_s o;
1122 static struct dsc$descriptor_s i;
1123 static bool pass1 = TRUE;
1124 long status;
1125 short retval;
1126
1127 if (pass1)
1128 {
1129 pass1 = FALSE;
1130 o.dsc$a_pointer = (char *) out;
1131 o.dsc$w_length = (short)OUTSIZE;
1132 i.dsc$a_pointer = in;
1133 i.dsc$w_length = (short)strlen(in);
1134 i.dsc$b_dtype = DSC$K_DTYPE_T;
1135 i.dsc$b_class = DSC$K_CLASS_S;
1136 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1137 o.dsc$b_class = DSC$K_CLASS_VS;
1138 }
1139 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1140 {
1141 out->body[out->curlen] = EOS;
1142 return 1;
1143 }
1144 else if (status == RMS$_NMF)
1145 retval = 0;
1146 else
1147 {
1148 strcpy(out->body, in);
1149 retval = -1;
1150 }
1151 lib$find_file_end(&context);
1152 pass1 = TRUE;
1153 return retval;
1154 }
1155
1156 /*
1157 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1158 name of each file specified by the provided arg expanding wildcards.
1159 */
1160 static char *
1161 gfnames (arg, p_error)
1162 char *arg;
1163 bool *p_error;
1164 {
1165 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1166
1167 switch (fn_exp (&filename, arg))
1168 {
1169 case 1:
1170 *p_error = FALSE;
1171 return filename.body;
1172 case 0:
1173 *p_error = FALSE;
1174 return NULL;
1175 default:
1176 *p_error = TRUE;
1177 return filename.body;
1178 }
1179 }
1180
1181 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1182 system (cmd)
1183 char *cmd;
1184 {
1185 error ("%s", "system() function not implemented under VMS");
1186 }
1187 #endif
1188
1189 #define VERSION_DELIM ';'
1190 char *massage_name (s)
1191 char *s;
1192 {
1193 char *start = s;
1194
1195 for ( ; *s; s++)
1196 if (*s == VERSION_DELIM)
1197 {
1198 *s = EOS;
1199 break;
1200 }
1201 else
1202 *s = lowcase (*s);
1203 return start;
1204 }
1205 #endif /* VMS */
1206
1207 \f
1208 int
1209 main (argc, argv)
1210 int argc;
1211 char *argv[];
1212 {
1213 int i;
1214 unsigned int nincluded_files;
1215 char **included_files;
1216 argument *argbuffer;
1217 int current_arg, file_count;
1218 linebuffer filename_lb;
1219 bool help_asked = FALSE;
1220 #ifdef VMS
1221 bool got_err;
1222 #endif
1223 char *optstring;
1224 int opt;
1225
1226
1227 #ifdef DOS_NT
1228 _fmode = O_BINARY; /* all of files are treated as binary files */
1229 #endif /* DOS_NT */
1230
1231 progname = argv[0];
1232 nincluded_files = 0;
1233 included_files = xnew (argc, char *);
1234 current_arg = 0;
1235 file_count = 0;
1236
1237 /* Allocate enough no matter what happens. Overkill, but each one
1238 is small. */
1239 argbuffer = xnew (argc, argument);
1240
1241 /*
1242 * If etags, always find typedefs and structure tags. Why not?
1243 * Also default to find macro constants, enum constants, struct
1244 * members and global variables.
1245 */
1246 if (!CTAGS)
1247 {
1248 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1249 globals = members = TRUE;
1250 }
1251
1252 /* When the optstring begins with a '-' getopt_long does not rearrange the
1253 non-options arguments to be at the end, but leaves them alone. */
1254 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1255 "ac:Cf:Il:o:r:RSVhH",
1256 (CTAGS) ? "BxdtTuvw" : "Di:");
1257
1258 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1259 switch (opt)
1260 {
1261 case 0:
1262 /* If getopt returns 0, then it has already processed a
1263 long-named option. We should do nothing. */
1264 break;
1265
1266 case 1:
1267 /* This means that a file name has been seen. Record it. */
1268 argbuffer[current_arg].arg_type = at_filename;
1269 argbuffer[current_arg].what = optarg;
1270 ++current_arg;
1271 ++file_count;
1272 break;
1273
1274 case STDIN:
1275 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1276 argbuffer[current_arg].arg_type = at_stdin;
1277 argbuffer[current_arg].what = optarg;
1278 ++current_arg;
1279 ++file_count;
1280 if (parsing_stdin)
1281 fatal ("cannot parse standard input more than once", (char *)NULL);
1282 parsing_stdin = TRUE;
1283 break;
1284
1285 /* Common options. */
1286 case 'a': append_to_tagfile = TRUE; break;
1287 case 'C': cplusplus = TRUE; break;
1288 case 'f': /* for compatibility with old makefiles */
1289 case 'o':
1290 if (tagfile)
1291 {
1292 error ("-o option may only be given once.", (char *)NULL);
1293 suggest_asking_for_help ();
1294 /* NOTREACHED */
1295 }
1296 tagfile = optarg;
1297 break;
1298 case 'I':
1299 case 'S': /* for backward compatibility */
1300 ignoreindent = TRUE;
1301 break;
1302 case 'l':
1303 {
1304 language *lang = get_language_from_langname (optarg);
1305 if (lang != NULL)
1306 {
1307 argbuffer[current_arg].lang = lang;
1308 argbuffer[current_arg].arg_type = at_language;
1309 ++current_arg;
1310 }
1311 }
1312 break;
1313 case 'c':
1314 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1315 optarg = concat (optarg, "i", ""); /* memory leak here */
1316 /* FALLTHRU */
1317 case 'r':
1318 argbuffer[current_arg].arg_type = at_regexp;
1319 argbuffer[current_arg].what = optarg;
1320 ++current_arg;
1321 break;
1322 case 'R':
1323 argbuffer[current_arg].arg_type = at_regexp;
1324 argbuffer[current_arg].what = NULL;
1325 ++current_arg;
1326 break;
1327 case 'V':
1328 print_version ();
1329 break;
1330 case 'h':
1331 case 'H':
1332 help_asked = TRUE;
1333 break;
1334
1335 /* Etags options */
1336 case 'D': constantypedefs = FALSE; break;
1337 case 'i': included_files[nincluded_files++] = optarg; break;
1338
1339 /* Ctags options. */
1340 case 'B': searchar = '?'; break;
1341 case 'd': constantypedefs = TRUE; break;
1342 case 't': typedefs = TRUE; break;
1343 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1344 case 'u': update = TRUE; break;
1345 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1346 case 'x': cxref_style = TRUE; break;
1347 case 'w': no_warnings = TRUE; break;
1348 default:
1349 suggest_asking_for_help ();
1350 /* NOTREACHED */
1351 }
1352
1353 /* No more options. Store the rest of arguments. */
1354 for (; optind < argc; optind++)
1355 {
1356 argbuffer[current_arg].arg_type = at_filename;
1357 argbuffer[current_arg].what = argv[optind];
1358 ++current_arg;
1359 ++file_count;
1360 }
1361
1362 argbuffer[current_arg].arg_type = at_end;
1363
1364 if (help_asked)
1365 print_help (argbuffer);
1366 /* NOTREACHED */
1367
1368 if (nincluded_files == 0 && file_count == 0)
1369 {
1370 error ("no input files specified.", (char *)NULL);
1371 suggest_asking_for_help ();
1372 /* NOTREACHED */
1373 }
1374
1375 if (tagfile == NULL)
1376 tagfile = CTAGS ? "tags" : "TAGS";
1377 cwd = etags_getcwd (); /* the current working directory */
1378 if (cwd[strlen (cwd) - 1] != '/')
1379 {
1380 char *oldcwd = cwd;
1381 cwd = concat (oldcwd, "/", "");
1382 free (oldcwd);
1383 }
1384 /* Relative file names are made relative to the current directory. */
1385 if (streq (tagfile, "-")
1386 || strneq (tagfile, "/dev/", 5))
1387 tagfiledir = cwd;
1388 else
1389 tagfiledir = absolute_dirname (tagfile, cwd);
1390
1391 init (); /* set up boolean "functions" */
1392
1393 linebuffer_init (&lb);
1394 linebuffer_init (&filename_lb);
1395 linebuffer_init (&filebuf);
1396 linebuffer_init (&token_name);
1397
1398 if (!CTAGS)
1399 {
1400 if (streq (tagfile, "-"))
1401 {
1402 tagf = stdout;
1403 #ifdef DOS_NT
1404 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1405 doesn't take effect until after `stdout' is already open). */
1406 if (!isatty (fileno (stdout)))
1407 setmode (fileno (stdout), O_BINARY);
1408 #endif /* DOS_NT */
1409 }
1410 else
1411 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1412 if (tagf == NULL)
1413 pfatal (tagfile);
1414 }
1415
1416 /*
1417 * Loop through files finding functions.
1418 */
1419 for (i = 0; i < current_arg; i++)
1420 {
1421 static language *lang; /* non-NULL if language is forced */
1422 char *this_file;
1423
1424 switch (argbuffer[i].arg_type)
1425 {
1426 case at_language:
1427 lang = argbuffer[i].lang;
1428 break;
1429 case at_regexp:
1430 analyse_regex (argbuffer[i].what);
1431 break;
1432 case at_filename:
1433 #ifdef VMS
1434 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1435 {
1436 if (got_err)
1437 {
1438 error ("can't find file %s\n", this_file);
1439 argc--, argv++;
1440 }
1441 else
1442 {
1443 this_file = massage_name (this_file);
1444 }
1445 #else
1446 this_file = argbuffer[i].what;
1447 #endif
1448 /* Input file named "-" means read file names from stdin
1449 (one per line) and use them. */
1450 if (streq (this_file, "-"))
1451 {
1452 if (parsing_stdin)
1453 fatal ("cannot parse standard input AND read file names from it",
1454 (char *)NULL);
1455 while (readline_internal (&filename_lb, stdin) > 0)
1456 process_file_name (filename_lb.buffer, lang);
1457 }
1458 else
1459 process_file_name (this_file, lang);
1460 #ifdef VMS
1461 }
1462 #endif
1463 break;
1464 case at_stdin:
1465 this_file = argbuffer[i].what;
1466 process_file (stdin, this_file, lang);
1467 break;
1468 }
1469 }
1470
1471 free_regexps ();
1472 free (lb.buffer);
1473 free (filebuf.buffer);
1474 free (token_name.buffer);
1475
1476 if (!CTAGS || cxref_style)
1477 {
1478 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1479 put_entries (nodehead);
1480 free_tree (nodehead);
1481 nodehead = NULL;
1482 if (!CTAGS)
1483 {
1484 fdesc *fdp;
1485
1486 /* Output file entries that have no tags. */
1487 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1488 if (!fdp->written)
1489 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1490
1491 while (nincluded_files-- > 0)
1492 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1493
1494 if (fclose (tagf) == EOF)
1495 pfatal (tagfile);
1496 }
1497
1498 exit (EXIT_SUCCESS);
1499 }
1500
1501 if (update)
1502 {
1503 char cmd[BUFSIZ];
1504 for (i = 0; i < current_arg; ++i)
1505 {
1506 switch (argbuffer[i].arg_type)
1507 {
1508 case at_filename:
1509 case at_stdin:
1510 break;
1511 default:
1512 continue; /* the for loop */
1513 }
1514 sprintf (cmd,
1515 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1516 tagfile, argbuffer[i].what, tagfile);
1517 if (system (cmd) != EXIT_SUCCESS)
1518 fatal ("failed to execute shell command", (char *)NULL);
1519 }
1520 append_to_tagfile = TRUE;
1521 }
1522
1523 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1524 if (tagf == NULL)
1525 pfatal (tagfile);
1526 put_entries (nodehead); /* write all the tags (CTAGS) */
1527 free_tree (nodehead);
1528 nodehead = NULL;
1529 if (fclose (tagf) == EOF)
1530 pfatal (tagfile);
1531
1532 if (CTAGS)
1533 if (append_to_tagfile || update)
1534 {
1535 char cmd[2*BUFSIZ+20];
1536 /* Maybe these should be used:
1537 setenv ("LC_COLLATE", "C", 1);
1538 setenv ("LC_ALL", "C", 1); */
1539 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1540 exit (system (cmd));
1541 }
1542 return EXIT_SUCCESS;
1543 }
1544
1545
1546 /*
1547 * Return a compressor given the file name. If EXTPTR is non-zero,
1548 * return a pointer into FILE where the compressor-specific
1549 * extension begins. If no compressor is found, NULL is returned
1550 * and EXTPTR is not significant.
1551 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1552 */
1553 static compressor *
1554 get_compressor_from_suffix (file, extptr)
1555 char *file;
1556 char **extptr;
1557 {
1558 compressor *compr;
1559 char *slash, *suffix;
1560
1561 /* This relies on FN to be after canonicalize_filename,
1562 so we don't need to consider backslashes on DOS_NT. */
1563 slash = etags_strrchr (file, '/');
1564 suffix = etags_strrchr (file, '.');
1565 if (suffix == NULL || suffix < slash)
1566 return NULL;
1567 if (extptr != NULL)
1568 *extptr = suffix;
1569 suffix += 1;
1570 /* Let those poor souls who live with DOS 8+3 file name limits get
1571 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1572 Only the first do loop is run if not MSDOS */
1573 do
1574 {
1575 for (compr = compressors; compr->suffix != NULL; compr++)
1576 if (streq (compr->suffix, suffix))
1577 return compr;
1578 if (!MSDOS)
1579 break; /* do it only once: not really a loop */
1580 if (extptr != NULL)
1581 *extptr = ++suffix;
1582 } while (*suffix != '\0');
1583 return NULL;
1584 }
1585
1586
1587
1588 /*
1589 * Return a language given the name.
1590 */
1591 static language *
1592 get_language_from_langname (name)
1593 const char *name;
1594 {
1595 language *lang;
1596
1597 if (name == NULL)
1598 error ("empty language name", (char *)NULL);
1599 else
1600 {
1601 for (lang = lang_names; lang->name != NULL; lang++)
1602 if (streq (name, lang->name))
1603 return lang;
1604 error ("unknown language \"%s\"", name);
1605 }
1606
1607 return NULL;
1608 }
1609
1610
1611 /*
1612 * Return a language given the interpreter name.
1613 */
1614 static language *
1615 get_language_from_interpreter (interpreter)
1616 char *interpreter;
1617 {
1618 language *lang;
1619 char **iname;
1620
1621 if (interpreter == NULL)
1622 return NULL;
1623 for (lang = lang_names; lang->name != NULL; lang++)
1624 if (lang->interpreters != NULL)
1625 for (iname = lang->interpreters; *iname != NULL; iname++)
1626 if (streq (*iname, interpreter))
1627 return lang;
1628
1629 return NULL;
1630 }
1631
1632
1633
1634 /*
1635 * Return a language given the file name.
1636 */
1637 static language *
1638 get_language_from_filename (file, case_sensitive)
1639 char *file;
1640 bool case_sensitive;
1641 {
1642 language *lang;
1643 char **name, **ext, *suffix;
1644
1645 /* Try whole file name first. */
1646 for (lang = lang_names; lang->name != NULL; lang++)
1647 if (lang->filenames != NULL)
1648 for (name = lang->filenames; *name != NULL; name++)
1649 if ((case_sensitive)
1650 ? streq (*name, file)
1651 : strcaseeq (*name, file))
1652 return lang;
1653
1654 /* If not found, try suffix after last dot. */
1655 suffix = etags_strrchr (file, '.');
1656 if (suffix == NULL)
1657 return NULL;
1658 suffix += 1;
1659 for (lang = lang_names; lang->name != NULL; lang++)
1660 if (lang->suffixes != NULL)
1661 for (ext = lang->suffixes; *ext != NULL; ext++)
1662 if ((case_sensitive)
1663 ? streq (*ext, suffix)
1664 : strcaseeq (*ext, suffix))
1665 return lang;
1666 return NULL;
1667 }
1668
1669 \f
1670 /*
1671 * This routine is called on each file argument.
1672 */
1673 static void
1674 process_file_name (file, lang)
1675 char *file;
1676 language *lang;
1677 {
1678 struct stat stat_buf;
1679 FILE *inf;
1680 fdesc *fdp;
1681 compressor *compr;
1682 char *compressed_name, *uncompressed_name;
1683 char *ext, *real_name;
1684 int retval;
1685
1686 canonicalize_filename (file);
1687 if (streq (file, tagfile) && !streq (tagfile, "-"))
1688 {
1689 error ("skipping inclusion of %s in self.", file);
1690 return;
1691 }
1692 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1693 {
1694 compressed_name = NULL;
1695 real_name = uncompressed_name = savestr (file);
1696 }
1697 else
1698 {
1699 real_name = compressed_name = savestr (file);
1700 uncompressed_name = savenstr (file, ext - file);
1701 }
1702
1703 /* If the canonicalized uncompressed name
1704 has already been dealt with, skip it silently. */
1705 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1706 {
1707 assert (fdp->infname != NULL);
1708 if (streq (uncompressed_name, fdp->infname))
1709 goto cleanup;
1710 }
1711
1712 if (stat (real_name, &stat_buf) != 0)
1713 {
1714 /* Reset real_name and try with a different name. */
1715 real_name = NULL;
1716 if (compressed_name != NULL) /* try with the given suffix */
1717 {
1718 if (stat (uncompressed_name, &stat_buf) == 0)
1719 real_name = uncompressed_name;
1720 }
1721 else /* try all possible suffixes */
1722 {
1723 for (compr = compressors; compr->suffix != NULL; compr++)
1724 {
1725 compressed_name = concat (file, ".", compr->suffix);
1726 if (stat (compressed_name, &stat_buf) != 0)
1727 {
1728 if (MSDOS)
1729 {
1730 char *suf = compressed_name + strlen (file);
1731 size_t suflen = strlen (compr->suffix) + 1;
1732 for ( ; suf[1]; suf++, suflen--)
1733 {
1734 memmove (suf, suf + 1, suflen);
1735 if (stat (compressed_name, &stat_buf) == 0)
1736 {
1737 real_name = compressed_name;
1738 break;
1739 }
1740 }
1741 if (real_name != NULL)
1742 break;
1743 } /* MSDOS */
1744 free (compressed_name);
1745 compressed_name = NULL;
1746 }
1747 else
1748 {
1749 real_name = compressed_name;
1750 break;
1751 }
1752 }
1753 }
1754 if (real_name == NULL)
1755 {
1756 perror (file);
1757 goto cleanup;
1758 }
1759 } /* try with a different name */
1760
1761 if (!S_ISREG (stat_buf.st_mode))
1762 {
1763 error ("skipping %s: it is not a regular file.", real_name);
1764 goto cleanup;
1765 }
1766 if (real_name == compressed_name)
1767 {
1768 char *cmd = concat (compr->command, " ", real_name);
1769 inf = (FILE *) popen (cmd, "r");
1770 free (cmd);
1771 }
1772 else
1773 inf = fopen (real_name, "r");
1774 if (inf == NULL)
1775 {
1776 perror (real_name);
1777 goto cleanup;
1778 }
1779
1780 process_file (inf, uncompressed_name, lang);
1781
1782 if (real_name == compressed_name)
1783 retval = pclose (inf);
1784 else
1785 retval = fclose (inf);
1786 if (retval < 0)
1787 pfatal (file);
1788
1789 cleanup:
1790 if (compressed_name) free (compressed_name);
1791 if (uncompressed_name) free (uncompressed_name);
1792 last_node = NULL;
1793 curfdp = NULL;
1794 return;
1795 }
1796
1797 static void
1798 process_file (fh, fn, lang)
1799 FILE *fh;
1800 char *fn;
1801 language *lang;
1802 {
1803 static const fdesc emptyfdesc;
1804 fdesc *fdp;
1805
1806 /* Create a new input file description entry. */
1807 fdp = xnew (1, fdesc);
1808 *fdp = emptyfdesc;
1809 fdp->next = fdhead;
1810 fdp->infname = savestr (fn);
1811 fdp->lang = lang;
1812 fdp->infabsname = absolute_filename (fn, cwd);
1813 fdp->infabsdir = absolute_dirname (fn, cwd);
1814 if (filename_is_absolute (fn))
1815 {
1816 /* An absolute file name. Canonicalize it. */
1817 fdp->taggedfname = absolute_filename (fn, NULL);
1818 }
1819 else
1820 {
1821 /* A file name relative to cwd. Make it relative
1822 to the directory of the tags file. */
1823 fdp->taggedfname = relative_filename (fn, tagfiledir);
1824 }
1825 fdp->usecharno = TRUE; /* use char position when making tags */
1826 fdp->prop = NULL;
1827 fdp->written = FALSE; /* not written on tags file yet */
1828
1829 fdhead = fdp;
1830 curfdp = fdhead; /* the current file description */
1831
1832 find_entries (fh);
1833
1834 /* If not Ctags, and if this is not metasource and if it contained no #line
1835 directives, we can write the tags and free all nodes pointing to
1836 curfdp. */
1837 if (!CTAGS
1838 && curfdp->usecharno /* no #line directives in this file */
1839 && !curfdp->lang->metasource)
1840 {
1841 node *np, *prev;
1842
1843 /* Look for the head of the sublist relative to this file. See add_node
1844 for the structure of the node tree. */
1845 prev = NULL;
1846 for (np = nodehead; np != NULL; prev = np, np = np->left)
1847 if (np->fdp == curfdp)
1848 break;
1849
1850 /* If we generated tags for this file, write and delete them. */
1851 if (np != NULL)
1852 {
1853 /* This is the head of the last sublist, if any. The following
1854 instructions depend on this being true. */
1855 assert (np->left == NULL);
1856
1857 assert (fdhead == curfdp);
1858 assert (last_node->fdp == curfdp);
1859 put_entries (np); /* write tags for file curfdp->taggedfname */
1860 free_tree (np); /* remove the written nodes */
1861 if (prev == NULL)
1862 nodehead = NULL; /* no nodes left */
1863 else
1864 prev->left = NULL; /* delete the pointer to the sublist */
1865 }
1866 }
1867 }
1868
1869 /*
1870 * This routine sets up the boolean pseudo-functions which work
1871 * by setting boolean flags dependent upon the corresponding character.
1872 * Every char which is NOT in that string is not a white char. Therefore,
1873 * all of the array "_wht" is set to FALSE, and then the elements
1874 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1875 * of a char is TRUE if it is the string "white", else FALSE.
1876 */
1877 static void
1878 init ()
1879 {
1880 register char *sp;
1881 register int i;
1882
1883 for (i = 0; i < CHARS; i++)
1884 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1885 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1886 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1887 notinname('\0') = notinname('\n');
1888 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1889 begtoken('\0') = begtoken('\n');
1890 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1891 intoken('\0') = intoken('\n');
1892 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1893 endtoken('\0') = endtoken('\n');
1894 }
1895
1896 /*
1897 * This routine opens the specified file and calls the function
1898 * which finds the function and type definitions.
1899 */
1900 static void
1901 find_entries (inf)
1902 FILE *inf;
1903 {
1904 char *cp;
1905 language *lang = curfdp->lang;
1906 Lang_function *parser = NULL;
1907
1908 /* If user specified a language, use it. */
1909 if (lang != NULL && lang->function != NULL)
1910 {
1911 parser = lang->function;
1912 }
1913
1914 /* Else try to guess the language given the file name. */
1915 if (parser == NULL)
1916 {
1917 lang = get_language_from_filename (curfdp->infname, TRUE);
1918 if (lang != NULL && lang->function != NULL)
1919 {
1920 curfdp->lang = lang;
1921 parser = lang->function;
1922 }
1923 }
1924
1925 /* Else look for sharp-bang as the first two characters. */
1926 if (parser == NULL
1927 && readline_internal (&lb, inf) > 0
1928 && lb.len >= 2
1929 && lb.buffer[0] == '#'
1930 && lb.buffer[1] == '!')
1931 {
1932 char *lp;
1933
1934 /* Set lp to point at the first char after the last slash in the
1935 line or, if no slashes, at the first nonblank. Then set cp to
1936 the first successive blank and terminate the string. */
1937 lp = etags_strrchr (lb.buffer+2, '/');
1938 if (lp != NULL)
1939 lp += 1;
1940 else
1941 lp = skip_spaces (lb.buffer + 2);
1942 cp = skip_non_spaces (lp);
1943 *cp = '\0';
1944
1945 if (strlen (lp) > 0)
1946 {
1947 lang = get_language_from_interpreter (lp);
1948 if (lang != NULL && lang->function != NULL)
1949 {
1950 curfdp->lang = lang;
1951 parser = lang->function;
1952 }
1953 }
1954 }
1955
1956 /* We rewind here, even if inf may be a pipe. We fail if the
1957 length of the first line is longer than the pipe block size,
1958 which is unlikely. */
1959 rewind (inf);
1960
1961 /* Else try to guess the language given the case insensitive file name. */
1962 if (parser == NULL)
1963 {
1964 lang = get_language_from_filename (curfdp->infname, FALSE);
1965 if (lang != NULL && lang->function != NULL)
1966 {
1967 curfdp->lang = lang;
1968 parser = lang->function;
1969 }
1970 }
1971
1972 /* Else try Fortran or C. */
1973 if (parser == NULL)
1974 {
1975 node *old_last_node = last_node;
1976
1977 curfdp->lang = get_language_from_langname ("fortran");
1978 find_entries (inf);
1979
1980 if (old_last_node == last_node)
1981 /* No Fortran entries found. Try C. */
1982 {
1983 /* We do not tag if rewind fails.
1984 Only the file name will be recorded in the tags file. */
1985 rewind (inf);
1986 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1987 find_entries (inf);
1988 }
1989 return;
1990 }
1991
1992 if (!no_line_directive
1993 && curfdp->lang != NULL && curfdp->lang->metasource)
1994 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1995 file, or anyway we parsed a file that is automatically generated from
1996 this one. If this is the case, the bingo.c file contained #line
1997 directives that generated tags pointing to this file. Let's delete
1998 them all before parsing this file, which is the real source. */
1999 {
2000 fdesc **fdpp = &fdhead;
2001 while (*fdpp != NULL)
2002 if (*fdpp != curfdp
2003 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
2004 /* We found one of those! We must delete both the file description
2005 and all tags referring to it. */
2006 {
2007 fdesc *badfdp = *fdpp;
2008
2009 /* Delete the tags referring to badfdp->taggedfname
2010 that were obtained from badfdp->infname. */
2011 invalidate_nodes (badfdp, &nodehead);
2012
2013 *fdpp = badfdp->next; /* remove the bad description from the list */
2014 free_fdesc (badfdp);
2015 }
2016 else
2017 fdpp = &(*fdpp)->next; /* advance the list pointer */
2018 }
2019
2020 assert (parser != NULL);
2021
2022 /* Generic initialisations before reading from file. */
2023 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
2024
2025 /* Generic initialisations before parsing file with readline. */
2026 lineno = 0; /* reset global line number */
2027 charno = 0; /* reset global char number */
2028 linecharno = 0; /* reset global char number of line start */
2029
2030 parser (inf);
2031
2032 regex_tag_multiline ();
2033 }
2034
2035 \f
2036 /*
2037 * Check whether an implicitly named tag should be created,
2038 * then call `pfnote'.
2039 * NAME is a string that is internally copied by this function.
2040 *
2041 * TAGS format specification
2042 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2043 * The following is explained in some more detail in etc/ETAGS.EBNF.
2044 *
2045 * make_tag creates tags with "implicit tag names" (unnamed tags)
2046 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2047 * 1. NAME does not contain any of the characters in NONAM;
2048 * 2. LINESTART contains name as either a rightmost, or rightmost but
2049 * one character, substring;
2050 * 3. the character, if any, immediately before NAME in LINESTART must
2051 * be a character in NONAM;
2052 * 4. the character, if any, immediately after NAME in LINESTART must
2053 * also be a character in NONAM.
2054 *
2055 * The implementation uses the notinname() macro, which recognises the
2056 * characters stored in the string `nonam'.
2057 * etags.el needs to use the same characters that are in NONAM.
2058 */
2059 static void
2060 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2061 char *name; /* tag name, or NULL if unnamed */
2062 int namelen; /* tag length */
2063 bool is_func; /* tag is a function */
2064 char *linestart; /* start of the line where tag is */
2065 int linelen; /* length of the line where tag is */
2066 int lno; /* line number */
2067 long cno; /* character number */
2068 {
2069 bool named = (name != NULL && namelen > 0);
2070
2071 if (!CTAGS && named) /* maybe set named to false */
2072 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2073 such that etags.el can guess a name from it. */
2074 {
2075 int i;
2076 register char *cp = name;
2077
2078 for (i = 0; i < namelen; i++)
2079 if (notinname (*cp++))
2080 break;
2081 if (i == namelen) /* rule #1 */
2082 {
2083 cp = linestart + linelen - namelen;
2084 if (notinname (linestart[linelen-1]))
2085 cp -= 1; /* rule #4 */
2086 if (cp >= linestart /* rule #2 */
2087 && (cp == linestart
2088 || notinname (cp[-1])) /* rule #3 */
2089 && strneq (name, cp, namelen)) /* rule #2 */
2090 named = FALSE; /* use implicit tag name */
2091 }
2092 }
2093
2094 if (named)
2095 name = savenstr (name, namelen);
2096 else
2097 name = NULL;
2098 pfnote (name, is_func, linestart, linelen, lno, cno);
2099 }
2100
2101 /* Record a tag. */
2102 static void
2103 pfnote (name, is_func, linestart, linelen, lno, cno)
2104 char *name; /* tag name, or NULL if unnamed */
2105 bool is_func; /* tag is a function */
2106 char *linestart; /* start of the line where tag is */
2107 int linelen; /* length of the line where tag is */
2108 int lno; /* line number */
2109 long cno; /* character number */
2110 {
2111 register node *np;
2112
2113 assert (name == NULL || name[0] != '\0');
2114 if (CTAGS && name == NULL)
2115 return;
2116
2117 np = xnew (1, node);
2118
2119 /* If ctags mode, change name "main" to M<thisfilename>. */
2120 if (CTAGS && !cxref_style && streq (name, "main"))
2121 {
2122 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2123 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2124 fp = etags_strrchr (np->name, '.');
2125 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2126 fp[0] = '\0';
2127 }
2128 else
2129 np->name = name;
2130 np->valid = TRUE;
2131 np->been_warned = FALSE;
2132 np->fdp = curfdp;
2133 np->is_func = is_func;
2134 np->lno = lno;
2135 if (np->fdp->usecharno)
2136 /* Our char numbers are 0-base, because of C language tradition?
2137 ctags compatibility? old versions compatibility? I don't know.
2138 Anyway, since emacs's are 1-base we expect etags.el to take care
2139 of the difference. If we wanted to have 1-based numbers, we would
2140 uncomment the +1 below. */
2141 np->cno = cno /* + 1 */ ;
2142 else
2143 np->cno = invalidcharno;
2144 np->left = np->right = NULL;
2145 if (CTAGS && !cxref_style)
2146 {
2147 if (strlen (linestart) < 50)
2148 np->regex = concat (linestart, "$", "");
2149 else
2150 np->regex = savenstr (linestart, 50);
2151 }
2152 else
2153 np->regex = savenstr (linestart, linelen);
2154
2155 add_node (np, &nodehead);
2156 }
2157
2158 /*
2159 * free_tree ()
2160 * recurse on left children, iterate on right children.
2161 */
2162 static void
2163 free_tree (np)
2164 register node *np;
2165 {
2166 while (np)
2167 {
2168 register node *node_right = np->right;
2169 free_tree (np->left);
2170 if (np->name != NULL)
2171 free (np->name);
2172 free (np->regex);
2173 free (np);
2174 np = node_right;
2175 }
2176 }
2177
2178 /*
2179 * free_fdesc ()
2180 * delete a file description
2181 */
2182 static void
2183 free_fdesc (fdp)
2184 register fdesc *fdp;
2185 {
2186 if (fdp->infname != NULL) free (fdp->infname);
2187 if (fdp->infabsname != NULL) free (fdp->infabsname);
2188 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2189 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2190 if (fdp->prop != NULL) free (fdp->prop);
2191 free (fdp);
2192 }
2193
2194 /*
2195 * add_node ()
2196 * Adds a node to the tree of nodes. In etags mode, sort by file
2197 * name. In ctags mode, sort by tag name. Make no attempt at
2198 * balancing.
2199 *
2200 * add_node is the only function allowed to add nodes, so it can
2201 * maintain state.
2202 */
2203 static void
2204 add_node (np, cur_node_p)
2205 node *np, **cur_node_p;
2206 {
2207 register int dif;
2208 register node *cur_node = *cur_node_p;
2209
2210 if (cur_node == NULL)
2211 {
2212 *cur_node_p = np;
2213 last_node = np;
2214 return;
2215 }
2216
2217 if (!CTAGS)
2218 /* Etags Mode */
2219 {
2220 /* For each file name, tags are in a linked sublist on the right
2221 pointer. The first tags of different files are a linked list
2222 on the left pointer. last_node points to the end of the last
2223 used sublist. */
2224 if (last_node != NULL && last_node->fdp == np->fdp)
2225 {
2226 /* Let's use the same sublist as the last added node. */
2227 assert (last_node->right == NULL);
2228 last_node->right = np;
2229 last_node = np;
2230 }
2231 else if (cur_node->fdp == np->fdp)
2232 {
2233 /* Scanning the list we found the head of a sublist which is
2234 good for us. Let's scan this sublist. */
2235 add_node (np, &cur_node->right);
2236 }
2237 else
2238 /* The head of this sublist is not good for us. Let's try the
2239 next one. */
2240 add_node (np, &cur_node->left);
2241 } /* if ETAGS mode */
2242
2243 else
2244 {
2245 /* Ctags Mode */
2246 dif = strcmp (np->name, cur_node->name);
2247
2248 /*
2249 * If this tag name matches an existing one, then
2250 * do not add the node, but maybe print a warning.
2251 */
2252 if (no_duplicates && !dif)
2253 {
2254 if (np->fdp == cur_node->fdp)
2255 {
2256 if (!no_warnings)
2257 {
2258 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2259 np->fdp->infname, lineno, np->name);
2260 fprintf (stderr, "Second entry ignored\n");
2261 }
2262 }
2263 else if (!cur_node->been_warned && !no_warnings)
2264 {
2265 fprintf
2266 (stderr,
2267 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2268 np->fdp->infname, cur_node->fdp->infname, np->name);
2269 cur_node->been_warned = TRUE;
2270 }
2271 return;
2272 }
2273
2274 /* Actually add the node */
2275 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2276 } /* if CTAGS mode */
2277 }
2278
2279 /*
2280 * invalidate_nodes ()
2281 * Scan the node tree and invalidate all nodes pointing to the
2282 * given file description (CTAGS case) or free them (ETAGS case).
2283 */
2284 static void
2285 invalidate_nodes (badfdp, npp)
2286 fdesc *badfdp;
2287 node **npp;
2288 {
2289 node *np = *npp;
2290
2291 if (np == NULL)
2292 return;
2293
2294 if (CTAGS)
2295 {
2296 if (np->left != NULL)
2297 invalidate_nodes (badfdp, &np->left);
2298 if (np->fdp == badfdp)
2299 np->valid = FALSE;
2300 if (np->right != NULL)
2301 invalidate_nodes (badfdp, &np->right);
2302 }
2303 else
2304 {
2305 assert (np->fdp != NULL);
2306 if (np->fdp == badfdp)
2307 {
2308 *npp = np->left; /* detach the sublist from the list */
2309 np->left = NULL; /* isolate it */
2310 free_tree (np); /* free it */
2311 invalidate_nodes (badfdp, npp);
2312 }
2313 else
2314 invalidate_nodes (badfdp, &np->left);
2315 }
2316 }
2317
2318 \f
2319 static int total_size_of_entries __P((node *));
2320 static int number_len __P((long));
2321
2322 /* Length of a non-negative number's decimal representation. */
2323 static int
2324 number_len (num)
2325 long num;
2326 {
2327 int len = 1;
2328 while ((num /= 10) > 0)
2329 len += 1;
2330 return len;
2331 }
2332
2333 /*
2334 * Return total number of characters that put_entries will output for
2335 * the nodes in the linked list at the right of the specified node.
2336 * This count is irrelevant with etags.el since emacs 19.34 at least,
2337 * but is still supplied for backward compatibility.
2338 */
2339 static int
2340 total_size_of_entries (np)
2341 register node *np;
2342 {
2343 register int total = 0;
2344
2345 for (; np != NULL; np = np->right)
2346 if (np->valid)
2347 {
2348 total += strlen (np->regex) + 1; /* pat\177 */
2349 if (np->name != NULL)
2350 total += strlen (np->name) + 1; /* name\001 */
2351 total += number_len ((long) np->lno) + 1; /* lno, */
2352 if (np->cno != invalidcharno) /* cno */
2353 total += number_len (np->cno);
2354 total += 1; /* newline */
2355 }
2356
2357 return total;
2358 }
2359
2360 static void
2361 put_entries (np)
2362 register node *np;
2363 {
2364 register char *sp;
2365 static fdesc *fdp = NULL;
2366
2367 if (np == NULL)
2368 return;
2369
2370 /* Output subentries that precede this one */
2371 if (CTAGS)
2372 put_entries (np->left);
2373
2374 /* Output this entry */
2375 if (np->valid)
2376 {
2377 if (!CTAGS)
2378 {
2379 /* Etags mode */
2380 if (fdp != np->fdp)
2381 {
2382 fdp = np->fdp;
2383 fprintf (tagf, "\f\n%s,%d\n",
2384 fdp->taggedfname, total_size_of_entries (np));
2385 fdp->written = TRUE;
2386 }
2387 fputs (np->regex, tagf);
2388 fputc ('\177', tagf);
2389 if (np->name != NULL)
2390 {
2391 fputs (np->name, tagf);
2392 fputc ('\001', tagf);
2393 }
2394 fprintf (tagf, "%d,", np->lno);
2395 if (np->cno != invalidcharno)
2396 fprintf (tagf, "%ld", np->cno);
2397 fputs ("\n", tagf);
2398 }
2399 else
2400 {
2401 /* Ctags mode */
2402 if (np->name == NULL)
2403 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2404
2405 if (cxref_style)
2406 {
2407 if (vgrind_style)
2408 fprintf (stdout, "%s %s %d\n",
2409 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2410 else
2411 fprintf (stdout, "%-16s %3d %-16s %s\n",
2412 np->name, np->lno, np->fdp->taggedfname, np->regex);
2413 }
2414 else
2415 {
2416 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2417
2418 if (np->is_func)
2419 { /* function or #define macro with args */
2420 putc (searchar, tagf);
2421 putc ('^', tagf);
2422
2423 for (sp = np->regex; *sp; sp++)
2424 {
2425 if (*sp == '\\' || *sp == searchar)
2426 putc ('\\', tagf);
2427 putc (*sp, tagf);
2428 }
2429 putc (searchar, tagf);
2430 }
2431 else
2432 { /* anything else; text pattern inadequate */
2433 fprintf (tagf, "%d", np->lno);
2434 }
2435 putc ('\n', tagf);
2436 }
2437 }
2438 } /* if this node contains a valid tag */
2439
2440 /* Output subentries that follow this one */
2441 put_entries (np->right);
2442 if (!CTAGS)
2443 put_entries (np->left);
2444 }
2445
2446 \f
2447 /* C extensions. */
2448 #define C_EXT 0x00fff /* C extensions */
2449 #define C_PLAIN 0x00000 /* C */
2450 #define C_PLPL 0x00001 /* C++ */
2451 #define C_STAR 0x00003 /* C* */
2452 #define C_JAVA 0x00005 /* JAVA */
2453 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2454 #define YACC 0x10000 /* yacc file */
2455
2456 /*
2457 * The C symbol tables.
2458 */
2459 enum sym_type
2460 {
2461 st_none,
2462 st_C_objprot, st_C_objimpl, st_C_objend,
2463 st_C_gnumacro,
2464 st_C_ignore, st_C_attribute,
2465 st_C_javastruct,
2466 st_C_operator,
2467 st_C_class, st_C_template,
2468 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2469 };
2470
2471 static unsigned int hash __P((const char *, unsigned int));
2472 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2473 static enum sym_type C_symtype __P((char *, int, int));
2474
2475 /* Feed stuff between (but not including) %[ and %] lines to:
2476 gperf -m 5
2477 %[
2478 %compare-strncmp
2479 %enum
2480 %struct-type
2481 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2482 %%
2483 if, 0, st_C_ignore
2484 for, 0, st_C_ignore
2485 while, 0, st_C_ignore
2486 switch, 0, st_C_ignore
2487 return, 0, st_C_ignore
2488 __attribute__, 0, st_C_attribute
2489 @interface, 0, st_C_objprot
2490 @protocol, 0, st_C_objprot
2491 @implementation,0, st_C_objimpl
2492 @end, 0, st_C_objend
2493 import, (C_JAVA & ~C_PLPL), st_C_ignore
2494 package, (C_JAVA & ~C_PLPL), st_C_ignore
2495 friend, C_PLPL, st_C_ignore
2496 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2497 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2498 interface, (C_JAVA & ~C_PLPL), st_C_struct
2499 class, 0, st_C_class
2500 namespace, C_PLPL, st_C_struct
2501 domain, C_STAR, st_C_struct
2502 union, 0, st_C_struct
2503 struct, 0, st_C_struct
2504 extern, 0, st_C_extern
2505 enum, 0, st_C_enum
2506 typedef, 0, st_C_typedef
2507 define, 0, st_C_define
2508 undef, 0, st_C_define
2509 operator, C_PLPL, st_C_operator
2510 template, 0, st_C_template
2511 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2512 DEFUN, 0, st_C_gnumacro
2513 SYSCALL, 0, st_C_gnumacro
2514 ENTRY, 0, st_C_gnumacro
2515 PSEUDO, 0, st_C_gnumacro
2516 # These are defined inside C functions, so currently they are not met.
2517 # EXFUN used in glibc, DEFVAR_* in emacs.
2518 #EXFUN, 0, st_C_gnumacro
2519 #DEFVAR_, 0, st_C_gnumacro
2520 %]
2521 and replace lines between %< and %> with its output, then:
2522 - remove the #if characterset check
2523 - make in_word_set static and not inline. */
2524 /*%<*/
2525 /* C code produced by gperf version 3.0.1 */
2526 /* Command-line: gperf -m 5 */
2527 /* Computed positions: -k'2-3' */
2528
2529 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2530 /* maximum key range = 33, duplicates = 0 */
2531
2532 #ifdef __GNUC__
2533 __inline
2534 #else
2535 #ifdef __cplusplus
2536 inline
2537 #endif
2538 #endif
2539 static unsigned int
2540 hash (str, len)
2541 register const char *str;
2542 register unsigned int len;
2543 {
2544 static unsigned char asso_values[] =
2545 {
2546 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2547 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2548 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2549 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2550 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2551 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2552 35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2553 14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2554 35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2555 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2556 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2557 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2558 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2559 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2560 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2561 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2562 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2563 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2564 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2565 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2566 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2567 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2568 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2569 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2570 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2571 35, 35, 35, 35, 35, 35
2572 };
2573 register int hval = len;
2574
2575 switch (hval)
2576 {
2577 default:
2578 hval += asso_values[(unsigned char)str[2]];
2579 /*FALLTHROUGH*/
2580 case 2:
2581 hval += asso_values[(unsigned char)str[1]];
2582 break;
2583 }
2584 return hval;
2585 }
2586
2587 static struct C_stab_entry *
2588 in_word_set (str, len)
2589 register const char *str;
2590 register unsigned int len;
2591 {
2592 enum
2593 {
2594 TOTAL_KEYWORDS = 32,
2595 MIN_WORD_LENGTH = 2,
2596 MAX_WORD_LENGTH = 15,
2597 MIN_HASH_VALUE = 2,
2598 MAX_HASH_VALUE = 34
2599 };
2600
2601 static struct C_stab_entry wordlist[] =
2602 {
2603 {""}, {""},
2604 {"if", 0, st_C_ignore},
2605 {""},
2606 {"@end", 0, st_C_objend},
2607 {"union", 0, st_C_struct},
2608 {"define", 0, st_C_define},
2609 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2610 {"template", 0, st_C_template},
2611 {"operator", C_PLPL, st_C_operator},
2612 {"@interface", 0, st_C_objprot},
2613 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2614 {"friend", C_PLPL, st_C_ignore},
2615 {"typedef", 0, st_C_typedef},
2616 {"return", 0, st_C_ignore},
2617 {"@implementation",0, st_C_objimpl},
2618 {"@protocol", 0, st_C_objprot},
2619 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2620 {"extern", 0, st_C_extern},
2621 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2622 {"struct", 0, st_C_struct},
2623 {"domain", C_STAR, st_C_struct},
2624 {"switch", 0, st_C_ignore},
2625 {"enum", 0, st_C_enum},
2626 {"for", 0, st_C_ignore},
2627 {"namespace", C_PLPL, st_C_struct},
2628 {"class", 0, st_C_class},
2629 {"while", 0, st_C_ignore},
2630 {"undef", 0, st_C_define},
2631 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2632 {"__attribute__", 0, st_C_attribute},
2633 {"SYSCALL", 0, st_C_gnumacro},
2634 {"ENTRY", 0, st_C_gnumacro},
2635 {"PSEUDO", 0, st_C_gnumacro},
2636 {"DEFUN", 0, st_C_gnumacro}
2637 };
2638
2639 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2640 {
2641 register int key = hash (str, len);
2642
2643 if (key <= MAX_HASH_VALUE && key >= 0)
2644 {
2645 register const char *s = wordlist[key].name;
2646
2647 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2648 return &wordlist[key];
2649 }
2650 }
2651 return 0;
2652 }
2653 /*%>*/
2654
2655 static enum sym_type
2656 C_symtype (str, len, c_ext)
2657 char *str;
2658 int len;
2659 int c_ext;
2660 {
2661 register struct C_stab_entry *se = in_word_set (str, len);
2662
2663 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2664 return st_none;
2665 return se->type;
2666 }
2667
2668 \f
2669 /*
2670 * Ignoring __attribute__ ((list))
2671 */
2672 static bool inattribute; /* looking at an __attribute__ construct */
2673
2674 /*
2675 * C functions and variables are recognized using a simple
2676 * finite automaton. fvdef is its state variable.
2677 */
2678 static enum
2679 {
2680 fvnone, /* nothing seen */
2681 fdefunkey, /* Emacs DEFUN keyword seen */
2682 fdefunname, /* Emacs DEFUN name seen */
2683 foperator, /* func: operator keyword seen (cplpl) */
2684 fvnameseen, /* function or variable name seen */
2685 fstartlist, /* func: just after open parenthesis */
2686 finlist, /* func: in parameter list */
2687 flistseen, /* func: after parameter list */
2688 fignore, /* func: before open brace */
2689 vignore /* var-like: ignore until ';' */
2690 } fvdef;
2691
2692 static bool fvextern; /* func or var: extern keyword seen; */
2693
2694 /*
2695 * typedefs are recognized using a simple finite automaton.
2696 * typdef is its state variable.
2697 */
2698 static enum
2699 {
2700 tnone, /* nothing seen */
2701 tkeyseen, /* typedef keyword seen */
2702 ttypeseen, /* defined type seen */
2703 tinbody, /* inside typedef body */
2704 tend, /* just before typedef tag */
2705 tignore /* junk after typedef tag */
2706 } typdef;
2707
2708 /*
2709 * struct-like structures (enum, struct and union) are recognized
2710 * using another simple finite automaton. `structdef' is its state
2711 * variable.
2712 */
2713 static enum
2714 {
2715 snone, /* nothing seen yet,
2716 or in struct body if bracelev > 0 */
2717 skeyseen, /* struct-like keyword seen */
2718 stagseen, /* struct-like tag seen */
2719 scolonseen /* colon seen after struct-like tag */
2720 } structdef;
2721
2722 /*
2723 * When objdef is different from onone, objtag is the name of the class.
2724 */
2725 static char *objtag = "<uninited>";
2726
2727 /*
2728 * Yet another little state machine to deal with preprocessor lines.
2729 */
2730 static enum
2731 {
2732 dnone, /* nothing seen */
2733 dsharpseen, /* '#' seen as first char on line */
2734 ddefineseen, /* '#' and 'define' seen */
2735 dignorerest /* ignore rest of line */
2736 } definedef;
2737
2738 /*
2739 * State machine for Objective C protocols and implementations.
2740 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2741 */
2742 static enum
2743 {
2744 onone, /* nothing seen */
2745 oprotocol, /* @interface or @protocol seen */
2746 oimplementation, /* @implementations seen */
2747 otagseen, /* class name seen */
2748 oparenseen, /* parenthesis before category seen */
2749 ocatseen, /* category name seen */
2750 oinbody, /* in @implementation body */
2751 omethodsign, /* in @implementation body, after +/- */
2752 omethodtag, /* after method name */
2753 omethodcolon, /* after method colon */
2754 omethodparm, /* after method parameter */
2755 oignore /* wait for @end */
2756 } objdef;
2757
2758
2759 /*
2760 * Use this structure to keep info about the token read, and how it
2761 * should be tagged. Used by the make_C_tag function to build a tag.
2762 */
2763 static struct tok
2764 {
2765 char *line; /* string containing the token */
2766 int offset; /* where the token starts in LINE */
2767 int length; /* token length */
2768 /*
2769 The previous members can be used to pass strings around for generic
2770 purposes. The following ones specifically refer to creating tags. In this
2771 case the token contained here is the pattern that will be used to create a
2772 tag.
2773 */
2774 bool valid; /* do not create a tag; the token should be
2775 invalidated whenever a state machine is
2776 reset prematurely */
2777 bool named; /* create a named tag */
2778 int lineno; /* source line number of tag */
2779 long linepos; /* source char number of tag */
2780 } token; /* latest token read */
2781
2782 /*
2783 * Variables and functions for dealing with nested structures.
2784 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2785 */
2786 static void pushclass_above __P((int, char *, int));
2787 static void popclass_above __P((int));
2788 static void write_classname __P((linebuffer *, char *qualifier));
2789
2790 static struct {
2791 char **cname; /* nested class names */
2792 int *bracelev; /* nested class brace level */
2793 int nl; /* class nesting level (elements used) */
2794 int size; /* length of the array */
2795 } cstack; /* stack for nested declaration tags */
2796 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2797 #define nestlev (cstack.nl)
2798 /* After struct keyword or in struct body, not inside a nested function. */
2799 #define instruct (structdef == snone && nestlev > 0 \
2800 && bracelev == cstack.bracelev[nestlev-1] + 1)
2801
2802 static void
2803 pushclass_above (bracelev, str, len)
2804 int bracelev;
2805 char *str;
2806 int len;
2807 {
2808 int nl;
2809
2810 popclass_above (bracelev);
2811 nl = cstack.nl;
2812 if (nl >= cstack.size)
2813 {
2814 int size = cstack.size *= 2;
2815 xrnew (cstack.cname, size, char *);
2816 xrnew (cstack.bracelev, size, int);
2817 }
2818 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2819 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2820 cstack.bracelev[nl] = bracelev;
2821 cstack.nl = nl + 1;
2822 }
2823
2824 static void
2825 popclass_above (bracelev)
2826 int bracelev;
2827 {
2828 int nl;
2829
2830 for (nl = cstack.nl - 1;
2831 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2832 nl--)
2833 {
2834 if (cstack.cname[nl] != NULL)
2835 free (cstack.cname[nl]);
2836 cstack.nl = nl;
2837 }
2838 }
2839
2840 static void
2841 write_classname (cn, qualifier)
2842 linebuffer *cn;
2843 char *qualifier;
2844 {
2845 int i, len;
2846 int qlen = strlen (qualifier);
2847
2848 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2849 {
2850 len = 0;
2851 cn->len = 0;
2852 cn->buffer[0] = '\0';
2853 }
2854 else
2855 {
2856 len = strlen (cstack.cname[0]);
2857 linebuffer_setlen (cn, len);
2858 strcpy (cn->buffer, cstack.cname[0]);
2859 }
2860 for (i = 1; i < cstack.nl; i++)
2861 {
2862 char *s;
2863 int slen;
2864
2865 s = cstack.cname[i];
2866 if (s == NULL)
2867 continue;
2868 slen = strlen (s);
2869 len += slen + qlen;
2870 linebuffer_setlen (cn, len);
2871 strncat (cn->buffer, qualifier, qlen);
2872 strncat (cn->buffer, s, slen);
2873 }
2874 }
2875
2876 \f
2877 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2878 static void make_C_tag __P((bool));
2879
2880 /*
2881 * consider_token ()
2882 * checks to see if the current token is at the start of a
2883 * function or variable, or corresponds to a typedef, or
2884 * is a struct/union/enum tag, or #define, or an enum constant.
2885 *
2886 * *IS_FUNC gets TRUE if the token is a function or #define macro
2887 * with args. C_EXTP points to which language we are looking at.
2888 *
2889 * Globals
2890 * fvdef IN OUT
2891 * structdef IN OUT
2892 * definedef IN OUT
2893 * typdef IN OUT
2894 * objdef IN OUT
2895 */
2896
2897 static bool
2898 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2899 register char *str; /* IN: token pointer */
2900 register int len; /* IN: token length */
2901 register int c; /* IN: first char after the token */
2902 int *c_extp; /* IN, OUT: C extensions mask */
2903 int bracelev; /* IN: brace level */
2904 int parlev; /* IN: parenthesis level */
2905 bool *is_func_or_var; /* OUT: function or variable found */
2906 {
2907 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2908 structtype is the type of the preceding struct-like keyword, and
2909 structbracelev is the brace level where it has been seen. */
2910 static enum sym_type structtype;
2911 static int structbracelev;
2912 static enum sym_type toktype;
2913
2914
2915 toktype = C_symtype (str, len, *c_extp);
2916
2917 /*
2918 * Skip __attribute__
2919 */
2920 if (toktype == st_C_attribute)
2921 {
2922 inattribute = TRUE;
2923 return FALSE;
2924 }
2925
2926 /*
2927 * Advance the definedef state machine.
2928 */
2929 switch (definedef)
2930 {
2931 case dnone:
2932 /* We're not on a preprocessor line. */
2933 if (toktype == st_C_gnumacro)
2934 {
2935 fvdef = fdefunkey;
2936 return FALSE;
2937 }
2938 break;
2939 case dsharpseen:
2940 if (toktype == st_C_define)
2941 {
2942 definedef = ddefineseen;
2943 }
2944 else
2945 {
2946 definedef = dignorerest;
2947 }
2948 return FALSE;
2949 case ddefineseen:
2950 /*
2951 * Make a tag for any macro, unless it is a constant
2952 * and constantypedefs is FALSE.
2953 */
2954 definedef = dignorerest;
2955 *is_func_or_var = (c == '(');
2956 if (!*is_func_or_var && !constantypedefs)
2957 return FALSE;
2958 else
2959 return TRUE;
2960 case dignorerest:
2961 return FALSE;
2962 default:
2963 error ("internal error: definedef value.", (char *)NULL);
2964 }
2965
2966 /*
2967 * Now typedefs
2968 */
2969 switch (typdef)
2970 {
2971 case tnone:
2972 if (toktype == st_C_typedef)
2973 {
2974 if (typedefs)
2975 typdef = tkeyseen;
2976 fvextern = FALSE;
2977 fvdef = fvnone;
2978 return FALSE;
2979 }
2980 break;
2981 case tkeyseen:
2982 switch (toktype)
2983 {
2984 case st_none:
2985 case st_C_class:
2986 case st_C_struct:
2987 case st_C_enum:
2988 typdef = ttypeseen;
2989 }
2990 break;
2991 case ttypeseen:
2992 if (structdef == snone && fvdef == fvnone)
2993 {
2994 fvdef = fvnameseen;
2995 return TRUE;
2996 }
2997 break;
2998 case tend:
2999 switch (toktype)
3000 {
3001 case st_C_class:
3002 case st_C_struct:
3003 case st_C_enum:
3004 return FALSE;
3005 }
3006 return TRUE;
3007 }
3008
3009 /*
3010 * This structdef business is NOT invoked when we are ctags and the
3011 * file is plain C. This is because a struct tag may have the same
3012 * name as another tag, and this loses with ctags.
3013 */
3014 switch (toktype)
3015 {
3016 case st_C_javastruct:
3017 if (structdef == stagseen)
3018 structdef = scolonseen;
3019 return FALSE;
3020 case st_C_template:
3021 case st_C_class:
3022 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
3023 && bracelev == 0
3024 && definedef == dnone && structdef == snone
3025 && typdef == tnone && fvdef == fvnone)
3026 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3027 if (toktype == st_C_template)
3028 break;
3029 /* FALLTHRU */
3030 case st_C_struct:
3031 case st_C_enum:
3032 if (parlev == 0
3033 && fvdef != vignore
3034 && (typdef == tkeyseen
3035 || (typedefs_or_cplusplus && structdef == snone)))
3036 {
3037 structdef = skeyseen;
3038 structtype = toktype;
3039 structbracelev = bracelev;
3040 if (fvdef == fvnameseen)
3041 fvdef = fvnone;
3042 }
3043 return FALSE;
3044 }
3045
3046 if (structdef == skeyseen)
3047 {
3048 structdef = stagseen;
3049 return TRUE;
3050 }
3051
3052 if (typdef != tnone)
3053 definedef = dnone;
3054
3055 /* Detect Objective C constructs. */
3056 switch (objdef)
3057 {
3058 case onone:
3059 switch (toktype)
3060 {
3061 case st_C_objprot:
3062 objdef = oprotocol;
3063 return FALSE;
3064 case st_C_objimpl:
3065 objdef = oimplementation;
3066 return FALSE;
3067 }
3068 break;
3069 case oimplementation:
3070 /* Save the class tag for functions or variables defined inside. */
3071 objtag = savenstr (str, len);
3072 objdef = oinbody;
3073 return FALSE;
3074 case oprotocol:
3075 /* Save the class tag for categories. */
3076 objtag = savenstr (str, len);
3077 objdef = otagseen;
3078 *is_func_or_var = TRUE;
3079 return TRUE;
3080 case oparenseen:
3081 objdef = ocatseen;
3082 *is_func_or_var = TRUE;
3083 return TRUE;
3084 case oinbody:
3085 break;
3086 case omethodsign:
3087 if (parlev == 0)
3088 {
3089 fvdef = fvnone;
3090 objdef = omethodtag;
3091 linebuffer_setlen (&token_name, len);
3092 strncpy (token_name.buffer, str, len);
3093 token_name.buffer[len] = '\0';
3094 return TRUE;
3095 }
3096 return FALSE;
3097 case omethodcolon:
3098 if (parlev == 0)
3099 objdef = omethodparm;
3100 return FALSE;
3101 case omethodparm:
3102 if (parlev == 0)
3103 {
3104 fvdef = fvnone;
3105 objdef = omethodtag;
3106 linebuffer_setlen (&token_name, token_name.len + len);
3107 strncat (token_name.buffer, str, len);
3108 return TRUE;
3109 }
3110 return FALSE;
3111 case oignore:
3112 if (toktype == st_C_objend)
3113 {
3114 /* Memory leakage here: the string pointed by objtag is
3115 never released, because many tests would be needed to
3116 avoid breaking on incorrect input code. The amount of
3117 memory leaked here is the sum of the lengths of the
3118 class tags.
3119 free (objtag); */
3120 objdef = onone;
3121 }
3122 return FALSE;
3123 }
3124
3125 /* A function, variable or enum constant? */
3126 switch (toktype)
3127 {
3128 case st_C_extern:
3129 fvextern = TRUE;
3130 switch (fvdef)
3131 {
3132 case finlist:
3133 case flistseen:
3134 case fignore:
3135 case vignore:
3136 break;
3137 default:
3138 fvdef = fvnone;
3139 }
3140 return FALSE;
3141 case st_C_ignore:
3142 fvextern = FALSE;
3143 fvdef = vignore;
3144 return FALSE;
3145 case st_C_operator:
3146 fvdef = foperator;
3147 *is_func_or_var = TRUE;
3148 return TRUE;
3149 case st_none:
3150 if (constantypedefs
3151 && structdef == snone
3152 && structtype == st_C_enum && bracelev > structbracelev)
3153 return TRUE; /* enum constant */
3154 switch (fvdef)
3155 {
3156 case fdefunkey:
3157 if (bracelev > 0)
3158 break;
3159 fvdef = fdefunname; /* GNU macro */
3160 *is_func_or_var = TRUE;
3161 return TRUE;
3162 case fvnone:
3163 switch (typdef)
3164 {
3165 case ttypeseen:
3166 return FALSE;
3167 case tnone:
3168 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3169 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3170 {
3171 fvdef = vignore;
3172 return FALSE;
3173 }
3174 break;
3175 }
3176 /* FALLTHRU */
3177 case fvnameseen:
3178 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3179 {
3180 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3181 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3182 fvdef = foperator;
3183 *is_func_or_var = TRUE;
3184 return TRUE;
3185 }
3186 if (bracelev > 0 && !instruct)
3187 break;
3188 fvdef = fvnameseen; /* function or variable */
3189 *is_func_or_var = TRUE;
3190 return TRUE;
3191 }
3192 break;
3193 }
3194
3195 return FALSE;
3196 }
3197
3198 \f
3199 /*
3200 * C_entries often keeps pointers to tokens or lines which are older than
3201 * the line currently read. By keeping two line buffers, and switching
3202 * them at end of line, it is possible to use those pointers.
3203 */
3204 static struct
3205 {
3206 long linepos;
3207 linebuffer lb;
3208 } lbs[2];
3209
3210 #define current_lb_is_new (newndx == curndx)
3211 #define switch_line_buffers() (curndx = 1 - curndx)
3212
3213 #define curlb (lbs[curndx].lb)
3214 #define newlb (lbs[newndx].lb)
3215 #define curlinepos (lbs[curndx].linepos)
3216 #define newlinepos (lbs[newndx].linepos)
3217
3218 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3219 #define cplpl (c_ext & C_PLPL)
3220 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3221
3222 #define CNL_SAVE_DEFINEDEF() \
3223 do { \
3224 curlinepos = charno; \
3225 readline (&curlb, inf); \
3226 lp = curlb.buffer; \
3227 quotednl = FALSE; \
3228 newndx = curndx; \
3229 } while (0)
3230
3231 #define CNL() \
3232 do { \
3233 CNL_SAVE_DEFINEDEF(); \
3234 if (savetoken.valid) \
3235 { \
3236 token = savetoken; \
3237 savetoken.valid = FALSE; \
3238 } \
3239 definedef = dnone; \
3240 } while (0)
3241
3242
3243 static void
3244 make_C_tag (isfun)
3245 bool isfun;
3246 {
3247 /* This function is never called when token.valid is FALSE, but
3248 we must protect against invalid input or internal errors. */
3249 if (!DEBUG && !token.valid)
3250 return;
3251
3252 if (token.valid)
3253 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3254 token.offset+token.length+1, token.lineno, token.linepos);
3255 else /* this case is optimised away if !DEBUG */
3256 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3257 token_name.len + 17, isfun, token.line,
3258 token.offset+token.length+1, token.lineno, token.linepos);
3259
3260 token.valid = FALSE;
3261 }
3262
3263
3264 /*
3265 * C_entries ()
3266 * This routine finds functions, variables, typedefs,
3267 * #define's, enum constants and struct/union/enum definitions in
3268 * C syntax and adds them to the list.
3269 */
3270 static void
3271 C_entries (c_ext, inf)
3272 int c_ext; /* extension of C */
3273 FILE *inf; /* input file */
3274 {
3275 register char c; /* latest char read; '\0' for end of line */
3276 register char *lp; /* pointer one beyond the character `c' */
3277 int curndx, newndx; /* indices for current and new lb */
3278 register int tokoff; /* offset in line of start of current token */
3279 register int toklen; /* length of current token */
3280 char *qualifier; /* string used to qualify names */
3281 int qlen; /* length of qualifier */
3282 int bracelev; /* current brace level */
3283 int bracketlev; /* current bracket level */
3284 int parlev; /* current parenthesis level */
3285 int attrparlev; /* __attribute__ parenthesis level */
3286 int templatelev; /* current template level */
3287 int typdefbracelev; /* bracelev where a typedef struct body begun */
3288 bool incomm, inquote, inchar, quotednl, midtoken;
3289 bool yacc_rules; /* in the rules part of a yacc file */
3290 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3291
3292
3293 linebuffer_init (&lbs[0].lb);
3294 linebuffer_init (&lbs[1].lb);
3295 if (cstack.size == 0)
3296 {
3297 cstack.size = (DEBUG) ? 1 : 4;
3298 cstack.nl = 0;
3299 cstack.cname = xnew (cstack.size, char *);
3300 cstack.bracelev = xnew (cstack.size, int);
3301 }
3302
3303 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3304 curndx = newndx = 0;
3305 lp = curlb.buffer;
3306 *lp = 0;
3307
3308 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3309 structdef = snone; definedef = dnone; objdef = onone;
3310 yacc_rules = FALSE;
3311 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3312 token.valid = savetoken.valid = FALSE;
3313 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3314 if (cjava)
3315 { qualifier = "."; qlen = 1; }
3316 else
3317 { qualifier = "::"; qlen = 2; }
3318
3319
3320 while (!feof (inf))
3321 {
3322 c = *lp++;
3323 if (c == '\\')
3324 {
3325 /* If we are at the end of the line, the next character is a
3326 '\0'; do not skip it, because it is what tells us
3327 to read the next line. */
3328 if (*lp == '\0')
3329 {
3330 quotednl = TRUE;
3331 continue;
3332 }
3333 lp++;
3334 c = ' ';
3335 }
3336 else if (incomm)
3337 {
3338 switch (c)
3339 {
3340 case '*':
3341 if (*lp == '/')
3342 {
3343 c = *lp++;
3344 incomm = FALSE;
3345 }
3346 break;
3347 case '\0':
3348 /* Newlines inside comments do not end macro definitions in
3349 traditional cpp. */
3350 CNL_SAVE_DEFINEDEF ();
3351 break;
3352 }
3353 continue;
3354 }
3355 else if (inquote)
3356 {
3357 switch (c)
3358 {
3359 case '"':
3360 inquote = FALSE;
3361 break;
3362 case '\0':
3363 /* Newlines inside strings do not end macro definitions
3364 in traditional cpp, even though compilers don't
3365 usually accept them. */
3366 CNL_SAVE_DEFINEDEF ();
3367 break;
3368 }
3369 continue;
3370 }
3371 else if (inchar)
3372 {
3373 switch (c)
3374 {
3375 case '\0':
3376 /* Hmmm, something went wrong. */
3377 CNL ();
3378 /* FALLTHRU */
3379 case '\'':
3380 inchar = FALSE;
3381 break;
3382 }
3383 continue;
3384 }
3385 else if (bracketlev > 0)
3386 {
3387 switch (c)
3388 {
3389 case ']':
3390 if (--bracketlev > 0)
3391 continue;
3392 break;
3393 case '\0':
3394 CNL_SAVE_DEFINEDEF ();
3395 break;
3396 }
3397 continue;
3398 }
3399 else switch (c)
3400 {
3401 case '"':
3402 inquote = TRUE;
3403 if (inattribute)
3404 break;
3405 switch (fvdef)
3406 {
3407 case fdefunkey:
3408 case fstartlist:
3409 case finlist:
3410 case fignore:
3411 case vignore:
3412 break;
3413 default:
3414 fvextern = FALSE;
3415 fvdef = fvnone;
3416 }
3417 continue;
3418 case '\'':
3419 inchar = TRUE;
3420 if (inattribute)
3421 break;
3422 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3423 {
3424 fvextern = FALSE;
3425 fvdef = fvnone;
3426 }
3427 continue;
3428 case '/':
3429 if (*lp == '*')
3430 {
3431 incomm = TRUE;
3432 lp++;
3433 c = ' ';
3434 }
3435 else if (/* cplpl && */ *lp == '/')
3436 {
3437 c = '\0';
3438 }
3439 break;
3440 case '%':
3441 if ((c_ext & YACC) && *lp == '%')
3442 {
3443 /* Entering or exiting rules section in yacc file. */
3444 lp++;
3445 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3446 typdef = tnone; structdef = snone;
3447 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3448 bracelev = 0;
3449 yacc_rules = !yacc_rules;
3450 continue;
3451 }
3452 else
3453 break;
3454 case '#':
3455 if (definedef == dnone)
3456 {
3457 char *cp;
3458 bool cpptoken = TRUE;
3459
3460 /* Look back on this line. If all blanks, or nonblanks
3461 followed by an end of comment, this is a preprocessor
3462 token. */
3463 for (cp = newlb.buffer; cp < lp-1; cp++)
3464 if (!iswhite (*cp))
3465 {
3466 if (*cp == '*' && *(cp+1) == '/')
3467 {
3468 cp++;
3469 cpptoken = TRUE;
3470 }
3471 else
3472 cpptoken = FALSE;
3473 }
3474 if (cpptoken)
3475 definedef = dsharpseen;
3476 } /* if (definedef == dnone) */
3477 continue;
3478 case '[':
3479 bracketlev++;
3480 continue;
3481 } /* switch (c) */
3482
3483
3484 /* Consider token only if some involved conditions are satisfied. */
3485 if (typdef != tignore
3486 && definedef != dignorerest
3487 && fvdef != finlist
3488 && templatelev == 0
3489 && (definedef != dnone
3490 || structdef != scolonseen)
3491 && !inattribute)
3492 {
3493 if (midtoken)
3494 {
3495 if (endtoken (c))
3496 {
3497 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3498 /* This handles :: in the middle,
3499 but not at the beginning of an identifier.
3500 Also, space-separated :: is not recognised. */
3501 {
3502 if (c_ext & C_AUTO) /* automatic detection of C++ */
3503 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3504 lp += 2;
3505 toklen += 2;
3506 c = lp[-1];
3507 goto still_in_token;
3508 }
3509 else
3510 {
3511 bool funorvar = FALSE;
3512
3513 if (yacc_rules
3514 || consider_token (newlb.buffer + tokoff, toklen, c,
3515 &c_ext, bracelev, parlev,
3516 &funorvar))
3517 {
3518 if (fvdef == foperator)
3519 {
3520 char *oldlp = lp;
3521 lp = skip_spaces (lp-1);
3522 if (*lp != '\0')
3523 lp += 1;
3524 while (*lp != '\0'
3525 && !iswhite (*lp) && *lp != '(')
3526 lp += 1;
3527 c = *lp++;
3528 toklen += lp - oldlp;
3529 }
3530 token.named = FALSE;
3531 if (!plainc
3532 && nestlev > 0 && definedef == dnone)
3533 /* in struct body */
3534 {
3535 write_classname (&token_name, qualifier);
3536 linebuffer_setlen (&token_name,
3537 token_name.len+qlen+toklen);
3538 strcat (token_name.buffer, qualifier);
3539 strncat (token_name.buffer,
3540 newlb.buffer + tokoff, toklen);
3541 token.named = TRUE;
3542 }
3543 else if (objdef == ocatseen)
3544 /* Objective C category */
3545 {
3546 int len = strlen (objtag) + 2 + toklen;
3547 linebuffer_setlen (&token_name, len);
3548 strcpy (token_name.buffer, objtag);
3549 strcat (token_name.buffer, "(");
3550 strncat (token_name.buffer,
3551 newlb.buffer + tokoff, toklen);
3552 strcat (token_name.buffer, ")");
3553 token.named = TRUE;
3554 }
3555 else if (objdef == omethodtag
3556 || objdef == omethodparm)
3557 /* Objective C method */
3558 {
3559 token.named = TRUE;
3560 }
3561 else if (fvdef == fdefunname)
3562 /* GNU DEFUN and similar macros */
3563 {
3564 bool defun = (newlb.buffer[tokoff] == 'F');
3565 int off = tokoff;
3566 int len = toklen;
3567
3568 /* Rewrite the tag so that emacs lisp DEFUNs
3569 can be found by their elisp name */
3570 if (defun)
3571 {
3572 off += 1;
3573 len -= 1;
3574 }
3575 linebuffer_setlen (&token_name, len);
3576 strncpy (token_name.buffer,
3577 newlb.buffer + off, len);
3578 token_name.buffer[len] = '\0';
3579 if (defun)
3580 while (--len >= 0)
3581 if (token_name.buffer[len] == '_')
3582 token_name.buffer[len] = '-';
3583 token.named = defun;
3584 }
3585 else
3586 {
3587 linebuffer_setlen (&token_name, toklen);
3588 strncpy (token_name.buffer,
3589 newlb.buffer + tokoff, toklen);
3590 token_name.buffer[toklen] = '\0';
3591 /* Name macros and members. */
3592 token.named = (structdef == stagseen
3593 || typdef == ttypeseen
3594 || typdef == tend
3595 || (funorvar
3596 && definedef == dignorerest)
3597 || (funorvar
3598 && definedef == dnone
3599 && structdef == snone
3600 && bracelev > 0));
3601 }
3602 token.lineno = lineno;
3603 token.offset = tokoff;
3604 token.length = toklen;
3605 token.line = newlb.buffer;
3606 token.linepos = newlinepos;
3607 token.valid = TRUE;
3608
3609 if (definedef == dnone
3610 && (fvdef == fvnameseen
3611 || fvdef == foperator
3612 || structdef == stagseen
3613 || typdef == tend
3614 || typdef == ttypeseen
3615 || objdef != onone))
3616 {
3617 if (current_lb_is_new)
3618 switch_line_buffers ();
3619 }
3620 else if (definedef != dnone
3621 || fvdef == fdefunname
3622 || instruct)
3623 make_C_tag (funorvar);
3624 }
3625 else /* not yacc and consider_token failed */
3626 {
3627 if (inattribute && fvdef == fignore)
3628 {
3629 /* We have just met __attribute__ after a
3630 function parameter list: do not tag the
3631 function again. */
3632 fvdef = fvnone;
3633 }
3634 }
3635 midtoken = FALSE;
3636 }
3637 } /* if (endtoken (c)) */
3638 else if (intoken (c))
3639 still_in_token:
3640 {
3641 toklen++;
3642 continue;
3643 }
3644 } /* if (midtoken) */
3645 else if (begtoken (c))
3646 {
3647 switch (definedef)
3648 {
3649 case dnone:
3650 switch (fvdef)
3651 {
3652 case fstartlist:
3653 /* This prevents tagging fb in
3654 void (__attribute__((noreturn)) *fb) (void);
3655 Fixing this is not easy and not very important. */
3656 fvdef = finlist;
3657 continue;
3658 case flistseen:
3659 if (plainc || declarations)
3660 {
3661 make_C_tag (TRUE); /* a function */
3662 fvdef = fignore;
3663 }
3664 break;
3665 }
3666 if (structdef == stagseen && !cjava)
3667 {
3668 popclass_above (bracelev);
3669 structdef = snone;
3670 }
3671 break;
3672 case dsharpseen:
3673 savetoken = token;
3674 break;
3675 }
3676 if (!yacc_rules || lp == newlb.buffer + 1)
3677 {
3678 tokoff = lp - 1 - newlb.buffer;
3679 toklen = 1;
3680 midtoken = TRUE;
3681 }
3682 continue;
3683 } /* if (begtoken) */
3684 } /* if must look at token */
3685
3686
3687 /* Detect end of line, colon, comma, semicolon and various braces
3688 after having handled a token.*/
3689 switch (c)
3690 {
3691 case ':':
3692 if (inattribute)
3693 break;
3694 if (yacc_rules && token.offset == 0 && token.valid)
3695 {
3696 make_C_tag (FALSE); /* a yacc function */
3697 break;
3698 }
3699 if (definedef != dnone)
3700 break;
3701 switch (objdef)
3702 {
3703 case otagseen:
3704 objdef = oignore;
3705 make_C_tag (TRUE); /* an Objective C class */
3706 break;
3707 case omethodtag:
3708 case omethodparm:
3709 objdef = omethodcolon;
3710 linebuffer_setlen (&token_name, token_name.len + 1);
3711 strcat (token_name.buffer, ":");
3712 break;
3713 }
3714 if (structdef == stagseen)
3715 {
3716 structdef = scolonseen;
3717 break;
3718 }
3719 /* Should be useless, but may be work as a safety net. */
3720 if (cplpl && fvdef == flistseen)
3721 {
3722 make_C_tag (TRUE); /* a function */
3723 fvdef = fignore;
3724 break;
3725 }
3726 break;
3727 case ';':
3728 if (definedef != dnone || inattribute)
3729 break;
3730 switch (typdef)
3731 {
3732 case tend:
3733 case ttypeseen:
3734 make_C_tag (FALSE); /* a typedef */
3735 typdef = tnone;
3736 fvdef = fvnone;
3737 break;
3738 case tnone:
3739 case tinbody:
3740 case tignore:
3741 switch (fvdef)
3742 {
3743 case fignore:
3744 if (typdef == tignore || cplpl)
3745 fvdef = fvnone;
3746 break;
3747 case fvnameseen:
3748 if ((globals && bracelev == 0 && (!fvextern || declarations))
3749 || (members && instruct))
3750 make_C_tag (FALSE); /* a variable */
3751 fvextern = FALSE;
3752 fvdef = fvnone;
3753 token.valid = FALSE;
3754 break;
3755 case flistseen:
3756 if ((declarations
3757 && (cplpl || !instruct)
3758 && (typdef == tnone || (typdef != tignore && instruct)))
3759 || (members
3760 && plainc && instruct))
3761 make_C_tag (TRUE); /* a function */
3762 /* FALLTHRU */
3763 default:
3764 fvextern = FALSE;
3765 fvdef = fvnone;
3766 if (declarations
3767 && cplpl && structdef == stagseen)
3768 make_C_tag (FALSE); /* forward declaration */
3769 else
3770 token.valid = FALSE;
3771 } /* switch (fvdef) */
3772 /* FALLTHRU */
3773 default:
3774 if (!instruct)
3775 typdef = tnone;
3776 }
3777 if (structdef == stagseen)
3778 structdef = snone;
3779 break;
3780 case ',':
3781 if (definedef != dnone || inattribute)
3782 break;
3783 switch (objdef)
3784 {
3785 case omethodtag:
3786 case omethodparm:
3787 make_C_tag (TRUE); /* an Objective C method */
3788 objdef = oinbody;
3789 break;
3790 }
3791 switch (fvdef)
3792 {
3793 case fdefunkey:
3794 case foperator:
3795 case fstartlist:
3796 case finlist:
3797 case fignore:
3798 case vignore:
3799 break;
3800 case fdefunname:
3801 fvdef = fignore;
3802 break;
3803 case fvnameseen:
3804 if (parlev == 0
3805 && ((globals
3806 && bracelev == 0
3807 && templatelev == 0
3808 && (!fvextern || declarations))
3809 || (members && instruct)))
3810 make_C_tag (FALSE); /* a variable */
3811 break;
3812 case flistseen:
3813 if ((declarations && typdef == tnone && !instruct)
3814 || (members && typdef != tignore && instruct))
3815 {
3816 make_C_tag (TRUE); /* a function */
3817 fvdef = fvnameseen;
3818 }
3819 else if (!declarations)
3820 fvdef = fvnone;
3821 token.valid = FALSE;
3822 break;
3823 default:
3824 fvdef = fvnone;
3825 }
3826 if (structdef == stagseen)
3827 structdef = snone;
3828 break;
3829 case ']':
3830 if (definedef != dnone || inattribute)
3831 break;
3832 if (structdef == stagseen)
3833 structdef = snone;
3834 switch (typdef)
3835 {
3836 case ttypeseen:
3837 case tend:
3838 typdef = tignore;
3839 make_C_tag (FALSE); /* a typedef */
3840 break;
3841 case tnone:
3842 case tinbody:
3843 switch (fvdef)
3844 {
3845 case foperator:
3846 case finlist:
3847 case fignore:
3848 case vignore:
3849 break;
3850 case fvnameseen:
3851 if ((members && bracelev == 1)
3852 || (globals && bracelev == 0
3853 && (!fvextern || declarations)))
3854 make_C_tag (FALSE); /* a variable */
3855 /* FALLTHRU */
3856 default:
3857 fvdef = fvnone;
3858 }
3859 break;
3860 }
3861 break;
3862 case '(':
3863 if (inattribute)
3864 {
3865 attrparlev++;
3866 break;
3867 }
3868 if (definedef != dnone)
3869 break;
3870 if (objdef == otagseen && parlev == 0)
3871 objdef = oparenseen;
3872 switch (fvdef)
3873 {
3874 case fvnameseen:
3875 if (typdef == ttypeseen
3876 && *lp != '*'
3877 && !instruct)
3878 {
3879 /* This handles constructs like:
3880 typedef void OperatorFun (int fun); */
3881 make_C_tag (FALSE);
3882 typdef = tignore;
3883 fvdef = fignore;
3884 break;
3885 }
3886 /* FALLTHRU */
3887 case foperator:
3888 fvdef = fstartlist;
3889 break;
3890 case flistseen:
3891 fvdef = finlist;
3892 break;
3893 }
3894 parlev++;
3895 break;
3896 case ')':
3897 if (inattribute)
3898 {
3899 if (--attrparlev == 0)
3900 inattribute = FALSE;
3901 break;
3902 }
3903 if (definedef != dnone)
3904 break;
3905 if (objdef == ocatseen && parlev == 1)
3906 {
3907 make_C_tag (TRUE); /* an Objective C category */
3908 objdef = oignore;
3909 }
3910 if (--parlev == 0)
3911 {
3912 switch (fvdef)
3913 {
3914 case fstartlist:
3915 case finlist:
3916 fvdef = flistseen;
3917 break;
3918 }
3919 if (!instruct
3920 && (typdef == tend
3921 || typdef == ttypeseen))
3922 {
3923 typdef = tignore;
3924 make_C_tag (FALSE); /* a typedef */
3925 }
3926 }
3927 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3928 parlev = 0;
3929 break;
3930 case '{':
3931 if (definedef != dnone)
3932 break;
3933 if (typdef == ttypeseen)
3934 {
3935 /* Whenever typdef is set to tinbody (currently only
3936 here), typdefbracelev should be set to bracelev. */
3937 typdef = tinbody;
3938 typdefbracelev = bracelev;
3939 }
3940 switch (fvdef)
3941 {
3942 case flistseen:
3943 make_C_tag (TRUE); /* a function */
3944 /* FALLTHRU */
3945 case fignore:
3946 fvdef = fvnone;
3947 break;
3948 case fvnone:
3949 switch (objdef)
3950 {
3951 case otagseen:
3952 make_C_tag (TRUE); /* an Objective C class */
3953 objdef = oignore;
3954 break;
3955 case omethodtag:
3956 case omethodparm:
3957 make_C_tag (TRUE); /* an Objective C method */
3958 objdef = oinbody;
3959 break;
3960 default:
3961 /* Neutralize `extern "C" {' grot. */
3962 if (bracelev == 0 && structdef == snone && nestlev == 0
3963 && typdef == tnone)
3964 bracelev = -1;
3965 }
3966 break;
3967 }
3968 switch (structdef)
3969 {
3970 case skeyseen: /* unnamed struct */
3971 pushclass_above (bracelev, NULL, 0);
3972 structdef = snone;
3973 break;
3974 case stagseen: /* named struct or enum */
3975 case scolonseen: /* a class */
3976 pushclass_above (bracelev,token.line+token.offset, token.length);
3977 structdef = snone;
3978 make_C_tag (FALSE); /* a struct or enum */
3979 break;
3980 }
3981 bracelev++;
3982 break;
3983 case '*':
3984 if (definedef != dnone)
3985 break;
3986 if (fvdef == fstartlist)
3987 {
3988 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3989 token.valid = FALSE;
3990 }
3991 break;
3992 case '}':
3993 if (definedef != dnone)
3994 break;
3995 if (!ignoreindent && lp == newlb.buffer + 1)
3996 {
3997 if (bracelev != 0)
3998 token.valid = FALSE;
3999 bracelev = 0; /* reset brace level if first column */
4000 parlev = 0; /* also reset paren level, just in case... */
4001 }
4002 else
4003 {
4004 if (--bracelev < 0)
4005 {
4006 bracelev = 0;
4007 token.valid = FALSE; /* something gone amiss, token unreliable */
4008 }
4009 if (bracelev == 0 && fvdef == vignore)
4010 fvdef = fvnone; /* end of function */
4011 }
4012 popclass_above (bracelev);
4013 structdef = snone;
4014 /* Only if typdef == tinbody is typdefbracelev significant. */
4015 if (typdef == tinbody && bracelev <= typdefbracelev)
4016 {
4017 assert (bracelev == typdefbracelev);
4018 typdef = tend;
4019 }
4020 break;
4021 case '=':
4022 if (definedef != dnone)
4023 break;
4024 switch (fvdef)
4025 {
4026 case foperator:
4027 case finlist:
4028 case fignore:
4029 case vignore:
4030 break;
4031 case fvnameseen:
4032 if ((members && bracelev == 1)
4033 || (globals && bracelev == 0 && (!fvextern || declarations)))
4034 make_C_tag (FALSE); /* a variable */
4035 /* FALLTHRU */
4036 default:
4037 fvdef = vignore;
4038 }
4039 break;
4040 case '<':
4041 if (cplpl
4042 && (structdef == stagseen || fvdef == fvnameseen))
4043 {
4044 templatelev++;
4045 break;
4046 }
4047 goto resetfvdef;
4048 case '>':
4049 if (templatelev > 0)
4050 {
4051 templatelev--;
4052 break;
4053 }
4054 goto resetfvdef;
4055 case '+':
4056 case '-':
4057 if (objdef == oinbody && bracelev == 0)
4058 {
4059 objdef = omethodsign;
4060 break;
4061 }
4062 /* FALLTHRU */
4063 resetfvdef:
4064 case '#': case '~': case '&': case '%': case '/':
4065 case '|': case '^': case '!': case '.': case '?':
4066 if (definedef != dnone)
4067 break;
4068 /* These surely cannot follow a function tag in C. */
4069 switch (fvdef)
4070 {
4071 case foperator:
4072 case finlist:
4073 case fignore:
4074 case vignore:
4075 break;
4076 default:
4077 fvdef = fvnone;
4078 }
4079 break;
4080 case '\0':
4081 if (objdef == otagseen)
4082 {
4083 make_C_tag (TRUE); /* an Objective C class */
4084 objdef = oignore;
4085 }
4086 /* If a macro spans multiple lines don't reset its state. */
4087 if (quotednl)
4088 CNL_SAVE_DEFINEDEF ();
4089 else
4090 CNL ();
4091 break;
4092 } /* switch (c) */
4093
4094 } /* while not eof */
4095
4096 free (lbs[0].lb.buffer);
4097 free (lbs[1].lb.buffer);
4098 }
4099
4100 /*
4101 * Process either a C++ file or a C file depending on the setting
4102 * of a global flag.
4103 */
4104 static void
4105 default_C_entries (inf)
4106 FILE *inf;
4107 {
4108 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4109 }
4110
4111 /* Always do plain C. */
4112 static void
4113 plain_C_entries (inf)
4114 FILE *inf;
4115 {
4116 C_entries (0, inf);
4117 }
4118
4119 /* Always do C++. */
4120 static void
4121 Cplusplus_entries (inf)
4122 FILE *inf;
4123 {
4124 C_entries (C_PLPL, inf);
4125 }
4126
4127 /* Always do Java. */
4128 static void
4129 Cjava_entries (inf)
4130 FILE *inf;
4131 {
4132 C_entries (C_JAVA, inf);
4133 }
4134
4135 /* Always do C*. */
4136 static void
4137 Cstar_entries (inf)
4138 FILE *inf;
4139 {
4140 C_entries (C_STAR, inf);
4141 }
4142
4143 /* Always do Yacc. */
4144 static void
4145 Yacc_entries (inf)
4146 FILE *inf;
4147 {
4148 C_entries (YACC, inf);
4149 }
4150
4151 \f
4152 /* Useful macros. */
4153 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4154 for (; /* loop initialization */ \
4155 !feof (file_pointer) /* loop test */ \
4156 && /* instructions at start of loop */ \
4157 (readline (&line_buffer, file_pointer), \
4158 char_pointer = line_buffer.buffer, \
4159 TRUE); \
4160 )
4161
4162 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4163 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4164 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4165 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4166 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4167
4168 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4169 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4170 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4171 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4172 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4173
4174 /*
4175 * Read a file, but do no processing. This is used to do regexp
4176 * matching on files that have no language defined.
4177 */
4178 static void
4179 just_read_file (inf)
4180 FILE *inf;
4181 {
4182 register char *dummy;
4183
4184 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4185 continue;
4186 }
4187
4188 \f
4189 /* Fortran parsing */
4190
4191 static void F_takeprec __P((void));
4192 static void F_getit __P((FILE *));
4193
4194 static void
4195 F_takeprec ()
4196 {
4197 dbp = skip_spaces (dbp);
4198 if (*dbp != '*')
4199 return;
4200 dbp++;
4201 dbp = skip_spaces (dbp);
4202 if (strneq (dbp, "(*)", 3))
4203 {
4204 dbp += 3;
4205 return;
4206 }
4207 if (!ISDIGIT (*dbp))
4208 {
4209 --dbp; /* force failure */
4210 return;
4211 }
4212 do
4213 dbp++;
4214 while (ISDIGIT (*dbp));
4215 }
4216
4217 static void
4218 F_getit (inf)
4219 FILE *inf;
4220 {
4221 register char *cp;
4222
4223 dbp = skip_spaces (dbp);
4224 if (*dbp == '\0')
4225 {
4226 readline (&lb, inf);
4227 dbp = lb.buffer;
4228 if (dbp[5] != '&')
4229 return;
4230 dbp += 6;
4231 dbp = skip_spaces (dbp);
4232 }
4233 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4234 return;
4235 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4236 continue;
4237 make_tag (dbp, cp-dbp, TRUE,
4238 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4239 }
4240
4241
4242 static void
4243 Fortran_functions (inf)
4244 FILE *inf;
4245 {
4246 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4247 {
4248 if (*dbp == '%')
4249 dbp++; /* Ratfor escape to fortran */
4250 dbp = skip_spaces (dbp);
4251 if (*dbp == '\0')
4252 continue;
4253 switch (lowcase (*dbp))
4254 {
4255 case 'i':
4256 if (nocase_tail ("integer"))
4257 F_takeprec ();
4258 break;
4259 case 'r':
4260 if (nocase_tail ("real"))
4261 F_takeprec ();
4262 break;
4263 case 'l':
4264 if (nocase_tail ("logical"))
4265 F_takeprec ();
4266 break;
4267 case 'c':
4268 if (nocase_tail ("complex") || nocase_tail ("character"))
4269 F_takeprec ();
4270 break;
4271 case 'd':
4272 if (nocase_tail ("double"))
4273 {
4274 dbp = skip_spaces (dbp);
4275 if (*dbp == '\0')
4276 continue;
4277 if (nocase_tail ("precision"))
4278 break;
4279 continue;
4280 }
4281 break;
4282 }
4283 dbp = skip_spaces (dbp);
4284 if (*dbp == '\0')
4285 continue;
4286 switch (lowcase (*dbp))
4287 {
4288 case 'f':
4289 if (nocase_tail ("function"))
4290 F_getit (inf);
4291 continue;
4292 case 's':
4293 if (nocase_tail ("subroutine"))
4294 F_getit (inf);
4295 continue;
4296 case 'e':
4297 if (nocase_tail ("entry"))
4298 F_getit (inf);
4299 continue;
4300 case 'b':
4301 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4302 {
4303 dbp = skip_spaces (dbp);
4304 if (*dbp == '\0') /* assume un-named */
4305 make_tag ("blockdata", 9, TRUE,
4306 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4307 else
4308 F_getit (inf); /* look for name */
4309 }
4310 continue;
4311 }
4312 }
4313 }
4314
4315 \f
4316 /*
4317 * Ada parsing
4318 * Original code by
4319 * Philippe Waroquiers (1998)
4320 */
4321
4322 static void Ada_getit __P((FILE *, char *));
4323
4324 /* Once we are positioned after an "interesting" keyword, let's get
4325 the real tag value necessary. */
4326 static void
4327 Ada_getit (inf, name_qualifier)
4328 FILE *inf;
4329 char *name_qualifier;
4330 {
4331 register char *cp;
4332 char *name;
4333 char c;
4334
4335 while (!feof (inf))
4336 {
4337 dbp = skip_spaces (dbp);
4338 if (*dbp == '\0'
4339 || (dbp[0] == '-' && dbp[1] == '-'))
4340 {
4341 readline (&lb, inf);
4342 dbp = lb.buffer;
4343 }
4344 switch (lowcase(*dbp))
4345 {
4346 case 'b':
4347 if (nocase_tail ("body"))
4348 {
4349 /* Skipping body of procedure body or package body or ....
4350 resetting qualifier to body instead of spec. */
4351 name_qualifier = "/b";
4352 continue;
4353 }
4354 break;
4355 case 't':
4356 /* Skipping type of task type or protected type ... */
4357 if (nocase_tail ("type"))
4358 continue;
4359 break;
4360 }
4361 if (*dbp == '"')
4362 {
4363 dbp += 1;
4364 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4365 continue;
4366 }
4367 else
4368 {
4369 dbp = skip_spaces (dbp);
4370 for (cp = dbp;
4371 (*cp != '\0'
4372 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4373 cp++)
4374 continue;
4375 if (cp == dbp)
4376 return;
4377 }
4378 c = *cp;
4379 *cp = '\0';
4380 name = concat (dbp, name_qualifier, "");
4381 *cp = c;
4382 make_tag (name, strlen (name), TRUE,
4383 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4384 free (name);
4385 if (c == '"')
4386 dbp = cp + 1;
4387 return;
4388 }
4389 }
4390
4391 static void
4392 Ada_funcs (inf)
4393 FILE *inf;
4394 {
4395 bool inquote = FALSE;
4396 bool skip_till_semicolumn = FALSE;
4397
4398 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4399 {
4400 while (*dbp != '\0')
4401 {
4402 /* Skip a string i.e. "abcd". */
4403 if (inquote || (*dbp == '"'))
4404 {
4405 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4406 if (dbp != NULL)
4407 {
4408 inquote = FALSE;
4409 dbp += 1;
4410 continue; /* advance char */
4411 }
4412 else
4413 {
4414 inquote = TRUE;
4415 break; /* advance line */
4416 }
4417 }
4418
4419 /* Skip comments. */
4420 if (dbp[0] == '-' && dbp[1] == '-')
4421 break; /* advance line */
4422
4423 /* Skip character enclosed in single quote i.e. 'a'
4424 and skip single quote starting an attribute i.e. 'Image. */
4425 if (*dbp == '\'')
4426 {
4427 dbp++ ;
4428 if (*dbp != '\0')
4429 dbp++;
4430 continue;
4431 }
4432
4433 if (skip_till_semicolumn)
4434 {
4435 if (*dbp == ';')
4436 skip_till_semicolumn = FALSE;
4437 dbp++;
4438 continue; /* advance char */
4439 }
4440
4441 /* Search for beginning of a token. */
4442 if (!begtoken (*dbp))
4443 {
4444 dbp++;
4445 continue; /* advance char */
4446 }
4447
4448 /* We are at the beginning of a token. */
4449 switch (lowcase(*dbp))
4450 {
4451 case 'f':
4452 if (!packages_only && nocase_tail ("function"))
4453 Ada_getit (inf, "/f");
4454 else
4455 break; /* from switch */
4456 continue; /* advance char */
4457 case 'p':
4458 if (!packages_only && nocase_tail ("procedure"))
4459 Ada_getit (inf, "/p");
4460 else if (nocase_tail ("package"))
4461 Ada_getit (inf, "/s");
4462 else if (nocase_tail ("protected")) /* protected type */
4463 Ada_getit (inf, "/t");
4464 else
4465 break; /* from switch */
4466 continue; /* advance char */
4467
4468 case 'u':
4469 if (typedefs && !packages_only && nocase_tail ("use"))
4470 {
4471 /* when tagging types, avoid tagging use type Pack.Typename;
4472 for this, we will skip everything till a ; */
4473 skip_till_semicolumn = TRUE;
4474 continue; /* advance char */
4475 }
4476
4477 case 't':
4478 if (!packages_only && nocase_tail ("task"))
4479 Ada_getit (inf, "/k");
4480 else if (typedefs && !packages_only && nocase_tail ("type"))
4481 {
4482 Ada_getit (inf, "/t");
4483 while (*dbp != '\0')
4484 dbp += 1;
4485 }
4486 else
4487 break; /* from switch */
4488 continue; /* advance char */
4489 }
4490
4491 /* Look for the end of the token. */
4492 while (!endtoken (*dbp))
4493 dbp++;
4494
4495 } /* advance char */
4496 } /* advance line */
4497 }
4498
4499 \f
4500 /*
4501 * Unix and microcontroller assembly tag handling
4502 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4503 * Idea by Bob Weiner, Motorola Inc. (1994)
4504 */
4505 static void
4506 Asm_labels (inf)
4507 FILE *inf;
4508 {
4509 register char *cp;
4510
4511 LOOP_ON_INPUT_LINES (inf, lb, cp)
4512 {
4513 /* If first char is alphabetic or one of [_.$], test for colon
4514 following identifier. */
4515 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4516 {
4517 /* Read past label. */
4518 cp++;
4519 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4520 cp++;
4521 if (*cp == ':' || iswhite (*cp))
4522 /* Found end of label, so copy it and add it to the table. */
4523 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4524 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4525 }
4526 }
4527 }
4528
4529 \f
4530 /*
4531 * Perl support
4532 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4533 * Perl variable names: /^(my|local).../
4534 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4535 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4536 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4537 */
4538 static void
4539 Perl_functions (inf)
4540 FILE *inf;
4541 {
4542 char *package = savestr ("main"); /* current package name */
4543 register char *cp;
4544
4545 LOOP_ON_INPUT_LINES (inf, lb, cp)
4546 {
4547 skip_spaces(cp);
4548
4549 if (LOOKING_AT (cp, "package"))
4550 {
4551 free (package);
4552 get_tag (cp, &package);
4553 }
4554 else if (LOOKING_AT (cp, "sub"))
4555 {
4556 char *pos;
4557 char *sp = cp;
4558
4559 while (!notinname (*cp))
4560 cp++;
4561 if (cp == sp)
4562 continue; /* nothing found */
4563 if ((pos = etags_strchr (sp, ':')) != NULL
4564 && pos < cp && pos[1] == ':')
4565 /* The name is already qualified. */
4566 make_tag (sp, cp - sp, TRUE,
4567 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4568 else
4569 /* Qualify it. */
4570 {
4571 char savechar, *name;
4572
4573 savechar = *cp;
4574 *cp = '\0';
4575 name = concat (package, "::", sp);
4576 *cp = savechar;
4577 make_tag (name, strlen(name), TRUE,
4578 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4579 free (name);
4580 }
4581 }
4582 else if (globals) /* only if we are tagging global vars */
4583 {
4584 /* Skip a qualifier, if any. */
4585 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4586 /* After "my" or "local", but before any following paren or space. */
4587 char *varstart = cp;
4588
4589 if (qual /* should this be removed? If yes, how? */
4590 && (*cp == '$' || *cp == '@' || *cp == '%'))
4591 {
4592 varstart += 1;
4593 do
4594 cp++;
4595 while (ISALNUM (*cp) || *cp == '_');
4596 }
4597 else if (qual)
4598 {
4599 /* Should be examining a variable list at this point;
4600 could insist on seeing an open parenthesis. */
4601 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4602 cp++;
4603 }
4604 else
4605 continue;
4606
4607 make_tag (varstart, cp - varstart, FALSE,
4608 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4609 }
4610 }
4611 free (package);
4612 }
4613
4614
4615 /*
4616 * Python support
4617 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4618 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4619 * More ideas by seb bacon <seb@jamkit.com> (2002)
4620 */
4621 static void
4622 Python_functions (inf)
4623 FILE *inf;
4624 {
4625 register char *cp;
4626
4627 LOOP_ON_INPUT_LINES (inf, lb, cp)
4628 {
4629 cp = skip_spaces (cp);
4630 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4631 {
4632 char *name = cp;
4633 while (!notinname (*cp) && *cp != ':')
4634 cp++;
4635 make_tag (name, cp - name, TRUE,
4636 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4637 }
4638 }
4639 }
4640
4641 \f
4642 /*
4643 * PHP support
4644 * Look for:
4645 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4646 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4647 * - /^[ \t]*define\(\"[^\"]+/
4648 * Only with --members:
4649 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4650 * Idea by Diez B. Roggisch (2001)
4651 */
4652 static void
4653 PHP_functions (inf)
4654 FILE *inf;
4655 {
4656 register char *cp, *name;
4657 bool search_identifier = FALSE;
4658
4659 LOOP_ON_INPUT_LINES (inf, lb, cp)
4660 {
4661 cp = skip_spaces (cp);
4662 name = cp;
4663 if (search_identifier
4664 && *cp != '\0')
4665 {
4666 while (!notinname (*cp))
4667 cp++;
4668 make_tag (name, cp - name, TRUE,
4669 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4670 search_identifier = FALSE;
4671 }
4672 else if (LOOKING_AT (cp, "function"))
4673 {
4674 if(*cp == '&')
4675 cp = skip_spaces (cp+1);
4676 if(*cp != '\0')
4677 {
4678 name = cp;
4679 while (!notinname (*cp))
4680 cp++;
4681 make_tag (name, cp - name, TRUE,
4682 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4683 }
4684 else
4685 search_identifier = TRUE;
4686 }
4687 else if (LOOKING_AT (cp, "class"))
4688 {
4689 if (*cp != '\0')
4690 {
4691 name = cp;
4692 while (*cp != '\0' && !iswhite (*cp))
4693 cp++;
4694 make_tag (name, cp - name, FALSE,
4695 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4696 }
4697 else
4698 search_identifier = TRUE;
4699 }
4700 else if (strneq (cp, "define", 6)
4701 && (cp = skip_spaces (cp+6))
4702 && *cp++ == '('
4703 && (*cp == '"' || *cp == '\''))
4704 {
4705 char quote = *cp++;
4706 name = cp;
4707 while (*cp != quote && *cp != '\0')
4708 cp++;
4709 make_tag (name, cp - name, FALSE,
4710 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4711 }
4712 else if (members
4713 && LOOKING_AT (cp, "var")
4714 && *cp == '$')
4715 {
4716 name = cp;
4717 while (!notinname(*cp))
4718 cp++;
4719 make_tag (name, cp - name, FALSE,
4720 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4721 }
4722 }
4723 }
4724
4725 \f
4726 /*
4727 * Cobol tag functions
4728 * We could look for anything that could be a paragraph name.
4729 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4730 * Idea by Corny de Souza (1993)
4731 */
4732 static void
4733 Cobol_paragraphs (inf)
4734 FILE *inf;
4735 {
4736 register char *bp, *ep;
4737
4738 LOOP_ON_INPUT_LINES (inf, lb, bp)
4739 {
4740 if (lb.len < 9)
4741 continue;
4742 bp += 8;
4743
4744 /* If eoln, compiler option or comment ignore whole line. */
4745 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4746 continue;
4747
4748 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4749 continue;
4750 if (*ep++ == '.')
4751 make_tag (bp, ep - bp, TRUE,
4752 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4753 }
4754 }
4755
4756 \f
4757 /*
4758 * Makefile support
4759 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4760 */
4761 static void
4762 Makefile_targets (inf)
4763 FILE *inf;
4764 {
4765 register char *bp;
4766
4767 LOOP_ON_INPUT_LINES (inf, lb, bp)
4768 {
4769 if (*bp == '\t' || *bp == '#')
4770 continue;
4771 while (*bp != '\0' && *bp != '=' && *bp != ':')
4772 bp++;
4773 if (*bp == ':' || (globals && *bp == '='))
4774 {
4775 /* We should detect if there is more than one tag, but we do not.
4776 We just skip initial and final spaces. */
4777 char * namestart = skip_spaces (lb.buffer);
4778 while (--bp > namestart)
4779 if (!notinname (*bp))
4780 break;
4781 make_tag (namestart, bp - namestart + 1, TRUE,
4782 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4783 }
4784 }
4785 }
4786
4787 \f
4788 /*
4789 * Pascal parsing
4790 * Original code by Mosur K. Mohan (1989)
4791 *
4792 * Locates tags for procedures & functions. Doesn't do any type- or
4793 * var-definitions. It does look for the keyword "extern" or
4794 * "forward" immediately following the procedure statement; if found,
4795 * the tag is skipped.
4796 */
4797 static void
4798 Pascal_functions (inf)
4799 FILE *inf;
4800 {
4801 linebuffer tline; /* mostly copied from C_entries */
4802 long save_lcno;
4803 int save_lineno, namelen, taglen;
4804 char c, *name;
4805
4806 bool /* each of these flags is TRUE if: */
4807 incomment, /* point is inside a comment */
4808 inquote, /* point is inside '..' string */
4809 get_tagname, /* point is after PROCEDURE/FUNCTION
4810 keyword, so next item = potential tag */
4811 found_tag, /* point is after a potential tag */
4812 inparms, /* point is within parameter-list */
4813 verify_tag; /* point has passed the parm-list, so the
4814 next token will determine whether this
4815 is a FORWARD/EXTERN to be ignored, or
4816 whether it is a real tag */
4817
4818 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4819 name = NULL; /* keep compiler quiet */
4820 dbp = lb.buffer;
4821 *dbp = '\0';
4822 linebuffer_init (&tline);
4823
4824 incomment = inquote = FALSE;
4825 found_tag = FALSE; /* have a proc name; check if extern */
4826 get_tagname = FALSE; /* found "procedure" keyword */
4827 inparms = FALSE; /* found '(' after "proc" */
4828 verify_tag = FALSE; /* check if "extern" is ahead */
4829
4830
4831 while (!feof (inf)) /* long main loop to get next char */
4832 {
4833 c = *dbp++;
4834 if (c == '\0') /* if end of line */
4835 {
4836 readline (&lb, inf);
4837 dbp = lb.buffer;
4838 if (*dbp == '\0')
4839 continue;
4840 if (!((found_tag && verify_tag)
4841 || get_tagname))
4842 c = *dbp++; /* only if don't need *dbp pointing
4843 to the beginning of the name of
4844 the procedure or function */
4845 }
4846 if (incomment)
4847 {
4848 if (c == '}') /* within { } comments */
4849 incomment = FALSE;
4850 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4851 {
4852 dbp++;
4853 incomment = FALSE;
4854 }
4855 continue;
4856 }
4857 else if (inquote)
4858 {
4859 if (c == '\'')
4860 inquote = FALSE;
4861 continue;
4862 }
4863 else
4864 switch (c)
4865 {
4866 case '\'':
4867 inquote = TRUE; /* found first quote */
4868 continue;
4869 case '{': /* found open { comment */
4870 incomment = TRUE;
4871 continue;
4872 case '(':
4873 if (*dbp == '*') /* found open (* comment */
4874 {
4875 incomment = TRUE;
4876 dbp++;
4877 }
4878 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4879 inparms = TRUE;
4880 continue;
4881 case ')': /* end of parms list */
4882 if (inparms)
4883 inparms = FALSE;
4884 continue;
4885 case ';':
4886 if (found_tag && !inparms) /* end of proc or fn stmt */
4887 {
4888 verify_tag = TRUE;
4889 break;
4890 }
4891 continue;
4892 }
4893 if (found_tag && verify_tag && (*dbp != ' '))
4894 {
4895 /* Check if this is an "extern" declaration. */
4896 if (*dbp == '\0')
4897 continue;
4898 if (lowcase (*dbp == 'e'))
4899 {
4900 if (nocase_tail ("extern")) /* superfluous, really! */
4901 {
4902 found_tag = FALSE;
4903 verify_tag = FALSE;
4904 }
4905 }
4906 else if (lowcase (*dbp) == 'f')
4907 {
4908 if (nocase_tail ("forward")) /* check for forward reference */
4909 {
4910 found_tag = FALSE;
4911 verify_tag = FALSE;
4912 }
4913 }
4914 if (found_tag && verify_tag) /* not external proc, so make tag */
4915 {
4916 found_tag = FALSE;
4917 verify_tag = FALSE;
4918 make_tag (name, namelen, TRUE,
4919 tline.buffer, taglen, save_lineno, save_lcno);
4920 continue;
4921 }
4922 }
4923 if (get_tagname) /* grab name of proc or fn */
4924 {
4925 char *cp;
4926
4927 if (*dbp == '\0')
4928 continue;
4929
4930 /* Find block name. */
4931 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4932 continue;
4933
4934 /* Save all values for later tagging. */
4935 linebuffer_setlen (&tline, lb.len);
4936 strcpy (tline.buffer, lb.buffer);
4937 save_lineno = lineno;
4938 save_lcno = linecharno;
4939 name = tline.buffer + (dbp - lb.buffer);
4940 namelen = cp - dbp;
4941 taglen = cp - lb.buffer + 1;
4942
4943 dbp = cp; /* set dbp to e-o-token */
4944 get_tagname = FALSE;
4945 found_tag = TRUE;
4946 continue;
4947
4948 /* And proceed to check for "extern". */
4949 }
4950 else if (!incomment && !inquote && !found_tag)
4951 {
4952 /* Check for proc/fn keywords. */
4953 switch (lowcase (c))
4954 {
4955 case 'p':
4956 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4957 get_tagname = TRUE;
4958 continue;
4959 case 'f':
4960 if (nocase_tail ("unction"))
4961 get_tagname = TRUE;
4962 continue;
4963 }
4964 }
4965 } /* while not eof */
4966
4967 free (tline.buffer);
4968 }
4969
4970 \f
4971 /*
4972 * Lisp tag functions
4973 * look for (def or (DEF, quote or QUOTE
4974 */
4975
4976 static void L_getit __P((void));
4977
4978 static void
4979 L_getit ()
4980 {
4981 if (*dbp == '\'') /* Skip prefix quote */
4982 dbp++;
4983 else if (*dbp == '(')
4984 {
4985 dbp++;
4986 /* Try to skip "(quote " */
4987 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4988 /* Ok, then skip "(" before name in (defstruct (foo)) */
4989 dbp = skip_spaces (dbp);
4990 }
4991 get_tag (dbp, NULL);
4992 }
4993
4994 static void
4995 Lisp_functions (inf)
4996 FILE *inf;
4997 {
4998 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4999 {
5000 if (dbp[0] != '(')
5001 continue;
5002
5003 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5004 {
5005 dbp = skip_non_spaces (dbp);
5006 dbp = skip_spaces (dbp);
5007 L_getit ();
5008 }
5009 else
5010 {
5011 /* Check for (foo::defmumble name-defined ... */
5012 do
5013 dbp++;
5014 while (!notinname (*dbp) && *dbp != ':');
5015 if (*dbp == ':')
5016 {
5017 do
5018 dbp++;
5019 while (*dbp == ':');
5020
5021 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5022 {
5023 dbp = skip_non_spaces (dbp);
5024 dbp = skip_spaces (dbp);
5025 L_getit ();
5026 }
5027 }
5028 }
5029 }
5030 }
5031
5032 \f
5033 /*
5034 * Lua script language parsing
5035 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5036 *
5037 * "function" and "local function" are tags if they start at column 1.
5038 */
5039 static void
5040 Lua_functions (inf)
5041 FILE *inf;
5042 {
5043 register char *bp;
5044
5045 LOOP_ON_INPUT_LINES (inf, lb, bp)
5046 {
5047 if (bp[0] != 'f' && bp[0] != 'l')
5048 continue;
5049
5050 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5051
5052 if (LOOKING_AT (bp, "function"))
5053 get_tag (bp, NULL);
5054 }
5055 }
5056
5057 \f
5058 /*
5059 * Postscript tags
5060 * Just look for lines where the first character is '/'
5061 * Also look at "defineps" for PSWrap
5062 * Ideas by:
5063 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5064 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5065 */
5066 static void
5067 PS_functions (inf)
5068 FILE *inf;
5069 {
5070 register char *bp, *ep;
5071
5072 LOOP_ON_INPUT_LINES (inf, lb, bp)
5073 {
5074 if (bp[0] == '/')
5075 {
5076 for (ep = bp+1;
5077 *ep != '\0' && *ep != ' ' && *ep != '{';
5078 ep++)
5079 continue;
5080 make_tag (bp, ep - bp, TRUE,
5081 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5082 }
5083 else if (LOOKING_AT (bp, "defineps"))
5084 get_tag (bp, NULL);
5085 }
5086 }
5087
5088 \f
5089 /*
5090 * Forth tags
5091 * Ignore anything after \ followed by space or in ( )
5092 * Look for words defined by :
5093 * Look for constant, code, create, defer, value, and variable
5094 * OBP extensions: Look for buffer:, field,
5095 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5096 */
5097 static void
5098 Forth_words (inf)
5099 FILE *inf;
5100 {
5101 register char *bp;
5102
5103 LOOP_ON_INPUT_LINES (inf, lb, bp)
5104 while ((bp = skip_spaces (bp))[0] != '\0')
5105 if (bp[0] == '\\' && iswhite(bp[1]))
5106 break; /* read next line */
5107 else if (bp[0] == '(' && iswhite(bp[1]))
5108 do /* skip to ) or eol */
5109 bp++;
5110 while (*bp != ')' && *bp != '\0');
5111 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5112 || LOOKING_AT_NOCASE (bp, "constant")
5113 || LOOKING_AT_NOCASE (bp, "code")
5114 || LOOKING_AT_NOCASE (bp, "create")
5115 || LOOKING_AT_NOCASE (bp, "defer")
5116 || LOOKING_AT_NOCASE (bp, "value")
5117 || LOOKING_AT_NOCASE (bp, "variable")
5118 || LOOKING_AT_NOCASE (bp, "buffer:")
5119 || LOOKING_AT_NOCASE (bp, "field"))
5120 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5121 else
5122 bp = skip_non_spaces (bp);
5123 }
5124
5125 \f
5126 /*
5127 * Scheme tag functions
5128 * look for (def... xyzzy
5129 * (def... (xyzzy
5130 * (def ... ((...(xyzzy ....
5131 * (set! xyzzy
5132 * Original code by Ken Haase (1985?)
5133 */
5134 static void
5135 Scheme_functions (inf)
5136 FILE *inf;
5137 {
5138 register char *bp;
5139
5140 LOOP_ON_INPUT_LINES (inf, lb, bp)
5141 {
5142 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5143 {
5144 bp = skip_non_spaces (bp+4);
5145 /* Skip over open parens and white space */
5146 while (notinname (*bp))
5147 bp++;
5148 get_tag (bp, NULL);
5149 }
5150 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5151 get_tag (bp, NULL);
5152 }
5153 }
5154
5155 \f
5156 /* Find tags in TeX and LaTeX input files. */
5157
5158 /* TEX_toktab is a table of TeX control sequences that define tags.
5159 * Each entry records one such control sequence.
5160 *
5161 * Original code from who knows whom.
5162 * Ideas by:
5163 * Stefan Monnier (2002)
5164 */
5165
5166 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5167
5168 /* Default set of control sequences to put into TEX_toktab.
5169 The value of environment var TEXTAGS is prepended to this. */
5170 static char *TEX_defenv = "\
5171 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5172 :part:appendix:entry:index:def\
5173 :newcommand:renewcommand:newenvironment:renewenvironment";
5174
5175 static void TEX_mode __P((FILE *));
5176 static void TEX_decode_env __P((char *, char *));
5177
5178 static char TEX_esc = '\\';
5179 static char TEX_opgrp = '{';
5180 static char TEX_clgrp = '}';
5181
5182 /*
5183 * TeX/LaTeX scanning loop.
5184 */
5185 static void
5186 TeX_commands (inf)
5187 FILE *inf;
5188 {
5189 char *cp;
5190 linebuffer *key;
5191
5192 /* Select either \ or ! as escape character. */
5193 TEX_mode (inf);
5194
5195 /* Initialize token table once from environment. */
5196 if (TEX_toktab == NULL)
5197 TEX_decode_env ("TEXTAGS", TEX_defenv);
5198
5199 LOOP_ON_INPUT_LINES (inf, lb, cp)
5200 {
5201 /* Look at each TEX keyword in line. */
5202 for (;;)
5203 {
5204 /* Look for a TEX escape. */
5205 while (*cp++ != TEX_esc)
5206 if (cp[-1] == '\0' || cp[-1] == '%')
5207 goto tex_next_line;
5208
5209 for (key = TEX_toktab; key->buffer != NULL; key++)
5210 if (strneq (cp, key->buffer, key->len))
5211 {
5212 register char *p;
5213 int namelen, linelen;
5214 bool opgrp = FALSE;
5215
5216 cp = skip_spaces (cp + key->len);
5217 if (*cp == TEX_opgrp)
5218 {
5219 opgrp = TRUE;
5220 cp++;
5221 }
5222 for (p = cp;
5223 (!iswhite (*p) && *p != '#' &&
5224 *p != TEX_opgrp && *p != TEX_clgrp);
5225 p++)
5226 continue;
5227 namelen = p - cp;
5228 linelen = lb.len;
5229 if (!opgrp || *p == TEX_clgrp)
5230 {
5231 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5232 p++;
5233 linelen = p - lb.buffer + 1;
5234 }
5235 make_tag (cp, namelen, TRUE,
5236 lb.buffer, linelen, lineno, linecharno);
5237 goto tex_next_line; /* We only tag a line once */
5238 }
5239 }
5240 tex_next_line:
5241 ;
5242 }
5243 }
5244
5245 #define TEX_LESC '\\'
5246 #define TEX_SESC '!'
5247
5248 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5249 chars accordingly. */
5250 static void
5251 TEX_mode (inf)
5252 FILE *inf;
5253 {
5254 int c;
5255
5256 while ((c = getc (inf)) != EOF)
5257 {
5258 /* Skip to next line if we hit the TeX comment char. */
5259 if (c == '%')
5260 while (c != '\n' && c != EOF)
5261 c = getc (inf);
5262 else if (c == TEX_LESC || c == TEX_SESC )
5263 break;
5264 }
5265
5266 if (c == TEX_LESC)
5267 {
5268 TEX_esc = TEX_LESC;
5269 TEX_opgrp = '{';
5270 TEX_clgrp = '}';
5271 }
5272 else
5273 {
5274 TEX_esc = TEX_SESC;
5275 TEX_opgrp = '<';
5276 TEX_clgrp = '>';
5277 }
5278 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5279 No attempt is made to correct the situation. */
5280 rewind (inf);
5281 }
5282
5283 /* Read environment and prepend it to the default string.
5284 Build token table. */
5285 static void
5286 TEX_decode_env (evarname, defenv)
5287 char *evarname;
5288 char *defenv;
5289 {
5290 register char *env, *p;
5291 int i, len;
5292
5293 /* Append default string to environment. */
5294 env = getenv (evarname);
5295 if (!env)
5296 env = defenv;
5297 else
5298 {
5299 char *oldenv = env;
5300 env = concat (oldenv, defenv, "");
5301 }
5302
5303 /* Allocate a token table */
5304 for (len = 1, p = env; p;)
5305 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5306 len++;
5307 TEX_toktab = xnew (len, linebuffer);
5308
5309 /* Unpack environment string into token table. Be careful about */
5310 /* zero-length strings (leading ':', "::" and trailing ':') */
5311 for (i = 0; *env != '\0';)
5312 {
5313 p = etags_strchr (env, ':');
5314 if (!p) /* End of environment string. */
5315 p = env + strlen (env);
5316 if (p - env > 0)
5317 { /* Only non-zero strings. */
5318 TEX_toktab[i].buffer = savenstr (env, p - env);
5319 TEX_toktab[i].len = p - env;
5320 i++;
5321 }
5322 if (*p)
5323 env = p + 1;
5324 else
5325 {
5326 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5327 TEX_toktab[i].len = 0;
5328 break;
5329 }
5330 }
5331 }
5332
5333 \f
5334 /* Texinfo support. Dave Love, Mar. 2000. */
5335 static void
5336 Texinfo_nodes (inf)
5337 FILE * inf;
5338 {
5339 char *cp, *start;
5340 LOOP_ON_INPUT_LINES (inf, lb, cp)
5341 if (LOOKING_AT (cp, "@node"))
5342 {
5343 start = cp;
5344 while (*cp != '\0' && *cp != ',')
5345 cp++;
5346 make_tag (start, cp - start, TRUE,
5347 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5348 }
5349 }
5350
5351 \f
5352 /*
5353 * HTML support.
5354 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5355 * Contents of <a name=xxx> are tags with name xxx.
5356 *
5357 * Francesco Potortì, 2002.
5358 */
5359 static void
5360 HTML_labels (inf)
5361 FILE * inf;
5362 {
5363 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5364 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5365 bool intag = FALSE; /* inside an html tag, looking for ID= */
5366 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5367 char *end;
5368
5369
5370 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5371
5372 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5373 for (;;) /* loop on the same line */
5374 {
5375 if (skiptag) /* skip HTML tag */
5376 {
5377 while (*dbp != '\0' && *dbp != '>')
5378 dbp++;
5379 if (*dbp == '>')
5380 {
5381 dbp += 1;
5382 skiptag = FALSE;
5383 continue; /* look on the same line */
5384 }
5385 break; /* go to next line */
5386 }
5387
5388 else if (intag) /* look for "name=" or "id=" */
5389 {
5390 while (*dbp != '\0' && *dbp != '>'
5391 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5392 dbp++;
5393 if (*dbp == '\0')
5394 break; /* go to next line */
5395 if (*dbp == '>')
5396 {
5397 dbp += 1;
5398 intag = FALSE;
5399 continue; /* look on the same line */
5400 }
5401 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5402 || LOOKING_AT_NOCASE (dbp, "id="))
5403 {
5404 bool quoted = (dbp[0] == '"');
5405
5406 if (quoted)
5407 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5408 continue;
5409 else
5410 for (end = dbp; *end != '\0' && intoken (*end); end++)
5411 continue;
5412 linebuffer_setlen (&token_name, end - dbp);
5413 strncpy (token_name.buffer, dbp, end - dbp);
5414 token_name.buffer[end - dbp] = '\0';
5415
5416 dbp = end;
5417 intag = FALSE; /* we found what we looked for */
5418 skiptag = TRUE; /* skip to the end of the tag */
5419 getnext = TRUE; /* then grab the text */
5420 continue; /* look on the same line */
5421 }
5422 dbp += 1;
5423 }
5424
5425 else if (getnext) /* grab next tokens and tag them */
5426 {
5427 dbp = skip_spaces (dbp);
5428 if (*dbp == '\0')
5429 break; /* go to next line */
5430 if (*dbp == '<')
5431 {
5432 intag = TRUE;
5433 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5434 continue; /* look on the same line */
5435 }
5436
5437 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5438 continue;
5439 make_tag (token_name.buffer, token_name.len, TRUE,
5440 dbp, end - dbp, lineno, linecharno);
5441 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5442 getnext = FALSE;
5443 break; /* go to next line */
5444 }
5445
5446 else /* look for an interesting HTML tag */
5447 {
5448 while (*dbp != '\0' && *dbp != '<')
5449 dbp++;
5450 if (*dbp == '\0')
5451 break; /* go to next line */
5452 intag = TRUE;
5453 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5454 {
5455 inanchor = TRUE;
5456 continue; /* look on the same line */
5457 }
5458 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5459 || LOOKING_AT_NOCASE (dbp, "<h1>")
5460 || LOOKING_AT_NOCASE (dbp, "<h2>")
5461 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5462 {
5463 intag = FALSE;
5464 getnext = TRUE;
5465 continue; /* look on the same line */
5466 }
5467 dbp += 1;
5468 }
5469 }
5470 }
5471
5472 \f
5473 /*
5474 * Prolog support
5475 *
5476 * Assumes that the predicate or rule starts at column 0.
5477 * Only the first clause of a predicate or rule is added.
5478 * Original code by Sunichirou Sugou (1989)
5479 * Rewritten by Anders Lindgren (1996)
5480 */
5481 static int prolog_pr __P((char *, char *));
5482 static void prolog_skip_comment __P((linebuffer *, FILE *));
5483 static int prolog_atom __P((char *, int));
5484
5485 static void
5486 Prolog_functions (inf)
5487 FILE *inf;
5488 {
5489 char *cp, *last;
5490 int len;
5491 int allocated;
5492
5493 allocated = 0;
5494 len = 0;
5495 last = NULL;
5496
5497 LOOP_ON_INPUT_LINES (inf, lb, cp)
5498 {
5499 if (cp[0] == '\0') /* Empty line */
5500 continue;
5501 else if (iswhite (cp[0])) /* Not a predicate */
5502 continue;
5503 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5504 prolog_skip_comment (&lb, inf);
5505 else if ((len = prolog_pr (cp, last)) > 0)
5506 {
5507 /* Predicate or rule. Store the function name so that we
5508 only generate a tag for the first clause. */
5509 if (last == NULL)
5510 last = xnew(len + 1, char);
5511 else if (len + 1 > allocated)
5512 xrnew (last, len + 1, char);
5513 allocated = len + 1;
5514 strncpy (last, cp, len);
5515 last[len] = '\0';
5516 }
5517 }
5518 if (last != NULL)
5519 free (last);
5520 }
5521
5522
5523 static void
5524 prolog_skip_comment (plb, inf)
5525 linebuffer *plb;
5526 FILE *inf;
5527 {
5528 char *cp;
5529
5530 do
5531 {
5532 for (cp = plb->buffer; *cp != '\0'; cp++)
5533 if (cp[0] == '*' && cp[1] == '/')
5534 return;
5535 readline (plb, inf);
5536 }
5537 while (!feof(inf));
5538 }
5539
5540 /*
5541 * A predicate or rule definition is added if it matches:
5542 * <beginning of line><Prolog Atom><whitespace>(
5543 * or <beginning of line><Prolog Atom><whitespace>:-
5544 *
5545 * It is added to the tags database if it doesn't match the
5546 * name of the previous clause header.
5547 *
5548 * Return the size of the name of the predicate or rule, or 0 if no
5549 * header was found.
5550 */
5551 static int
5552 prolog_pr (s, last)
5553 char *s;
5554 char *last; /* Name of last clause. */
5555 {
5556 int pos;
5557 int len;
5558
5559 pos = prolog_atom (s, 0);
5560 if (pos < 1)
5561 return 0;
5562
5563 len = pos;
5564 pos = skip_spaces (s + pos) - s;
5565
5566 if ((s[pos] == '.'
5567 || (s[pos] == '(' && (pos += 1))
5568 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5569 && (last == NULL /* save only the first clause */
5570 || len != (int)strlen (last)
5571 || !strneq (s, last, len)))
5572 {
5573 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5574 return len;
5575 }
5576 else
5577 return 0;
5578 }
5579
5580 /*
5581 * Consume a Prolog atom.
5582 * Return the number of bytes consumed, or -1 if there was an error.
5583 *
5584 * A prolog atom, in this context, could be one of:
5585 * - An alphanumeric sequence, starting with a lower case letter.
5586 * - A quoted arbitrary string. Single quotes can escape themselves.
5587 * Backslash quotes everything.
5588 */
5589 static int
5590 prolog_atom (s, pos)
5591 char *s;
5592 int pos;
5593 {
5594 int origpos;
5595
5596 origpos = pos;
5597
5598 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5599 {
5600 /* The atom is unquoted. */
5601 pos++;
5602 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5603 {
5604 pos++;
5605 }
5606 return pos - origpos;
5607 }
5608 else if (s[pos] == '\'')
5609 {
5610 pos++;
5611
5612 for (;;)
5613 {
5614 if (s[pos] == '\'')
5615 {
5616 pos++;
5617 if (s[pos] != '\'')
5618 break;
5619 pos++; /* A double quote */
5620 }
5621 else if (s[pos] == '\0')
5622 /* Multiline quoted atoms are ignored. */
5623 return -1;
5624 else if (s[pos] == '\\')
5625 {
5626 if (s[pos+1] == '\0')
5627 return -1;
5628 pos += 2;
5629 }
5630 else
5631 pos++;
5632 }
5633 return pos - origpos;
5634 }
5635 else
5636 return -1;
5637 }
5638
5639 \f
5640 /*
5641 * Support for Erlang
5642 *
5643 * Generates tags for functions, defines, and records.
5644 * Assumes that Erlang functions start at column 0.
5645 * Original code by Anders Lindgren (1996)
5646 */
5647 static int erlang_func __P((char *, char *));
5648 static void erlang_attribute __P((char *));
5649 static int erlang_atom __P((char *));
5650
5651 static void
5652 Erlang_functions (inf)
5653 FILE *inf;
5654 {
5655 char *cp, *last;
5656 int len;
5657 int allocated;
5658
5659 allocated = 0;
5660 len = 0;
5661 last = NULL;
5662
5663 LOOP_ON_INPUT_LINES (inf, lb, cp)
5664 {
5665 if (cp[0] == '\0') /* Empty line */
5666 continue;
5667 else if (iswhite (cp[0])) /* Not function nor attribute */
5668 continue;
5669 else if (cp[0] == '%') /* comment */
5670 continue;
5671 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5672 continue;
5673 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5674 {
5675 erlang_attribute (cp);
5676 if (last != NULL)
5677 {
5678 free (last);
5679 last = NULL;
5680 }
5681 }
5682 else if ((len = erlang_func (cp, last)) > 0)
5683 {
5684 /*
5685 * Function. Store the function name so that we only
5686 * generates a tag for the first clause.
5687 */
5688 if (last == NULL)
5689 last = xnew (len + 1, char);
5690 else if (len + 1 > allocated)
5691 xrnew (last, len + 1, char);
5692 allocated = len + 1;
5693 strncpy (last, cp, len);
5694 last[len] = '\0';
5695 }
5696 }
5697 if (last != NULL)
5698 free (last);
5699 }
5700
5701
5702 /*
5703 * A function definition is added if it matches:
5704 * <beginning of line><Erlang Atom><whitespace>(
5705 *
5706 * It is added to the tags database if it doesn't match the
5707 * name of the previous clause header.
5708 *
5709 * Return the size of the name of the function, or 0 if no function
5710 * was found.
5711 */
5712 static int
5713 erlang_func (s, last)
5714 char *s;
5715 char *last; /* Name of last clause. */
5716 {
5717 int pos;
5718 int len;
5719
5720 pos = erlang_atom (s);
5721 if (pos < 1)
5722 return 0;
5723
5724 len = pos;
5725 pos = skip_spaces (s + pos) - s;
5726
5727 /* Save only the first clause. */
5728 if (s[pos++] == '('
5729 && (last == NULL
5730 || len != (int)strlen (last)
5731 || !strneq (s, last, len)))
5732 {
5733 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5734 return len;
5735 }
5736
5737 return 0;
5738 }
5739
5740
5741 /*
5742 * Handle attributes. Currently, tags are generated for defines
5743 * and records.
5744 *
5745 * They are on the form:
5746 * -define(foo, bar).
5747 * -define(Foo(M, N), M+N).
5748 * -record(graph, {vtab = notable, cyclic = true}).
5749 */
5750 static void
5751 erlang_attribute (s)
5752 char *s;
5753 {
5754 char *cp = s;
5755
5756 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5757 && *cp++ == '(')
5758 {
5759 int len = erlang_atom (skip_spaces (cp));
5760 if (len > 0)
5761 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5762 }
5763 return;
5764 }
5765
5766
5767 /*
5768 * Consume an Erlang atom (or variable).
5769 * Return the number of bytes consumed, or -1 if there was an error.
5770 */
5771 static int
5772 erlang_atom (s)
5773 char *s;
5774 {
5775 int pos = 0;
5776
5777 if (ISALPHA (s[pos]) || s[pos] == '_')
5778 {
5779 /* The atom is unquoted. */
5780 do
5781 pos++;
5782 while (ISALNUM (s[pos]) || s[pos] == '_');
5783 }
5784 else if (s[pos] == '\'')
5785 {
5786 for (pos++; s[pos] != '\''; pos++)
5787 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5788 || (s[pos] == '\\' && s[++pos] == '\0'))
5789 return 0;
5790 pos++;
5791 }
5792
5793 return pos;
5794 }
5795
5796 \f
5797 static char *scan_separators __P((char *));
5798 static void add_regex __P((char *, language *));
5799 static char *substitute __P((char *, char *, struct re_registers *));
5800
5801 /*
5802 * Take a string like "/blah/" and turn it into "blah", verifying
5803 * that the first and last characters are the same, and handling
5804 * quoted separator characters. Actually, stops on the occurrence of
5805 * an unquoted separator. Also process \t, \n, etc. and turn into
5806 * appropriate characters. Works in place. Null terminates name string.
5807 * Returns pointer to terminating separator, or NULL for
5808 * unterminated regexps.
5809 */
5810 static char *
5811 scan_separators (name)
5812 char *name;
5813 {
5814 char sep = name[0];
5815 char *copyto = name;
5816 bool quoted = FALSE;
5817
5818 for (++name; *name != '\0'; ++name)
5819 {
5820 if (quoted)
5821 {
5822 switch (*name)
5823 {
5824 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5825 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5826 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5827 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5828 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5829 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5830 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5831 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5832 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5833 default:
5834 if (*name == sep)
5835 *copyto++ = sep;
5836 else
5837 {
5838 /* Something else is quoted, so preserve the quote. */
5839 *copyto++ = '\\';
5840 *copyto++ = *name;
5841 }
5842 break;
5843 }
5844 quoted = FALSE;
5845 }
5846 else if (*name == '\\')
5847 quoted = TRUE;
5848 else if (*name == sep)
5849 break;
5850 else
5851 *copyto++ = *name;
5852 }
5853 if (*name != sep)
5854 name = NULL; /* signal unterminated regexp */
5855
5856 /* Terminate copied string. */
5857 *copyto = '\0';
5858 return name;
5859 }
5860
5861 /* Look at the argument of --regex or --no-regex and do the right
5862 thing. Same for each line of a regexp file. */
5863 static void
5864 analyse_regex (regex_arg)
5865 char *regex_arg;
5866 {
5867 if (regex_arg == NULL)
5868 {
5869 free_regexps (); /* --no-regex: remove existing regexps */
5870 return;
5871 }
5872
5873 /* A real --regexp option or a line in a regexp file. */
5874 switch (regex_arg[0])
5875 {
5876 /* Comments in regexp file or null arg to --regex. */
5877 case '\0':
5878 case ' ':
5879 case '\t':
5880 break;
5881
5882 /* Read a regex file. This is recursive and may result in a
5883 loop, which will stop when the file descriptors are exhausted. */
5884 case '@':
5885 {
5886 FILE *regexfp;
5887 linebuffer regexbuf;
5888 char *regexfile = regex_arg + 1;
5889
5890 /* regexfile is a file containing regexps, one per line. */
5891 regexfp = fopen (regexfile, "r");
5892 if (regexfp == NULL)
5893 {
5894 pfatal (regexfile);
5895 return;
5896 }
5897 linebuffer_init (&regexbuf);
5898 while (readline_internal (&regexbuf, regexfp) > 0)
5899 analyse_regex (regexbuf.buffer);
5900 free (regexbuf.buffer);
5901 fclose (regexfp);
5902 }
5903 break;
5904
5905 /* Regexp to be used for a specific language only. */
5906 case '{':
5907 {
5908 language *lang;
5909 char *lang_name = regex_arg + 1;
5910 char *cp;
5911
5912 for (cp = lang_name; *cp != '}'; cp++)
5913 if (*cp == '\0')
5914 {
5915 error ("unterminated language name in regex: %s", regex_arg);
5916 return;
5917 }
5918 *cp++ = '\0';
5919 lang = get_language_from_langname (lang_name);
5920 if (lang == NULL)
5921 return;
5922 add_regex (cp, lang);
5923 }
5924 break;
5925
5926 /* Regexp to be used for any language. */
5927 default:
5928 add_regex (regex_arg, NULL);
5929 break;
5930 }
5931 }
5932
5933 /* Separate the regexp pattern, compile it,
5934 and care for optional name and modifiers. */
5935 static void
5936 add_regex (regexp_pattern, lang)
5937 char *regexp_pattern;
5938 language *lang;
5939 {
5940 static struct re_pattern_buffer zeropattern;
5941 char sep, *pat, *name, *modifiers;
5942 const char *err;
5943 struct re_pattern_buffer *patbuf;
5944 regexp *rp;
5945 bool
5946 force_explicit_name = TRUE, /* do not use implicit tag names */
5947 ignore_case = FALSE, /* case is significant */
5948 multi_line = FALSE, /* matches are done one line at a time */
5949 single_line = FALSE; /* dot does not match newline */
5950
5951
5952 if (strlen(regexp_pattern) < 3)
5953 {
5954 error ("null regexp", (char *)NULL);
5955 return;
5956 }
5957 sep = regexp_pattern[0];
5958 name = scan_separators (regexp_pattern);
5959 if (name == NULL)
5960 {
5961 error ("%s: unterminated regexp", regexp_pattern);
5962 return;
5963 }
5964 if (name[1] == sep)
5965 {
5966 error ("null name for regexp \"%s\"", regexp_pattern);
5967 return;
5968 }
5969 modifiers = scan_separators (name);
5970 if (modifiers == NULL) /* no terminating separator --> no name */
5971 {
5972 modifiers = name;
5973 name = "";
5974 }
5975 else
5976 modifiers += 1; /* skip separator */
5977
5978 /* Parse regex modifiers. */
5979 for (; modifiers[0] != '\0'; modifiers++)
5980 switch (modifiers[0])
5981 {
5982 case 'N':
5983 if (modifiers == name)
5984 error ("forcing explicit tag name but no name, ignoring", NULL);
5985 force_explicit_name = TRUE;
5986 break;
5987 case 'i':
5988 ignore_case = TRUE;
5989 break;
5990 case 's':
5991 single_line = TRUE;
5992 /* FALLTHRU */
5993 case 'm':
5994 multi_line = TRUE;
5995 need_filebuf = TRUE;
5996 break;
5997 default:
5998 {
5999 char wrongmod [2];
6000 wrongmod[0] = modifiers[0];
6001 wrongmod[1] = '\0';
6002 error ("invalid regexp modifier `%s', ignoring", wrongmod);
6003 }
6004 break;
6005 }
6006
6007 patbuf = xnew (1, struct re_pattern_buffer);
6008 *patbuf = zeropattern;
6009 if (ignore_case)
6010 {
6011 static char lc_trans[CHARS];
6012 int i;
6013 for (i = 0; i < CHARS; i++)
6014 lc_trans[i] = lowcase (i);
6015 patbuf->translate = lc_trans; /* translation table to fold case */
6016 }
6017
6018 if (multi_line)
6019 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6020 else
6021 pat = regexp_pattern;
6022
6023 if (single_line)
6024 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6025 else
6026 re_set_syntax (RE_SYNTAX_EMACS);
6027
6028 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
6029 if (multi_line)
6030 free (pat);
6031 if (err != NULL)
6032 {
6033 error ("%s while compiling pattern", err);
6034 return;
6035 }
6036
6037 rp = p_head;
6038 p_head = xnew (1, regexp);
6039 p_head->pattern = savestr (regexp_pattern);
6040 p_head->p_next = rp;
6041 p_head->lang = lang;
6042 p_head->pat = patbuf;
6043 p_head->name = savestr (name);
6044 p_head->error_signaled = FALSE;
6045 p_head->force_explicit_name = force_explicit_name;
6046 p_head->ignore_case = ignore_case;
6047 p_head->multi_line = multi_line;
6048 }
6049
6050 /*
6051 * Do the substitutions indicated by the regular expression and
6052 * arguments.
6053 */
6054 static char *
6055 substitute (in, out, regs)
6056 char *in, *out;
6057 struct re_registers *regs;
6058 {
6059 char *result, *t;
6060 int size, dig, diglen;
6061
6062 result = NULL;
6063 size = strlen (out);
6064
6065 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6066 if (out[size - 1] == '\\')
6067 fatal ("pattern error in \"%s\"", out);
6068 for (t = etags_strchr (out, '\\');
6069 t != NULL;
6070 t = etags_strchr (t + 2, '\\'))
6071 if (ISDIGIT (t[1]))
6072 {
6073 dig = t[1] - '0';
6074 diglen = regs->end[dig] - regs->start[dig];
6075 size += diglen - 2;
6076 }
6077 else
6078 size -= 1;
6079
6080 /* Allocate space and do the substitutions. */
6081 assert (size >= 0);
6082 result = xnew (size + 1, char);
6083
6084 for (t = result; *out != '\0'; out++)
6085 if (*out == '\\' && ISDIGIT (*++out))
6086 {
6087 dig = *out - '0';
6088 diglen = regs->end[dig] - regs->start[dig];
6089 strncpy (t, in + regs->start[dig], diglen);
6090 t += diglen;
6091 }
6092 else
6093 *t++ = *out;
6094 *t = '\0';
6095
6096 assert (t <= result + size);
6097 assert (t - result == (int)strlen (result));
6098
6099 return result;
6100 }
6101
6102 /* Deallocate all regexps. */
6103 static void
6104 free_regexps ()
6105 {
6106 regexp *rp;
6107 while (p_head != NULL)
6108 {
6109 rp = p_head->p_next;
6110 free (p_head->pattern);
6111 free (p_head->name);
6112 free (p_head);
6113 p_head = rp;
6114 }
6115 return;
6116 }
6117
6118 /*
6119 * Reads the whole file as a single string from `filebuf' and looks for
6120 * multi-line regular expressions, creating tags on matches.
6121 * readline already dealt with normal regexps.
6122 *
6123 * Idea by Ben Wing <ben@666.com> (2002).
6124 */
6125 static void
6126 regex_tag_multiline ()
6127 {
6128 char *buffer = filebuf.buffer;
6129 regexp *rp;
6130 char *name;
6131
6132 for (rp = p_head; rp != NULL; rp = rp->p_next)
6133 {
6134 int match = 0;
6135
6136 if (!rp->multi_line)
6137 continue; /* skip normal regexps */
6138
6139 /* Generic initialisations before parsing file from memory. */
6140 lineno = 1; /* reset global line number */
6141 charno = 0; /* reset global char number */
6142 linecharno = 0; /* reset global char number of line start */
6143
6144 /* Only use generic regexps or those for the current language. */
6145 if (rp->lang != NULL && rp->lang != curfdp->lang)
6146 continue;
6147
6148 while (match >= 0 && match < filebuf.len)
6149 {
6150 match = re_search (rp->pat, buffer, filebuf.len, charno,
6151 filebuf.len - match, &rp->regs);
6152 switch (match)
6153 {
6154 case -2:
6155 /* Some error. */
6156 if (!rp->error_signaled)
6157 {
6158 error ("regexp stack overflow while matching \"%s\"",
6159 rp->pattern);
6160 rp->error_signaled = TRUE;
6161 }
6162 break;
6163 case -1:
6164 /* No match. */
6165 break;
6166 default:
6167 if (match == rp->regs.end[0])
6168 {
6169 if (!rp->error_signaled)
6170 {
6171 error ("regexp matches the empty string: \"%s\"",
6172 rp->pattern);
6173 rp->error_signaled = TRUE;
6174 }
6175 match = -3; /* exit from while loop */
6176 break;
6177 }
6178
6179 /* Match occurred. Construct a tag. */
6180 while (charno < rp->regs.end[0])
6181 if (buffer[charno++] == '\n')
6182 lineno++, linecharno = charno;
6183 name = rp->name;
6184 if (name[0] == '\0')
6185 name = NULL;
6186 else /* make a named tag */
6187 name = substitute (buffer, rp->name, &rp->regs);
6188 if (rp->force_explicit_name)
6189 /* Force explicit tag name, if a name is there. */
6190 pfnote (name, TRUE, buffer + linecharno,
6191 charno - linecharno + 1, lineno, linecharno);
6192 else
6193 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6194 charno - linecharno + 1, lineno, linecharno);
6195 break;
6196 }
6197 }
6198 }
6199 }
6200
6201 \f
6202 static bool
6203 nocase_tail (cp)
6204 char *cp;
6205 {
6206 register int len = 0;
6207
6208 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6209 cp++, len++;
6210 if (*cp == '\0' && !intoken (dbp[len]))
6211 {
6212 dbp += len;
6213 return TRUE;
6214 }
6215 return FALSE;
6216 }
6217
6218 static void
6219 get_tag (bp, namepp)
6220 register char *bp;
6221 char **namepp;
6222 {
6223 register char *cp = bp;
6224
6225 if (*bp != '\0')
6226 {
6227 /* Go till you get to white space or a syntactic break */
6228 for (cp = bp + 1; !notinname (*cp); cp++)
6229 continue;
6230 make_tag (bp, cp - bp, TRUE,
6231 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6232 }
6233
6234 if (namepp != NULL)
6235 *namepp = savenstr (bp, cp - bp);
6236 }
6237
6238 /*
6239 * Read a line of text from `stream' into `lbp', excluding the
6240 * newline or CR-NL, if any. Return the number of characters read from
6241 * `stream', which is the length of the line including the newline.
6242 *
6243 * On DOS or Windows we do not count the CR character, if any before the
6244 * NL, in the returned length; this mirrors the behavior of Emacs on those
6245 * platforms (for text files, it translates CR-NL to NL as it reads in the
6246 * file).
6247 *
6248 * If multi-line regular expressions are requested, each line read is
6249 * appended to `filebuf'.
6250 */
6251 static long
6252 readline_internal (lbp, stream)
6253 linebuffer *lbp;
6254 register FILE *stream;
6255 {
6256 char *buffer = lbp->buffer;
6257 register char *p = lbp->buffer;
6258 register char *pend;
6259 int chars_deleted;
6260
6261 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6262
6263 for (;;)
6264 {
6265 register int c = getc (stream);
6266 if (p == pend)
6267 {
6268 /* We're at the end of linebuffer: expand it. */
6269 lbp->size *= 2;
6270 xrnew (buffer, lbp->size, char);
6271 p += buffer - lbp->buffer;
6272 pend = buffer + lbp->size;
6273 lbp->buffer = buffer;
6274 }
6275 if (c == EOF)
6276 {
6277 *p = '\0';
6278 chars_deleted = 0;
6279 break;
6280 }
6281 if (c == '\n')
6282 {
6283 if (p > buffer && p[-1] == '\r')
6284 {
6285 p -= 1;
6286 #ifdef DOS_NT
6287 /* Assume CRLF->LF translation will be performed by Emacs
6288 when loading this file, so CRs won't appear in the buffer.
6289 It would be cleaner to compensate within Emacs;
6290 however, Emacs does not know how many CRs were deleted
6291 before any given point in the file. */
6292 chars_deleted = 1;
6293 #else
6294 chars_deleted = 2;
6295 #endif
6296 }
6297 else
6298 {
6299 chars_deleted = 1;
6300 }
6301 *p = '\0';
6302 break;
6303 }
6304 *p++ = c;
6305 }
6306 lbp->len = p - buffer;
6307
6308 if (need_filebuf /* we need filebuf for multi-line regexps */
6309 && chars_deleted > 0) /* not at EOF */
6310 {
6311 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6312 {
6313 /* Expand filebuf. */
6314 filebuf.size *= 2;
6315 xrnew (filebuf.buffer, filebuf.size, char);
6316 }
6317 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6318 filebuf.len += lbp->len;
6319 filebuf.buffer[filebuf.len++] = '\n';
6320 filebuf.buffer[filebuf.len] = '\0';
6321 }
6322
6323 return lbp->len + chars_deleted;
6324 }
6325
6326 /*
6327 * Like readline_internal, above, but in addition try to match the
6328 * input line against relevant regular expressions and manage #line
6329 * directives.
6330 */
6331 static void
6332 readline (lbp, stream)
6333 linebuffer *lbp;
6334 FILE *stream;
6335 {
6336 long result;
6337
6338 linecharno = charno; /* update global char number of line start */
6339 result = readline_internal (lbp, stream); /* read line */
6340 lineno += 1; /* increment global line number */
6341 charno += result; /* increment global char number */
6342
6343 /* Honour #line directives. */
6344 if (!no_line_directive)
6345 {
6346 static bool discard_until_line_directive;
6347
6348 /* Check whether this is a #line directive. */
6349 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6350 {
6351 unsigned int lno;
6352 int start = 0;
6353
6354 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6355 && start > 0) /* double quote character found */
6356 {
6357 char *endp = lbp->buffer + start;
6358
6359 while ((endp = etags_strchr (endp, '"')) != NULL
6360 && endp[-1] == '\\')
6361 endp++;
6362 if (endp != NULL)
6363 /* Ok, this is a real #line directive. Let's deal with it. */
6364 {
6365 char *taggedabsname; /* absolute name of original file */
6366 char *taggedfname; /* name of original file as given */
6367 char *name; /* temp var */
6368
6369 discard_until_line_directive = FALSE; /* found it */
6370 name = lbp->buffer + start;
6371 *endp = '\0';
6372 canonicalize_filename (name); /* for DOS */
6373 taggedabsname = absolute_filename (name, tagfiledir);
6374 if (filename_is_absolute (name)
6375 || filename_is_absolute (curfdp->infname))
6376 taggedfname = savestr (taggedabsname);
6377 else
6378 taggedfname = relative_filename (taggedabsname,tagfiledir);
6379
6380 if (streq (curfdp->taggedfname, taggedfname))
6381 /* The #line directive is only a line number change. We
6382 deal with this afterwards. */
6383 free (taggedfname);
6384 else
6385 /* The tags following this #line directive should be
6386 attributed to taggedfname. In order to do this, set
6387 curfdp accordingly. */
6388 {
6389 fdesc *fdp; /* file description pointer */
6390
6391 /* Go look for a file description already set up for the
6392 file indicated in the #line directive. If there is
6393 one, use it from now until the next #line
6394 directive. */
6395 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6396 if (streq (fdp->infname, curfdp->infname)
6397 && streq (fdp->taggedfname, taggedfname))
6398 /* If we remove the second test above (after the &&)
6399 then all entries pertaining to the same file are
6400 coalesced in the tags file. If we use it, then
6401 entries pertaining to the same file but generated
6402 from different files (via #line directives) will
6403 go into separate sections in the tags file. These
6404 alternatives look equivalent. The first one
6405 destroys some apparently useless information. */
6406 {
6407 curfdp = fdp;
6408 free (taggedfname);
6409 break;
6410 }
6411 /* Else, if we already tagged the real file, skip all
6412 input lines until the next #line directive. */
6413 if (fdp == NULL) /* not found */
6414 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6415 if (streq (fdp->infabsname, taggedabsname))
6416 {
6417 discard_until_line_directive = TRUE;
6418 free (taggedfname);
6419 break;
6420 }
6421 /* Else create a new file description and use that from
6422 now on, until the next #line directive. */
6423 if (fdp == NULL) /* not found */
6424 {
6425 fdp = fdhead;
6426 fdhead = xnew (1, fdesc);
6427 *fdhead = *curfdp; /* copy curr. file description */
6428 fdhead->next = fdp;
6429 fdhead->infname = savestr (curfdp->infname);
6430 fdhead->infabsname = savestr (curfdp->infabsname);
6431 fdhead->infabsdir = savestr (curfdp->infabsdir);
6432 fdhead->taggedfname = taggedfname;
6433 fdhead->usecharno = FALSE;
6434 fdhead->prop = NULL;
6435 fdhead->written = FALSE;
6436 curfdp = fdhead;
6437 }
6438 }
6439 free (taggedabsname);
6440 lineno = lno - 1;
6441 readline (lbp, stream);
6442 return;
6443 } /* if a real #line directive */
6444 } /* if #line is followed by a a number */
6445 } /* if line begins with "#line " */
6446
6447 /* If we are here, no #line directive was found. */
6448 if (discard_until_line_directive)
6449 {
6450 if (result > 0)
6451 {
6452 /* Do a tail recursion on ourselves, thus discarding the contents
6453 of the line buffer. */
6454 readline (lbp, stream);
6455 return;
6456 }
6457 /* End of file. */
6458 discard_until_line_directive = FALSE;
6459 return;
6460 }
6461 } /* if #line directives should be considered */
6462
6463 {
6464 int match;
6465 regexp *rp;
6466 char *name;
6467
6468 /* Match against relevant regexps. */
6469 if (lbp->len > 0)
6470 for (rp = p_head; rp != NULL; rp = rp->p_next)
6471 {
6472 /* Only use generic regexps or those for the current language.
6473 Also do not use multiline regexps, which is the job of
6474 regex_tag_multiline. */
6475 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6476 || rp->multi_line)
6477 continue;
6478
6479 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6480 switch (match)
6481 {
6482 case -2:
6483 /* Some error. */
6484 if (!rp->error_signaled)
6485 {
6486 error ("regexp stack overflow while matching \"%s\"",
6487 rp->pattern);
6488 rp->error_signaled = TRUE;
6489 }
6490 break;
6491 case -1:
6492 /* No match. */
6493 break;
6494 case 0:
6495 /* Empty string matched. */
6496 if (!rp->error_signaled)
6497 {
6498 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6499 rp->error_signaled = TRUE;
6500 }
6501 break;
6502 default:
6503 /* Match occurred. Construct a tag. */
6504 name = rp->name;
6505 if (name[0] == '\0')
6506 name = NULL;
6507 else /* make a named tag */
6508 name = substitute (lbp->buffer, rp->name, &rp->regs);
6509 if (rp->force_explicit_name)
6510 /* Force explicit tag name, if a name is there. */
6511 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6512 else
6513 make_tag (name, strlen (name), TRUE,
6514 lbp->buffer, match, lineno, linecharno);
6515 break;
6516 }
6517 }
6518 }
6519 }
6520
6521 \f
6522 /*
6523 * Return a pointer to a space of size strlen(cp)+1 allocated
6524 * with xnew where the string CP has been copied.
6525 */
6526 static char *
6527 savestr (cp)
6528 char *cp;
6529 {
6530 return savenstr (cp, strlen (cp));
6531 }
6532
6533 /*
6534 * Return a pointer to a space of size LEN+1 allocated with xnew where
6535 * the string CP has been copied for at most the first LEN characters.
6536 */
6537 static char *
6538 savenstr (cp, len)
6539 char *cp;
6540 int len;
6541 {
6542 register char *dp;
6543
6544 dp = xnew (len + 1, char);
6545 strncpy (dp, cp, len);
6546 dp[len] = '\0';
6547 return dp;
6548 }
6549
6550 /*
6551 * Return the ptr in sp at which the character c last
6552 * appears; NULL if not found
6553 *
6554 * Identical to POSIX strrchr, included for portability.
6555 */
6556 static char *
6557 etags_strrchr (sp, c)
6558 register const char *sp;
6559 register int c;
6560 {
6561 register const char *r;
6562
6563 r = NULL;
6564 do
6565 {
6566 if (*sp == c)
6567 r = sp;
6568 } while (*sp++);
6569 return (char *)r;
6570 }
6571
6572 /*
6573 * Return the ptr in sp at which the character c first
6574 * appears; NULL if not found
6575 *
6576 * Identical to POSIX strchr, included for portability.
6577 */
6578 static char *
6579 etags_strchr (sp, c)
6580 register const char *sp;
6581 register int c;
6582 {
6583 do
6584 {
6585 if (*sp == c)
6586 return (char *)sp;
6587 } while (*sp++);
6588 return NULL;
6589 }
6590
6591 /*
6592 * Compare two strings, ignoring case for alphabetic characters.
6593 *
6594 * Same as BSD's strcasecmp, included for portability.
6595 */
6596 static int
6597 etags_strcasecmp (s1, s2)
6598 register const char *s1;
6599 register const char *s2;
6600 {
6601 while (*s1 != '\0'
6602 && (ISALPHA (*s1) && ISALPHA (*s2)
6603 ? lowcase (*s1) == lowcase (*s2)
6604 : *s1 == *s2))
6605 s1++, s2++;
6606
6607 return (ISALPHA (*s1) && ISALPHA (*s2)
6608 ? lowcase (*s1) - lowcase (*s2)
6609 : *s1 - *s2);
6610 }
6611
6612 /*
6613 * Compare two strings, ignoring case for alphabetic characters.
6614 * Stop after a given number of characters
6615 *
6616 * Same as BSD's strncasecmp, included for portability.
6617 */
6618 static int
6619 etags_strncasecmp (s1, s2, n)
6620 register const char *s1;
6621 register const char *s2;
6622 register int n;
6623 {
6624 while (*s1 != '\0' && n-- > 0
6625 && (ISALPHA (*s1) && ISALPHA (*s2)
6626 ? lowcase (*s1) == lowcase (*s2)
6627 : *s1 == *s2))
6628 s1++, s2++;
6629
6630 if (n < 0)
6631 return 0;
6632 else
6633 return (ISALPHA (*s1) && ISALPHA (*s2)
6634 ? lowcase (*s1) - lowcase (*s2)
6635 : *s1 - *s2);
6636 }
6637
6638 /* Skip spaces (end of string is not space), return new pointer. */
6639 static char *
6640 skip_spaces (cp)
6641 char *cp;
6642 {
6643 while (iswhite (*cp))
6644 cp++;
6645 return cp;
6646 }
6647
6648 /* Skip non spaces, except end of string, return new pointer. */
6649 static char *
6650 skip_non_spaces (cp)
6651 char *cp;
6652 {
6653 while (*cp != '\0' && !iswhite (*cp))
6654 cp++;
6655 return cp;
6656 }
6657
6658 /* Print error message and exit. */
6659 void
6660 fatal (s1, s2)
6661 char *s1, *s2;
6662 {
6663 error (s1, s2);
6664 exit (EXIT_FAILURE);
6665 }
6666
6667 static void
6668 pfatal (s1)
6669 char *s1;
6670 {
6671 perror (s1);
6672 exit (EXIT_FAILURE);
6673 }
6674
6675 static void
6676 suggest_asking_for_help ()
6677 {
6678 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6679 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6680 exit (EXIT_FAILURE);
6681 }
6682
6683 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6684 static void
6685 error (s1, s2)
6686 const char *s1, *s2;
6687 {
6688 fprintf (stderr, "%s: ", progname);
6689 fprintf (stderr, s1, s2);
6690 fprintf (stderr, "\n");
6691 }
6692
6693 /* Return a newly-allocated string whose contents
6694 concatenate those of s1, s2, s3. */
6695 static char *
6696 concat (s1, s2, s3)
6697 char *s1, *s2, *s3;
6698 {
6699 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6700 char *result = xnew (len1 + len2 + len3 + 1, char);
6701
6702 strcpy (result, s1);
6703 strcpy (result + len1, s2);
6704 strcpy (result + len1 + len2, s3);
6705 result[len1 + len2 + len3] = '\0';
6706
6707 return result;
6708 }
6709
6710 \f
6711 /* Does the same work as the system V getcwd, but does not need to
6712 guess the buffer size in advance. */
6713 static char *
6714 etags_getcwd ()
6715 {
6716 #ifdef HAVE_GETCWD
6717 int bufsize = 200;
6718 char *path = xnew (bufsize, char);
6719
6720 while (getcwd (path, bufsize) == NULL)
6721 {
6722 if (errno != ERANGE)
6723 pfatal ("getcwd");
6724 bufsize *= 2;
6725 free (path);
6726 path = xnew (bufsize, char);
6727 }
6728
6729 canonicalize_filename (path);
6730 return path;
6731
6732 #else /* not HAVE_GETCWD */
6733 #if MSDOS
6734
6735 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6736
6737 getwd (path);
6738
6739 for (p = path; *p != '\0'; p++)
6740 if (*p == '\\')
6741 *p = '/';
6742 else
6743 *p = lowcase (*p);
6744
6745 return strdup (path);
6746 #else /* not MSDOS */
6747 linebuffer path;
6748 FILE *pipe;
6749
6750 linebuffer_init (&path);
6751 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6752 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6753 pfatal ("pwd");
6754 pclose (pipe);
6755
6756 return path.buffer;
6757 #endif /* not MSDOS */
6758 #endif /* not HAVE_GETCWD */
6759 }
6760
6761 /* Return a newly allocated string containing the file name of FILE
6762 relative to the absolute directory DIR (which should end with a slash). */
6763 static char *
6764 relative_filename (file, dir)
6765 char *file, *dir;
6766 {
6767 char *fp, *dp, *afn, *res;
6768 int i;
6769
6770 /* Find the common root of file and dir (with a trailing slash). */
6771 afn = absolute_filename (file, cwd);
6772 fp = afn;
6773 dp = dir;
6774 while (*fp++ == *dp++)
6775 continue;
6776 fp--, dp--; /* back to the first differing char */
6777 #ifdef DOS_NT
6778 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6779 return afn;
6780 #endif
6781 do /* look at the equal chars until '/' */
6782 fp--, dp--;
6783 while (*fp != '/');
6784
6785 /* Build a sequence of "../" strings for the resulting relative file name. */
6786 i = 0;
6787 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6788 i += 1;
6789 res = xnew (3*i + strlen (fp + 1) + 1, char);
6790 res[0] = '\0';
6791 while (i-- > 0)
6792 strcat (res, "../");
6793
6794 /* Add the file name relative to the common root of file and dir. */
6795 strcat (res, fp + 1);
6796 free (afn);
6797
6798 return res;
6799 }
6800
6801 /* Return a newly allocated string containing the absolute file name
6802 of FILE given DIR (which should end with a slash). */
6803 static char *
6804 absolute_filename (file, dir)
6805 char *file, *dir;
6806 {
6807 char *slashp, *cp, *res;
6808
6809 if (filename_is_absolute (file))
6810 res = savestr (file);
6811 #ifdef DOS_NT
6812 /* We don't support non-absolute file names with a drive
6813 letter, like `d:NAME' (it's too much hassle). */
6814 else if (file[1] == ':')
6815 fatal ("%s: relative file names with drive letters not supported", file);
6816 #endif
6817 else
6818 res = concat (dir, file, "");
6819
6820 /* Delete the "/dirname/.." and "/." substrings. */
6821 slashp = etags_strchr (res, '/');
6822 while (slashp != NULL && slashp[0] != '\0')
6823 {
6824 if (slashp[1] == '.')
6825 {
6826 if (slashp[2] == '.'
6827 && (slashp[3] == '/' || slashp[3] == '\0'))
6828 {
6829 cp = slashp;
6830 do
6831 cp--;
6832 while (cp >= res && !filename_is_absolute (cp));
6833 if (cp < res)
6834 cp = slashp; /* the absolute name begins with "/.." */
6835 #ifdef DOS_NT
6836 /* Under MSDOS and NT we get `d:/NAME' as absolute
6837 file name, so the luser could say `d:/../NAME'.
6838 We silently treat this as `d:/NAME'. */
6839 else if (cp[0] != '/')
6840 cp = slashp;
6841 #endif
6842 strcpy (cp, slashp + 3);
6843 slashp = cp;
6844 continue;
6845 }
6846 else if (slashp[2] == '/' || slashp[2] == '\0')
6847 {
6848 strcpy (slashp, slashp + 2);
6849 continue;
6850 }
6851 }
6852
6853 slashp = etags_strchr (slashp + 1, '/');
6854 }
6855
6856 if (res[0] == '\0') /* just a safety net: should never happen */
6857 {
6858 free (res);
6859 return savestr ("/");
6860 }
6861 else
6862 return res;
6863 }
6864
6865 /* Return a newly allocated string containing the absolute
6866 file name of dir where FILE resides given DIR (which should
6867 end with a slash). */
6868 static char *
6869 absolute_dirname (file, dir)
6870 char *file, *dir;
6871 {
6872 char *slashp, *res;
6873 char save;
6874
6875 canonicalize_filename (file);
6876 slashp = etags_strrchr (file, '/');
6877 if (slashp == NULL)
6878 return savestr (dir);
6879 save = slashp[1];
6880 slashp[1] = '\0';
6881 res = absolute_filename (file, dir);
6882 slashp[1] = save;
6883
6884 return res;
6885 }
6886
6887 /* Whether the argument string is an absolute file name. The argument
6888 string must have been canonicalized with canonicalize_filename. */
6889 static bool
6890 filename_is_absolute (fn)
6891 char *fn;
6892 {
6893 return (fn[0] == '/'
6894 #ifdef DOS_NT
6895 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6896 #endif
6897 );
6898 }
6899
6900 /* Translate backslashes into slashes. Works in place. */
6901 static void
6902 canonicalize_filename (fn)
6903 register char *fn;
6904 {
6905 #ifdef DOS_NT
6906 /* Canonicalize drive letter case. */
6907 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6908 fn[0] = upcase (fn[0]);
6909 /* Convert backslashes to slashes. */
6910 for (; *fn != '\0'; fn++)
6911 if (*fn == '\\')
6912 *fn = '/';
6913 #else
6914 /* No action. */
6915 fn = NULL; /* shut up the compiler */
6916 #endif
6917 }
6918
6919 \f
6920 /* Initialize a linebuffer for use */
6921 static void
6922 linebuffer_init (lbp)
6923 linebuffer *lbp;
6924 {
6925 lbp->size = (DEBUG) ? 3 : 200;
6926 lbp->buffer = xnew (lbp->size, char);
6927 lbp->buffer[0] = '\0';
6928 lbp->len = 0;
6929 }
6930
6931 /* Set the minimum size of a string contained in a linebuffer. */
6932 static void
6933 linebuffer_setlen (lbp, toksize)
6934 linebuffer *lbp;
6935 int toksize;
6936 {
6937 while (lbp->size <= toksize)
6938 {
6939 lbp->size *= 2;
6940 xrnew (lbp->buffer, lbp->size, char);
6941 }
6942 lbp->len = toksize;
6943 }
6944
6945 /* Like malloc but get fatal error if memory is exhausted. */
6946 static PTR
6947 xmalloc (size)
6948 unsigned int size;
6949 {
6950 PTR result = (PTR) malloc (size);
6951 if (result == NULL)
6952 fatal ("virtual memory exhausted", (char *)NULL);
6953 return result;
6954 }
6955
6956 static PTR
6957 xrealloc (ptr, size)
6958 char *ptr;
6959 unsigned int size;
6960 {
6961 PTR result = (PTR) realloc (ptr, size);
6962 if (result == NULL)
6963 fatal ("virtual memory exhausted", (char *)NULL);
6964 return result;
6965 }
6966
6967 /*
6968 * Local Variables:
6969 * indent-tabs-mode: t
6970 * tab-width: 8
6971 * fill-column: 79
6972 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6973 * c-file-style: "gnu"
6974 * End:
6975 */
6976
6977 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6978 (do not change this comment) */
6979
6980 /* etags.c ends here */