Merge from emacs--rel--22
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
33 Free Software Foundation, Inc.
34
35 This file is not considered part of GNU Emacs.
36
37 This program is free software; you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation; either version 2 of the License, or
40 (at your option) any later version.
41
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
46
47 You should have received a copy of the GNU General Public License
48 along with this program; if not, write to the Free Software Foundation,
49 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
50
51
52 /* NB To comply with the above BSD license, copyright information is
53 reproduced in etc/ETAGS.README. That file should be updated when the
54 above notices are.
55
56 To the best of our knowledge, this code was originally based on the
57 ctags.c distributed with BSD4.2, which was copyrighted by the
58 University of California, as described above. */
59
60
61 /*
62 * Authors:
63 * 1983 Ctags originally by Ken Arnold.
64 * 1984 Fortran added by Jim Kleckner.
65 * 1984 Ed Pelegri-Llopart added C typedefs.
66 * 1985 Emacs TAGS format by Richard Stallman.
67 * 1989 Sam Kendall added C++.
68 * 1992 Joseph B. Wells improved C and C++ parsing.
69 * 1993 Francesco Potortì reorganised C and C++.
70 * 1994 Line-by-line regexp tags by Tom Tromey.
71 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
72 * 2002 #line directives by Francesco Potortì.
73 *
74 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
75 */
76
77 /*
78 * If you want to add support for a new language, start by looking at the LUA
79 * language, which is the simplest. Alternatively, consider shipping a
80 * configuration file containing regexp definitions for etags.
81 */
82
83 char pot_etags_version[] = "@(#) pot revision number is 17.34";
84
85 #define TRUE 1
86 #define FALSE 0
87
88 #ifdef DEBUG
89 # undef DEBUG
90 # define DEBUG TRUE
91 #else
92 # define DEBUG FALSE
93 # define NDEBUG /* disable assert */
94 #endif
95
96 #ifdef HAVE_CONFIG_H
97 # include <config.h>
98 /* On some systems, Emacs defines static as nothing for the sake
99 of unexec. We don't want that here since we don't use unexec. */
100 # undef static
101 # ifndef PTR /* for XEmacs */
102 # define PTR void *
103 # endif
104 # ifndef __P /* for XEmacs */
105 # define __P(args) args
106 # endif
107 #else /* no config.h */
108 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
109 # define __P(args) args /* use prototypes */
110 # define PTR void * /* for generic pointers */
111 # else /* not standard C */
112 # define __P(args) () /* no prototypes */
113 # define const /* remove const for old compilers' sake */
114 # define PTR long * /* don't use void* */
115 # endif
116 #endif /* !HAVE_CONFIG_H */
117
118 #ifndef _GNU_SOURCE
119 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
120 #endif
121
122 /* WIN32_NATIVE is for XEmacs.
123 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
124 #ifdef WIN32_NATIVE
125 # undef MSDOS
126 # undef WINDOWSNT
127 # define WINDOWSNT
128 #endif /* WIN32_NATIVE */
129
130 #ifdef MSDOS
131 # undef MSDOS
132 # define MSDOS TRUE
133 # include <fcntl.h>
134 # include <sys/param.h>
135 # include <io.h>
136 # ifndef HAVE_CONFIG_H
137 # define DOS_NT
138 # include <sys/config.h>
139 # endif
140 #else
141 # define MSDOS FALSE
142 #endif /* MSDOS */
143
144 #ifdef WINDOWSNT
145 # include <stdlib.h>
146 # include <fcntl.h>
147 # include <string.h>
148 # include <direct.h>
149 # include <io.h>
150 # define MAXPATHLEN _MAX_PATH
151 # undef HAVE_NTGUI
152 # undef DOS_NT
153 # define DOS_NT
154 # ifndef HAVE_GETCWD
155 # define HAVE_GETCWD
156 # endif /* undef HAVE_GETCWD */
157 #else /* not WINDOWSNT */
158 # ifdef STDC_HEADERS
159 # include <stdlib.h>
160 # include <string.h>
161 # else /* no standard C headers */
162 extern char *getenv ();
163 extern char *strcpy ();
164 extern char *strncpy ();
165 extern char *strcat ();
166 extern char *strncat ();
167 extern unsigned long strlen ();
168 extern PTR malloc ();
169 extern PTR realloc ();
170 # ifdef VMS
171 # define EXIT_SUCCESS 1
172 # define EXIT_FAILURE 0
173 # else /* no VMS */
174 # define EXIT_SUCCESS 0
175 # define EXIT_FAILURE 1
176 # endif
177 # endif
178 #endif /* !WINDOWSNT */
179
180 #ifdef HAVE_UNISTD_H
181 # include <unistd.h>
182 #else
183 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
184 extern char *getcwd (char *buf, size_t size);
185 # endif
186 #endif /* HAVE_UNISTD_H */
187
188 #include <stdio.h>
189 #include <ctype.h>
190 #include <errno.h>
191 #ifndef errno
192 extern int errno;
193 #endif
194 #include <sys/types.h>
195 #include <sys/stat.h>
196
197 #include <assert.h>
198 #ifdef NDEBUG
199 # undef assert /* some systems have a buggy assert.h */
200 # define assert(x) ((void) 0)
201 #endif
202
203 #if !defined (S_ISREG) && defined (S_IFREG)
204 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
205 #endif
206
207 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
208 # define NO_LONG_OPTIONS TRUE
209 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
210 extern char *optarg;
211 extern int optind, opterr;
212 #else
213 # define NO_LONG_OPTIONS FALSE
214 # include <getopt.h>
215 #endif /* NO_LONG_OPTIONS */
216
217 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
218 # ifdef __CYGWIN__ /* compiling on Cygwin */
219 !!! NOTICE !!!
220 the regex.h distributed with Cygwin is not compatible with etags, alas!
221 If you want regular expression support, you should delete this notice and
222 arrange to use the GNU regex.h and regex.c.
223 # endif
224 #endif
225 #include <regex.h>
226
227 /* Define CTAGS to make the program "ctags" compatible with the usual one.
228 Leave it undefined to make the program "etags", which makes emacs-style
229 tag tables and tags typedefs, #defines and struct/union/enum by default. */
230 #ifdef CTAGS
231 # undef CTAGS
232 # define CTAGS TRUE
233 #else
234 # define CTAGS FALSE
235 #endif
236
237 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
238 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
239 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
240 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
241
242 #define CHARS 256 /* 2^sizeof(char) */
243 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
244 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
245 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
246 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
247 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
248 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
249
250 #define ISALNUM(c) isalnum (CHAR(c))
251 #define ISALPHA(c) isalpha (CHAR(c))
252 #define ISDIGIT(c) isdigit (CHAR(c))
253 #define ISLOWER(c) islower (CHAR(c))
254
255 #define lowcase(c) tolower (CHAR(c))
256 #define upcase(c) toupper (CHAR(c))
257
258
259 /*
260 * xnew, xrnew -- allocate, reallocate storage
261 *
262 * SYNOPSIS: Type *xnew (int n, Type);
263 * void xrnew (OldPointer, int n, Type);
264 */
265 #if DEBUG
266 # include "chkmalloc.h"
267 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
268 (n) * sizeof (Type)))
269 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
270 (char *) (op), (n) * sizeof (Type)))
271 #else
272 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
273 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
274 (char *) (op), (n) * sizeof (Type)))
275 #endif
276
277 #define bool int
278
279 typedef void Lang_function __P((FILE *));
280
281 typedef struct
282 {
283 char *suffix; /* file name suffix for this compressor */
284 char *command; /* takes one arg and decompresses to stdout */
285 } compressor;
286
287 typedef struct
288 {
289 char *name; /* language name */
290 char *help; /* detailed help for the language */
291 Lang_function *function; /* parse function */
292 char **suffixes; /* name suffixes of this language's files */
293 char **filenames; /* names of this language's files */
294 char **interpreters; /* interpreters for this language */
295 bool metasource; /* source used to generate other sources */
296 } language;
297
298 typedef struct fdesc
299 {
300 struct fdesc *next; /* for the linked list */
301 char *infname; /* uncompressed input file name */
302 char *infabsname; /* absolute uncompressed input file name */
303 char *infabsdir; /* absolute dir of input file */
304 char *taggedfname; /* file name to write in tagfile */
305 language *lang; /* language of file */
306 char *prop; /* file properties to write in tagfile */
307 bool usecharno; /* etags tags shall contain char number */
308 bool written; /* entry written in the tags file */
309 } fdesc;
310
311 typedef struct node_st
312 { /* sorting structure */
313 struct node_st *left, *right; /* left and right sons */
314 fdesc *fdp; /* description of file to whom tag belongs */
315 char *name; /* tag name */
316 char *regex; /* search regexp */
317 bool valid; /* write this tag on the tag file */
318 bool is_func; /* function tag: use regexp in CTAGS mode */
319 bool been_warned; /* warning already given for duplicated tag */
320 int lno; /* line number tag is on */
321 long cno; /* character number line starts on */
322 } node;
323
324 /*
325 * A `linebuffer' is a structure which holds a line of text.
326 * `readline_internal' reads a line from a stream into a linebuffer
327 * and works regardless of the length of the line.
328 * SIZE is the size of BUFFER, LEN is the length of the string in
329 * BUFFER after readline reads it.
330 */
331 typedef struct
332 {
333 long size;
334 int len;
335 char *buffer;
336 } linebuffer;
337
338 /* Used to support mixing of --lang and file names. */
339 typedef struct
340 {
341 enum {
342 at_language, /* a language specification */
343 at_regexp, /* a regular expression */
344 at_filename, /* a file name */
345 at_stdin, /* read from stdin here */
346 at_end /* stop parsing the list */
347 } arg_type; /* argument type */
348 language *lang; /* language associated with the argument */
349 char *what; /* the argument itself */
350 } argument;
351
352 /* Structure defining a regular expression. */
353 typedef struct regexp
354 {
355 struct regexp *p_next; /* pointer to next in list */
356 language *lang; /* if set, use only for this language */
357 char *pattern; /* the regexp pattern */
358 char *name; /* tag name */
359 struct re_pattern_buffer *pat; /* the compiled pattern */
360 struct re_registers regs; /* re registers */
361 bool error_signaled; /* already signaled for this regexp */
362 bool force_explicit_name; /* do not allow implict tag name */
363 bool ignore_case; /* ignore case when matching */
364 bool multi_line; /* do a multi-line match on the whole file */
365 } regexp;
366
367
368 /* Many compilers barf on this:
369 Lang_function Ada_funcs;
370 so let's write it this way */
371 static void Ada_funcs __P((FILE *));
372 static void Asm_labels __P((FILE *));
373 static void C_entries __P((int c_ext, FILE *));
374 static void default_C_entries __P((FILE *));
375 static void plain_C_entries __P((FILE *));
376 static void Cjava_entries __P((FILE *));
377 static void Cobol_paragraphs __P((FILE *));
378 static void Cplusplus_entries __P((FILE *));
379 static void Cstar_entries __P((FILE *));
380 static void Erlang_functions __P((FILE *));
381 static void Forth_words __P((FILE *));
382 static void Fortran_functions __P((FILE *));
383 static void HTML_labels __P((FILE *));
384 static void Lisp_functions __P((FILE *));
385 static void Lua_functions __P((FILE *));
386 static void Makefile_targets __P((FILE *));
387 static void Pascal_functions __P((FILE *));
388 static void Perl_functions __P((FILE *));
389 static void PHP_functions __P((FILE *));
390 static void PS_functions __P((FILE *));
391 static void Prolog_functions __P((FILE *));
392 static void Python_functions __P((FILE *));
393 static void Scheme_functions __P((FILE *));
394 static void TeX_commands __P((FILE *));
395 static void Texinfo_nodes __P((FILE *));
396 static void Yacc_entries __P((FILE *));
397 static void just_read_file __P((FILE *));
398
399 static void print_language_names __P((void));
400 static void print_version __P((void));
401 static void print_help __P((argument *));
402 int main __P((int, char **));
403
404 static compressor *get_compressor_from_suffix __P((char *, char **));
405 static language *get_language_from_langname __P((const char *));
406 static language *get_language_from_interpreter __P((char *));
407 static language *get_language_from_filename __P((char *, bool));
408 static void readline __P((linebuffer *, FILE *));
409 static long readline_internal __P((linebuffer *, FILE *));
410 static bool nocase_tail __P((char *));
411 static void get_tag __P((char *, char **));
412
413 static void analyse_regex __P((char *));
414 static void free_regexps __P((void));
415 static void regex_tag_multiline __P((void));
416 static void error __P((const char *, const char *));
417 static void suggest_asking_for_help __P((void));
418 void fatal __P((char *, char *));
419 static void pfatal __P((char *));
420 static void add_node __P((node *, node **));
421
422 static void init __P((void));
423 static void process_file_name __P((char *, language *));
424 static void process_file __P((FILE *, char *, language *));
425 static void find_entries __P((FILE *));
426 static void free_tree __P((node *));
427 static void free_fdesc __P((fdesc *));
428 static void pfnote __P((char *, bool, char *, int, int, long));
429 static void make_tag __P((char *, int, bool, char *, int, int, long));
430 static void invalidate_nodes __P((fdesc *, node **));
431 static void put_entries __P((node *));
432
433 static char *concat __P((char *, char *, char *));
434 static char *skip_spaces __P((char *));
435 static char *skip_non_spaces __P((char *));
436 static char *savenstr __P((char *, int));
437 static char *savestr __P((char *));
438 static char *etags_strchr __P((const char *, int));
439 static char *etags_strrchr __P((const char *, int));
440 static int etags_strcasecmp __P((const char *, const char *));
441 static int etags_strncasecmp __P((const char *, const char *, int));
442 static char *etags_getcwd __P((void));
443 static char *relative_filename __P((char *, char *));
444 static char *absolute_filename __P((char *, char *));
445 static char *absolute_dirname __P((char *, char *));
446 static bool filename_is_absolute __P((char *f));
447 static void canonicalize_filename __P((char *));
448 static void linebuffer_init __P((linebuffer *));
449 static void linebuffer_setlen __P((linebuffer *, int));
450 static PTR xmalloc __P((unsigned int));
451 static PTR xrealloc __P((char *, unsigned int));
452
453 \f
454 static char searchar = '/'; /* use /.../ searches */
455
456 static char *tagfile; /* output file */
457 static char *progname; /* name this program was invoked with */
458 static char *cwd; /* current working directory */
459 static char *tagfiledir; /* directory of tagfile */
460 static FILE *tagf; /* ioptr for tags file */
461
462 static fdesc *fdhead; /* head of file description list */
463 static fdesc *curfdp; /* current file description */
464 static int lineno; /* line number of current line */
465 static long charno; /* current character number */
466 static long linecharno; /* charno of start of current line */
467 static char *dbp; /* pointer to start of current tag */
468
469 static const int invalidcharno = -1;
470
471 static node *nodehead; /* the head of the binary tree of tags */
472 static node *last_node; /* the last node created */
473
474 static linebuffer lb; /* the current line */
475 static linebuffer filebuf; /* a buffer containing the whole file */
476 static linebuffer token_name; /* a buffer containing a tag name */
477
478 /* boolean "functions" (see init) */
479 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
480 static char
481 /* white chars */
482 *white = " \f\t\n\r\v",
483 /* not in a name */
484 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
485 /* token ending chars */
486 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
487 /* token starting chars */
488 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
489 /* valid in-token chars */
490 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
491
492 static bool append_to_tagfile; /* -a: append to tags */
493 /* The next five default to TRUE for etags, but to FALSE for ctags. */
494 static bool typedefs; /* -t: create tags for C and Ada typedefs */
495 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
496 /* 0 struct/enum/union decls, and C++ */
497 /* member functions. */
498 static bool constantypedefs; /* -d: create tags for C #define, enum */
499 /* constants and variables. */
500 /* -D: opposite of -d. Default under ctags. */
501 static bool globals; /* create tags for global variables */
502 static bool members; /* create tags for C member variables */
503 static bool declarations; /* --declarations: tag them and extern in C&Co*/
504 static bool no_line_directive; /* ignore #line directives (undocumented) */
505 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
506 static bool update; /* -u: update tags */
507 static bool vgrind_style; /* -v: create vgrind style index output */
508 static bool no_warnings; /* -w: suppress warnings (undocumented) */
509 static bool cxref_style; /* -x: create cxref style output */
510 static bool cplusplus; /* .[hc] means C++, not C */
511 static bool ignoreindent; /* -I: ignore indentation in C */
512 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
513
514 /* STDIN is defined in LynxOS system headers */
515 #ifdef STDIN
516 # undef STDIN
517 #endif
518
519 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
520 static bool parsing_stdin; /* --parse-stdin used */
521
522 static regexp *p_head; /* list of all regexps */
523 static bool need_filebuf; /* some regexes are multi-line */
524
525 static struct option longopts[] =
526 {
527 { "append", no_argument, NULL, 'a' },
528 { "packages-only", no_argument, &packages_only, TRUE },
529 { "c++", no_argument, NULL, 'C' },
530 { "declarations", no_argument, &declarations, TRUE },
531 { "no-line-directive", no_argument, &no_line_directive, TRUE },
532 { "no-duplicates", no_argument, &no_duplicates, TRUE },
533 { "help", no_argument, NULL, 'h' },
534 { "help", no_argument, NULL, 'H' },
535 { "ignore-indentation", no_argument, NULL, 'I' },
536 { "language", required_argument, NULL, 'l' },
537 { "members", no_argument, &members, TRUE },
538 { "no-members", no_argument, &members, FALSE },
539 { "output", required_argument, NULL, 'o' },
540 { "regex", required_argument, NULL, 'r' },
541 { "no-regex", no_argument, NULL, 'R' },
542 { "ignore-case-regex", required_argument, NULL, 'c' },
543 { "parse-stdin", required_argument, NULL, STDIN },
544 { "version", no_argument, NULL, 'V' },
545
546 #if CTAGS /* Ctags options */
547 { "backward-search", no_argument, NULL, 'B' },
548 { "cxref", no_argument, NULL, 'x' },
549 { "defines", no_argument, NULL, 'd' },
550 { "globals", no_argument, &globals, TRUE },
551 { "typedefs", no_argument, NULL, 't' },
552 { "typedefs-and-c++", no_argument, NULL, 'T' },
553 { "update", no_argument, NULL, 'u' },
554 { "vgrind", no_argument, NULL, 'v' },
555 { "no-warn", no_argument, NULL, 'w' },
556
557 #else /* Etags options */
558 { "no-defines", no_argument, NULL, 'D' },
559 { "no-globals", no_argument, &globals, FALSE },
560 { "include", required_argument, NULL, 'i' },
561 #endif
562 { NULL }
563 };
564
565 static compressor compressors[] =
566 {
567 { "z", "gzip -d -c"},
568 { "Z", "gzip -d -c"},
569 { "gz", "gzip -d -c"},
570 { "GZ", "gzip -d -c"},
571 { "bz2", "bzip2 -d -c" },
572 { NULL }
573 };
574
575 /*
576 * Language stuff.
577 */
578
579 /* Ada code */
580 static char *Ada_suffixes [] =
581 { "ads", "adb", "ada", NULL };
582 static char Ada_help [] =
583 "In Ada code, functions, procedures, packages, tasks and types are\n\
584 tags. Use the `--packages-only' option to create tags for\n\
585 packages only.\n\
586 Ada tag names have suffixes indicating the type of entity:\n\
587 Entity type: Qualifier:\n\
588 ------------ ----------\n\
589 function /f\n\
590 procedure /p\n\
591 package spec /s\n\
592 package body /b\n\
593 type /t\n\
594 task /k\n\
595 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
596 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
597 will just search for any tag `bidule'.";
598
599 /* Assembly code */
600 static char *Asm_suffixes [] =
601 { "a", /* Unix assembler */
602 "asm", /* Microcontroller assembly */
603 "def", /* BSO/Tasking definition includes */
604 "inc", /* Microcontroller include files */
605 "ins", /* Microcontroller include files */
606 "s", "sa", /* Unix assembler */
607 "S", /* cpp-processed Unix assembler */
608 "src", /* BSO/Tasking C compiler output */
609 NULL
610 };
611 static char Asm_help [] =
612 "In assembler code, labels appearing at the beginning of a line,\n\
613 followed by a colon, are tags.";
614
615
616 /* Note that .c and .h can be considered C++, if the --c++ flag was
617 given, or if the `class' or `template' keyowrds are met inside the file.
618 That is why default_C_entries is called for these. */
619 static char *default_C_suffixes [] =
620 { "c", "h", NULL };
621 static char default_C_help [] =
622 "In C code, any C function or typedef is a tag, and so are\n\
623 definitions of `struct', `union' and `enum'. `#define' macro\n\
624 definitions and `enum' constants are tags unless you specify\n\
625 `--no-defines'. Global variables are tags unless you specify\n\
626 `--no-globals' and so are struct members unless you specify\n\
627 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
628 `--no-members' can make the tags table file much smaller.\n\
629 You can tag function declarations and external variables by\n\
630 using `--declarations'.";
631
632 static char *Cplusplus_suffixes [] =
633 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
634 "M", /* Objective C++ */
635 "pdb", /* Postscript with C syntax */
636 NULL };
637 static char Cplusplus_help [] =
638 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
639 --help --lang=c --lang=c++ for full help.)\n\
640 In addition to C tags, member functions are also recognized. Member\n\
641 variables are recognized unless you use the `--no-members' option.\n\
642 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
643 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
644 `operator+'.";
645
646 static char *Cjava_suffixes [] =
647 { "java", NULL };
648 static char Cjava_help [] =
649 "In Java code, all the tags constructs of C and C++ code are\n\
650 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
651
652
653 static char *Cobol_suffixes [] =
654 { "COB", "cob", NULL };
655 static char Cobol_help [] =
656 "In Cobol code, tags are paragraph names; that is, any word\n\
657 starting in column 8 and followed by a period.";
658
659 static char *Cstar_suffixes [] =
660 { "cs", "hs", NULL };
661
662 static char *Erlang_suffixes [] =
663 { "erl", "hrl", NULL };
664 static char Erlang_help [] =
665 "In Erlang code, the tags are the functions, records and macros\n\
666 defined in the file.";
667
668 char *Forth_suffixes [] =
669 { "fth", "tok", NULL };
670 static char Forth_help [] =
671 "In Forth code, tags are words defined by `:',\n\
672 constant, code, create, defer, value, variable, buffer:, field.";
673
674 static char *Fortran_suffixes [] =
675 { "F", "f", "f90", "for", NULL };
676 static char Fortran_help [] =
677 "In Fortran code, functions, subroutines and block data are tags.";
678
679 static char *HTML_suffixes [] =
680 { "htm", "html", "shtml", NULL };
681 static char HTML_help [] =
682 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
683 `h3' headers. Also, tags are `name=' in anchors and all\n\
684 occurrences of `id='.";
685
686 static char *Lisp_suffixes [] =
687 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
688 static char Lisp_help [] =
689 "In Lisp code, any function defined with `defun', any variable\n\
690 defined with `defvar' or `defconst', and in general the first\n\
691 argument of any expression that starts with `(def' in column zero\n\
692 is a tag.";
693
694 static char *Lua_suffixes [] =
695 { "lua", "LUA", NULL };
696 static char Lua_help [] =
697 "In Lua scripts, all functions are tags.";
698
699 static char *Makefile_filenames [] =
700 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
701 static char Makefile_help [] =
702 "In makefiles, targets are tags; additionally, variables are tags\n\
703 unless you specify `--no-globals'.";
704
705 static char *Objc_suffixes [] =
706 { "lm", /* Objective lex file */
707 "m", /* Objective C file */
708 NULL };
709 static char Objc_help [] =
710 "In Objective C code, tags include Objective C definitions for classes,\n\
711 class categories, methods and protocols. Tags for variables and\n\
712 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
713 (Use --help --lang=c --lang=objc --lang=java for full help.)";
714
715 static char *Pascal_suffixes [] =
716 { "p", "pas", NULL };
717 static char Pascal_help [] =
718 "In Pascal code, the tags are the functions and procedures defined\n\
719 in the file.";
720 /* " // this is for working around an Emacs highlighting bug... */
721
722 static char *Perl_suffixes [] =
723 { "pl", "pm", NULL };
724 static char *Perl_interpreters [] =
725 { "perl", "@PERL@", NULL };
726 static char Perl_help [] =
727 "In Perl code, the tags are the packages, subroutines and variables\n\
728 defined by the `package', `sub', `my' and `local' keywords. Use\n\
729 `--globals' if you want to tag global variables. Tags for\n\
730 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
731 defined in the default package is `main::SUB'.";
732
733 static char *PHP_suffixes [] =
734 { "php", "php3", "php4", NULL };
735 static char PHP_help [] =
736 "In PHP code, tags are functions, classes and defines. Unless you use\n\
737 the `--no-members' option, vars are tags too.";
738
739 static char *plain_C_suffixes [] =
740 { "pc", /* Pro*C file */
741 NULL };
742
743 static char *PS_suffixes [] =
744 { "ps", "psw", NULL }; /* .psw is for PSWrap */
745 static char PS_help [] =
746 "In PostScript code, the tags are the functions.";
747
748 static char *Prolog_suffixes [] =
749 { "prolog", NULL };
750 static char Prolog_help [] =
751 "In Prolog code, tags are predicates and rules at the beginning of\n\
752 line.";
753
754 static char *Python_suffixes [] =
755 { "py", NULL };
756 static char Python_help [] =
757 "In Python code, `def' or `class' at the beginning of a line\n\
758 generate a tag.";
759
760 /* Can't do the `SCM' or `scm' prefix with a version number. */
761 static char *Scheme_suffixes [] =
762 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
763 static char Scheme_help [] =
764 "In Scheme code, tags include anything defined with `def' or with a\n\
765 construct whose name starts with `def'. They also include\n\
766 variables set with `set!' at top level in the file.";
767
768 static char *TeX_suffixes [] =
769 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
770 static char TeX_help [] =
771 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
772 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
773 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
774 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
775 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
776 \n\
777 Other commands can be specified by setting the environment variable\n\
778 `TEXTAGS' to a colon-separated list like, for example,\n\
779 TEXTAGS=\"mycommand:myothercommand\".";
780
781
782 static char *Texinfo_suffixes [] =
783 { "texi", "texinfo", "txi", NULL };
784 static char Texinfo_help [] =
785 "for texinfo files, lines starting with @node are tagged.";
786
787 static char *Yacc_suffixes [] =
788 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
789 static char Yacc_help [] =
790 "In Bison or Yacc input files, each rule defines as a tag the\n\
791 nonterminal it constructs. The portions of the file that contain\n\
792 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
793 for full help).";
794
795 static char auto_help [] =
796 "`auto' is not a real language, it indicates to use\n\
797 a default language for files base on file name suffix and file contents.";
798
799 static char none_help [] =
800 "`none' is not a real language, it indicates to only do\n\
801 regexp processing on files.";
802
803 static char no_lang_help [] =
804 "No detailed help available for this language.";
805
806
807 /*
808 * Table of languages.
809 *
810 * It is ok for a given function to be listed under more than one
811 * name. I just didn't.
812 */
813
814 static language lang_names [] =
815 {
816 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
817 { "asm", Asm_help, Asm_labels, Asm_suffixes },
818 { "c", default_C_help, default_C_entries, default_C_suffixes },
819 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
820 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
821 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
822 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
823 { "forth", Forth_help, Forth_words, Forth_suffixes },
824 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
825 { "html", HTML_help, HTML_labels, HTML_suffixes },
826 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
827 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
828 { "lua", Lua_help, Lua_functions, Lua_suffixes },
829 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
830 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
831 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
832 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
833 { "php", PHP_help, PHP_functions, PHP_suffixes },
834 { "postscript",PS_help, PS_functions, PS_suffixes },
835 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
836 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
837 { "python", Python_help, Python_functions, Python_suffixes },
838 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
839 { "tex", TeX_help, TeX_commands, TeX_suffixes },
840 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
841 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
842 { "auto", auto_help }, /* default guessing scheme */
843 { "none", none_help, just_read_file }, /* regexp matching only */
844 { NULL } /* end of list */
845 };
846
847 \f
848 static void
849 print_language_names ()
850 {
851 language *lang;
852 char **name, **ext;
853
854 puts ("\nThese are the currently supported languages, along with the\n\
855 default file names and dot suffixes:");
856 for (lang = lang_names; lang->name != NULL; lang++)
857 {
858 printf (" %-*s", 10, lang->name);
859 if (lang->filenames != NULL)
860 for (name = lang->filenames; *name != NULL; name++)
861 printf (" %s", *name);
862 if (lang->suffixes != NULL)
863 for (ext = lang->suffixes; *ext != NULL; ext++)
864 printf (" .%s", *ext);
865 puts ("");
866 }
867 puts ("where `auto' means use default language for files based on file\n\
868 name suffix, and `none' means only do regexp processing on files.\n\
869 If no language is specified and no matching suffix is found,\n\
870 the first line of the file is read for a sharp-bang (#!) sequence\n\
871 followed by the name of an interpreter. If no such sequence is found,\n\
872 Fortran is tried first; if no tags are found, C is tried next.\n\
873 When parsing any C file, a \"class\" or \"template\" keyword\n\
874 switches to C++.");
875 puts ("Compressed files are supported using gzip and bzip2.\n\
876 \n\
877 For detailed help on a given language use, for example,\n\
878 etags --help --lang=ada.");
879 }
880
881 #ifndef EMACS_NAME
882 # define EMACS_NAME "standalone"
883 #endif
884 #ifndef VERSION
885 # define VERSION "17.34"
886 #endif
887 static void
888 print_version ()
889 {
890 /* Makes it easier to update automatically. */
891 char emacs_copyright[] = "Copyright (C) 2007 Free Software Foundation, Inc.";
892
893 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
894 puts (emacs_copyright);
895 puts ("This program is distributed under the terms in ETAGS.README");
896
897 exit (EXIT_SUCCESS);
898 }
899
900 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
901 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
902 #endif
903
904 static void
905 print_help (argbuffer)
906 argument *argbuffer;
907 {
908 bool help_for_lang = FALSE;
909
910 for (; argbuffer->arg_type != at_end; argbuffer++)
911 if (argbuffer->arg_type == at_language)
912 {
913 if (help_for_lang)
914 puts ("");
915 puts (argbuffer->lang->help);
916 help_for_lang = TRUE;
917 }
918
919 if (help_for_lang)
920 exit (EXIT_SUCCESS);
921
922 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
923 \n\
924 These are the options accepted by %s.\n", progname, progname);
925 if (NO_LONG_OPTIONS)
926 puts ("WARNING: long option names do not work with this executable,\n\
927 as it is not linked with GNU getopt.");
928 else
929 puts ("You may use unambiguous abbreviations for the long option names.");
930 puts (" A - as file name means read names from stdin (one per line).\n\
931 Absolute names are stored in the output file as they are.\n\
932 Relative ones are stored relative to the output file's directory.\n");
933
934 puts ("-a, --append\n\
935 Append tag entries to existing tags file.");
936
937 puts ("--packages-only\n\
938 For Ada files, only generate tags for packages.");
939
940 if (CTAGS)
941 puts ("-B, --backward-search\n\
942 Write the search commands for the tag entries using '?', the\n\
943 backward-search command instead of '/', the forward-search command.");
944
945 /* This option is mostly obsolete, because etags can now automatically
946 detect C++. Retained for backward compatibility and for debugging and
947 experimentation. In principle, we could want to tag as C++ even
948 before any "class" or "template" keyword.
949 puts ("-C, --c++\n\
950 Treat files whose name suffix defaults to C language as C++ files.");
951 */
952
953 puts ("--declarations\n\
954 In C and derived languages, create tags for function declarations,");
955 if (CTAGS)
956 puts ("\tand create tags for extern variables if --globals is used.");
957 else
958 puts
959 ("\tand create tags for extern variables unless --no-globals is used.");
960
961 if (CTAGS)
962 puts ("-d, --defines\n\
963 Create tag entries for C #define constants and enum constants, too.");
964 else
965 puts ("-D, --no-defines\n\
966 Don't create tag entries for C #define constants and enum constants.\n\
967 This makes the tags file smaller.");
968
969 if (!CTAGS)
970 puts ("-i FILE, --include=FILE\n\
971 Include a note in tag file indicating that, when searching for\n\
972 a tag, one should also consult the tags file FILE after\n\
973 checking the current file.");
974
975 puts ("-l LANG, --language=LANG\n\
976 Force the following files to be considered as written in the\n\
977 named language up to the next --language=LANG option.");
978
979 if (CTAGS)
980 puts ("--globals\n\
981 Create tag entries for global variables in some languages.");
982 else
983 puts ("--no-globals\n\
984 Do not create tag entries for global variables in some\n\
985 languages. This makes the tags file smaller.");
986
987 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
988 puts ("--no-line-directive\n\
989 Ignore #line preprocessor directives in C and derived languages.");
990
991 if (CTAGS)
992 puts ("--members\n\
993 Create tag entries for members of structures in some languages.");
994 else
995 puts ("--no-members\n\
996 Do not create tag entries for members of structures\n\
997 in some languages.");
998
999 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1000 Make a tag for each line matching a regular expression pattern\n\
1001 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1002 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1003 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1004 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1005 puts (" If TAGNAME/ is present, the tags created are named.\n\
1006 For example Tcl named tags can be created with:\n\
1007 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1008 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1009 `m' means to allow multi-line matches, `s' implies `m' and\n\
1010 causes dot to match any character, including newline.");
1011
1012 puts ("-R, --no-regex\n\
1013 Don't create tags from regexps for the following files.");
1014
1015 puts ("-I, --ignore-indentation\n\
1016 In C and C++ do not assume that a closing brace in the first\n\
1017 column is the final brace of a function or structure definition.");
1018
1019 puts ("-o FILE, --output=FILE\n\
1020 Write the tags to FILE.");
1021
1022 puts ("--parse-stdin=NAME\n\
1023 Read from standard input and record tags as belonging to file NAME.");
1024
1025 if (CTAGS)
1026 {
1027 puts ("-t, --typedefs\n\
1028 Generate tag entries for C and Ada typedefs.");
1029 puts ("-T, --typedefs-and-c++\n\
1030 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1031 and C++ member functions.");
1032 }
1033
1034 if (CTAGS)
1035 puts ("-u, --update\n\
1036 Update the tag entries for the given files, leaving tag\n\
1037 entries for other files in place. Currently, this is\n\
1038 implemented by deleting the existing entries for the given\n\
1039 files and then rewriting the new entries at the end of the\n\
1040 tags file. It is often faster to simply rebuild the entire\n\
1041 tag file than to use this.");
1042
1043 if (CTAGS)
1044 {
1045 puts ("-v, --vgrind\n\
1046 Print on the standard output an index of items intended for\n\
1047 human consumption, similar to the output of vgrind. The index\n\
1048 is sorted, and gives the page number of each item.");
1049
1050 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1051 puts ("-w, --no-duplicates\n\
1052 Do not create duplicate tag entries, for compatibility with\n\
1053 traditional ctags.");
1054
1055 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1056 puts ("-w, --no-warn\n\
1057 Suppress warning messages about duplicate tag entries.");
1058
1059 puts ("-x, --cxref\n\
1060 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1061 The output uses line numbers instead of page numbers, but\n\
1062 beyond that the differences are cosmetic; try both to see\n\
1063 which you like.");
1064 }
1065
1066 puts ("-V, --version\n\
1067 Print the version of the program.\n\
1068 -h, --help\n\
1069 Print this help message.\n\
1070 Followed by one or more `--language' options prints detailed\n\
1071 help about tag generation for the specified languages.");
1072
1073 print_language_names ();
1074
1075 puts ("");
1076 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1077
1078 exit (EXIT_SUCCESS);
1079 }
1080
1081 \f
1082 #ifdef VMS /* VMS specific functions */
1083
1084 #define EOS '\0'
1085
1086 /* This is a BUG! ANY arbitrary limit is a BUG!
1087 Won't someone please fix this? */
1088 #define MAX_FILE_SPEC_LEN 255
1089 typedef struct {
1090 short curlen;
1091 char body[MAX_FILE_SPEC_LEN + 1];
1092 } vspec;
1093
1094 /*
1095 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1096 returning in each successive call the next file name matching the input
1097 spec. The function expects that each in_spec passed
1098 to it will be processed to completion; in particular, up to and
1099 including the call following that in which the last matching name
1100 is returned, the function ignores the value of in_spec, and will
1101 only start processing a new spec with the following call.
1102 If an error occurs, on return out_spec contains the value
1103 of in_spec when the error occurred.
1104
1105 With each successive file name returned in out_spec, the
1106 function's return value is one. When there are no more matching
1107 names the function returns zero. If on the first call no file
1108 matches in_spec, or there is any other error, -1 is returned.
1109 */
1110
1111 #include <rmsdef.h>
1112 #include <descrip.h>
1113 #define OUTSIZE MAX_FILE_SPEC_LEN
1114 static short
1115 fn_exp (out, in)
1116 vspec *out;
1117 char *in;
1118 {
1119 static long context = 0;
1120 static struct dsc$descriptor_s o;
1121 static struct dsc$descriptor_s i;
1122 static bool pass1 = TRUE;
1123 long status;
1124 short retval;
1125
1126 if (pass1)
1127 {
1128 pass1 = FALSE;
1129 o.dsc$a_pointer = (char *) out;
1130 o.dsc$w_length = (short)OUTSIZE;
1131 i.dsc$a_pointer = in;
1132 i.dsc$w_length = (short)strlen(in);
1133 i.dsc$b_dtype = DSC$K_DTYPE_T;
1134 i.dsc$b_class = DSC$K_CLASS_S;
1135 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1136 o.dsc$b_class = DSC$K_CLASS_VS;
1137 }
1138 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1139 {
1140 out->body[out->curlen] = EOS;
1141 return 1;
1142 }
1143 else if (status == RMS$_NMF)
1144 retval = 0;
1145 else
1146 {
1147 strcpy(out->body, in);
1148 retval = -1;
1149 }
1150 lib$find_file_end(&context);
1151 pass1 = TRUE;
1152 return retval;
1153 }
1154
1155 /*
1156 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1157 name of each file specified by the provided arg expanding wildcards.
1158 */
1159 static char *
1160 gfnames (arg, p_error)
1161 char *arg;
1162 bool *p_error;
1163 {
1164 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1165
1166 switch (fn_exp (&filename, arg))
1167 {
1168 case 1:
1169 *p_error = FALSE;
1170 return filename.body;
1171 case 0:
1172 *p_error = FALSE;
1173 return NULL;
1174 default:
1175 *p_error = TRUE;
1176 return filename.body;
1177 }
1178 }
1179
1180 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1181 system (cmd)
1182 char *cmd;
1183 {
1184 error ("%s", "system() function not implemented under VMS");
1185 }
1186 #endif
1187
1188 #define VERSION_DELIM ';'
1189 char *massage_name (s)
1190 char *s;
1191 {
1192 char *start = s;
1193
1194 for ( ; *s; s++)
1195 if (*s == VERSION_DELIM)
1196 {
1197 *s = EOS;
1198 break;
1199 }
1200 else
1201 *s = lowcase (*s);
1202 return start;
1203 }
1204 #endif /* VMS */
1205
1206 \f
1207 int
1208 main (argc, argv)
1209 int argc;
1210 char *argv[];
1211 {
1212 int i;
1213 unsigned int nincluded_files;
1214 char **included_files;
1215 argument *argbuffer;
1216 int current_arg, file_count;
1217 linebuffer filename_lb;
1218 bool help_asked = FALSE;
1219 #ifdef VMS
1220 bool got_err;
1221 #endif
1222 char *optstring;
1223 int opt;
1224
1225
1226 #ifdef DOS_NT
1227 _fmode = O_BINARY; /* all of files are treated as binary files */
1228 #endif /* DOS_NT */
1229
1230 progname = argv[0];
1231 nincluded_files = 0;
1232 included_files = xnew (argc, char *);
1233 current_arg = 0;
1234 file_count = 0;
1235
1236 /* Allocate enough no matter what happens. Overkill, but each one
1237 is small. */
1238 argbuffer = xnew (argc, argument);
1239
1240 /*
1241 * If etags, always find typedefs and structure tags. Why not?
1242 * Also default to find macro constants, enum constants, struct
1243 * members and global variables.
1244 */
1245 if (!CTAGS)
1246 {
1247 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1248 globals = members = TRUE;
1249 }
1250
1251 /* When the optstring begins with a '-' getopt_long does not rearrange the
1252 non-options arguments to be at the end, but leaves them alone. */
1253 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1254 "ac:Cf:Il:o:r:RSVhH",
1255 (CTAGS) ? "BxdtTuvw" : "Di:");
1256
1257 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1258 switch (opt)
1259 {
1260 case 0:
1261 /* If getopt returns 0, then it has already processed a
1262 long-named option. We should do nothing. */
1263 break;
1264
1265 case 1:
1266 /* This means that a file name has been seen. Record it. */
1267 argbuffer[current_arg].arg_type = at_filename;
1268 argbuffer[current_arg].what = optarg;
1269 ++current_arg;
1270 ++file_count;
1271 break;
1272
1273 case STDIN:
1274 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1275 argbuffer[current_arg].arg_type = at_stdin;
1276 argbuffer[current_arg].what = optarg;
1277 ++current_arg;
1278 ++file_count;
1279 if (parsing_stdin)
1280 fatal ("cannot parse standard input more than once", (char *)NULL);
1281 parsing_stdin = TRUE;
1282 break;
1283
1284 /* Common options. */
1285 case 'a': append_to_tagfile = TRUE; break;
1286 case 'C': cplusplus = TRUE; break;
1287 case 'f': /* for compatibility with old makefiles */
1288 case 'o':
1289 if (tagfile)
1290 {
1291 error ("-o option may only be given once.", (char *)NULL);
1292 suggest_asking_for_help ();
1293 /* NOTREACHED */
1294 }
1295 tagfile = optarg;
1296 break;
1297 case 'I':
1298 case 'S': /* for backward compatibility */
1299 ignoreindent = TRUE;
1300 break;
1301 case 'l':
1302 {
1303 language *lang = get_language_from_langname (optarg);
1304 if (lang != NULL)
1305 {
1306 argbuffer[current_arg].lang = lang;
1307 argbuffer[current_arg].arg_type = at_language;
1308 ++current_arg;
1309 }
1310 }
1311 break;
1312 case 'c':
1313 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1314 optarg = concat (optarg, "i", ""); /* memory leak here */
1315 /* FALLTHRU */
1316 case 'r':
1317 argbuffer[current_arg].arg_type = at_regexp;
1318 argbuffer[current_arg].what = optarg;
1319 ++current_arg;
1320 break;
1321 case 'R':
1322 argbuffer[current_arg].arg_type = at_regexp;
1323 argbuffer[current_arg].what = NULL;
1324 ++current_arg;
1325 break;
1326 case 'V':
1327 print_version ();
1328 break;
1329 case 'h':
1330 case 'H':
1331 help_asked = TRUE;
1332 break;
1333
1334 /* Etags options */
1335 case 'D': constantypedefs = FALSE; break;
1336 case 'i': included_files[nincluded_files++] = optarg; break;
1337
1338 /* Ctags options. */
1339 case 'B': searchar = '?'; break;
1340 case 'd': constantypedefs = TRUE; break;
1341 case 't': typedefs = TRUE; break;
1342 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1343 case 'u': update = TRUE; break;
1344 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1345 case 'x': cxref_style = TRUE; break;
1346 case 'w': no_warnings = TRUE; break;
1347 default:
1348 suggest_asking_for_help ();
1349 /* NOTREACHED */
1350 }
1351
1352 /* No more options. Store the rest of arguments. */
1353 for (; optind < argc; optind++)
1354 {
1355 argbuffer[current_arg].arg_type = at_filename;
1356 argbuffer[current_arg].what = argv[optind];
1357 ++current_arg;
1358 ++file_count;
1359 }
1360
1361 argbuffer[current_arg].arg_type = at_end;
1362
1363 if (help_asked)
1364 print_help (argbuffer);
1365 /* NOTREACHED */
1366
1367 if (nincluded_files == 0 && file_count == 0)
1368 {
1369 error ("no input files specified.", (char *)NULL);
1370 suggest_asking_for_help ();
1371 /* NOTREACHED */
1372 }
1373
1374 if (tagfile == NULL)
1375 tagfile = CTAGS ? "tags" : "TAGS";
1376 cwd = etags_getcwd (); /* the current working directory */
1377 if (cwd[strlen (cwd) - 1] != '/')
1378 {
1379 char *oldcwd = cwd;
1380 cwd = concat (oldcwd, "/", "");
1381 free (oldcwd);
1382 }
1383 /* Relative file names are made relative to the current directory. */
1384 if (streq (tagfile, "-")
1385 || strneq (tagfile, "/dev/", 5))
1386 tagfiledir = cwd;
1387 else
1388 tagfiledir = absolute_dirname (tagfile, cwd);
1389
1390 init (); /* set up boolean "functions" */
1391
1392 linebuffer_init (&lb);
1393 linebuffer_init (&filename_lb);
1394 linebuffer_init (&filebuf);
1395 linebuffer_init (&token_name);
1396
1397 if (!CTAGS)
1398 {
1399 if (streq (tagfile, "-"))
1400 {
1401 tagf = stdout;
1402 #ifdef DOS_NT
1403 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1404 doesn't take effect until after `stdout' is already open). */
1405 if (!isatty (fileno (stdout)))
1406 setmode (fileno (stdout), O_BINARY);
1407 #endif /* DOS_NT */
1408 }
1409 else
1410 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1411 if (tagf == NULL)
1412 pfatal (tagfile);
1413 }
1414
1415 /*
1416 * Loop through files finding functions.
1417 */
1418 for (i = 0; i < current_arg; i++)
1419 {
1420 static language *lang; /* non-NULL if language is forced */
1421 char *this_file;
1422
1423 switch (argbuffer[i].arg_type)
1424 {
1425 case at_language:
1426 lang = argbuffer[i].lang;
1427 break;
1428 case at_regexp:
1429 analyse_regex (argbuffer[i].what);
1430 break;
1431 case at_filename:
1432 #ifdef VMS
1433 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1434 {
1435 if (got_err)
1436 {
1437 error ("can't find file %s\n", this_file);
1438 argc--, argv++;
1439 }
1440 else
1441 {
1442 this_file = massage_name (this_file);
1443 }
1444 #else
1445 this_file = argbuffer[i].what;
1446 #endif
1447 /* Input file named "-" means read file names from stdin
1448 (one per line) and use them. */
1449 if (streq (this_file, "-"))
1450 {
1451 if (parsing_stdin)
1452 fatal ("cannot parse standard input AND read file names from it",
1453 (char *)NULL);
1454 while (readline_internal (&filename_lb, stdin) > 0)
1455 process_file_name (filename_lb.buffer, lang);
1456 }
1457 else
1458 process_file_name (this_file, lang);
1459 #ifdef VMS
1460 }
1461 #endif
1462 break;
1463 case at_stdin:
1464 this_file = argbuffer[i].what;
1465 process_file (stdin, this_file, lang);
1466 break;
1467 }
1468 }
1469
1470 free_regexps ();
1471 free (lb.buffer);
1472 free (filebuf.buffer);
1473 free (token_name.buffer);
1474
1475 if (!CTAGS || cxref_style)
1476 {
1477 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1478 put_entries (nodehead);
1479 free_tree (nodehead);
1480 nodehead = NULL;
1481 if (!CTAGS)
1482 {
1483 fdesc *fdp;
1484
1485 /* Output file entries that have no tags. */
1486 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1487 if (!fdp->written)
1488 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1489
1490 while (nincluded_files-- > 0)
1491 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1492
1493 if (fclose (tagf) == EOF)
1494 pfatal (tagfile);
1495 }
1496
1497 exit (EXIT_SUCCESS);
1498 }
1499
1500 if (update)
1501 {
1502 char cmd[BUFSIZ];
1503 for (i = 0; i < current_arg; ++i)
1504 {
1505 switch (argbuffer[i].arg_type)
1506 {
1507 case at_filename:
1508 case at_stdin:
1509 break;
1510 default:
1511 continue; /* the for loop */
1512 }
1513 sprintf (cmd,
1514 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1515 tagfile, argbuffer[i].what, tagfile);
1516 if (system (cmd) != EXIT_SUCCESS)
1517 fatal ("failed to execute shell command", (char *)NULL);
1518 }
1519 append_to_tagfile = TRUE;
1520 }
1521
1522 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1523 if (tagf == NULL)
1524 pfatal (tagfile);
1525 put_entries (nodehead); /* write all the tags (CTAGS) */
1526 free_tree (nodehead);
1527 nodehead = NULL;
1528 if (fclose (tagf) == EOF)
1529 pfatal (tagfile);
1530
1531 if (CTAGS)
1532 if (append_to_tagfile || update)
1533 {
1534 char cmd[2*BUFSIZ+20];
1535 /* Maybe these should be used:
1536 setenv ("LC_COLLATE", "C", 1);
1537 setenv ("LC_ALL", "C", 1); */
1538 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1539 exit (system (cmd));
1540 }
1541 return EXIT_SUCCESS;
1542 }
1543
1544
1545 /*
1546 * Return a compressor given the file name. If EXTPTR is non-zero,
1547 * return a pointer into FILE where the compressor-specific
1548 * extension begins. If no compressor is found, NULL is returned
1549 * and EXTPTR is not significant.
1550 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1551 */
1552 static compressor *
1553 get_compressor_from_suffix (file, extptr)
1554 char *file;
1555 char **extptr;
1556 {
1557 compressor *compr;
1558 char *slash, *suffix;
1559
1560 /* This relies on FN to be after canonicalize_filename,
1561 so we don't need to consider backslashes on DOS_NT. */
1562 slash = etags_strrchr (file, '/');
1563 suffix = etags_strrchr (file, '.');
1564 if (suffix == NULL || suffix < slash)
1565 return NULL;
1566 if (extptr != NULL)
1567 *extptr = suffix;
1568 suffix += 1;
1569 /* Let those poor souls who live with DOS 8+3 file name limits get
1570 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1571 Only the first do loop is run if not MSDOS */
1572 do
1573 {
1574 for (compr = compressors; compr->suffix != NULL; compr++)
1575 if (streq (compr->suffix, suffix))
1576 return compr;
1577 if (!MSDOS)
1578 break; /* do it only once: not really a loop */
1579 if (extptr != NULL)
1580 *extptr = ++suffix;
1581 } while (*suffix != '\0');
1582 return NULL;
1583 }
1584
1585
1586
1587 /*
1588 * Return a language given the name.
1589 */
1590 static language *
1591 get_language_from_langname (name)
1592 const char *name;
1593 {
1594 language *lang;
1595
1596 if (name == NULL)
1597 error ("empty language name", (char *)NULL);
1598 else
1599 {
1600 for (lang = lang_names; lang->name != NULL; lang++)
1601 if (streq (name, lang->name))
1602 return lang;
1603 error ("unknown language \"%s\"", name);
1604 }
1605
1606 return NULL;
1607 }
1608
1609
1610 /*
1611 * Return a language given the interpreter name.
1612 */
1613 static language *
1614 get_language_from_interpreter (interpreter)
1615 char *interpreter;
1616 {
1617 language *lang;
1618 char **iname;
1619
1620 if (interpreter == NULL)
1621 return NULL;
1622 for (lang = lang_names; lang->name != NULL; lang++)
1623 if (lang->interpreters != NULL)
1624 for (iname = lang->interpreters; *iname != NULL; iname++)
1625 if (streq (*iname, interpreter))
1626 return lang;
1627
1628 return NULL;
1629 }
1630
1631
1632
1633 /*
1634 * Return a language given the file name.
1635 */
1636 static language *
1637 get_language_from_filename (file, case_sensitive)
1638 char *file;
1639 bool case_sensitive;
1640 {
1641 language *lang;
1642 char **name, **ext, *suffix;
1643
1644 /* Try whole file name first. */
1645 for (lang = lang_names; lang->name != NULL; lang++)
1646 if (lang->filenames != NULL)
1647 for (name = lang->filenames; *name != NULL; name++)
1648 if ((case_sensitive)
1649 ? streq (*name, file)
1650 : strcaseeq (*name, file))
1651 return lang;
1652
1653 /* If not found, try suffix after last dot. */
1654 suffix = etags_strrchr (file, '.');
1655 if (suffix == NULL)
1656 return NULL;
1657 suffix += 1;
1658 for (lang = lang_names; lang->name != NULL; lang++)
1659 if (lang->suffixes != NULL)
1660 for (ext = lang->suffixes; *ext != NULL; ext++)
1661 if ((case_sensitive)
1662 ? streq (*ext, suffix)
1663 : strcaseeq (*ext, suffix))
1664 return lang;
1665 return NULL;
1666 }
1667
1668 \f
1669 /*
1670 * This routine is called on each file argument.
1671 */
1672 static void
1673 process_file_name (file, lang)
1674 char *file;
1675 language *lang;
1676 {
1677 struct stat stat_buf;
1678 FILE *inf;
1679 fdesc *fdp;
1680 compressor *compr;
1681 char *compressed_name, *uncompressed_name;
1682 char *ext, *real_name;
1683 int retval;
1684
1685 canonicalize_filename (file);
1686 if (streq (file, tagfile) && !streq (tagfile, "-"))
1687 {
1688 error ("skipping inclusion of %s in self.", file);
1689 return;
1690 }
1691 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1692 {
1693 compressed_name = NULL;
1694 real_name = uncompressed_name = savestr (file);
1695 }
1696 else
1697 {
1698 real_name = compressed_name = savestr (file);
1699 uncompressed_name = savenstr (file, ext - file);
1700 }
1701
1702 /* If the canonicalized uncompressed name
1703 has already been dealt with, skip it silently. */
1704 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1705 {
1706 assert (fdp->infname != NULL);
1707 if (streq (uncompressed_name, fdp->infname))
1708 goto cleanup;
1709 }
1710
1711 if (stat (real_name, &stat_buf) != 0)
1712 {
1713 /* Reset real_name and try with a different name. */
1714 real_name = NULL;
1715 if (compressed_name != NULL) /* try with the given suffix */
1716 {
1717 if (stat (uncompressed_name, &stat_buf) == 0)
1718 real_name = uncompressed_name;
1719 }
1720 else /* try all possible suffixes */
1721 {
1722 for (compr = compressors; compr->suffix != NULL; compr++)
1723 {
1724 compressed_name = concat (file, ".", compr->suffix);
1725 if (stat (compressed_name, &stat_buf) != 0)
1726 {
1727 if (MSDOS)
1728 {
1729 char *suf = compressed_name + strlen (file);
1730 size_t suflen = strlen (compr->suffix) + 1;
1731 for ( ; suf[1]; suf++, suflen--)
1732 {
1733 memmove (suf, suf + 1, suflen);
1734 if (stat (compressed_name, &stat_buf) == 0)
1735 {
1736 real_name = compressed_name;
1737 break;
1738 }
1739 }
1740 if (real_name != NULL)
1741 break;
1742 } /* MSDOS */
1743 free (compressed_name);
1744 compressed_name = NULL;
1745 }
1746 else
1747 {
1748 real_name = compressed_name;
1749 break;
1750 }
1751 }
1752 }
1753 if (real_name == NULL)
1754 {
1755 perror (file);
1756 goto cleanup;
1757 }
1758 } /* try with a different name */
1759
1760 if (!S_ISREG (stat_buf.st_mode))
1761 {
1762 error ("skipping %s: it is not a regular file.", real_name);
1763 goto cleanup;
1764 }
1765 if (real_name == compressed_name)
1766 {
1767 char *cmd = concat (compr->command, " ", real_name);
1768 inf = (FILE *) popen (cmd, "r");
1769 free (cmd);
1770 }
1771 else
1772 inf = fopen (real_name, "r");
1773 if (inf == NULL)
1774 {
1775 perror (real_name);
1776 goto cleanup;
1777 }
1778
1779 process_file (inf, uncompressed_name, lang);
1780
1781 if (real_name == compressed_name)
1782 retval = pclose (inf);
1783 else
1784 retval = fclose (inf);
1785 if (retval < 0)
1786 pfatal (file);
1787
1788 cleanup:
1789 if (compressed_name) free (compressed_name);
1790 if (uncompressed_name) free (uncompressed_name);
1791 last_node = NULL;
1792 curfdp = NULL;
1793 return;
1794 }
1795
1796 static void
1797 process_file (fh, fn, lang)
1798 FILE *fh;
1799 char *fn;
1800 language *lang;
1801 {
1802 static const fdesc emptyfdesc;
1803 fdesc *fdp;
1804
1805 /* Create a new input file description entry. */
1806 fdp = xnew (1, fdesc);
1807 *fdp = emptyfdesc;
1808 fdp->next = fdhead;
1809 fdp->infname = savestr (fn);
1810 fdp->lang = lang;
1811 fdp->infabsname = absolute_filename (fn, cwd);
1812 fdp->infabsdir = absolute_dirname (fn, cwd);
1813 if (filename_is_absolute (fn))
1814 {
1815 /* An absolute file name. Canonicalize it. */
1816 fdp->taggedfname = absolute_filename (fn, NULL);
1817 }
1818 else
1819 {
1820 /* A file name relative to cwd. Make it relative
1821 to the directory of the tags file. */
1822 fdp->taggedfname = relative_filename (fn, tagfiledir);
1823 }
1824 fdp->usecharno = TRUE; /* use char position when making tags */
1825 fdp->prop = NULL;
1826 fdp->written = FALSE; /* not written on tags file yet */
1827
1828 fdhead = fdp;
1829 curfdp = fdhead; /* the current file description */
1830
1831 find_entries (fh);
1832
1833 /* If not Ctags, and if this is not metasource and if it contained no #line
1834 directives, we can write the tags and free all nodes pointing to
1835 curfdp. */
1836 if (!CTAGS
1837 && curfdp->usecharno /* no #line directives in this file */
1838 && !curfdp->lang->metasource)
1839 {
1840 node *np, *prev;
1841
1842 /* Look for the head of the sublist relative to this file. See add_node
1843 for the structure of the node tree. */
1844 prev = NULL;
1845 for (np = nodehead; np != NULL; prev = np, np = np->left)
1846 if (np->fdp == curfdp)
1847 break;
1848
1849 /* If we generated tags for this file, write and delete them. */
1850 if (np != NULL)
1851 {
1852 /* This is the head of the last sublist, if any. The following
1853 instructions depend on this being true. */
1854 assert (np->left == NULL);
1855
1856 assert (fdhead == curfdp);
1857 assert (last_node->fdp == curfdp);
1858 put_entries (np); /* write tags for file curfdp->taggedfname */
1859 free_tree (np); /* remove the written nodes */
1860 if (prev == NULL)
1861 nodehead = NULL; /* no nodes left */
1862 else
1863 prev->left = NULL; /* delete the pointer to the sublist */
1864 }
1865 }
1866 }
1867
1868 /*
1869 * This routine sets up the boolean pseudo-functions which work
1870 * by setting boolean flags dependent upon the corresponding character.
1871 * Every char which is NOT in that string is not a white char. Therefore,
1872 * all of the array "_wht" is set to FALSE, and then the elements
1873 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1874 * of a char is TRUE if it is the string "white", else FALSE.
1875 */
1876 static void
1877 init ()
1878 {
1879 register char *sp;
1880 register int i;
1881
1882 for (i = 0; i < CHARS; i++)
1883 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1884 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1885 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1886 notinname('\0') = notinname('\n');
1887 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1888 begtoken('\0') = begtoken('\n');
1889 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1890 intoken('\0') = intoken('\n');
1891 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1892 endtoken('\0') = endtoken('\n');
1893 }
1894
1895 /*
1896 * This routine opens the specified file and calls the function
1897 * which finds the function and type definitions.
1898 */
1899 static void
1900 find_entries (inf)
1901 FILE *inf;
1902 {
1903 char *cp;
1904 language *lang = curfdp->lang;
1905 Lang_function *parser = NULL;
1906
1907 /* If user specified a language, use it. */
1908 if (lang != NULL && lang->function != NULL)
1909 {
1910 parser = lang->function;
1911 }
1912
1913 /* Else try to guess the language given the file name. */
1914 if (parser == NULL)
1915 {
1916 lang = get_language_from_filename (curfdp->infname, TRUE);
1917 if (lang != NULL && lang->function != NULL)
1918 {
1919 curfdp->lang = lang;
1920 parser = lang->function;
1921 }
1922 }
1923
1924 /* Else look for sharp-bang as the first two characters. */
1925 if (parser == NULL
1926 && readline_internal (&lb, inf) > 0
1927 && lb.len >= 2
1928 && lb.buffer[0] == '#'
1929 && lb.buffer[1] == '!')
1930 {
1931 char *lp;
1932
1933 /* Set lp to point at the first char after the last slash in the
1934 line or, if no slashes, at the first nonblank. Then set cp to
1935 the first successive blank and terminate the string. */
1936 lp = etags_strrchr (lb.buffer+2, '/');
1937 if (lp != NULL)
1938 lp += 1;
1939 else
1940 lp = skip_spaces (lb.buffer + 2);
1941 cp = skip_non_spaces (lp);
1942 *cp = '\0';
1943
1944 if (strlen (lp) > 0)
1945 {
1946 lang = get_language_from_interpreter (lp);
1947 if (lang != NULL && lang->function != NULL)
1948 {
1949 curfdp->lang = lang;
1950 parser = lang->function;
1951 }
1952 }
1953 }
1954
1955 /* We rewind here, even if inf may be a pipe. We fail if the
1956 length of the first line is longer than the pipe block size,
1957 which is unlikely. */
1958 rewind (inf);
1959
1960 /* Else try to guess the language given the case insensitive file name. */
1961 if (parser == NULL)
1962 {
1963 lang = get_language_from_filename (curfdp->infname, FALSE);
1964 if (lang != NULL && lang->function != NULL)
1965 {
1966 curfdp->lang = lang;
1967 parser = lang->function;
1968 }
1969 }
1970
1971 /* Else try Fortran or C. */
1972 if (parser == NULL)
1973 {
1974 node *old_last_node = last_node;
1975
1976 curfdp->lang = get_language_from_langname ("fortran");
1977 find_entries (inf);
1978
1979 if (old_last_node == last_node)
1980 /* No Fortran entries found. Try C. */
1981 {
1982 /* We do not tag if rewind fails.
1983 Only the file name will be recorded in the tags file. */
1984 rewind (inf);
1985 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1986 find_entries (inf);
1987 }
1988 return;
1989 }
1990
1991 if (!no_line_directive
1992 && curfdp->lang != NULL && curfdp->lang->metasource)
1993 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1994 file, or anyway we parsed a file that is automatically generated from
1995 this one. If this is the case, the bingo.c file contained #line
1996 directives that generated tags pointing to this file. Let's delete
1997 them all before parsing this file, which is the real source. */
1998 {
1999 fdesc **fdpp = &fdhead;
2000 while (*fdpp != NULL)
2001 if (*fdpp != curfdp
2002 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
2003 /* We found one of those! We must delete both the file description
2004 and all tags referring to it. */
2005 {
2006 fdesc *badfdp = *fdpp;
2007
2008 /* Delete the tags referring to badfdp->taggedfname
2009 that were obtained from badfdp->infname. */
2010 invalidate_nodes (badfdp, &nodehead);
2011
2012 *fdpp = badfdp->next; /* remove the bad description from the list */
2013 free_fdesc (badfdp);
2014 }
2015 else
2016 fdpp = &(*fdpp)->next; /* advance the list pointer */
2017 }
2018
2019 assert (parser != NULL);
2020
2021 /* Generic initialisations before reading from file. */
2022 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
2023
2024 /* Generic initialisations before parsing file with readline. */
2025 lineno = 0; /* reset global line number */
2026 charno = 0; /* reset global char number */
2027 linecharno = 0; /* reset global char number of line start */
2028
2029 parser (inf);
2030
2031 regex_tag_multiline ();
2032 }
2033
2034 \f
2035 /*
2036 * Check whether an implicitly named tag should be created,
2037 * then call `pfnote'.
2038 * NAME is a string that is internally copied by this function.
2039 *
2040 * TAGS format specification
2041 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2042 * The following is explained in some more detail in etc/ETAGS.EBNF.
2043 *
2044 * make_tag creates tags with "implicit tag names" (unnamed tags)
2045 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2046 * 1. NAME does not contain any of the characters in NONAM;
2047 * 2. LINESTART contains name as either a rightmost, or rightmost but
2048 * one character, substring;
2049 * 3. the character, if any, immediately before NAME in LINESTART must
2050 * be a character in NONAM;
2051 * 4. the character, if any, immediately after NAME in LINESTART must
2052 * also be a character in NONAM.
2053 *
2054 * The implementation uses the notinname() macro, which recognises the
2055 * characters stored in the string `nonam'.
2056 * etags.el needs to use the same characters that are in NONAM.
2057 */
2058 static void
2059 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2060 char *name; /* tag name, or NULL if unnamed */
2061 int namelen; /* tag length */
2062 bool is_func; /* tag is a function */
2063 char *linestart; /* start of the line where tag is */
2064 int linelen; /* length of the line where tag is */
2065 int lno; /* line number */
2066 long cno; /* character number */
2067 {
2068 bool named = (name != NULL && namelen > 0);
2069
2070 if (!CTAGS && named) /* maybe set named to false */
2071 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2072 such that etags.el can guess a name from it. */
2073 {
2074 int i;
2075 register char *cp = name;
2076
2077 for (i = 0; i < namelen; i++)
2078 if (notinname (*cp++))
2079 break;
2080 if (i == namelen) /* rule #1 */
2081 {
2082 cp = linestart + linelen - namelen;
2083 if (notinname (linestart[linelen-1]))
2084 cp -= 1; /* rule #4 */
2085 if (cp >= linestart /* rule #2 */
2086 && (cp == linestart
2087 || notinname (cp[-1])) /* rule #3 */
2088 && strneq (name, cp, namelen)) /* rule #2 */
2089 named = FALSE; /* use implicit tag name */
2090 }
2091 }
2092
2093 if (named)
2094 name = savenstr (name, namelen);
2095 else
2096 name = NULL;
2097 pfnote (name, is_func, linestart, linelen, lno, cno);
2098 }
2099
2100 /* Record a tag. */
2101 static void
2102 pfnote (name, is_func, linestart, linelen, lno, cno)
2103 char *name; /* tag name, or NULL if unnamed */
2104 bool is_func; /* tag is a function */
2105 char *linestart; /* start of the line where tag is */
2106 int linelen; /* length of the line where tag is */
2107 int lno; /* line number */
2108 long cno; /* character number */
2109 {
2110 register node *np;
2111
2112 assert (name == NULL || name[0] != '\0');
2113 if (CTAGS && name == NULL)
2114 return;
2115
2116 np = xnew (1, node);
2117
2118 /* If ctags mode, change name "main" to M<thisfilename>. */
2119 if (CTAGS && !cxref_style && streq (name, "main"))
2120 {
2121 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2122 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2123 fp = etags_strrchr (np->name, '.');
2124 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2125 fp[0] = '\0';
2126 }
2127 else
2128 np->name = name;
2129 np->valid = TRUE;
2130 np->been_warned = FALSE;
2131 np->fdp = curfdp;
2132 np->is_func = is_func;
2133 np->lno = lno;
2134 if (np->fdp->usecharno)
2135 /* Our char numbers are 0-base, because of C language tradition?
2136 ctags compatibility? old versions compatibility? I don't know.
2137 Anyway, since emacs's are 1-base we expect etags.el to take care
2138 of the difference. If we wanted to have 1-based numbers, we would
2139 uncomment the +1 below. */
2140 np->cno = cno /* + 1 */ ;
2141 else
2142 np->cno = invalidcharno;
2143 np->left = np->right = NULL;
2144 if (CTAGS && !cxref_style)
2145 {
2146 if (strlen (linestart) < 50)
2147 np->regex = concat (linestart, "$", "");
2148 else
2149 np->regex = savenstr (linestart, 50);
2150 }
2151 else
2152 np->regex = savenstr (linestart, linelen);
2153
2154 add_node (np, &nodehead);
2155 }
2156
2157 /*
2158 * free_tree ()
2159 * recurse on left children, iterate on right children.
2160 */
2161 static void
2162 free_tree (np)
2163 register node *np;
2164 {
2165 while (np)
2166 {
2167 register node *node_right = np->right;
2168 free_tree (np->left);
2169 if (np->name != NULL)
2170 free (np->name);
2171 free (np->regex);
2172 free (np);
2173 np = node_right;
2174 }
2175 }
2176
2177 /*
2178 * free_fdesc ()
2179 * delete a file description
2180 */
2181 static void
2182 free_fdesc (fdp)
2183 register fdesc *fdp;
2184 {
2185 if (fdp->infname != NULL) free (fdp->infname);
2186 if (fdp->infabsname != NULL) free (fdp->infabsname);
2187 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2188 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2189 if (fdp->prop != NULL) free (fdp->prop);
2190 free (fdp);
2191 }
2192
2193 /*
2194 * add_node ()
2195 * Adds a node to the tree of nodes. In etags mode, sort by file
2196 * name. In ctags mode, sort by tag name. Make no attempt at
2197 * balancing.
2198 *
2199 * add_node is the only function allowed to add nodes, so it can
2200 * maintain state.
2201 */
2202 static void
2203 add_node (np, cur_node_p)
2204 node *np, **cur_node_p;
2205 {
2206 register int dif;
2207 register node *cur_node = *cur_node_p;
2208
2209 if (cur_node == NULL)
2210 {
2211 *cur_node_p = np;
2212 last_node = np;
2213 return;
2214 }
2215
2216 if (!CTAGS)
2217 /* Etags Mode */
2218 {
2219 /* For each file name, tags are in a linked sublist on the right
2220 pointer. The first tags of different files are a linked list
2221 on the left pointer. last_node points to the end of the last
2222 used sublist. */
2223 if (last_node != NULL && last_node->fdp == np->fdp)
2224 {
2225 /* Let's use the same sublist as the last added node. */
2226 assert (last_node->right == NULL);
2227 last_node->right = np;
2228 last_node = np;
2229 }
2230 else if (cur_node->fdp == np->fdp)
2231 {
2232 /* Scanning the list we found the head of a sublist which is
2233 good for us. Let's scan this sublist. */
2234 add_node (np, &cur_node->right);
2235 }
2236 else
2237 /* The head of this sublist is not good for us. Let's try the
2238 next one. */
2239 add_node (np, &cur_node->left);
2240 } /* if ETAGS mode */
2241
2242 else
2243 {
2244 /* Ctags Mode */
2245 dif = strcmp (np->name, cur_node->name);
2246
2247 /*
2248 * If this tag name matches an existing one, then
2249 * do not add the node, but maybe print a warning.
2250 */
2251 if (no_duplicates && !dif)
2252 {
2253 if (np->fdp == cur_node->fdp)
2254 {
2255 if (!no_warnings)
2256 {
2257 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2258 np->fdp->infname, lineno, np->name);
2259 fprintf (stderr, "Second entry ignored\n");
2260 }
2261 }
2262 else if (!cur_node->been_warned && !no_warnings)
2263 {
2264 fprintf
2265 (stderr,
2266 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2267 np->fdp->infname, cur_node->fdp->infname, np->name);
2268 cur_node->been_warned = TRUE;
2269 }
2270 return;
2271 }
2272
2273 /* Actually add the node */
2274 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2275 } /* if CTAGS mode */
2276 }
2277
2278 /*
2279 * invalidate_nodes ()
2280 * Scan the node tree and invalidate all nodes pointing to the
2281 * given file description (CTAGS case) or free them (ETAGS case).
2282 */
2283 static void
2284 invalidate_nodes (badfdp, npp)
2285 fdesc *badfdp;
2286 node **npp;
2287 {
2288 node *np = *npp;
2289
2290 if (np == NULL)
2291 return;
2292
2293 if (CTAGS)
2294 {
2295 if (np->left != NULL)
2296 invalidate_nodes (badfdp, &np->left);
2297 if (np->fdp == badfdp)
2298 np->valid = FALSE;
2299 if (np->right != NULL)
2300 invalidate_nodes (badfdp, &np->right);
2301 }
2302 else
2303 {
2304 assert (np->fdp != NULL);
2305 if (np->fdp == badfdp)
2306 {
2307 *npp = np->left; /* detach the sublist from the list */
2308 np->left = NULL; /* isolate it */
2309 free_tree (np); /* free it */
2310 invalidate_nodes (badfdp, npp);
2311 }
2312 else
2313 invalidate_nodes (badfdp, &np->left);
2314 }
2315 }
2316
2317 \f
2318 static int total_size_of_entries __P((node *));
2319 static int number_len __P((long));
2320
2321 /* Length of a non-negative number's decimal representation. */
2322 static int
2323 number_len (num)
2324 long num;
2325 {
2326 int len = 1;
2327 while ((num /= 10) > 0)
2328 len += 1;
2329 return len;
2330 }
2331
2332 /*
2333 * Return total number of characters that put_entries will output for
2334 * the nodes in the linked list at the right of the specified node.
2335 * This count is irrelevant with etags.el since emacs 19.34 at least,
2336 * but is still supplied for backward compatibility.
2337 */
2338 static int
2339 total_size_of_entries (np)
2340 register node *np;
2341 {
2342 register int total = 0;
2343
2344 for (; np != NULL; np = np->right)
2345 if (np->valid)
2346 {
2347 total += strlen (np->regex) + 1; /* pat\177 */
2348 if (np->name != NULL)
2349 total += strlen (np->name) + 1; /* name\001 */
2350 total += number_len ((long) np->lno) + 1; /* lno, */
2351 if (np->cno != invalidcharno) /* cno */
2352 total += number_len (np->cno);
2353 total += 1; /* newline */
2354 }
2355
2356 return total;
2357 }
2358
2359 static void
2360 put_entries (np)
2361 register node *np;
2362 {
2363 register char *sp;
2364 static fdesc *fdp = NULL;
2365
2366 if (np == NULL)
2367 return;
2368
2369 /* Output subentries that precede this one */
2370 if (CTAGS)
2371 put_entries (np->left);
2372
2373 /* Output this entry */
2374 if (np->valid)
2375 {
2376 if (!CTAGS)
2377 {
2378 /* Etags mode */
2379 if (fdp != np->fdp)
2380 {
2381 fdp = np->fdp;
2382 fprintf (tagf, "\f\n%s,%d\n",
2383 fdp->taggedfname, total_size_of_entries (np));
2384 fdp->written = TRUE;
2385 }
2386 fputs (np->regex, tagf);
2387 fputc ('\177', tagf);
2388 if (np->name != NULL)
2389 {
2390 fputs (np->name, tagf);
2391 fputc ('\001', tagf);
2392 }
2393 fprintf (tagf, "%d,", np->lno);
2394 if (np->cno != invalidcharno)
2395 fprintf (tagf, "%ld", np->cno);
2396 fputs ("\n", tagf);
2397 }
2398 else
2399 {
2400 /* Ctags mode */
2401 if (np->name == NULL)
2402 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2403
2404 if (cxref_style)
2405 {
2406 if (vgrind_style)
2407 fprintf (stdout, "%s %s %d\n",
2408 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2409 else
2410 fprintf (stdout, "%-16s %3d %-16s %s\n",
2411 np->name, np->lno, np->fdp->taggedfname, np->regex);
2412 }
2413 else
2414 {
2415 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2416
2417 if (np->is_func)
2418 { /* function or #define macro with args */
2419 putc (searchar, tagf);
2420 putc ('^', tagf);
2421
2422 for (sp = np->regex; *sp; sp++)
2423 {
2424 if (*sp == '\\' || *sp == searchar)
2425 putc ('\\', tagf);
2426 putc (*sp, tagf);
2427 }
2428 putc (searchar, tagf);
2429 }
2430 else
2431 { /* anything else; text pattern inadequate */
2432 fprintf (tagf, "%d", np->lno);
2433 }
2434 putc ('\n', tagf);
2435 }
2436 }
2437 } /* if this node contains a valid tag */
2438
2439 /* Output subentries that follow this one */
2440 put_entries (np->right);
2441 if (!CTAGS)
2442 put_entries (np->left);
2443 }
2444
2445 \f
2446 /* C extensions. */
2447 #define C_EXT 0x00fff /* C extensions */
2448 #define C_PLAIN 0x00000 /* C */
2449 #define C_PLPL 0x00001 /* C++ */
2450 #define C_STAR 0x00003 /* C* */
2451 #define C_JAVA 0x00005 /* JAVA */
2452 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2453 #define YACC 0x10000 /* yacc file */
2454
2455 /*
2456 * The C symbol tables.
2457 */
2458 enum sym_type
2459 {
2460 st_none,
2461 st_C_objprot, st_C_objimpl, st_C_objend,
2462 st_C_gnumacro,
2463 st_C_ignore, st_C_attribute,
2464 st_C_javastruct,
2465 st_C_operator,
2466 st_C_class, st_C_template,
2467 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2468 };
2469
2470 static unsigned int hash __P((const char *, unsigned int));
2471 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2472 static enum sym_type C_symtype __P((char *, int, int));
2473
2474 /* Feed stuff between (but not including) %[ and %] lines to:
2475 gperf -m 5
2476 %[
2477 %compare-strncmp
2478 %enum
2479 %struct-type
2480 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2481 %%
2482 if, 0, st_C_ignore
2483 for, 0, st_C_ignore
2484 while, 0, st_C_ignore
2485 switch, 0, st_C_ignore
2486 return, 0, st_C_ignore
2487 __attribute__, 0, st_C_attribute
2488 @interface, 0, st_C_objprot
2489 @protocol, 0, st_C_objprot
2490 @implementation,0, st_C_objimpl
2491 @end, 0, st_C_objend
2492 import, (C_JAVA & ~C_PLPL), st_C_ignore
2493 package, (C_JAVA & ~C_PLPL), st_C_ignore
2494 friend, C_PLPL, st_C_ignore
2495 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2496 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2497 interface, (C_JAVA & ~C_PLPL), st_C_struct
2498 class, 0, st_C_class
2499 namespace, C_PLPL, st_C_struct
2500 domain, C_STAR, st_C_struct
2501 union, 0, st_C_struct
2502 struct, 0, st_C_struct
2503 extern, 0, st_C_extern
2504 enum, 0, st_C_enum
2505 typedef, 0, st_C_typedef
2506 define, 0, st_C_define
2507 undef, 0, st_C_define
2508 operator, C_PLPL, st_C_operator
2509 template, 0, st_C_template
2510 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2511 DEFUN, 0, st_C_gnumacro
2512 SYSCALL, 0, st_C_gnumacro
2513 ENTRY, 0, st_C_gnumacro
2514 PSEUDO, 0, st_C_gnumacro
2515 # These are defined inside C functions, so currently they are not met.
2516 # EXFUN used in glibc, DEFVAR_* in emacs.
2517 #EXFUN, 0, st_C_gnumacro
2518 #DEFVAR_, 0, st_C_gnumacro
2519 %]
2520 and replace lines between %< and %> with its output, then:
2521 - remove the #if characterset check
2522 - make in_word_set static and not inline. */
2523 /*%<*/
2524 /* C code produced by gperf version 3.0.1 */
2525 /* Command-line: gperf -m 5 */
2526 /* Computed positions: -k'2-3' */
2527
2528 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2529 /* maximum key range = 33, duplicates = 0 */
2530
2531 #ifdef __GNUC__
2532 __inline
2533 #else
2534 #ifdef __cplusplus
2535 inline
2536 #endif
2537 #endif
2538 static unsigned int
2539 hash (str, len)
2540 register const char *str;
2541 register unsigned int len;
2542 {
2543 static unsigned char asso_values[] =
2544 {
2545 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2546 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2547 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2548 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2549 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2550 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2551 35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2552 14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2553 35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2554 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2555 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2556 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2557 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2558 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2559 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2560 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2561 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2562 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2563 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2564 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2565 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2566 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2567 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2568 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2569 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2570 35, 35, 35, 35, 35, 35
2571 };
2572 register int hval = len;
2573
2574 switch (hval)
2575 {
2576 default:
2577 hval += asso_values[(unsigned char)str[2]];
2578 /*FALLTHROUGH*/
2579 case 2:
2580 hval += asso_values[(unsigned char)str[1]];
2581 break;
2582 }
2583 return hval;
2584 }
2585
2586 static struct C_stab_entry *
2587 in_word_set (str, len)
2588 register const char *str;
2589 register unsigned int len;
2590 {
2591 enum
2592 {
2593 TOTAL_KEYWORDS = 32,
2594 MIN_WORD_LENGTH = 2,
2595 MAX_WORD_LENGTH = 15,
2596 MIN_HASH_VALUE = 2,
2597 MAX_HASH_VALUE = 34
2598 };
2599
2600 static struct C_stab_entry wordlist[] =
2601 {
2602 {""}, {""},
2603 {"if", 0, st_C_ignore},
2604 {""},
2605 {"@end", 0, st_C_objend},
2606 {"union", 0, st_C_struct},
2607 {"define", 0, st_C_define},
2608 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2609 {"template", 0, st_C_template},
2610 {"operator", C_PLPL, st_C_operator},
2611 {"@interface", 0, st_C_objprot},
2612 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2613 {"friend", C_PLPL, st_C_ignore},
2614 {"typedef", 0, st_C_typedef},
2615 {"return", 0, st_C_ignore},
2616 {"@implementation",0, st_C_objimpl},
2617 {"@protocol", 0, st_C_objprot},
2618 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2619 {"extern", 0, st_C_extern},
2620 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2621 {"struct", 0, st_C_struct},
2622 {"domain", C_STAR, st_C_struct},
2623 {"switch", 0, st_C_ignore},
2624 {"enum", 0, st_C_enum},
2625 {"for", 0, st_C_ignore},
2626 {"namespace", C_PLPL, st_C_struct},
2627 {"class", 0, st_C_class},
2628 {"while", 0, st_C_ignore},
2629 {"undef", 0, st_C_define},
2630 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2631 {"__attribute__", 0, st_C_attribute},
2632 {"SYSCALL", 0, st_C_gnumacro},
2633 {"ENTRY", 0, st_C_gnumacro},
2634 {"PSEUDO", 0, st_C_gnumacro},
2635 {"DEFUN", 0, st_C_gnumacro}
2636 };
2637
2638 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2639 {
2640 register int key = hash (str, len);
2641
2642 if (key <= MAX_HASH_VALUE && key >= 0)
2643 {
2644 register const char *s = wordlist[key].name;
2645
2646 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2647 return &wordlist[key];
2648 }
2649 }
2650 return 0;
2651 }
2652 /*%>*/
2653
2654 static enum sym_type
2655 C_symtype (str, len, c_ext)
2656 char *str;
2657 int len;
2658 int c_ext;
2659 {
2660 register struct C_stab_entry *se = in_word_set (str, len);
2661
2662 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2663 return st_none;
2664 return se->type;
2665 }
2666
2667 \f
2668 /*
2669 * Ignoring __attribute__ ((list))
2670 */
2671 static bool inattribute; /* looking at an __attribute__ construct */
2672
2673 /*
2674 * C functions and variables are recognized using a simple
2675 * finite automaton. fvdef is its state variable.
2676 */
2677 static enum
2678 {
2679 fvnone, /* nothing seen */
2680 fdefunkey, /* Emacs DEFUN keyword seen */
2681 fdefunname, /* Emacs DEFUN name seen */
2682 foperator, /* func: operator keyword seen (cplpl) */
2683 fvnameseen, /* function or variable name seen */
2684 fstartlist, /* func: just after open parenthesis */
2685 finlist, /* func: in parameter list */
2686 flistseen, /* func: after parameter list */
2687 fignore, /* func: before open brace */
2688 vignore /* var-like: ignore until ';' */
2689 } fvdef;
2690
2691 static bool fvextern; /* func or var: extern keyword seen; */
2692
2693 /*
2694 * typedefs are recognized using a simple finite automaton.
2695 * typdef is its state variable.
2696 */
2697 static enum
2698 {
2699 tnone, /* nothing seen */
2700 tkeyseen, /* typedef keyword seen */
2701 ttypeseen, /* defined type seen */
2702 tinbody, /* inside typedef body */
2703 tend, /* just before typedef tag */
2704 tignore /* junk after typedef tag */
2705 } typdef;
2706
2707 /*
2708 * struct-like structures (enum, struct and union) are recognized
2709 * using another simple finite automaton. `structdef' is its state
2710 * variable.
2711 */
2712 static enum
2713 {
2714 snone, /* nothing seen yet,
2715 or in struct body if bracelev > 0 */
2716 skeyseen, /* struct-like keyword seen */
2717 stagseen, /* struct-like tag seen */
2718 scolonseen /* colon seen after struct-like tag */
2719 } structdef;
2720
2721 /*
2722 * When objdef is different from onone, objtag is the name of the class.
2723 */
2724 static char *objtag = "<uninited>";
2725
2726 /*
2727 * Yet another little state machine to deal with preprocessor lines.
2728 */
2729 static enum
2730 {
2731 dnone, /* nothing seen */
2732 dsharpseen, /* '#' seen as first char on line */
2733 ddefineseen, /* '#' and 'define' seen */
2734 dignorerest /* ignore rest of line */
2735 } definedef;
2736
2737 /*
2738 * State machine for Objective C protocols and implementations.
2739 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2740 */
2741 static enum
2742 {
2743 onone, /* nothing seen */
2744 oprotocol, /* @interface or @protocol seen */
2745 oimplementation, /* @implementations seen */
2746 otagseen, /* class name seen */
2747 oparenseen, /* parenthesis before category seen */
2748 ocatseen, /* category name seen */
2749 oinbody, /* in @implementation body */
2750 omethodsign, /* in @implementation body, after +/- */
2751 omethodtag, /* after method name */
2752 omethodcolon, /* after method colon */
2753 omethodparm, /* after method parameter */
2754 oignore /* wait for @end */
2755 } objdef;
2756
2757
2758 /*
2759 * Use this structure to keep info about the token read, and how it
2760 * should be tagged. Used by the make_C_tag function to build a tag.
2761 */
2762 static struct tok
2763 {
2764 char *line; /* string containing the token */
2765 int offset; /* where the token starts in LINE */
2766 int length; /* token length */
2767 /*
2768 The previous members can be used to pass strings around for generic
2769 purposes. The following ones specifically refer to creating tags. In this
2770 case the token contained here is the pattern that will be used to create a
2771 tag.
2772 */
2773 bool valid; /* do not create a tag; the token should be
2774 invalidated whenever a state machine is
2775 reset prematurely */
2776 bool named; /* create a named tag */
2777 int lineno; /* source line number of tag */
2778 long linepos; /* source char number of tag */
2779 } token; /* latest token read */
2780
2781 /*
2782 * Variables and functions for dealing with nested structures.
2783 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2784 */
2785 static void pushclass_above __P((int, char *, int));
2786 static void popclass_above __P((int));
2787 static void write_classname __P((linebuffer *, char *qualifier));
2788
2789 static struct {
2790 char **cname; /* nested class names */
2791 int *bracelev; /* nested class brace level */
2792 int nl; /* class nesting level (elements used) */
2793 int size; /* length of the array */
2794 } cstack; /* stack for nested declaration tags */
2795 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2796 #define nestlev (cstack.nl)
2797 /* After struct keyword or in struct body, not inside a nested function. */
2798 #define instruct (structdef == snone && nestlev > 0 \
2799 && bracelev == cstack.bracelev[nestlev-1] + 1)
2800
2801 static void
2802 pushclass_above (bracelev, str, len)
2803 int bracelev;
2804 char *str;
2805 int len;
2806 {
2807 int nl;
2808
2809 popclass_above (bracelev);
2810 nl = cstack.nl;
2811 if (nl >= cstack.size)
2812 {
2813 int size = cstack.size *= 2;
2814 xrnew (cstack.cname, size, char *);
2815 xrnew (cstack.bracelev, size, int);
2816 }
2817 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2818 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2819 cstack.bracelev[nl] = bracelev;
2820 cstack.nl = nl + 1;
2821 }
2822
2823 static void
2824 popclass_above (bracelev)
2825 int bracelev;
2826 {
2827 int nl;
2828
2829 for (nl = cstack.nl - 1;
2830 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2831 nl--)
2832 {
2833 if (cstack.cname[nl] != NULL)
2834 free (cstack.cname[nl]);
2835 cstack.nl = nl;
2836 }
2837 }
2838
2839 static void
2840 write_classname (cn, qualifier)
2841 linebuffer *cn;
2842 char *qualifier;
2843 {
2844 int i, len;
2845 int qlen = strlen (qualifier);
2846
2847 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2848 {
2849 len = 0;
2850 cn->len = 0;
2851 cn->buffer[0] = '\0';
2852 }
2853 else
2854 {
2855 len = strlen (cstack.cname[0]);
2856 linebuffer_setlen (cn, len);
2857 strcpy (cn->buffer, cstack.cname[0]);
2858 }
2859 for (i = 1; i < cstack.nl; i++)
2860 {
2861 char *s;
2862 int slen;
2863
2864 s = cstack.cname[i];
2865 if (s == NULL)
2866 continue;
2867 slen = strlen (s);
2868 len += slen + qlen;
2869 linebuffer_setlen (cn, len);
2870 strncat (cn->buffer, qualifier, qlen);
2871 strncat (cn->buffer, s, slen);
2872 }
2873 }
2874
2875 \f
2876 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2877 static void make_C_tag __P((bool));
2878
2879 /*
2880 * consider_token ()
2881 * checks to see if the current token is at the start of a
2882 * function or variable, or corresponds to a typedef, or
2883 * is a struct/union/enum tag, or #define, or an enum constant.
2884 *
2885 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2886 * with args. C_EXTP points to which language we are looking at.
2887 *
2888 * Globals
2889 * fvdef IN OUT
2890 * structdef IN OUT
2891 * definedef IN OUT
2892 * typdef IN OUT
2893 * objdef IN OUT
2894 */
2895
2896 static bool
2897 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2898 register char *str; /* IN: token pointer */
2899 register int len; /* IN: token length */
2900 register int c; /* IN: first char after the token */
2901 int *c_extp; /* IN, OUT: C extensions mask */
2902 int bracelev; /* IN: brace level */
2903 int parlev; /* IN: parenthesis level */
2904 bool *is_func_or_var; /* OUT: function or variable found */
2905 {
2906 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2907 structtype is the type of the preceding struct-like keyword, and
2908 structbracelev is the brace level where it has been seen. */
2909 static enum sym_type structtype;
2910 static int structbracelev;
2911 static enum sym_type toktype;
2912
2913
2914 toktype = C_symtype (str, len, *c_extp);
2915
2916 /*
2917 * Skip __attribute__
2918 */
2919 if (toktype == st_C_attribute)
2920 {
2921 inattribute = TRUE;
2922 return FALSE;
2923 }
2924
2925 /*
2926 * Advance the definedef state machine.
2927 */
2928 switch (definedef)
2929 {
2930 case dnone:
2931 /* We're not on a preprocessor line. */
2932 if (toktype == st_C_gnumacro)
2933 {
2934 fvdef = fdefunkey;
2935 return FALSE;
2936 }
2937 break;
2938 case dsharpseen:
2939 if (toktype == st_C_define)
2940 {
2941 definedef = ddefineseen;
2942 }
2943 else
2944 {
2945 definedef = dignorerest;
2946 }
2947 return FALSE;
2948 case ddefineseen:
2949 /*
2950 * Make a tag for any macro, unless it is a constant
2951 * and constantypedefs is FALSE.
2952 */
2953 definedef = dignorerest;
2954 *is_func_or_var = (c == '(');
2955 if (!*is_func_or_var && !constantypedefs)
2956 return FALSE;
2957 else
2958 return TRUE;
2959 case dignorerest:
2960 return FALSE;
2961 default:
2962 error ("internal error: definedef value.", (char *)NULL);
2963 }
2964
2965 /*
2966 * Now typedefs
2967 */
2968 switch (typdef)
2969 {
2970 case tnone:
2971 if (toktype == st_C_typedef)
2972 {
2973 if (typedefs)
2974 typdef = tkeyseen;
2975 fvextern = FALSE;
2976 fvdef = fvnone;
2977 return FALSE;
2978 }
2979 break;
2980 case tkeyseen:
2981 switch (toktype)
2982 {
2983 case st_none:
2984 case st_C_class:
2985 case st_C_struct:
2986 case st_C_enum:
2987 typdef = ttypeseen;
2988 }
2989 break;
2990 case ttypeseen:
2991 if (structdef == snone && fvdef == fvnone)
2992 {
2993 fvdef = fvnameseen;
2994 return TRUE;
2995 }
2996 break;
2997 case tend:
2998 switch (toktype)
2999 {
3000 case st_C_class:
3001 case st_C_struct:
3002 case st_C_enum:
3003 return FALSE;
3004 }
3005 return TRUE;
3006 }
3007
3008 /*
3009 * This structdef business is NOT invoked when we are ctags and the
3010 * file is plain C. This is because a struct tag may have the same
3011 * name as another tag, and this loses with ctags.
3012 */
3013 switch (toktype)
3014 {
3015 case st_C_javastruct:
3016 if (structdef == stagseen)
3017 structdef = scolonseen;
3018 return FALSE;
3019 case st_C_template:
3020 case st_C_class:
3021 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
3022 && bracelev == 0
3023 && definedef == dnone && structdef == snone
3024 && typdef == tnone && fvdef == fvnone)
3025 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3026 if (toktype == st_C_template)
3027 break;
3028 /* FALLTHRU */
3029 case st_C_struct:
3030 case st_C_enum:
3031 if (parlev == 0
3032 && fvdef != vignore
3033 && (typdef == tkeyseen
3034 || (typedefs_or_cplusplus && structdef == snone)))
3035 {
3036 structdef = skeyseen;
3037 structtype = toktype;
3038 structbracelev = bracelev;
3039 if (fvdef == fvnameseen)
3040 fvdef = fvnone;
3041 }
3042 return FALSE;
3043 }
3044
3045 if (structdef == skeyseen)
3046 {
3047 structdef = stagseen;
3048 return TRUE;
3049 }
3050
3051 if (typdef != tnone)
3052 definedef = dnone;
3053
3054 /* Detect Objective C constructs. */
3055 switch (objdef)
3056 {
3057 case onone:
3058 switch (toktype)
3059 {
3060 case st_C_objprot:
3061 objdef = oprotocol;
3062 return FALSE;
3063 case st_C_objimpl:
3064 objdef = oimplementation;
3065 return FALSE;
3066 }
3067 break;
3068 case oimplementation:
3069 /* Save the class tag for functions or variables defined inside. */
3070 objtag = savenstr (str, len);
3071 objdef = oinbody;
3072 return FALSE;
3073 case oprotocol:
3074 /* Save the class tag for categories. */
3075 objtag = savenstr (str, len);
3076 objdef = otagseen;
3077 *is_func_or_var = TRUE;
3078 return TRUE;
3079 case oparenseen:
3080 objdef = ocatseen;
3081 *is_func_or_var = TRUE;
3082 return TRUE;
3083 case oinbody:
3084 break;
3085 case omethodsign:
3086 if (parlev == 0)
3087 {
3088 fvdef = fvnone;
3089 objdef = omethodtag;
3090 linebuffer_setlen (&token_name, len);
3091 strncpy (token_name.buffer, str, len);
3092 token_name.buffer[len] = '\0';
3093 return TRUE;
3094 }
3095 return FALSE;
3096 case omethodcolon:
3097 if (parlev == 0)
3098 objdef = omethodparm;
3099 return FALSE;
3100 case omethodparm:
3101 if (parlev == 0)
3102 {
3103 fvdef = fvnone;
3104 objdef = omethodtag;
3105 linebuffer_setlen (&token_name, token_name.len + len);
3106 strncat (token_name.buffer, str, len);
3107 return TRUE;
3108 }
3109 return FALSE;
3110 case oignore:
3111 if (toktype == st_C_objend)
3112 {
3113 /* Memory leakage here: the string pointed by objtag is
3114 never released, because many tests would be needed to
3115 avoid breaking on incorrect input code. The amount of
3116 memory leaked here is the sum of the lengths of the
3117 class tags.
3118 free (objtag); */
3119 objdef = onone;
3120 }
3121 return FALSE;
3122 }
3123
3124 /* A function, variable or enum constant? */
3125 switch (toktype)
3126 {
3127 case st_C_extern:
3128 fvextern = TRUE;
3129 switch (fvdef)
3130 {
3131 case finlist:
3132 case flistseen:
3133 case fignore:
3134 case vignore:
3135 break;
3136 default:
3137 fvdef = fvnone;
3138 }
3139 return FALSE;
3140 case st_C_ignore:
3141 fvextern = FALSE;
3142 fvdef = vignore;
3143 return FALSE;
3144 case st_C_operator:
3145 fvdef = foperator;
3146 *is_func_or_var = TRUE;
3147 return TRUE;
3148 case st_none:
3149 if (constantypedefs
3150 && structdef == snone
3151 && structtype == st_C_enum && bracelev > structbracelev)
3152 return TRUE; /* enum constant */
3153 switch (fvdef)
3154 {
3155 case fdefunkey:
3156 if (bracelev > 0)
3157 break;
3158 fvdef = fdefunname; /* GNU macro */
3159 *is_func_or_var = TRUE;
3160 return TRUE;
3161 case fvnone:
3162 switch (typdef)
3163 {
3164 case ttypeseen:
3165 return FALSE;
3166 case tnone:
3167 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3168 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3169 {
3170 fvdef = vignore;
3171 return FALSE;
3172 }
3173 break;
3174 }
3175 /* FALLTHRU */
3176 case fvnameseen:
3177 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3178 {
3179 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3180 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3181 fvdef = foperator;
3182 *is_func_or_var = TRUE;
3183 return TRUE;
3184 }
3185 if (bracelev > 0 && !instruct)
3186 break;
3187 fvdef = fvnameseen; /* function or variable */
3188 *is_func_or_var = TRUE;
3189 return TRUE;
3190 }
3191 break;
3192 }
3193
3194 return FALSE;
3195 }
3196
3197 \f
3198 /*
3199 * C_entries often keeps pointers to tokens or lines which are older than
3200 * the line currently read. By keeping two line buffers, and switching
3201 * them at end of line, it is possible to use those pointers.
3202 */
3203 static struct
3204 {
3205 long linepos;
3206 linebuffer lb;
3207 } lbs[2];
3208
3209 #define current_lb_is_new (newndx == curndx)
3210 #define switch_line_buffers() (curndx = 1 - curndx)
3211
3212 #define curlb (lbs[curndx].lb)
3213 #define newlb (lbs[newndx].lb)
3214 #define curlinepos (lbs[curndx].linepos)
3215 #define newlinepos (lbs[newndx].linepos)
3216
3217 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3218 #define cplpl (c_ext & C_PLPL)
3219 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3220
3221 #define CNL_SAVE_DEFINEDEF() \
3222 do { \
3223 curlinepos = charno; \
3224 readline (&curlb, inf); \
3225 lp = curlb.buffer; \
3226 quotednl = FALSE; \
3227 newndx = curndx; \
3228 } while (0)
3229
3230 #define CNL() \
3231 do { \
3232 CNL_SAVE_DEFINEDEF(); \
3233 if (savetoken.valid) \
3234 { \
3235 token = savetoken; \
3236 savetoken.valid = FALSE; \
3237 } \
3238 definedef = dnone; \
3239 } while (0)
3240
3241
3242 static void
3243 make_C_tag (isfun)
3244 bool isfun;
3245 {
3246 /* This function is never called when token.valid is FALSE, but
3247 we must protect against invalid input or internal errors. */
3248 if (!DEBUG && !token.valid)
3249 return;
3250
3251 if (token.valid)
3252 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3253 token.offset+token.length+1, token.lineno, token.linepos);
3254 else /* this case is optimised away if !DEBUG */
3255 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3256 token_name.len + 17, isfun, token.line,
3257 token.offset+token.length+1, token.lineno, token.linepos);
3258
3259 token.valid = FALSE;
3260 }
3261
3262
3263 /*
3264 * C_entries ()
3265 * This routine finds functions, variables, typedefs,
3266 * #define's, enum constants and struct/union/enum definitions in
3267 * C syntax and adds them to the list.
3268 */
3269 static void
3270 C_entries (c_ext, inf)
3271 int c_ext; /* extension of C */
3272 FILE *inf; /* input file */
3273 {
3274 register char c; /* latest char read; '\0' for end of line */
3275 register char *lp; /* pointer one beyond the character `c' */
3276 int curndx, newndx; /* indices for current and new lb */
3277 register int tokoff; /* offset in line of start of current token */
3278 register int toklen; /* length of current token */
3279 char *qualifier; /* string used to qualify names */
3280 int qlen; /* length of qualifier */
3281 int bracelev; /* current brace level */
3282 int bracketlev; /* current bracket level */
3283 int parlev; /* current parenthesis level */
3284 int attrparlev; /* __attribute__ parenthesis level */
3285 int templatelev; /* current template level */
3286 int typdefbracelev; /* bracelev where a typedef struct body begun */
3287 bool incomm, inquote, inchar, quotednl, midtoken;
3288 bool yacc_rules; /* in the rules part of a yacc file */
3289 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3290
3291
3292 linebuffer_init (&lbs[0].lb);
3293 linebuffer_init (&lbs[1].lb);
3294 if (cstack.size == 0)
3295 {
3296 cstack.size = (DEBUG) ? 1 : 4;
3297 cstack.nl = 0;
3298 cstack.cname = xnew (cstack.size, char *);
3299 cstack.bracelev = xnew (cstack.size, int);
3300 }
3301
3302 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3303 curndx = newndx = 0;
3304 lp = curlb.buffer;
3305 *lp = 0;
3306
3307 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3308 structdef = snone; definedef = dnone; objdef = onone;
3309 yacc_rules = FALSE;
3310 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3311 token.valid = savetoken.valid = FALSE;
3312 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3313 if (cjava)
3314 { qualifier = "."; qlen = 1; }
3315 else
3316 { qualifier = "::"; qlen = 2; }
3317
3318
3319 while (!feof (inf))
3320 {
3321 c = *lp++;
3322 if (c == '\\')
3323 {
3324 /* If we are at the end of the line, the next character is a
3325 '\0'; do not skip it, because it is what tells us
3326 to read the next line. */
3327 if (*lp == '\0')
3328 {
3329 quotednl = TRUE;
3330 continue;
3331 }
3332 lp++;
3333 c = ' ';
3334 }
3335 else if (incomm)
3336 {
3337 switch (c)
3338 {
3339 case '*':
3340 if (*lp == '/')
3341 {
3342 c = *lp++;
3343 incomm = FALSE;
3344 }
3345 break;
3346 case '\0':
3347 /* Newlines inside comments do not end macro definitions in
3348 traditional cpp. */
3349 CNL_SAVE_DEFINEDEF ();
3350 break;
3351 }
3352 continue;
3353 }
3354 else if (inquote)
3355 {
3356 switch (c)
3357 {
3358 case '"':
3359 inquote = FALSE;
3360 break;
3361 case '\0':
3362 /* Newlines inside strings do not end macro definitions
3363 in traditional cpp, even though compilers don't
3364 usually accept them. */
3365 CNL_SAVE_DEFINEDEF ();
3366 break;
3367 }
3368 continue;
3369 }
3370 else if (inchar)
3371 {
3372 switch (c)
3373 {
3374 case '\0':
3375 /* Hmmm, something went wrong. */
3376 CNL ();
3377 /* FALLTHRU */
3378 case '\'':
3379 inchar = FALSE;
3380 break;
3381 }
3382 continue;
3383 }
3384 else if (bracketlev > 0)
3385 {
3386 switch (c)
3387 {
3388 case ']':
3389 if (--bracketlev > 0)
3390 continue;
3391 break;
3392 case '\0':
3393 CNL_SAVE_DEFINEDEF ();
3394 break;
3395 }
3396 continue;
3397 }
3398 else switch (c)
3399 {
3400 case '"':
3401 inquote = TRUE;
3402 if (inattribute)
3403 break;
3404 switch (fvdef)
3405 {
3406 case fdefunkey:
3407 case fstartlist:
3408 case finlist:
3409 case fignore:
3410 case vignore:
3411 break;
3412 default:
3413 fvextern = FALSE;
3414 fvdef = fvnone;
3415 }
3416 continue;
3417 case '\'':
3418 inchar = TRUE;
3419 if (inattribute)
3420 break;
3421 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3422 {
3423 fvextern = FALSE;
3424 fvdef = fvnone;
3425 }
3426 continue;
3427 case '/':
3428 if (*lp == '*')
3429 {
3430 incomm = TRUE;
3431 lp++;
3432 c = ' ';
3433 }
3434 else if (/* cplpl && */ *lp == '/')
3435 {
3436 c = '\0';
3437 }
3438 break;
3439 case '%':
3440 if ((c_ext & YACC) && *lp == '%')
3441 {
3442 /* Entering or exiting rules section in yacc file. */
3443 lp++;
3444 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3445 typdef = tnone; structdef = snone;
3446 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3447 bracelev = 0;
3448 yacc_rules = !yacc_rules;
3449 continue;
3450 }
3451 else
3452 break;
3453 case '#':
3454 if (definedef == dnone)
3455 {
3456 char *cp;
3457 bool cpptoken = TRUE;
3458
3459 /* Look back on this line. If all blanks, or nonblanks
3460 followed by an end of comment, this is a preprocessor
3461 token. */
3462 for (cp = newlb.buffer; cp < lp-1; cp++)
3463 if (!iswhite (*cp))
3464 {
3465 if (*cp == '*' && *(cp+1) == '/')
3466 {
3467 cp++;
3468 cpptoken = TRUE;
3469 }
3470 else
3471 cpptoken = FALSE;
3472 }
3473 if (cpptoken)
3474 definedef = dsharpseen;
3475 } /* if (definedef == dnone) */
3476 continue;
3477 case '[':
3478 bracketlev++;
3479 continue;
3480 } /* switch (c) */
3481
3482
3483 /* Consider token only if some involved conditions are satisfied. */
3484 if (typdef != tignore
3485 && definedef != dignorerest
3486 && fvdef != finlist
3487 && templatelev == 0
3488 && (definedef != dnone
3489 || structdef != scolonseen)
3490 && !inattribute)
3491 {
3492 if (midtoken)
3493 {
3494 if (endtoken (c))
3495 {
3496 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3497 /* This handles :: in the middle,
3498 but not at the beginning of an identifier.
3499 Also, space-separated :: is not recognised. */
3500 {
3501 if (c_ext & C_AUTO) /* automatic detection of C++ */
3502 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3503 lp += 2;
3504 toklen += 2;
3505 c = lp[-1];
3506 goto still_in_token;
3507 }
3508 else
3509 {
3510 bool funorvar = FALSE;
3511
3512 if (yacc_rules
3513 || consider_token (newlb.buffer + tokoff, toklen, c,
3514 &c_ext, bracelev, parlev,
3515 &funorvar))
3516 {
3517 if (fvdef == foperator)
3518 {
3519 char *oldlp = lp;
3520 lp = skip_spaces (lp-1);
3521 if (*lp != '\0')
3522 lp += 1;
3523 while (*lp != '\0'
3524 && !iswhite (*lp) && *lp != '(')
3525 lp += 1;
3526 c = *lp++;
3527 toklen += lp - oldlp;
3528 }
3529 token.named = FALSE;
3530 if (!plainc
3531 && nestlev > 0 && definedef == dnone)
3532 /* in struct body */
3533 {
3534 write_classname (&token_name, qualifier);
3535 linebuffer_setlen (&token_name,
3536 token_name.len+qlen+toklen);
3537 strcat (token_name.buffer, qualifier);
3538 strncat (token_name.buffer,
3539 newlb.buffer + tokoff, toklen);
3540 token.named = TRUE;
3541 }
3542 else if (objdef == ocatseen)
3543 /* Objective C category */
3544 {
3545 int len = strlen (objtag) + 2 + toklen;
3546 linebuffer_setlen (&token_name, len);
3547 strcpy (token_name.buffer, objtag);
3548 strcat (token_name.buffer, "(");
3549 strncat (token_name.buffer,
3550 newlb.buffer + tokoff, toklen);
3551 strcat (token_name.buffer, ")");
3552 token.named = TRUE;
3553 }
3554 else if (objdef == omethodtag
3555 || objdef == omethodparm)
3556 /* Objective C method */
3557 {
3558 token.named = TRUE;
3559 }
3560 else if (fvdef == fdefunname)
3561 /* GNU DEFUN and similar macros */
3562 {
3563 bool defun = (newlb.buffer[tokoff] == 'F');
3564 int off = tokoff;
3565 int len = toklen;
3566
3567 /* Rewrite the tag so that emacs lisp DEFUNs
3568 can be found by their elisp name */
3569 if (defun)
3570 {
3571 off += 1;
3572 len -= 1;
3573 }
3574 linebuffer_setlen (&token_name, len);
3575 strncpy (token_name.buffer,
3576 newlb.buffer + off, len);
3577 token_name.buffer[len] = '\0';
3578 if (defun)
3579 while (--len >= 0)
3580 if (token_name.buffer[len] == '_')
3581 token_name.buffer[len] = '-';
3582 token.named = defun;
3583 }
3584 else
3585 {
3586 linebuffer_setlen (&token_name, toklen);
3587 strncpy (token_name.buffer,
3588 newlb.buffer + tokoff, toklen);
3589 token_name.buffer[toklen] = '\0';
3590 /* Name macros and members. */
3591 token.named = (structdef == stagseen
3592 || typdef == ttypeseen
3593 || typdef == tend
3594 || (funorvar
3595 && definedef == dignorerest)
3596 || (funorvar
3597 && definedef == dnone
3598 && structdef == snone
3599 && bracelev > 0));
3600 }
3601 token.lineno = lineno;
3602 token.offset = tokoff;
3603 token.length = toklen;
3604 token.line = newlb.buffer;
3605 token.linepos = newlinepos;
3606 token.valid = TRUE;
3607
3608 if (definedef == dnone
3609 && (fvdef == fvnameseen
3610 || fvdef == foperator
3611 || structdef == stagseen
3612 || typdef == tend
3613 || typdef == ttypeseen
3614 || objdef != onone))
3615 {
3616 if (current_lb_is_new)
3617 switch_line_buffers ();
3618 }
3619 else if (definedef != dnone
3620 || fvdef == fdefunname
3621 || instruct)
3622 make_C_tag (funorvar);
3623 }
3624 else /* not yacc and consider_token failed */
3625 {
3626 if (inattribute && fvdef == fignore)
3627 {
3628 /* We have just met __attribute__ after a
3629 function parameter list: do not tag the
3630 function again. */
3631 fvdef = fvnone;
3632 }
3633 }
3634 midtoken = FALSE;
3635 }
3636 } /* if (endtoken (c)) */
3637 else if (intoken (c))
3638 still_in_token:
3639 {
3640 toklen++;
3641 continue;
3642 }
3643 } /* if (midtoken) */
3644 else if (begtoken (c))
3645 {
3646 switch (definedef)
3647 {
3648 case dnone:
3649 switch (fvdef)
3650 {
3651 case fstartlist:
3652 /* This prevents tagging fb in
3653 void (__attribute__((noreturn)) *fb) (void);
3654 Fixing this is not easy and not very important. */
3655 fvdef = finlist;
3656 continue;
3657 case flistseen:
3658 if (plainc || declarations)
3659 {
3660 make_C_tag (TRUE); /* a function */
3661 fvdef = fignore;
3662 }
3663 break;
3664 }
3665 if (structdef == stagseen && !cjava)
3666 {
3667 popclass_above (bracelev);
3668 structdef = snone;
3669 }
3670 break;
3671 case dsharpseen:
3672 savetoken = token;
3673 break;
3674 }
3675 if (!yacc_rules || lp == newlb.buffer + 1)
3676 {
3677 tokoff = lp - 1 - newlb.buffer;
3678 toklen = 1;
3679 midtoken = TRUE;
3680 }
3681 continue;
3682 } /* if (begtoken) */
3683 } /* if must look at token */
3684
3685
3686 /* Detect end of line, colon, comma, semicolon and various braces
3687 after having handled a token.*/
3688 switch (c)
3689 {
3690 case ':':
3691 if (inattribute)
3692 break;
3693 if (yacc_rules && token.offset == 0 && token.valid)
3694 {
3695 make_C_tag (FALSE); /* a yacc function */
3696 break;
3697 }
3698 if (definedef != dnone)
3699 break;
3700 switch (objdef)
3701 {
3702 case otagseen:
3703 objdef = oignore;
3704 make_C_tag (TRUE); /* an Objective C class */
3705 break;
3706 case omethodtag:
3707 case omethodparm:
3708 objdef = omethodcolon;
3709 linebuffer_setlen (&token_name, token_name.len + 1);
3710 strcat (token_name.buffer, ":");
3711 break;
3712 }
3713 if (structdef == stagseen)
3714 {
3715 structdef = scolonseen;
3716 break;
3717 }
3718 /* Should be useless, but may be work as a safety net. */
3719 if (cplpl && fvdef == flistseen)
3720 {
3721 make_C_tag (TRUE); /* a function */
3722 fvdef = fignore;
3723 break;
3724 }
3725 break;
3726 case ';':
3727 if (definedef != dnone || inattribute)
3728 break;
3729 switch (typdef)
3730 {
3731 case tend:
3732 case ttypeseen:
3733 make_C_tag (FALSE); /* a typedef */
3734 typdef = tnone;
3735 fvdef = fvnone;
3736 break;
3737 case tnone:
3738 case tinbody:
3739 case tignore:
3740 switch (fvdef)
3741 {
3742 case fignore:
3743 if (typdef == tignore || cplpl)
3744 fvdef = fvnone;
3745 break;
3746 case fvnameseen:
3747 if ((globals && bracelev == 0 && (!fvextern || declarations))
3748 || (members && instruct))
3749 make_C_tag (FALSE); /* a variable */
3750 fvextern = FALSE;
3751 fvdef = fvnone;
3752 token.valid = FALSE;
3753 break;
3754 case flistseen:
3755 if ((declarations
3756 && (cplpl || !instruct)
3757 && (typdef == tnone || (typdef != tignore && instruct)))
3758 || (members
3759 && plainc && instruct))
3760 make_C_tag (TRUE); /* a function */
3761 /* FALLTHRU */
3762 default:
3763 fvextern = FALSE;
3764 fvdef = fvnone;
3765 if (declarations
3766 && cplpl && structdef == stagseen)
3767 make_C_tag (FALSE); /* forward declaration */
3768 else
3769 token.valid = FALSE;
3770 } /* switch (fvdef) */
3771 /* FALLTHRU */
3772 default:
3773 if (!instruct)
3774 typdef = tnone;
3775 }
3776 if (structdef == stagseen)
3777 structdef = snone;
3778 break;
3779 case ',':
3780 if (definedef != dnone || inattribute)
3781 break;
3782 switch (objdef)
3783 {
3784 case omethodtag:
3785 case omethodparm:
3786 make_C_tag (TRUE); /* an Objective C method */
3787 objdef = oinbody;
3788 break;
3789 }
3790 switch (fvdef)
3791 {
3792 case fdefunkey:
3793 case foperator:
3794 case fstartlist:
3795 case finlist:
3796 case fignore:
3797 case vignore:
3798 break;
3799 case fdefunname:
3800 fvdef = fignore;
3801 break;
3802 case fvnameseen:
3803 if (parlev == 0
3804 && ((globals
3805 && bracelev == 0
3806 && templatelev == 0
3807 && (!fvextern || declarations))
3808 || (members && instruct)))
3809 make_C_tag (FALSE); /* a variable */
3810 break;
3811 case flistseen:
3812 if ((declarations && typdef == tnone && !instruct)
3813 || (members && typdef != tignore && instruct))
3814 {
3815 make_C_tag (TRUE); /* a function */
3816 fvdef = fvnameseen;
3817 }
3818 else if (!declarations)
3819 fvdef = fvnone;
3820 token.valid = FALSE;
3821 break;
3822 default:
3823 fvdef = fvnone;
3824 }
3825 if (structdef == stagseen)
3826 structdef = snone;
3827 break;
3828 case ']':
3829 if (definedef != dnone || inattribute)
3830 break;
3831 if (structdef == stagseen)
3832 structdef = snone;
3833 switch (typdef)
3834 {
3835 case ttypeseen:
3836 case tend:
3837 typdef = tignore;
3838 make_C_tag (FALSE); /* a typedef */
3839 break;
3840 case tnone:
3841 case tinbody:
3842 switch (fvdef)
3843 {
3844 case foperator:
3845 case finlist:
3846 case fignore:
3847 case vignore:
3848 break;
3849 case fvnameseen:
3850 if ((members && bracelev == 1)
3851 || (globals && bracelev == 0
3852 && (!fvextern || declarations)))
3853 make_C_tag (FALSE); /* a variable */
3854 /* FALLTHRU */
3855 default:
3856 fvdef = fvnone;
3857 }
3858 break;
3859 }
3860 break;
3861 case '(':
3862 if (inattribute)
3863 {
3864 attrparlev++;
3865 break;
3866 }
3867 if (definedef != dnone)
3868 break;
3869 if (objdef == otagseen && parlev == 0)
3870 objdef = oparenseen;
3871 switch (fvdef)
3872 {
3873 case fvnameseen:
3874 if (typdef == ttypeseen
3875 && *lp != '*'
3876 && !instruct)
3877 {
3878 /* This handles constructs like:
3879 typedef void OperatorFun (int fun); */
3880 make_C_tag (FALSE);
3881 typdef = tignore;
3882 fvdef = fignore;
3883 break;
3884 }
3885 /* FALLTHRU */
3886 case foperator:
3887 fvdef = fstartlist;
3888 break;
3889 case flistseen:
3890 fvdef = finlist;
3891 break;
3892 }
3893 parlev++;
3894 break;
3895 case ')':
3896 if (inattribute)
3897 {
3898 if (--attrparlev == 0)
3899 inattribute = FALSE;
3900 break;
3901 }
3902 if (definedef != dnone)
3903 break;
3904 if (objdef == ocatseen && parlev == 1)
3905 {
3906 make_C_tag (TRUE); /* an Objective C category */
3907 objdef = oignore;
3908 }
3909 if (--parlev == 0)
3910 {
3911 switch (fvdef)
3912 {
3913 case fstartlist:
3914 case finlist:
3915 fvdef = flistseen;
3916 break;
3917 }
3918 if (!instruct
3919 && (typdef == tend
3920 || typdef == ttypeseen))
3921 {
3922 typdef = tignore;
3923 make_C_tag (FALSE); /* a typedef */
3924 }
3925 }
3926 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3927 parlev = 0;
3928 break;
3929 case '{':
3930 if (definedef != dnone)
3931 break;
3932 if (typdef == ttypeseen)
3933 {
3934 /* Whenever typdef is set to tinbody (currently only
3935 here), typdefbracelev should be set to bracelev. */
3936 typdef = tinbody;
3937 typdefbracelev = bracelev;
3938 }
3939 switch (fvdef)
3940 {
3941 case flistseen:
3942 make_C_tag (TRUE); /* a function */
3943 /* FALLTHRU */
3944 case fignore:
3945 fvdef = fvnone;
3946 break;
3947 case fvnone:
3948 switch (objdef)
3949 {
3950 case otagseen:
3951 make_C_tag (TRUE); /* an Objective C class */
3952 objdef = oignore;
3953 break;
3954 case omethodtag:
3955 case omethodparm:
3956 make_C_tag (TRUE); /* an Objective C method */
3957 objdef = oinbody;
3958 break;
3959 default:
3960 /* Neutralize `extern "C" {' grot. */
3961 if (bracelev == 0 && structdef == snone && nestlev == 0
3962 && typdef == tnone)
3963 bracelev = -1;
3964 }
3965 break;
3966 }
3967 switch (structdef)
3968 {
3969 case skeyseen: /* unnamed struct */
3970 pushclass_above (bracelev, NULL, 0);
3971 structdef = snone;
3972 break;
3973 case stagseen: /* named struct or enum */
3974 case scolonseen: /* a class */
3975 pushclass_above (bracelev,token.line+token.offset, token.length);
3976 structdef = snone;
3977 make_C_tag (FALSE); /* a struct or enum */
3978 break;
3979 }
3980 bracelev++;
3981 break;
3982 case '*':
3983 if (definedef != dnone)
3984 break;
3985 if (fvdef == fstartlist)
3986 {
3987 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3988 token.valid = FALSE;
3989 }
3990 break;
3991 case '}':
3992 if (definedef != dnone)
3993 break;
3994 if (!ignoreindent && lp == newlb.buffer + 1)
3995 {
3996 if (bracelev != 0)
3997 token.valid = FALSE;
3998 bracelev = 0; /* reset brace level if first column */
3999 parlev = 0; /* also reset paren level, just in case... */
4000 }
4001 else
4002 {
4003 if (--bracelev < 0)
4004 {
4005 bracelev = 0;
4006 token.valid = FALSE; /* something gone amiss, token unreliable */
4007 }
4008 if (bracelev == 0 && fvdef == vignore)
4009 fvdef = fvnone; /* end of function */
4010 }
4011 popclass_above (bracelev);
4012 structdef = snone;
4013 /* Only if typdef == tinbody is typdefbracelev significant. */
4014 if (typdef == tinbody && bracelev <= typdefbracelev)
4015 {
4016 assert (bracelev == typdefbracelev);
4017 typdef = tend;
4018 }
4019 break;
4020 case '=':
4021 if (definedef != dnone)
4022 break;
4023 switch (fvdef)
4024 {
4025 case foperator:
4026 case finlist:
4027 case fignore:
4028 case vignore:
4029 break;
4030 case fvnameseen:
4031 if ((members && bracelev == 1)
4032 || (globals && bracelev == 0 && (!fvextern || declarations)))
4033 make_C_tag (FALSE); /* a variable */
4034 /* FALLTHRU */
4035 default:
4036 fvdef = vignore;
4037 }
4038 break;
4039 case '<':
4040 if (cplpl
4041 && (structdef == stagseen || fvdef == fvnameseen))
4042 {
4043 templatelev++;
4044 break;
4045 }
4046 goto resetfvdef;
4047 case '>':
4048 if (templatelev > 0)
4049 {
4050 templatelev--;
4051 break;
4052 }
4053 goto resetfvdef;
4054 case '+':
4055 case '-':
4056 if (objdef == oinbody && bracelev == 0)
4057 {
4058 objdef = omethodsign;
4059 break;
4060 }
4061 /* FALLTHRU */
4062 resetfvdef:
4063 case '#': case '~': case '&': case '%': case '/':
4064 case '|': case '^': case '!': case '.': case '?':
4065 if (definedef != dnone)
4066 break;
4067 /* These surely cannot follow a function tag in C. */
4068 switch (fvdef)
4069 {
4070 case foperator:
4071 case finlist:
4072 case fignore:
4073 case vignore:
4074 break;
4075 default:
4076 fvdef = fvnone;
4077 }
4078 break;
4079 case '\0':
4080 if (objdef == otagseen)
4081 {
4082 make_C_tag (TRUE); /* an Objective C class */
4083 objdef = oignore;
4084 }
4085 /* If a macro spans multiple lines don't reset its state. */
4086 if (quotednl)
4087 CNL_SAVE_DEFINEDEF ();
4088 else
4089 CNL ();
4090 break;
4091 } /* switch (c) */
4092
4093 } /* while not eof */
4094
4095 free (lbs[0].lb.buffer);
4096 free (lbs[1].lb.buffer);
4097 }
4098
4099 /*
4100 * Process either a C++ file or a C file depending on the setting
4101 * of a global flag.
4102 */
4103 static void
4104 default_C_entries (inf)
4105 FILE *inf;
4106 {
4107 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4108 }
4109
4110 /* Always do plain C. */
4111 static void
4112 plain_C_entries (inf)
4113 FILE *inf;
4114 {
4115 C_entries (0, inf);
4116 }
4117
4118 /* Always do C++. */
4119 static void
4120 Cplusplus_entries (inf)
4121 FILE *inf;
4122 {
4123 C_entries (C_PLPL, inf);
4124 }
4125
4126 /* Always do Java. */
4127 static void
4128 Cjava_entries (inf)
4129 FILE *inf;
4130 {
4131 C_entries (C_JAVA, inf);
4132 }
4133
4134 /* Always do C*. */
4135 static void
4136 Cstar_entries (inf)
4137 FILE *inf;
4138 {
4139 C_entries (C_STAR, inf);
4140 }
4141
4142 /* Always do Yacc. */
4143 static void
4144 Yacc_entries (inf)
4145 FILE *inf;
4146 {
4147 C_entries (YACC, inf);
4148 }
4149
4150 \f
4151 /* Useful macros. */
4152 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4153 for (; /* loop initialization */ \
4154 !feof (file_pointer) /* loop test */ \
4155 && /* instructions at start of loop */ \
4156 (readline (&line_buffer, file_pointer), \
4157 char_pointer = line_buffer.buffer, \
4158 TRUE); \
4159 )
4160
4161 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4162 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4163 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4164 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4165 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4166
4167 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4168 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4169 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4170 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4171 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4172
4173 /*
4174 * Read a file, but do no processing. This is used to do regexp
4175 * matching on files that have no language defined.
4176 */
4177 static void
4178 just_read_file (inf)
4179 FILE *inf;
4180 {
4181 register char *dummy;
4182
4183 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4184 continue;
4185 }
4186
4187 \f
4188 /* Fortran parsing */
4189
4190 static void F_takeprec __P((void));
4191 static void F_getit __P((FILE *));
4192
4193 static void
4194 F_takeprec ()
4195 {
4196 dbp = skip_spaces (dbp);
4197 if (*dbp != '*')
4198 return;
4199 dbp++;
4200 dbp = skip_spaces (dbp);
4201 if (strneq (dbp, "(*)", 3))
4202 {
4203 dbp += 3;
4204 return;
4205 }
4206 if (!ISDIGIT (*dbp))
4207 {
4208 --dbp; /* force failure */
4209 return;
4210 }
4211 do
4212 dbp++;
4213 while (ISDIGIT (*dbp));
4214 }
4215
4216 static void
4217 F_getit (inf)
4218 FILE *inf;
4219 {
4220 register char *cp;
4221
4222 dbp = skip_spaces (dbp);
4223 if (*dbp == '\0')
4224 {
4225 readline (&lb, inf);
4226 dbp = lb.buffer;
4227 if (dbp[5] != '&')
4228 return;
4229 dbp += 6;
4230 dbp = skip_spaces (dbp);
4231 }
4232 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4233 return;
4234 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4235 continue;
4236 make_tag (dbp, cp-dbp, TRUE,
4237 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4238 }
4239
4240
4241 static void
4242 Fortran_functions (inf)
4243 FILE *inf;
4244 {
4245 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4246 {
4247 if (*dbp == '%')
4248 dbp++; /* Ratfor escape to fortran */
4249 dbp = skip_spaces (dbp);
4250 if (*dbp == '\0')
4251 continue;
4252 switch (lowcase (*dbp))
4253 {
4254 case 'i':
4255 if (nocase_tail ("integer"))
4256 F_takeprec ();
4257 break;
4258 case 'r':
4259 if (nocase_tail ("real"))
4260 F_takeprec ();
4261 break;
4262 case 'l':
4263 if (nocase_tail ("logical"))
4264 F_takeprec ();
4265 break;
4266 case 'c':
4267 if (nocase_tail ("complex") || nocase_tail ("character"))
4268 F_takeprec ();
4269 break;
4270 case 'd':
4271 if (nocase_tail ("double"))
4272 {
4273 dbp = skip_spaces (dbp);
4274 if (*dbp == '\0')
4275 continue;
4276 if (nocase_tail ("precision"))
4277 break;
4278 continue;
4279 }
4280 break;
4281 }
4282 dbp = skip_spaces (dbp);
4283 if (*dbp == '\0')
4284 continue;
4285 switch (lowcase (*dbp))
4286 {
4287 case 'f':
4288 if (nocase_tail ("function"))
4289 F_getit (inf);
4290 continue;
4291 case 's':
4292 if (nocase_tail ("subroutine"))
4293 F_getit (inf);
4294 continue;
4295 case 'e':
4296 if (nocase_tail ("entry"))
4297 F_getit (inf);
4298 continue;
4299 case 'b':
4300 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4301 {
4302 dbp = skip_spaces (dbp);
4303 if (*dbp == '\0') /* assume un-named */
4304 make_tag ("blockdata", 9, TRUE,
4305 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4306 else
4307 F_getit (inf); /* look for name */
4308 }
4309 continue;
4310 }
4311 }
4312 }
4313
4314 \f
4315 /*
4316 * Ada parsing
4317 * Original code by
4318 * Philippe Waroquiers (1998)
4319 */
4320
4321 static void Ada_getit __P((FILE *, char *));
4322
4323 /* Once we are positioned after an "interesting" keyword, let's get
4324 the real tag value necessary. */
4325 static void
4326 Ada_getit (inf, name_qualifier)
4327 FILE *inf;
4328 char *name_qualifier;
4329 {
4330 register char *cp;
4331 char *name;
4332 char c;
4333
4334 while (!feof (inf))
4335 {
4336 dbp = skip_spaces (dbp);
4337 if (*dbp == '\0'
4338 || (dbp[0] == '-' && dbp[1] == '-'))
4339 {
4340 readline (&lb, inf);
4341 dbp = lb.buffer;
4342 }
4343 switch (lowcase(*dbp))
4344 {
4345 case 'b':
4346 if (nocase_tail ("body"))
4347 {
4348 /* Skipping body of procedure body or package body or ....
4349 resetting qualifier to body instead of spec. */
4350 name_qualifier = "/b";
4351 continue;
4352 }
4353 break;
4354 case 't':
4355 /* Skipping type of task type or protected type ... */
4356 if (nocase_tail ("type"))
4357 continue;
4358 break;
4359 }
4360 if (*dbp == '"')
4361 {
4362 dbp += 1;
4363 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4364 continue;
4365 }
4366 else
4367 {
4368 dbp = skip_spaces (dbp);
4369 for (cp = dbp;
4370 (*cp != '\0'
4371 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4372 cp++)
4373 continue;
4374 if (cp == dbp)
4375 return;
4376 }
4377 c = *cp;
4378 *cp = '\0';
4379 name = concat (dbp, name_qualifier, "");
4380 *cp = c;
4381 make_tag (name, strlen (name), TRUE,
4382 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4383 free (name);
4384 if (c == '"')
4385 dbp = cp + 1;
4386 return;
4387 }
4388 }
4389
4390 static void
4391 Ada_funcs (inf)
4392 FILE *inf;
4393 {
4394 bool inquote = FALSE;
4395 bool skip_till_semicolumn = FALSE;
4396
4397 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4398 {
4399 while (*dbp != '\0')
4400 {
4401 /* Skip a string i.e. "abcd". */
4402 if (inquote || (*dbp == '"'))
4403 {
4404 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4405 if (dbp != NULL)
4406 {
4407 inquote = FALSE;
4408 dbp += 1;
4409 continue; /* advance char */
4410 }
4411 else
4412 {
4413 inquote = TRUE;
4414 break; /* advance line */
4415 }
4416 }
4417
4418 /* Skip comments. */
4419 if (dbp[0] == '-' && dbp[1] == '-')
4420 break; /* advance line */
4421
4422 /* Skip character enclosed in single quote i.e. 'a'
4423 and skip single quote starting an attribute i.e. 'Image. */
4424 if (*dbp == '\'')
4425 {
4426 dbp++ ;
4427 if (*dbp != '\0')
4428 dbp++;
4429 continue;
4430 }
4431
4432 if (skip_till_semicolumn)
4433 {
4434 if (*dbp == ';')
4435 skip_till_semicolumn = FALSE;
4436 dbp++;
4437 continue; /* advance char */
4438 }
4439
4440 /* Search for beginning of a token. */
4441 if (!begtoken (*dbp))
4442 {
4443 dbp++;
4444 continue; /* advance char */
4445 }
4446
4447 /* We are at the beginning of a token. */
4448 switch (lowcase(*dbp))
4449 {
4450 case 'f':
4451 if (!packages_only && nocase_tail ("function"))
4452 Ada_getit (inf, "/f");
4453 else
4454 break; /* from switch */
4455 continue; /* advance char */
4456 case 'p':
4457 if (!packages_only && nocase_tail ("procedure"))
4458 Ada_getit (inf, "/p");
4459 else if (nocase_tail ("package"))
4460 Ada_getit (inf, "/s");
4461 else if (nocase_tail ("protected")) /* protected type */
4462 Ada_getit (inf, "/t");
4463 else
4464 break; /* from switch */
4465 continue; /* advance char */
4466
4467 case 'u':
4468 if (typedefs && !packages_only && nocase_tail ("use"))
4469 {
4470 /* when tagging types, avoid tagging use type Pack.Typename;
4471 for this, we will skip everything till a ; */
4472 skip_till_semicolumn = TRUE;
4473 continue; /* advance char */
4474 }
4475
4476 case 't':
4477 if (!packages_only && nocase_tail ("task"))
4478 Ada_getit (inf, "/k");
4479 else if (typedefs && !packages_only && nocase_tail ("type"))
4480 {
4481 Ada_getit (inf, "/t");
4482 while (*dbp != '\0')
4483 dbp += 1;
4484 }
4485 else
4486 break; /* from switch */
4487 continue; /* advance char */
4488 }
4489
4490 /* Look for the end of the token. */
4491 while (!endtoken (*dbp))
4492 dbp++;
4493
4494 } /* advance char */
4495 } /* advance line */
4496 }
4497
4498 \f
4499 /*
4500 * Unix and microcontroller assembly tag handling
4501 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4502 * Idea by Bob Weiner, Motorola Inc. (1994)
4503 */
4504 static void
4505 Asm_labels (inf)
4506 FILE *inf;
4507 {
4508 register char *cp;
4509
4510 LOOP_ON_INPUT_LINES (inf, lb, cp)
4511 {
4512 /* If first char is alphabetic or one of [_.$], test for colon
4513 following identifier. */
4514 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4515 {
4516 /* Read past label. */
4517 cp++;
4518 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4519 cp++;
4520 if (*cp == ':' || iswhite (*cp))
4521 /* Found end of label, so copy it and add it to the table. */
4522 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4523 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4524 }
4525 }
4526 }
4527
4528 \f
4529 /*
4530 * Perl support
4531 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4532 * Perl variable names: /^(my|local).../
4533 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4534 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4535 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4536 */
4537 static void
4538 Perl_functions (inf)
4539 FILE *inf;
4540 {
4541 char *package = savestr ("main"); /* current package name */
4542 register char *cp;
4543
4544 LOOP_ON_INPUT_LINES (inf, lb, cp)
4545 {
4546 skip_spaces(cp);
4547
4548 if (LOOKING_AT (cp, "package"))
4549 {
4550 free (package);
4551 get_tag (cp, &package);
4552 }
4553 else if (LOOKING_AT (cp, "sub"))
4554 {
4555 char *pos;
4556 char *sp = cp;
4557
4558 while (!notinname (*cp))
4559 cp++;
4560 if (cp == sp)
4561 continue; /* nothing found */
4562 if ((pos = etags_strchr (sp, ':')) != NULL
4563 && pos < cp && pos[1] == ':')
4564 /* The name is already qualified. */
4565 make_tag (sp, cp - sp, TRUE,
4566 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4567 else
4568 /* Qualify it. */
4569 {
4570 char savechar, *name;
4571
4572 savechar = *cp;
4573 *cp = '\0';
4574 name = concat (package, "::", sp);
4575 *cp = savechar;
4576 make_tag (name, strlen(name), TRUE,
4577 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4578 free (name);
4579 }
4580 }
4581 else if (globals) /* only if we are tagging global vars */
4582 {
4583 /* Skip a qualifier, if any. */
4584 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4585 /* After "my" or "local", but before any following paren or space. */
4586 char *varstart = cp;
4587
4588 if (qual /* should this be removed? If yes, how? */
4589 && (*cp == '$' || *cp == '@' || *cp == '%'))
4590 {
4591 varstart += 1;
4592 do
4593 cp++;
4594 while (ISALNUM (*cp) || *cp == '_');
4595 }
4596 else if (qual)
4597 {
4598 /* Should be examining a variable list at this point;
4599 could insist on seeing an open parenthesis. */
4600 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4601 cp++;
4602 }
4603 else
4604 continue;
4605
4606 make_tag (varstart, cp - varstart, FALSE,
4607 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4608 }
4609 }
4610 free (package);
4611 }
4612
4613
4614 /*
4615 * Python support
4616 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4617 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4618 * More ideas by seb bacon <seb@jamkit.com> (2002)
4619 */
4620 static void
4621 Python_functions (inf)
4622 FILE *inf;
4623 {
4624 register char *cp;
4625
4626 LOOP_ON_INPUT_LINES (inf, lb, cp)
4627 {
4628 cp = skip_spaces (cp);
4629 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4630 {
4631 char *name = cp;
4632 while (!notinname (*cp) && *cp != ':')
4633 cp++;
4634 make_tag (name, cp - name, TRUE,
4635 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4636 }
4637 }
4638 }
4639
4640 \f
4641 /*
4642 * PHP support
4643 * Look for:
4644 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4645 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4646 * - /^[ \t]*define\(\"[^\"]+/
4647 * Only with --members:
4648 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4649 * Idea by Diez B. Roggisch (2001)
4650 */
4651 static void
4652 PHP_functions (inf)
4653 FILE *inf;
4654 {
4655 register char *cp, *name;
4656 bool search_identifier = FALSE;
4657
4658 LOOP_ON_INPUT_LINES (inf, lb, cp)
4659 {
4660 cp = skip_spaces (cp);
4661 name = cp;
4662 if (search_identifier
4663 && *cp != '\0')
4664 {
4665 while (!notinname (*cp))
4666 cp++;
4667 make_tag (name, cp - name, TRUE,
4668 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4669 search_identifier = FALSE;
4670 }
4671 else if (LOOKING_AT (cp, "function"))
4672 {
4673 if(*cp == '&')
4674 cp = skip_spaces (cp+1);
4675 if(*cp != '\0')
4676 {
4677 name = cp;
4678 while (!notinname (*cp))
4679 cp++;
4680 make_tag (name, cp - name, TRUE,
4681 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4682 }
4683 else
4684 search_identifier = TRUE;
4685 }
4686 else if (LOOKING_AT (cp, "class"))
4687 {
4688 if (*cp != '\0')
4689 {
4690 name = cp;
4691 while (*cp != '\0' && !iswhite (*cp))
4692 cp++;
4693 make_tag (name, cp - name, FALSE,
4694 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4695 }
4696 else
4697 search_identifier = TRUE;
4698 }
4699 else if (strneq (cp, "define", 6)
4700 && (cp = skip_spaces (cp+6))
4701 && *cp++ == '('
4702 && (*cp == '"' || *cp == '\''))
4703 {
4704 char quote = *cp++;
4705 name = cp;
4706 while (*cp != quote && *cp != '\0')
4707 cp++;
4708 make_tag (name, cp - name, FALSE,
4709 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4710 }
4711 else if (members
4712 && LOOKING_AT (cp, "var")
4713 && *cp == '$')
4714 {
4715 name = cp;
4716 while (!notinname(*cp))
4717 cp++;
4718 make_tag (name, cp - name, FALSE,
4719 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4720 }
4721 }
4722 }
4723
4724 \f
4725 /*
4726 * Cobol tag functions
4727 * We could look for anything that could be a paragraph name.
4728 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4729 * Idea by Corny de Souza (1993)
4730 */
4731 static void
4732 Cobol_paragraphs (inf)
4733 FILE *inf;
4734 {
4735 register char *bp, *ep;
4736
4737 LOOP_ON_INPUT_LINES (inf, lb, bp)
4738 {
4739 if (lb.len < 9)
4740 continue;
4741 bp += 8;
4742
4743 /* If eoln, compiler option or comment ignore whole line. */
4744 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4745 continue;
4746
4747 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4748 continue;
4749 if (*ep++ == '.')
4750 make_tag (bp, ep - bp, TRUE,
4751 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4752 }
4753 }
4754
4755 \f
4756 /*
4757 * Makefile support
4758 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4759 */
4760 static void
4761 Makefile_targets (inf)
4762 FILE *inf;
4763 {
4764 register char *bp;
4765
4766 LOOP_ON_INPUT_LINES (inf, lb, bp)
4767 {
4768 if (*bp == '\t' || *bp == '#')
4769 continue;
4770 while (*bp != '\0' && *bp != '=' && *bp != ':')
4771 bp++;
4772 if (*bp == ':' || (globals && *bp == '='))
4773 {
4774 /* We should detect if there is more than one tag, but we do not.
4775 We just skip initial and final spaces. */
4776 char * namestart = skip_spaces (lb.buffer);
4777 while (--bp > namestart)
4778 if (!notinname (*bp))
4779 break;
4780 make_tag (namestart, bp - namestart + 1, TRUE,
4781 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4782 }
4783 }
4784 }
4785
4786 \f
4787 /*
4788 * Pascal parsing
4789 * Original code by Mosur K. Mohan (1989)
4790 *
4791 * Locates tags for procedures & functions. Doesn't do any type- or
4792 * var-definitions. It does look for the keyword "extern" or
4793 * "forward" immediately following the procedure statement; if found,
4794 * the tag is skipped.
4795 */
4796 static void
4797 Pascal_functions (inf)
4798 FILE *inf;
4799 {
4800 linebuffer tline; /* mostly copied from C_entries */
4801 long save_lcno;
4802 int save_lineno, namelen, taglen;
4803 char c, *name;
4804
4805 bool /* each of these flags is TRUE iff: */
4806 incomment, /* point is inside a comment */
4807 inquote, /* point is inside '..' string */
4808 get_tagname, /* point is after PROCEDURE/FUNCTION
4809 keyword, so next item = potential tag */
4810 found_tag, /* point is after a potential tag */
4811 inparms, /* point is within parameter-list */
4812 verify_tag; /* point has passed the parm-list, so the
4813 next token will determine whether this
4814 is a FORWARD/EXTERN to be ignored, or
4815 whether it is a real tag */
4816
4817 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4818 name = NULL; /* keep compiler quiet */
4819 dbp = lb.buffer;
4820 *dbp = '\0';
4821 linebuffer_init (&tline);
4822
4823 incomment = inquote = FALSE;
4824 found_tag = FALSE; /* have a proc name; check if extern */
4825 get_tagname = FALSE; /* found "procedure" keyword */
4826 inparms = FALSE; /* found '(' after "proc" */
4827 verify_tag = FALSE; /* check if "extern" is ahead */
4828
4829
4830 while (!feof (inf)) /* long main loop to get next char */
4831 {
4832 c = *dbp++;
4833 if (c == '\0') /* if end of line */
4834 {
4835 readline (&lb, inf);
4836 dbp = lb.buffer;
4837 if (*dbp == '\0')
4838 continue;
4839 if (!((found_tag && verify_tag)
4840 || get_tagname))
4841 c = *dbp++; /* only if don't need *dbp pointing
4842 to the beginning of the name of
4843 the procedure or function */
4844 }
4845 if (incomment)
4846 {
4847 if (c == '}') /* within { } comments */
4848 incomment = FALSE;
4849 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4850 {
4851 dbp++;
4852 incomment = FALSE;
4853 }
4854 continue;
4855 }
4856 else if (inquote)
4857 {
4858 if (c == '\'')
4859 inquote = FALSE;
4860 continue;
4861 }
4862 else
4863 switch (c)
4864 {
4865 case '\'':
4866 inquote = TRUE; /* found first quote */
4867 continue;
4868 case '{': /* found open { comment */
4869 incomment = TRUE;
4870 continue;
4871 case '(':
4872 if (*dbp == '*') /* found open (* comment */
4873 {
4874 incomment = TRUE;
4875 dbp++;
4876 }
4877 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4878 inparms = TRUE;
4879 continue;
4880 case ')': /* end of parms list */
4881 if (inparms)
4882 inparms = FALSE;
4883 continue;
4884 case ';':
4885 if (found_tag && !inparms) /* end of proc or fn stmt */
4886 {
4887 verify_tag = TRUE;
4888 break;
4889 }
4890 continue;
4891 }
4892 if (found_tag && verify_tag && (*dbp != ' '))
4893 {
4894 /* Check if this is an "extern" declaration. */
4895 if (*dbp == '\0')
4896 continue;
4897 if (lowcase (*dbp == 'e'))
4898 {
4899 if (nocase_tail ("extern")) /* superfluous, really! */
4900 {
4901 found_tag = FALSE;
4902 verify_tag = FALSE;
4903 }
4904 }
4905 else if (lowcase (*dbp) == 'f')
4906 {
4907 if (nocase_tail ("forward")) /* check for forward reference */
4908 {
4909 found_tag = FALSE;
4910 verify_tag = FALSE;
4911 }
4912 }
4913 if (found_tag && verify_tag) /* not external proc, so make tag */
4914 {
4915 found_tag = FALSE;
4916 verify_tag = FALSE;
4917 make_tag (name, namelen, TRUE,
4918 tline.buffer, taglen, save_lineno, save_lcno);
4919 continue;
4920 }
4921 }
4922 if (get_tagname) /* grab name of proc or fn */
4923 {
4924 char *cp;
4925
4926 if (*dbp == '\0')
4927 continue;
4928
4929 /* Find block name. */
4930 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4931 continue;
4932
4933 /* Save all values for later tagging. */
4934 linebuffer_setlen (&tline, lb.len);
4935 strcpy (tline.buffer, lb.buffer);
4936 save_lineno = lineno;
4937 save_lcno = linecharno;
4938 name = tline.buffer + (dbp - lb.buffer);
4939 namelen = cp - dbp;
4940 taglen = cp - lb.buffer + 1;
4941
4942 dbp = cp; /* set dbp to e-o-token */
4943 get_tagname = FALSE;
4944 found_tag = TRUE;
4945 continue;
4946
4947 /* And proceed to check for "extern". */
4948 }
4949 else if (!incomment && !inquote && !found_tag)
4950 {
4951 /* Check for proc/fn keywords. */
4952 switch (lowcase (c))
4953 {
4954 case 'p':
4955 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4956 get_tagname = TRUE;
4957 continue;
4958 case 'f':
4959 if (nocase_tail ("unction"))
4960 get_tagname = TRUE;
4961 continue;
4962 }
4963 }
4964 } /* while not eof */
4965
4966 free (tline.buffer);
4967 }
4968
4969 \f
4970 /*
4971 * Lisp tag functions
4972 * look for (def or (DEF, quote or QUOTE
4973 */
4974
4975 static void L_getit __P((void));
4976
4977 static void
4978 L_getit ()
4979 {
4980 if (*dbp == '\'') /* Skip prefix quote */
4981 dbp++;
4982 else if (*dbp == '(')
4983 {
4984 dbp++;
4985 /* Try to skip "(quote " */
4986 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4987 /* Ok, then skip "(" before name in (defstruct (foo)) */
4988 dbp = skip_spaces (dbp);
4989 }
4990 get_tag (dbp, NULL);
4991 }
4992
4993 static void
4994 Lisp_functions (inf)
4995 FILE *inf;
4996 {
4997 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4998 {
4999 if (dbp[0] != '(')
5000 continue;
5001
5002 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5003 {
5004 dbp = skip_non_spaces (dbp);
5005 dbp = skip_spaces (dbp);
5006 L_getit ();
5007 }
5008 else
5009 {
5010 /* Check for (foo::defmumble name-defined ... */
5011 do
5012 dbp++;
5013 while (!notinname (*dbp) && *dbp != ':');
5014 if (*dbp == ':')
5015 {
5016 do
5017 dbp++;
5018 while (*dbp == ':');
5019
5020 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5021 {
5022 dbp = skip_non_spaces (dbp);
5023 dbp = skip_spaces (dbp);
5024 L_getit ();
5025 }
5026 }
5027 }
5028 }
5029 }
5030
5031 \f
5032 /*
5033 * Lua script language parsing
5034 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5035 *
5036 * "function" and "local function" are tags if they start at column 1.
5037 */
5038 static void
5039 Lua_functions (inf)
5040 FILE *inf;
5041 {
5042 register char *bp;
5043
5044 LOOP_ON_INPUT_LINES (inf, lb, bp)
5045 {
5046 if (bp[0] != 'f' && bp[0] != 'l')
5047 continue;
5048
5049 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5050
5051 if (LOOKING_AT (bp, "function"))
5052 get_tag (bp, NULL);
5053 }
5054 }
5055
5056 \f
5057 /*
5058 * Postscript tags
5059 * Just look for lines where the first character is '/'
5060 * Also look at "defineps" for PSWrap
5061 * Ideas by:
5062 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5063 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5064 */
5065 static void
5066 PS_functions (inf)
5067 FILE *inf;
5068 {
5069 register char *bp, *ep;
5070
5071 LOOP_ON_INPUT_LINES (inf, lb, bp)
5072 {
5073 if (bp[0] == '/')
5074 {
5075 for (ep = bp+1;
5076 *ep != '\0' && *ep != ' ' && *ep != '{';
5077 ep++)
5078 continue;
5079 make_tag (bp, ep - bp, TRUE,
5080 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5081 }
5082 else if (LOOKING_AT (bp, "defineps"))
5083 get_tag (bp, NULL);
5084 }
5085 }
5086
5087 \f
5088 /*
5089 * Forth tags
5090 * Ignore anything after \ followed by space or in ( )
5091 * Look for words defined by :
5092 * Look for constant, code, create, defer, value, and variable
5093 * OBP extensions: Look for buffer:, field,
5094 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5095 */
5096 static void
5097 Forth_words (inf)
5098 FILE *inf;
5099 {
5100 register char *bp;
5101
5102 LOOP_ON_INPUT_LINES (inf, lb, bp)
5103 while ((bp = skip_spaces (bp))[0] != '\0')
5104 if (bp[0] == '\\' && iswhite(bp[1]))
5105 break; /* read next line */
5106 else if (bp[0] == '(' && iswhite(bp[1]))
5107 do /* skip to ) or eol */
5108 bp++;
5109 while (*bp != ')' && *bp != '\0');
5110 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5111 || LOOKING_AT_NOCASE (bp, "constant")
5112 || LOOKING_AT_NOCASE (bp, "code")
5113 || LOOKING_AT_NOCASE (bp, "create")
5114 || LOOKING_AT_NOCASE (bp, "defer")
5115 || LOOKING_AT_NOCASE (bp, "value")
5116 || LOOKING_AT_NOCASE (bp, "variable")
5117 || LOOKING_AT_NOCASE (bp, "buffer:")
5118 || LOOKING_AT_NOCASE (bp, "field"))
5119 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5120 else
5121 bp = skip_non_spaces (bp);
5122 }
5123
5124 \f
5125 /*
5126 * Scheme tag functions
5127 * look for (def... xyzzy
5128 * (def... (xyzzy
5129 * (def ... ((...(xyzzy ....
5130 * (set! xyzzy
5131 * Original code by Ken Haase (1985?)
5132 */
5133 static void
5134 Scheme_functions (inf)
5135 FILE *inf;
5136 {
5137 register char *bp;
5138
5139 LOOP_ON_INPUT_LINES (inf, lb, bp)
5140 {
5141 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5142 {
5143 bp = skip_non_spaces (bp+4);
5144 /* Skip over open parens and white space */
5145 while (notinname (*bp))
5146 bp++;
5147 get_tag (bp, NULL);
5148 }
5149 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5150 get_tag (bp, NULL);
5151 }
5152 }
5153
5154 \f
5155 /* Find tags in TeX and LaTeX input files. */
5156
5157 /* TEX_toktab is a table of TeX control sequences that define tags.
5158 * Each entry records one such control sequence.
5159 *
5160 * Original code from who knows whom.
5161 * Ideas by:
5162 * Stefan Monnier (2002)
5163 */
5164
5165 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5166
5167 /* Default set of control sequences to put into TEX_toktab.
5168 The value of environment var TEXTAGS is prepended to this. */
5169 static char *TEX_defenv = "\
5170 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5171 :part:appendix:entry:index:def\
5172 :newcommand:renewcommand:newenvironment:renewenvironment";
5173
5174 static void TEX_mode __P((FILE *));
5175 static void TEX_decode_env __P((char *, char *));
5176
5177 static char TEX_esc = '\\';
5178 static char TEX_opgrp = '{';
5179 static char TEX_clgrp = '}';
5180
5181 /*
5182 * TeX/LaTeX scanning loop.
5183 */
5184 static void
5185 TeX_commands (inf)
5186 FILE *inf;
5187 {
5188 char *cp;
5189 linebuffer *key;
5190
5191 /* Select either \ or ! as escape character. */
5192 TEX_mode (inf);
5193
5194 /* Initialize token table once from environment. */
5195 if (TEX_toktab == NULL)
5196 TEX_decode_env ("TEXTAGS", TEX_defenv);
5197
5198 LOOP_ON_INPUT_LINES (inf, lb, cp)
5199 {
5200 /* Look at each TEX keyword in line. */
5201 for (;;)
5202 {
5203 /* Look for a TEX escape. */
5204 while (*cp++ != TEX_esc)
5205 if (cp[-1] == '\0' || cp[-1] == '%')
5206 goto tex_next_line;
5207
5208 for (key = TEX_toktab; key->buffer != NULL; key++)
5209 if (strneq (cp, key->buffer, key->len))
5210 {
5211 register char *p;
5212 int namelen, linelen;
5213 bool opgrp = FALSE;
5214
5215 cp = skip_spaces (cp + key->len);
5216 if (*cp == TEX_opgrp)
5217 {
5218 opgrp = TRUE;
5219 cp++;
5220 }
5221 for (p = cp;
5222 (!iswhite (*p) && *p != '#' &&
5223 *p != TEX_opgrp && *p != TEX_clgrp);
5224 p++)
5225 continue;
5226 namelen = p - cp;
5227 linelen = lb.len;
5228 if (!opgrp || *p == TEX_clgrp)
5229 {
5230 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5231 p++;
5232 linelen = p - lb.buffer + 1;
5233 }
5234 make_tag (cp, namelen, TRUE,
5235 lb.buffer, linelen, lineno, linecharno);
5236 goto tex_next_line; /* We only tag a line once */
5237 }
5238 }
5239 tex_next_line:
5240 ;
5241 }
5242 }
5243
5244 #define TEX_LESC '\\'
5245 #define TEX_SESC '!'
5246
5247 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5248 chars accordingly. */
5249 static void
5250 TEX_mode (inf)
5251 FILE *inf;
5252 {
5253 int c;
5254
5255 while ((c = getc (inf)) != EOF)
5256 {
5257 /* Skip to next line if we hit the TeX comment char. */
5258 if (c == '%')
5259 while (c != '\n' && c != EOF)
5260 c = getc (inf);
5261 else if (c == TEX_LESC || c == TEX_SESC )
5262 break;
5263 }
5264
5265 if (c == TEX_LESC)
5266 {
5267 TEX_esc = TEX_LESC;
5268 TEX_opgrp = '{';
5269 TEX_clgrp = '}';
5270 }
5271 else
5272 {
5273 TEX_esc = TEX_SESC;
5274 TEX_opgrp = '<';
5275 TEX_clgrp = '>';
5276 }
5277 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5278 No attempt is made to correct the situation. */
5279 rewind (inf);
5280 }
5281
5282 /* Read environment and prepend it to the default string.
5283 Build token table. */
5284 static void
5285 TEX_decode_env (evarname, defenv)
5286 char *evarname;
5287 char *defenv;
5288 {
5289 register char *env, *p;
5290 int i, len;
5291
5292 /* Append default string to environment. */
5293 env = getenv (evarname);
5294 if (!env)
5295 env = defenv;
5296 else
5297 {
5298 char *oldenv = env;
5299 env = concat (oldenv, defenv, "");
5300 }
5301
5302 /* Allocate a token table */
5303 for (len = 1, p = env; p;)
5304 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5305 len++;
5306 TEX_toktab = xnew (len, linebuffer);
5307
5308 /* Unpack environment string into token table. Be careful about */
5309 /* zero-length strings (leading ':', "::" and trailing ':') */
5310 for (i = 0; *env != '\0';)
5311 {
5312 p = etags_strchr (env, ':');
5313 if (!p) /* End of environment string. */
5314 p = env + strlen (env);
5315 if (p - env > 0)
5316 { /* Only non-zero strings. */
5317 TEX_toktab[i].buffer = savenstr (env, p - env);
5318 TEX_toktab[i].len = p - env;
5319 i++;
5320 }
5321 if (*p)
5322 env = p + 1;
5323 else
5324 {
5325 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5326 TEX_toktab[i].len = 0;
5327 break;
5328 }
5329 }
5330 }
5331
5332 \f
5333 /* Texinfo support. Dave Love, Mar. 2000. */
5334 static void
5335 Texinfo_nodes (inf)
5336 FILE * inf;
5337 {
5338 char *cp, *start;
5339 LOOP_ON_INPUT_LINES (inf, lb, cp)
5340 if (LOOKING_AT (cp, "@node"))
5341 {
5342 start = cp;
5343 while (*cp != '\0' && *cp != ',')
5344 cp++;
5345 make_tag (start, cp - start, TRUE,
5346 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5347 }
5348 }
5349
5350 \f
5351 /*
5352 * HTML support.
5353 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5354 * Contents of <a name=xxx> are tags with name xxx.
5355 *
5356 * Francesco Potortì, 2002.
5357 */
5358 static void
5359 HTML_labels (inf)
5360 FILE * inf;
5361 {
5362 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5363 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5364 bool intag = FALSE; /* inside an html tag, looking for ID= */
5365 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5366 char *end;
5367
5368
5369 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5370
5371 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5372 for (;;) /* loop on the same line */
5373 {
5374 if (skiptag) /* skip HTML tag */
5375 {
5376 while (*dbp != '\0' && *dbp != '>')
5377 dbp++;
5378 if (*dbp == '>')
5379 {
5380 dbp += 1;
5381 skiptag = FALSE;
5382 continue; /* look on the same line */
5383 }
5384 break; /* go to next line */
5385 }
5386
5387 else if (intag) /* look for "name=" or "id=" */
5388 {
5389 while (*dbp != '\0' && *dbp != '>'
5390 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5391 dbp++;
5392 if (*dbp == '\0')
5393 break; /* go to next line */
5394 if (*dbp == '>')
5395 {
5396 dbp += 1;
5397 intag = FALSE;
5398 continue; /* look on the same line */
5399 }
5400 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5401 || LOOKING_AT_NOCASE (dbp, "id="))
5402 {
5403 bool quoted = (dbp[0] == '"');
5404
5405 if (quoted)
5406 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5407 continue;
5408 else
5409 for (end = dbp; *end != '\0' && intoken (*end); end++)
5410 continue;
5411 linebuffer_setlen (&token_name, end - dbp);
5412 strncpy (token_name.buffer, dbp, end - dbp);
5413 token_name.buffer[end - dbp] = '\0';
5414
5415 dbp = end;
5416 intag = FALSE; /* we found what we looked for */
5417 skiptag = TRUE; /* skip to the end of the tag */
5418 getnext = TRUE; /* then grab the text */
5419 continue; /* look on the same line */
5420 }
5421 dbp += 1;
5422 }
5423
5424 else if (getnext) /* grab next tokens and tag them */
5425 {
5426 dbp = skip_spaces (dbp);
5427 if (*dbp == '\0')
5428 break; /* go to next line */
5429 if (*dbp == '<')
5430 {
5431 intag = TRUE;
5432 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5433 continue; /* look on the same line */
5434 }
5435
5436 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5437 continue;
5438 make_tag (token_name.buffer, token_name.len, TRUE,
5439 dbp, end - dbp, lineno, linecharno);
5440 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5441 getnext = FALSE;
5442 break; /* go to next line */
5443 }
5444
5445 else /* look for an interesting HTML tag */
5446 {
5447 while (*dbp != '\0' && *dbp != '<')
5448 dbp++;
5449 if (*dbp == '\0')
5450 break; /* go to next line */
5451 intag = TRUE;
5452 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5453 {
5454 inanchor = TRUE;
5455 continue; /* look on the same line */
5456 }
5457 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5458 || LOOKING_AT_NOCASE (dbp, "<h1>")
5459 || LOOKING_AT_NOCASE (dbp, "<h2>")
5460 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5461 {
5462 intag = FALSE;
5463 getnext = TRUE;
5464 continue; /* look on the same line */
5465 }
5466 dbp += 1;
5467 }
5468 }
5469 }
5470
5471 \f
5472 /*
5473 * Prolog support
5474 *
5475 * Assumes that the predicate or rule starts at column 0.
5476 * Only the first clause of a predicate or rule is added.
5477 * Original code by Sunichirou Sugou (1989)
5478 * Rewritten by Anders Lindgren (1996)
5479 */
5480 static int prolog_pr __P((char *, char *));
5481 static void prolog_skip_comment __P((linebuffer *, FILE *));
5482 static int prolog_atom __P((char *, int));
5483
5484 static void
5485 Prolog_functions (inf)
5486 FILE *inf;
5487 {
5488 char *cp, *last;
5489 int len;
5490 int allocated;
5491
5492 allocated = 0;
5493 len = 0;
5494 last = NULL;
5495
5496 LOOP_ON_INPUT_LINES (inf, lb, cp)
5497 {
5498 if (cp[0] == '\0') /* Empty line */
5499 continue;
5500 else if (iswhite (cp[0])) /* Not a predicate */
5501 continue;
5502 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5503 prolog_skip_comment (&lb, inf);
5504 else if ((len = prolog_pr (cp, last)) > 0)
5505 {
5506 /* Predicate or rule. Store the function name so that we
5507 only generate a tag for the first clause. */
5508 if (last == NULL)
5509 last = xnew(len + 1, char);
5510 else if (len + 1 > allocated)
5511 xrnew (last, len + 1, char);
5512 allocated = len + 1;
5513 strncpy (last, cp, len);
5514 last[len] = '\0';
5515 }
5516 }
5517 if (last != NULL)
5518 free (last);
5519 }
5520
5521
5522 static void
5523 prolog_skip_comment (plb, inf)
5524 linebuffer *plb;
5525 FILE *inf;
5526 {
5527 char *cp;
5528
5529 do
5530 {
5531 for (cp = plb->buffer; *cp != '\0'; cp++)
5532 if (cp[0] == '*' && cp[1] == '/')
5533 return;
5534 readline (plb, inf);
5535 }
5536 while (!feof(inf));
5537 }
5538
5539 /*
5540 * A predicate or rule definition is added if it matches:
5541 * <beginning of line><Prolog Atom><whitespace>(
5542 * or <beginning of line><Prolog Atom><whitespace>:-
5543 *
5544 * It is added to the tags database if it doesn't match the
5545 * name of the previous clause header.
5546 *
5547 * Return the size of the name of the predicate or rule, or 0 if no
5548 * header was found.
5549 */
5550 static int
5551 prolog_pr (s, last)
5552 char *s;
5553 char *last; /* Name of last clause. */
5554 {
5555 int pos;
5556 int len;
5557
5558 pos = prolog_atom (s, 0);
5559 if (pos < 1)
5560 return 0;
5561
5562 len = pos;
5563 pos = skip_spaces (s + pos) - s;
5564
5565 if ((s[pos] == '.'
5566 || (s[pos] == '(' && (pos += 1))
5567 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5568 && (last == NULL /* save only the first clause */
5569 || len != (int)strlen (last)
5570 || !strneq (s, last, len)))
5571 {
5572 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5573 return len;
5574 }
5575 else
5576 return 0;
5577 }
5578
5579 /*
5580 * Consume a Prolog atom.
5581 * Return the number of bytes consumed, or -1 if there was an error.
5582 *
5583 * A prolog atom, in this context, could be one of:
5584 * - An alphanumeric sequence, starting with a lower case letter.
5585 * - A quoted arbitrary string. Single quotes can escape themselves.
5586 * Backslash quotes everything.
5587 */
5588 static int
5589 prolog_atom (s, pos)
5590 char *s;
5591 int pos;
5592 {
5593 int origpos;
5594
5595 origpos = pos;
5596
5597 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5598 {
5599 /* The atom is unquoted. */
5600 pos++;
5601 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5602 {
5603 pos++;
5604 }
5605 return pos - origpos;
5606 }
5607 else if (s[pos] == '\'')
5608 {
5609 pos++;
5610
5611 for (;;)
5612 {
5613 if (s[pos] == '\'')
5614 {
5615 pos++;
5616 if (s[pos] != '\'')
5617 break;
5618 pos++; /* A double quote */
5619 }
5620 else if (s[pos] == '\0')
5621 /* Multiline quoted atoms are ignored. */
5622 return -1;
5623 else if (s[pos] == '\\')
5624 {
5625 if (s[pos+1] == '\0')
5626 return -1;
5627 pos += 2;
5628 }
5629 else
5630 pos++;
5631 }
5632 return pos - origpos;
5633 }
5634 else
5635 return -1;
5636 }
5637
5638 \f
5639 /*
5640 * Support for Erlang
5641 *
5642 * Generates tags for functions, defines, and records.
5643 * Assumes that Erlang functions start at column 0.
5644 * Original code by Anders Lindgren (1996)
5645 */
5646 static int erlang_func __P((char *, char *));
5647 static void erlang_attribute __P((char *));
5648 static int erlang_atom __P((char *));
5649
5650 static void
5651 Erlang_functions (inf)
5652 FILE *inf;
5653 {
5654 char *cp, *last;
5655 int len;
5656 int allocated;
5657
5658 allocated = 0;
5659 len = 0;
5660 last = NULL;
5661
5662 LOOP_ON_INPUT_LINES (inf, lb, cp)
5663 {
5664 if (cp[0] == '\0') /* Empty line */
5665 continue;
5666 else if (iswhite (cp[0])) /* Not function nor attribute */
5667 continue;
5668 else if (cp[0] == '%') /* comment */
5669 continue;
5670 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5671 continue;
5672 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5673 {
5674 erlang_attribute (cp);
5675 if (last != NULL)
5676 {
5677 free (last);
5678 last = NULL;
5679 }
5680 }
5681 else if ((len = erlang_func (cp, last)) > 0)
5682 {
5683 /*
5684 * Function. Store the function name so that we only
5685 * generates a tag for the first clause.
5686 */
5687 if (last == NULL)
5688 last = xnew (len + 1, char);
5689 else if (len + 1 > allocated)
5690 xrnew (last, len + 1, char);
5691 allocated = len + 1;
5692 strncpy (last, cp, len);
5693 last[len] = '\0';
5694 }
5695 }
5696 if (last != NULL)
5697 free (last);
5698 }
5699
5700
5701 /*
5702 * A function definition is added if it matches:
5703 * <beginning of line><Erlang Atom><whitespace>(
5704 *
5705 * It is added to the tags database if it doesn't match the
5706 * name of the previous clause header.
5707 *
5708 * Return the size of the name of the function, or 0 if no function
5709 * was found.
5710 */
5711 static int
5712 erlang_func (s, last)
5713 char *s;
5714 char *last; /* Name of last clause. */
5715 {
5716 int pos;
5717 int len;
5718
5719 pos = erlang_atom (s);
5720 if (pos < 1)
5721 return 0;
5722
5723 len = pos;
5724 pos = skip_spaces (s + pos) - s;
5725
5726 /* Save only the first clause. */
5727 if (s[pos++] == '('
5728 && (last == NULL
5729 || len != (int)strlen (last)
5730 || !strneq (s, last, len)))
5731 {
5732 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5733 return len;
5734 }
5735
5736 return 0;
5737 }
5738
5739
5740 /*
5741 * Handle attributes. Currently, tags are generated for defines
5742 * and records.
5743 *
5744 * They are on the form:
5745 * -define(foo, bar).
5746 * -define(Foo(M, N), M+N).
5747 * -record(graph, {vtab = notable, cyclic = true}).
5748 */
5749 static void
5750 erlang_attribute (s)
5751 char *s;
5752 {
5753 char *cp = s;
5754
5755 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5756 && *cp++ == '(')
5757 {
5758 int len = erlang_atom (skip_spaces (cp));
5759 if (len > 0)
5760 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5761 }
5762 return;
5763 }
5764
5765
5766 /*
5767 * Consume an Erlang atom (or variable).
5768 * Return the number of bytes consumed, or -1 if there was an error.
5769 */
5770 static int
5771 erlang_atom (s)
5772 char *s;
5773 {
5774 int pos = 0;
5775
5776 if (ISALPHA (s[pos]) || s[pos] == '_')
5777 {
5778 /* The atom is unquoted. */
5779 do
5780 pos++;
5781 while (ISALNUM (s[pos]) || s[pos] == '_');
5782 }
5783 else if (s[pos] == '\'')
5784 {
5785 for (pos++; s[pos] != '\''; pos++)
5786 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5787 || (s[pos] == '\\' && s[++pos] == '\0'))
5788 return 0;
5789 pos++;
5790 }
5791
5792 return pos;
5793 }
5794
5795 \f
5796 static char *scan_separators __P((char *));
5797 static void add_regex __P((char *, language *));
5798 static char *substitute __P((char *, char *, struct re_registers *));
5799
5800 /*
5801 * Take a string like "/blah/" and turn it into "blah", verifying
5802 * that the first and last characters are the same, and handling
5803 * quoted separator characters. Actually, stops on the occurrence of
5804 * an unquoted separator. Also process \t, \n, etc. and turn into
5805 * appropriate characters. Works in place. Null terminates name string.
5806 * Returns pointer to terminating separator, or NULL for
5807 * unterminated regexps.
5808 */
5809 static char *
5810 scan_separators (name)
5811 char *name;
5812 {
5813 char sep = name[0];
5814 char *copyto = name;
5815 bool quoted = FALSE;
5816
5817 for (++name; *name != '\0'; ++name)
5818 {
5819 if (quoted)
5820 {
5821 switch (*name)
5822 {
5823 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5824 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5825 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5826 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5827 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5828 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5829 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5830 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5831 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5832 default:
5833 if (*name == sep)
5834 *copyto++ = sep;
5835 else
5836 {
5837 /* Something else is quoted, so preserve the quote. */
5838 *copyto++ = '\\';
5839 *copyto++ = *name;
5840 }
5841 break;
5842 }
5843 quoted = FALSE;
5844 }
5845 else if (*name == '\\')
5846 quoted = TRUE;
5847 else if (*name == sep)
5848 break;
5849 else
5850 *copyto++ = *name;
5851 }
5852 if (*name != sep)
5853 name = NULL; /* signal unterminated regexp */
5854
5855 /* Terminate copied string. */
5856 *copyto = '\0';
5857 return name;
5858 }
5859
5860 /* Look at the argument of --regex or --no-regex and do the right
5861 thing. Same for each line of a regexp file. */
5862 static void
5863 analyse_regex (regex_arg)
5864 char *regex_arg;
5865 {
5866 if (regex_arg == NULL)
5867 {
5868 free_regexps (); /* --no-regex: remove existing regexps */
5869 return;
5870 }
5871
5872 /* A real --regexp option or a line in a regexp file. */
5873 switch (regex_arg[0])
5874 {
5875 /* Comments in regexp file or null arg to --regex. */
5876 case '\0':
5877 case ' ':
5878 case '\t':
5879 break;
5880
5881 /* Read a regex file. This is recursive and may result in a
5882 loop, which will stop when the file descriptors are exhausted. */
5883 case '@':
5884 {
5885 FILE *regexfp;
5886 linebuffer regexbuf;
5887 char *regexfile = regex_arg + 1;
5888
5889 /* regexfile is a file containing regexps, one per line. */
5890 regexfp = fopen (regexfile, "r");
5891 if (regexfp == NULL)
5892 {
5893 pfatal (regexfile);
5894 return;
5895 }
5896 linebuffer_init (&regexbuf);
5897 while (readline_internal (&regexbuf, regexfp) > 0)
5898 analyse_regex (regexbuf.buffer);
5899 free (regexbuf.buffer);
5900 fclose (regexfp);
5901 }
5902 break;
5903
5904 /* Regexp to be used for a specific language only. */
5905 case '{':
5906 {
5907 language *lang;
5908 char *lang_name = regex_arg + 1;
5909 char *cp;
5910
5911 for (cp = lang_name; *cp != '}'; cp++)
5912 if (*cp == '\0')
5913 {
5914 error ("unterminated language name in regex: %s", regex_arg);
5915 return;
5916 }
5917 *cp++ = '\0';
5918 lang = get_language_from_langname (lang_name);
5919 if (lang == NULL)
5920 return;
5921 add_regex (cp, lang);
5922 }
5923 break;
5924
5925 /* Regexp to be used for any language. */
5926 default:
5927 add_regex (regex_arg, NULL);
5928 break;
5929 }
5930 }
5931
5932 /* Separate the regexp pattern, compile it,
5933 and care for optional name and modifiers. */
5934 static void
5935 add_regex (regexp_pattern, lang)
5936 char *regexp_pattern;
5937 language *lang;
5938 {
5939 static struct re_pattern_buffer zeropattern;
5940 char sep, *pat, *name, *modifiers;
5941 const char *err;
5942 struct re_pattern_buffer *patbuf;
5943 regexp *rp;
5944 bool
5945 force_explicit_name = TRUE, /* do not use implicit tag names */
5946 ignore_case = FALSE, /* case is significant */
5947 multi_line = FALSE, /* matches are done one line at a time */
5948 single_line = FALSE; /* dot does not match newline */
5949
5950
5951 if (strlen(regexp_pattern) < 3)
5952 {
5953 error ("null regexp", (char *)NULL);
5954 return;
5955 }
5956 sep = regexp_pattern[0];
5957 name = scan_separators (regexp_pattern);
5958 if (name == NULL)
5959 {
5960 error ("%s: unterminated regexp", regexp_pattern);
5961 return;
5962 }
5963 if (name[1] == sep)
5964 {
5965 error ("null name for regexp \"%s\"", regexp_pattern);
5966 return;
5967 }
5968 modifiers = scan_separators (name);
5969 if (modifiers == NULL) /* no terminating separator --> no name */
5970 {
5971 modifiers = name;
5972 name = "";
5973 }
5974 else
5975 modifiers += 1; /* skip separator */
5976
5977 /* Parse regex modifiers. */
5978 for (; modifiers[0] != '\0'; modifiers++)
5979 switch (modifiers[0])
5980 {
5981 case 'N':
5982 if (modifiers == name)
5983 error ("forcing explicit tag name but no name, ignoring", NULL);
5984 force_explicit_name = TRUE;
5985 break;
5986 case 'i':
5987 ignore_case = TRUE;
5988 break;
5989 case 's':
5990 single_line = TRUE;
5991 /* FALLTHRU */
5992 case 'm':
5993 multi_line = TRUE;
5994 need_filebuf = TRUE;
5995 break;
5996 default:
5997 {
5998 char wrongmod [2];
5999 wrongmod[0] = modifiers[0];
6000 wrongmod[1] = '\0';
6001 error ("invalid regexp modifier `%s', ignoring", wrongmod);
6002 }
6003 break;
6004 }
6005
6006 patbuf = xnew (1, struct re_pattern_buffer);
6007 *patbuf = zeropattern;
6008 if (ignore_case)
6009 {
6010 static char lc_trans[CHARS];
6011 int i;
6012 for (i = 0; i < CHARS; i++)
6013 lc_trans[i] = lowcase (i);
6014 patbuf->translate = lc_trans; /* translation table to fold case */
6015 }
6016
6017 if (multi_line)
6018 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6019 else
6020 pat = regexp_pattern;
6021
6022 if (single_line)
6023 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6024 else
6025 re_set_syntax (RE_SYNTAX_EMACS);
6026
6027 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
6028 if (multi_line)
6029 free (pat);
6030 if (err != NULL)
6031 {
6032 error ("%s while compiling pattern", err);
6033 return;
6034 }
6035
6036 rp = p_head;
6037 p_head = xnew (1, regexp);
6038 p_head->pattern = savestr (regexp_pattern);
6039 p_head->p_next = rp;
6040 p_head->lang = lang;
6041 p_head->pat = patbuf;
6042 p_head->name = savestr (name);
6043 p_head->error_signaled = FALSE;
6044 p_head->force_explicit_name = force_explicit_name;
6045 p_head->ignore_case = ignore_case;
6046 p_head->multi_line = multi_line;
6047 }
6048
6049 /*
6050 * Do the substitutions indicated by the regular expression and
6051 * arguments.
6052 */
6053 static char *
6054 substitute (in, out, regs)
6055 char *in, *out;
6056 struct re_registers *regs;
6057 {
6058 char *result, *t;
6059 int size, dig, diglen;
6060
6061 result = NULL;
6062 size = strlen (out);
6063
6064 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6065 if (out[size - 1] == '\\')
6066 fatal ("pattern error in \"%s\"", out);
6067 for (t = etags_strchr (out, '\\');
6068 t != NULL;
6069 t = etags_strchr (t + 2, '\\'))
6070 if (ISDIGIT (t[1]))
6071 {
6072 dig = t[1] - '0';
6073 diglen = regs->end[dig] - regs->start[dig];
6074 size += diglen - 2;
6075 }
6076 else
6077 size -= 1;
6078
6079 /* Allocate space and do the substitutions. */
6080 assert (size >= 0);
6081 result = xnew (size + 1, char);
6082
6083 for (t = result; *out != '\0'; out++)
6084 if (*out == '\\' && ISDIGIT (*++out))
6085 {
6086 dig = *out - '0';
6087 diglen = regs->end[dig] - regs->start[dig];
6088 strncpy (t, in + regs->start[dig], diglen);
6089 t += diglen;
6090 }
6091 else
6092 *t++ = *out;
6093 *t = '\0';
6094
6095 assert (t <= result + size);
6096 assert (t - result == (int)strlen (result));
6097
6098 return result;
6099 }
6100
6101 /* Deallocate all regexps. */
6102 static void
6103 free_regexps ()
6104 {
6105 regexp *rp;
6106 while (p_head != NULL)
6107 {
6108 rp = p_head->p_next;
6109 free (p_head->pattern);
6110 free (p_head->name);
6111 free (p_head);
6112 p_head = rp;
6113 }
6114 return;
6115 }
6116
6117 /*
6118 * Reads the whole file as a single string from `filebuf' and looks for
6119 * multi-line regular expressions, creating tags on matches.
6120 * readline already dealt with normal regexps.
6121 *
6122 * Idea by Ben Wing <ben@666.com> (2002).
6123 */
6124 static void
6125 regex_tag_multiline ()
6126 {
6127 char *buffer = filebuf.buffer;
6128 regexp *rp;
6129 char *name;
6130
6131 for (rp = p_head; rp != NULL; rp = rp->p_next)
6132 {
6133 int match = 0;
6134
6135 if (!rp->multi_line)
6136 continue; /* skip normal regexps */
6137
6138 /* Generic initialisations before parsing file from memory. */
6139 lineno = 1; /* reset global line number */
6140 charno = 0; /* reset global char number */
6141 linecharno = 0; /* reset global char number of line start */
6142
6143 /* Only use generic regexps or those for the current language. */
6144 if (rp->lang != NULL && rp->lang != curfdp->lang)
6145 continue;
6146
6147 while (match >= 0 && match < filebuf.len)
6148 {
6149 match = re_search (rp->pat, buffer, filebuf.len, charno,
6150 filebuf.len - match, &rp->regs);
6151 switch (match)
6152 {
6153 case -2:
6154 /* Some error. */
6155 if (!rp->error_signaled)
6156 {
6157 error ("regexp stack overflow while matching \"%s\"",
6158 rp->pattern);
6159 rp->error_signaled = TRUE;
6160 }
6161 break;
6162 case -1:
6163 /* No match. */
6164 break;
6165 default:
6166 if (match == rp->regs.end[0])
6167 {
6168 if (!rp->error_signaled)
6169 {
6170 error ("regexp matches the empty string: \"%s\"",
6171 rp->pattern);
6172 rp->error_signaled = TRUE;
6173 }
6174 match = -3; /* exit from while loop */
6175 break;
6176 }
6177
6178 /* Match occurred. Construct a tag. */
6179 while (charno < rp->regs.end[0])
6180 if (buffer[charno++] == '\n')
6181 lineno++, linecharno = charno;
6182 name = rp->name;
6183 if (name[0] == '\0')
6184 name = NULL;
6185 else /* make a named tag */
6186 name = substitute (buffer, rp->name, &rp->regs);
6187 if (rp->force_explicit_name)
6188 /* Force explicit tag name, if a name is there. */
6189 pfnote (name, TRUE, buffer + linecharno,
6190 charno - linecharno + 1, lineno, linecharno);
6191 else
6192 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6193 charno - linecharno + 1, lineno, linecharno);
6194 break;
6195 }
6196 }
6197 }
6198 }
6199
6200 \f
6201 static bool
6202 nocase_tail (cp)
6203 char *cp;
6204 {
6205 register int len = 0;
6206
6207 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6208 cp++, len++;
6209 if (*cp == '\0' && !intoken (dbp[len]))
6210 {
6211 dbp += len;
6212 return TRUE;
6213 }
6214 return FALSE;
6215 }
6216
6217 static void
6218 get_tag (bp, namepp)
6219 register char *bp;
6220 char **namepp;
6221 {
6222 register char *cp = bp;
6223
6224 if (*bp != '\0')
6225 {
6226 /* Go till you get to white space or a syntactic break */
6227 for (cp = bp + 1; !notinname (*cp); cp++)
6228 continue;
6229 make_tag (bp, cp - bp, TRUE,
6230 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6231 }
6232
6233 if (namepp != NULL)
6234 *namepp = savenstr (bp, cp - bp);
6235 }
6236
6237 /*
6238 * Read a line of text from `stream' into `lbp', excluding the
6239 * newline or CR-NL, if any. Return the number of characters read from
6240 * `stream', which is the length of the line including the newline.
6241 *
6242 * On DOS or Windows we do not count the CR character, if any before the
6243 * NL, in the returned length; this mirrors the behavior of Emacs on those
6244 * platforms (for text files, it translates CR-NL to NL as it reads in the
6245 * file).
6246 *
6247 * If multi-line regular expressions are requested, each line read is
6248 * appended to `filebuf'.
6249 */
6250 static long
6251 readline_internal (lbp, stream)
6252 linebuffer *lbp;
6253 register FILE *stream;
6254 {
6255 char *buffer = lbp->buffer;
6256 register char *p = lbp->buffer;
6257 register char *pend;
6258 int chars_deleted;
6259
6260 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6261
6262 for (;;)
6263 {
6264 register int c = getc (stream);
6265 if (p == pend)
6266 {
6267 /* We're at the end of linebuffer: expand it. */
6268 lbp->size *= 2;
6269 xrnew (buffer, lbp->size, char);
6270 p += buffer - lbp->buffer;
6271 pend = buffer + lbp->size;
6272 lbp->buffer = buffer;
6273 }
6274 if (c == EOF)
6275 {
6276 *p = '\0';
6277 chars_deleted = 0;
6278 break;
6279 }
6280 if (c == '\n')
6281 {
6282 if (p > buffer && p[-1] == '\r')
6283 {
6284 p -= 1;
6285 #ifdef DOS_NT
6286 /* Assume CRLF->LF translation will be performed by Emacs
6287 when loading this file, so CRs won't appear in the buffer.
6288 It would be cleaner to compensate within Emacs;
6289 however, Emacs does not know how many CRs were deleted
6290 before any given point in the file. */
6291 chars_deleted = 1;
6292 #else
6293 chars_deleted = 2;
6294 #endif
6295 }
6296 else
6297 {
6298 chars_deleted = 1;
6299 }
6300 *p = '\0';
6301 break;
6302 }
6303 *p++ = c;
6304 }
6305 lbp->len = p - buffer;
6306
6307 if (need_filebuf /* we need filebuf for multi-line regexps */
6308 && chars_deleted > 0) /* not at EOF */
6309 {
6310 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6311 {
6312 /* Expand filebuf. */
6313 filebuf.size *= 2;
6314 xrnew (filebuf.buffer, filebuf.size, char);
6315 }
6316 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6317 filebuf.len += lbp->len;
6318 filebuf.buffer[filebuf.len++] = '\n';
6319 filebuf.buffer[filebuf.len] = '\0';
6320 }
6321
6322 return lbp->len + chars_deleted;
6323 }
6324
6325 /*
6326 * Like readline_internal, above, but in addition try to match the
6327 * input line against relevant regular expressions and manage #line
6328 * directives.
6329 */
6330 static void
6331 readline (lbp, stream)
6332 linebuffer *lbp;
6333 FILE *stream;
6334 {
6335 long result;
6336
6337 linecharno = charno; /* update global char number of line start */
6338 result = readline_internal (lbp, stream); /* read line */
6339 lineno += 1; /* increment global line number */
6340 charno += result; /* increment global char number */
6341
6342 /* Honour #line directives. */
6343 if (!no_line_directive)
6344 {
6345 static bool discard_until_line_directive;
6346
6347 /* Check whether this is a #line directive. */
6348 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6349 {
6350 unsigned int lno;
6351 int start = 0;
6352
6353 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6354 && start > 0) /* double quote character found */
6355 {
6356 char *endp = lbp->buffer + start;
6357
6358 while ((endp = etags_strchr (endp, '"')) != NULL
6359 && endp[-1] == '\\')
6360 endp++;
6361 if (endp != NULL)
6362 /* Ok, this is a real #line directive. Let's deal with it. */
6363 {
6364 char *taggedabsname; /* absolute name of original file */
6365 char *taggedfname; /* name of original file as given */
6366 char *name; /* temp var */
6367
6368 discard_until_line_directive = FALSE; /* found it */
6369 name = lbp->buffer + start;
6370 *endp = '\0';
6371 canonicalize_filename (name); /* for DOS */
6372 taggedabsname = absolute_filename (name, tagfiledir);
6373 if (filename_is_absolute (name)
6374 || filename_is_absolute (curfdp->infname))
6375 taggedfname = savestr (taggedabsname);
6376 else
6377 taggedfname = relative_filename (taggedabsname,tagfiledir);
6378
6379 if (streq (curfdp->taggedfname, taggedfname))
6380 /* The #line directive is only a line number change. We
6381 deal with this afterwards. */
6382 free (taggedfname);
6383 else
6384 /* The tags following this #line directive should be
6385 attributed to taggedfname. In order to do this, set
6386 curfdp accordingly. */
6387 {
6388 fdesc *fdp; /* file description pointer */
6389
6390 /* Go look for a file description already set up for the
6391 file indicated in the #line directive. If there is
6392 one, use it from now until the next #line
6393 directive. */
6394 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6395 if (streq (fdp->infname, curfdp->infname)
6396 && streq (fdp->taggedfname, taggedfname))
6397 /* If we remove the second test above (after the &&)
6398 then all entries pertaining to the same file are
6399 coalesced in the tags file. If we use it, then
6400 entries pertaining to the same file but generated
6401 from different files (via #line directives) will
6402 go into separate sections in the tags file. These
6403 alternatives look equivalent. The first one
6404 destroys some apparently useless information. */
6405 {
6406 curfdp = fdp;
6407 free (taggedfname);
6408 break;
6409 }
6410 /* Else, if we already tagged the real file, skip all
6411 input lines until the next #line directive. */
6412 if (fdp == NULL) /* not found */
6413 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6414 if (streq (fdp->infabsname, taggedabsname))
6415 {
6416 discard_until_line_directive = TRUE;
6417 free (taggedfname);
6418 break;
6419 }
6420 /* Else create a new file description and use that from
6421 now on, until the next #line directive. */
6422 if (fdp == NULL) /* not found */
6423 {
6424 fdp = fdhead;
6425 fdhead = xnew (1, fdesc);
6426 *fdhead = *curfdp; /* copy curr. file description */
6427 fdhead->next = fdp;
6428 fdhead->infname = savestr (curfdp->infname);
6429 fdhead->infabsname = savestr (curfdp->infabsname);
6430 fdhead->infabsdir = savestr (curfdp->infabsdir);
6431 fdhead->taggedfname = taggedfname;
6432 fdhead->usecharno = FALSE;
6433 fdhead->prop = NULL;
6434 fdhead->written = FALSE;
6435 curfdp = fdhead;
6436 }
6437 }
6438 free (taggedabsname);
6439 lineno = lno - 1;
6440 readline (lbp, stream);
6441 return;
6442 } /* if a real #line directive */
6443 } /* if #line is followed by a a number */
6444 } /* if line begins with "#line " */
6445
6446 /* If we are here, no #line directive was found. */
6447 if (discard_until_line_directive)
6448 {
6449 if (result > 0)
6450 {
6451 /* Do a tail recursion on ourselves, thus discarding the contents
6452 of the line buffer. */
6453 readline (lbp, stream);
6454 return;
6455 }
6456 /* End of file. */
6457 discard_until_line_directive = FALSE;
6458 return;
6459 }
6460 } /* if #line directives should be considered */
6461
6462 {
6463 int match;
6464 regexp *rp;
6465 char *name;
6466
6467 /* Match against relevant regexps. */
6468 if (lbp->len > 0)
6469 for (rp = p_head; rp != NULL; rp = rp->p_next)
6470 {
6471 /* Only use generic regexps or those for the current language.
6472 Also do not use multiline regexps, which is the job of
6473 regex_tag_multiline. */
6474 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6475 || rp->multi_line)
6476 continue;
6477
6478 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6479 switch (match)
6480 {
6481 case -2:
6482 /* Some error. */
6483 if (!rp->error_signaled)
6484 {
6485 error ("regexp stack overflow while matching \"%s\"",
6486 rp->pattern);
6487 rp->error_signaled = TRUE;
6488 }
6489 break;
6490 case -1:
6491 /* No match. */
6492 break;
6493 case 0:
6494 /* Empty string matched. */
6495 if (!rp->error_signaled)
6496 {
6497 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6498 rp->error_signaled = TRUE;
6499 }
6500 break;
6501 default:
6502 /* Match occurred. Construct a tag. */
6503 name = rp->name;
6504 if (name[0] == '\0')
6505 name = NULL;
6506 else /* make a named tag */
6507 name = substitute (lbp->buffer, rp->name, &rp->regs);
6508 if (rp->force_explicit_name)
6509 /* Force explicit tag name, if a name is there. */
6510 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6511 else
6512 make_tag (name, strlen (name), TRUE,
6513 lbp->buffer, match, lineno, linecharno);
6514 break;
6515 }
6516 }
6517 }
6518 }
6519
6520 \f
6521 /*
6522 * Return a pointer to a space of size strlen(cp)+1 allocated
6523 * with xnew where the string CP has been copied.
6524 */
6525 static char *
6526 savestr (cp)
6527 char *cp;
6528 {
6529 return savenstr (cp, strlen (cp));
6530 }
6531
6532 /*
6533 * Return a pointer to a space of size LEN+1 allocated with xnew where
6534 * the string CP has been copied for at most the first LEN characters.
6535 */
6536 static char *
6537 savenstr (cp, len)
6538 char *cp;
6539 int len;
6540 {
6541 register char *dp;
6542
6543 dp = xnew (len + 1, char);
6544 strncpy (dp, cp, len);
6545 dp[len] = '\0';
6546 return dp;
6547 }
6548
6549 /*
6550 * Return the ptr in sp at which the character c last
6551 * appears; NULL if not found
6552 *
6553 * Identical to POSIX strrchr, included for portability.
6554 */
6555 static char *
6556 etags_strrchr (sp, c)
6557 register const char *sp;
6558 register int c;
6559 {
6560 register const char *r;
6561
6562 r = NULL;
6563 do
6564 {
6565 if (*sp == c)
6566 r = sp;
6567 } while (*sp++);
6568 return (char *)r;
6569 }
6570
6571 /*
6572 * Return the ptr in sp at which the character c first
6573 * appears; NULL if not found
6574 *
6575 * Identical to POSIX strchr, included for portability.
6576 */
6577 static char *
6578 etags_strchr (sp, c)
6579 register const char *sp;
6580 register int c;
6581 {
6582 do
6583 {
6584 if (*sp == c)
6585 return (char *)sp;
6586 } while (*sp++);
6587 return NULL;
6588 }
6589
6590 /*
6591 * Compare two strings, ignoring case for alphabetic characters.
6592 *
6593 * Same as BSD's strcasecmp, included for portability.
6594 */
6595 static int
6596 etags_strcasecmp (s1, s2)
6597 register const char *s1;
6598 register const char *s2;
6599 {
6600 while (*s1 != '\0'
6601 && (ISALPHA (*s1) && ISALPHA (*s2)
6602 ? lowcase (*s1) == lowcase (*s2)
6603 : *s1 == *s2))
6604 s1++, s2++;
6605
6606 return (ISALPHA (*s1) && ISALPHA (*s2)
6607 ? lowcase (*s1) - lowcase (*s2)
6608 : *s1 - *s2);
6609 }
6610
6611 /*
6612 * Compare two strings, ignoring case for alphabetic characters.
6613 * Stop after a given number of characters
6614 *
6615 * Same as BSD's strncasecmp, included for portability.
6616 */
6617 static int
6618 etags_strncasecmp (s1, s2, n)
6619 register const char *s1;
6620 register const char *s2;
6621 register int n;
6622 {
6623 while (*s1 != '\0' && n-- > 0
6624 && (ISALPHA (*s1) && ISALPHA (*s2)
6625 ? lowcase (*s1) == lowcase (*s2)
6626 : *s1 == *s2))
6627 s1++, s2++;
6628
6629 if (n < 0)
6630 return 0;
6631 else
6632 return (ISALPHA (*s1) && ISALPHA (*s2)
6633 ? lowcase (*s1) - lowcase (*s2)
6634 : *s1 - *s2);
6635 }
6636
6637 /* Skip spaces (end of string is not space), return new pointer. */
6638 static char *
6639 skip_spaces (cp)
6640 char *cp;
6641 {
6642 while (iswhite (*cp))
6643 cp++;
6644 return cp;
6645 }
6646
6647 /* Skip non spaces, except end of string, return new pointer. */
6648 static char *
6649 skip_non_spaces (cp)
6650 char *cp;
6651 {
6652 while (*cp != '\0' && !iswhite (*cp))
6653 cp++;
6654 return cp;
6655 }
6656
6657 /* Print error message and exit. */
6658 void
6659 fatal (s1, s2)
6660 char *s1, *s2;
6661 {
6662 error (s1, s2);
6663 exit (EXIT_FAILURE);
6664 }
6665
6666 static void
6667 pfatal (s1)
6668 char *s1;
6669 {
6670 perror (s1);
6671 exit (EXIT_FAILURE);
6672 }
6673
6674 static void
6675 suggest_asking_for_help ()
6676 {
6677 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6678 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6679 exit (EXIT_FAILURE);
6680 }
6681
6682 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6683 static void
6684 error (s1, s2)
6685 const char *s1, *s2;
6686 {
6687 fprintf (stderr, "%s: ", progname);
6688 fprintf (stderr, s1, s2);
6689 fprintf (stderr, "\n");
6690 }
6691
6692 /* Return a newly-allocated string whose contents
6693 concatenate those of s1, s2, s3. */
6694 static char *
6695 concat (s1, s2, s3)
6696 char *s1, *s2, *s3;
6697 {
6698 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6699 char *result = xnew (len1 + len2 + len3 + 1, char);
6700
6701 strcpy (result, s1);
6702 strcpy (result + len1, s2);
6703 strcpy (result + len1 + len2, s3);
6704 result[len1 + len2 + len3] = '\0';
6705
6706 return result;
6707 }
6708
6709 \f
6710 /* Does the same work as the system V getcwd, but does not need to
6711 guess the buffer size in advance. */
6712 static char *
6713 etags_getcwd ()
6714 {
6715 #ifdef HAVE_GETCWD
6716 int bufsize = 200;
6717 char *path = xnew (bufsize, char);
6718
6719 while (getcwd (path, bufsize) == NULL)
6720 {
6721 if (errno != ERANGE)
6722 pfatal ("getcwd");
6723 bufsize *= 2;
6724 free (path);
6725 path = xnew (bufsize, char);
6726 }
6727
6728 canonicalize_filename (path);
6729 return path;
6730
6731 #else /* not HAVE_GETCWD */
6732 #if MSDOS
6733
6734 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6735
6736 getwd (path);
6737
6738 for (p = path; *p != '\0'; p++)
6739 if (*p == '\\')
6740 *p = '/';
6741 else
6742 *p = lowcase (*p);
6743
6744 return strdup (path);
6745 #else /* not MSDOS */
6746 linebuffer path;
6747 FILE *pipe;
6748
6749 linebuffer_init (&path);
6750 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6751 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6752 pfatal ("pwd");
6753 pclose (pipe);
6754
6755 return path.buffer;
6756 #endif /* not MSDOS */
6757 #endif /* not HAVE_GETCWD */
6758 }
6759
6760 /* Return a newly allocated string containing the file name of FILE
6761 relative to the absolute directory DIR (which should end with a slash). */
6762 static char *
6763 relative_filename (file, dir)
6764 char *file, *dir;
6765 {
6766 char *fp, *dp, *afn, *res;
6767 int i;
6768
6769 /* Find the common root of file and dir (with a trailing slash). */
6770 afn = absolute_filename (file, cwd);
6771 fp = afn;
6772 dp = dir;
6773 while (*fp++ == *dp++)
6774 continue;
6775 fp--, dp--; /* back to the first differing char */
6776 #ifdef DOS_NT
6777 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6778 return afn;
6779 #endif
6780 do /* look at the equal chars until '/' */
6781 fp--, dp--;
6782 while (*fp != '/');
6783
6784 /* Build a sequence of "../" strings for the resulting relative file name. */
6785 i = 0;
6786 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6787 i += 1;
6788 res = xnew (3*i + strlen (fp + 1) + 1, char);
6789 res[0] = '\0';
6790 while (i-- > 0)
6791 strcat (res, "../");
6792
6793 /* Add the file name relative to the common root of file and dir. */
6794 strcat (res, fp + 1);
6795 free (afn);
6796
6797 return res;
6798 }
6799
6800 /* Return a newly allocated string containing the absolute file name
6801 of FILE given DIR (which should end with a slash). */
6802 static char *
6803 absolute_filename (file, dir)
6804 char *file, *dir;
6805 {
6806 char *slashp, *cp, *res;
6807
6808 if (filename_is_absolute (file))
6809 res = savestr (file);
6810 #ifdef DOS_NT
6811 /* We don't support non-absolute file names with a drive
6812 letter, like `d:NAME' (it's too much hassle). */
6813 else if (file[1] == ':')
6814 fatal ("%s: relative file names with drive letters not supported", file);
6815 #endif
6816 else
6817 res = concat (dir, file, "");
6818
6819 /* Delete the "/dirname/.." and "/." substrings. */
6820 slashp = etags_strchr (res, '/');
6821 while (slashp != NULL && slashp[0] != '\0')
6822 {
6823 if (slashp[1] == '.')
6824 {
6825 if (slashp[2] == '.'
6826 && (slashp[3] == '/' || slashp[3] == '\0'))
6827 {
6828 cp = slashp;
6829 do
6830 cp--;
6831 while (cp >= res && !filename_is_absolute (cp));
6832 if (cp < res)
6833 cp = slashp; /* the absolute name begins with "/.." */
6834 #ifdef DOS_NT
6835 /* Under MSDOS and NT we get `d:/NAME' as absolute
6836 file name, so the luser could say `d:/../NAME'.
6837 We silently treat this as `d:/NAME'. */
6838 else if (cp[0] != '/')
6839 cp = slashp;
6840 #endif
6841 strcpy (cp, slashp + 3);
6842 slashp = cp;
6843 continue;
6844 }
6845 else if (slashp[2] == '/' || slashp[2] == '\0')
6846 {
6847 strcpy (slashp, slashp + 2);
6848 continue;
6849 }
6850 }
6851
6852 slashp = etags_strchr (slashp + 1, '/');
6853 }
6854
6855 if (res[0] == '\0') /* just a safety net: should never happen */
6856 {
6857 free (res);
6858 return savestr ("/");
6859 }
6860 else
6861 return res;
6862 }
6863
6864 /* Return a newly allocated string containing the absolute
6865 file name of dir where FILE resides given DIR (which should
6866 end with a slash). */
6867 static char *
6868 absolute_dirname (file, dir)
6869 char *file, *dir;
6870 {
6871 char *slashp, *res;
6872 char save;
6873
6874 canonicalize_filename (file);
6875 slashp = etags_strrchr (file, '/');
6876 if (slashp == NULL)
6877 return savestr (dir);
6878 save = slashp[1];
6879 slashp[1] = '\0';
6880 res = absolute_filename (file, dir);
6881 slashp[1] = save;
6882
6883 return res;
6884 }
6885
6886 /* Whether the argument string is an absolute file name. The argument
6887 string must have been canonicalized with canonicalize_filename. */
6888 static bool
6889 filename_is_absolute (fn)
6890 char *fn;
6891 {
6892 return (fn[0] == '/'
6893 #ifdef DOS_NT
6894 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6895 #endif
6896 );
6897 }
6898
6899 /* Translate backslashes into slashes. Works in place. */
6900 static void
6901 canonicalize_filename (fn)
6902 register char *fn;
6903 {
6904 #ifdef DOS_NT
6905 /* Canonicalize drive letter case. */
6906 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6907 fn[0] = upcase (fn[0]);
6908 /* Convert backslashes to slashes. */
6909 for (; *fn != '\0'; fn++)
6910 if (*fn == '\\')
6911 *fn = '/';
6912 #else
6913 /* No action. */
6914 fn = NULL; /* shut up the compiler */
6915 #endif
6916 }
6917
6918 \f
6919 /* Initialize a linebuffer for use */
6920 static void
6921 linebuffer_init (lbp)
6922 linebuffer *lbp;
6923 {
6924 lbp->size = (DEBUG) ? 3 : 200;
6925 lbp->buffer = xnew (lbp->size, char);
6926 lbp->buffer[0] = '\0';
6927 lbp->len = 0;
6928 }
6929
6930 /* Set the minimum size of a string contained in a linebuffer. */
6931 static void
6932 linebuffer_setlen (lbp, toksize)
6933 linebuffer *lbp;
6934 int toksize;
6935 {
6936 while (lbp->size <= toksize)
6937 {
6938 lbp->size *= 2;
6939 xrnew (lbp->buffer, lbp->size, char);
6940 }
6941 lbp->len = toksize;
6942 }
6943
6944 /* Like malloc but get fatal error if memory is exhausted. */
6945 static PTR
6946 xmalloc (size)
6947 unsigned int size;
6948 {
6949 PTR result = (PTR) malloc (size);
6950 if (result == NULL)
6951 fatal ("virtual memory exhausted", (char *)NULL);
6952 return result;
6953 }
6954
6955 static PTR
6956 xrealloc (ptr, size)
6957 char *ptr;
6958 unsigned int size;
6959 {
6960 PTR result = (PTR) realloc (ptr, size);
6961 if (result == NULL)
6962 fatal ("virtual memory exhausted", (char *)NULL);
6963 return result;
6964 }
6965
6966 /*
6967 * Local Variables:
6968 * indent-tabs-mode: t
6969 * tab-width: 8
6970 * fill-column: 79
6971 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6972 * c-file-style: "gnu"
6973 * End:
6974 */
6975
6976 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6977 (do not change this comment) */
6978
6979 /* etags.c ends here */