* bitmaps/README:
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
33 Free Software Foundation, Inc.
34
35 This file is not considered part of GNU Emacs.
36
37 This program is free software: you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation, either version 3 of the License, or
40 (at your option) any later version.
41
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
46
47 You should have received a copy of the GNU General Public License
48 along with this program. If not, see <http://www.gnu.org/licenses/>. */
49
50
51 /* NB To comply with the above BSD license, copyright information is
52 reproduced in etc/ETAGS.README. That file should be updated when the
53 above notices are.
54
55 To the best of our knowledge, this code was originally based on the
56 ctags.c distributed with BSD4.2, which was copyrighted by the
57 University of California, as described above. */
58
59
60 /*
61 * Authors:
62 * 1983 Ctags originally by Ken Arnold.
63 * 1984 Fortran added by Jim Kleckner.
64 * 1984 Ed Pelegri-Llopart added C typedefs.
65 * 1985 Emacs TAGS format by Richard Stallman.
66 * 1989 Sam Kendall added C++.
67 * 1992 Joseph B. Wells improved C and C++ parsing.
68 * 1993 Francesco Potortì reorganized C and C++.
69 * 1994 Line-by-line regexp tags by Tom Tromey.
70 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
71 * 2002 #line directives by Francesco Potortì.
72 *
73 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
74 */
75
76 /*
77 * If you want to add support for a new language, start by looking at the LUA
78 * language, which is the simplest. Alternatively, consider shipping a
79 * configuration file containing regexp definitions for etags.
80 */
81
82 char pot_etags_version[] = "@(#) pot revision number is 17.38";
83
84 #define TRUE 1
85 #define FALSE 0
86
87 #ifdef DEBUG
88 # undef DEBUG
89 # define DEBUG TRUE
90 #else
91 # define DEBUG FALSE
92 # define NDEBUG /* disable assert */
93 #endif
94
95 #ifdef HAVE_CONFIG_H
96 # include <config.h>
97 /* On some systems, Emacs defines static as nothing for the sake
98 of unexec. We don't want that here since we don't use unexec. */
99 # undef static
100 # ifndef PTR /* for XEmacs */
101 # define PTR void *
102 # endif
103 # ifndef __P /* for XEmacs */
104 # define __P(args) args
105 # endif
106 #else /* no config.h */
107 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
108 # define __P(args) args /* use prototypes */
109 # define PTR void * /* for generic pointers */
110 # else /* not standard C */
111 # define __P(args) () /* no prototypes */
112 # define const /* remove const for old compilers' sake */
113 # define PTR long * /* don't use void* */
114 # endif
115 #endif /* !HAVE_CONFIG_H */
116
117 #ifndef _GNU_SOURCE
118 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
119 #endif
120
121 /* WIN32_NATIVE is for XEmacs.
122 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
123 #ifdef WIN32_NATIVE
124 # undef MSDOS
125 # undef WINDOWSNT
126 # define WINDOWSNT
127 #endif /* WIN32_NATIVE */
128
129 #ifdef MSDOS
130 # undef MSDOS
131 # define MSDOS TRUE
132 # include <fcntl.h>
133 # include <sys/param.h>
134 # include <io.h>
135 # ifndef HAVE_CONFIG_H
136 # define DOS_NT
137 # include <sys/config.h>
138 # endif
139 #else
140 # define MSDOS FALSE
141 #endif /* MSDOS */
142
143 #ifdef WINDOWSNT
144 # include <stdlib.h>
145 # include <fcntl.h>
146 # include <string.h>
147 # include <direct.h>
148 # include <io.h>
149 # define MAXPATHLEN _MAX_PATH
150 # undef HAVE_NTGUI
151 # undef DOS_NT
152 # define DOS_NT
153 # ifndef HAVE_GETCWD
154 # define HAVE_GETCWD
155 # endif /* undef HAVE_GETCWD */
156 #else /* not WINDOWSNT */
157 # ifdef STDC_HEADERS
158 # include <stdlib.h>
159 # include <string.h>
160 # else /* no standard C headers */
161 extern char *getenv __P((const char *));
162 extern char *strcpy __P((char *, const char *));
163 extern char *strncpy __P((char *, const char *, unsigned long));
164 extern char *strcat __P((char *, const char *));
165 extern char *strncat __P((char *, const char *, unsigned long));
166 extern int strcmp __P((const char *, const char *));
167 extern int strncmp __P((const char *, const char *, unsigned long));
168 extern int system __P((const char *));
169 extern unsigned long strlen __P((const char *));
170 extern void *malloc __P((unsigned long));
171 extern void *realloc __P((void *, unsigned long));
172 extern void exit __P((int));
173 extern void free __P((void *));
174 extern void *memmove __P((void *, const void *, unsigned long));
175 # define EXIT_SUCCESS 0
176 # define EXIT_FAILURE 1
177 # endif
178 #endif /* !WINDOWSNT */
179
180 #ifdef HAVE_UNISTD_H
181 # include <unistd.h>
182 #else
183 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
184 extern char *getcwd (char *buf, size_t size);
185 # endif
186 #endif /* HAVE_UNISTD_H */
187
188 #include <stdio.h>
189 #include <ctype.h>
190 #include <errno.h>
191 #ifndef errno
192 extern int errno;
193 #endif
194 #include <sys/types.h>
195 #include <sys/stat.h>
196
197 #include <assert.h>
198 #ifdef NDEBUG
199 # undef assert /* some systems have a buggy assert.h */
200 # define assert(x) ((void) 0)
201 #endif
202
203 #if !defined (S_ISREG) && defined (S_IFREG)
204 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
205 #endif
206
207 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
208 # define NO_LONG_OPTIONS TRUE
209 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
210 extern char *optarg;
211 extern int optind, opterr;
212 #else
213 # define NO_LONG_OPTIONS FALSE
214 # include <getopt.h>
215 #endif /* NO_LONG_OPTIONS */
216
217 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
218 # ifdef __CYGWIN__ /* compiling on Cygwin */
219 !!! NOTICE !!!
220 the regex.h distributed with Cygwin is not compatible with etags, alas!
221 If you want regular expression support, you should delete this notice and
222 arrange to use the GNU regex.h and regex.c.
223 # endif
224 #endif
225 #include <regex.h>
226
227 /* Define CTAGS to make the program "ctags" compatible with the usual one.
228 Leave it undefined to make the program "etags", which makes emacs-style
229 tag tables and tags typedefs, #defines and struct/union/enum by default. */
230 #ifdef CTAGS
231 # undef CTAGS
232 # define CTAGS TRUE
233 #else
234 # define CTAGS FALSE
235 #endif
236
237 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
238 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
239 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
240 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
241
242 #define CHARS 256 /* 2^sizeof(char) */
243 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
244 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
245 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
246 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
247 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
248 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
249
250 #define ISALNUM(c) isalnum (CHAR(c))
251 #define ISALPHA(c) isalpha (CHAR(c))
252 #define ISDIGIT(c) isdigit (CHAR(c))
253 #define ISLOWER(c) islower (CHAR(c))
254
255 #define lowcase(c) tolower (CHAR(c))
256 #define upcase(c) toupper (CHAR(c))
257
258
259 /*
260 * xnew, xrnew -- allocate, reallocate storage
261 *
262 * SYNOPSIS: Type *xnew (int n, Type);
263 * void xrnew (OldPointer, int n, Type);
264 */
265 #if DEBUG
266 # include "chkmalloc.h"
267 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
268 (n) * sizeof (Type)))
269 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
270 (char *) (op), (n) * sizeof (Type)))
271 #else
272 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
273 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
274 (char *) (op), (n) * sizeof (Type)))
275 #endif
276
277 #define bool int
278
279 typedef void Lang_function __P((FILE *));
280
281 typedef struct
282 {
283 char *suffix; /* file name suffix for this compressor */
284 char *command; /* takes one arg and decompresses to stdout */
285 } compressor;
286
287 typedef struct
288 {
289 char *name; /* language name */
290 char *help; /* detailed help for the language */
291 Lang_function *function; /* parse function */
292 char **suffixes; /* name suffixes of this language's files */
293 char **filenames; /* names of this language's files */
294 char **interpreters; /* interpreters for this language */
295 bool metasource; /* source used to generate other sources */
296 } language;
297
298 typedef struct fdesc
299 {
300 struct fdesc *next; /* for the linked list */
301 char *infname; /* uncompressed input file name */
302 char *infabsname; /* absolute uncompressed input file name */
303 char *infabsdir; /* absolute dir of input file */
304 char *taggedfname; /* file name to write in tagfile */
305 language *lang; /* language of file */
306 char *prop; /* file properties to write in tagfile */
307 bool usecharno; /* etags tags shall contain char number */
308 bool written; /* entry written in the tags file */
309 } fdesc;
310
311 typedef struct node_st
312 { /* sorting structure */
313 struct node_st *left, *right; /* left and right sons */
314 fdesc *fdp; /* description of file to whom tag belongs */
315 char *name; /* tag name */
316 char *regex; /* search regexp */
317 bool valid; /* write this tag on the tag file */
318 bool is_func; /* function tag: use regexp in CTAGS mode */
319 bool been_warned; /* warning already given for duplicated tag */
320 int lno; /* line number tag is on */
321 long cno; /* character number line starts on */
322 } node;
323
324 /*
325 * A `linebuffer' is a structure which holds a line of text.
326 * `readline_internal' reads a line from a stream into a linebuffer
327 * and works regardless of the length of the line.
328 * SIZE is the size of BUFFER, LEN is the length of the string in
329 * BUFFER after readline reads it.
330 */
331 typedef struct
332 {
333 long size;
334 int len;
335 char *buffer;
336 } linebuffer;
337
338 /* Used to support mixing of --lang and file names. */
339 typedef struct
340 {
341 enum {
342 at_language, /* a language specification */
343 at_regexp, /* a regular expression */
344 at_filename, /* a file name */
345 at_stdin, /* read from stdin here */
346 at_end /* stop parsing the list */
347 } arg_type; /* argument type */
348 language *lang; /* language associated with the argument */
349 char *what; /* the argument itself */
350 } argument;
351
352 /* Structure defining a regular expression. */
353 typedef struct regexp
354 {
355 struct regexp *p_next; /* pointer to next in list */
356 language *lang; /* if set, use only for this language */
357 char *pattern; /* the regexp pattern */
358 char *name; /* tag name */
359 struct re_pattern_buffer *pat; /* the compiled pattern */
360 struct re_registers regs; /* re registers */
361 bool error_signaled; /* already signaled for this regexp */
362 bool force_explicit_name; /* do not allow implict tag name */
363 bool ignore_case; /* ignore case when matching */
364 bool multi_line; /* do a multi-line match on the whole file */
365 } regexp;
366
367
368 /* Many compilers barf on this:
369 Lang_function Ada_funcs;
370 so let's write it this way */
371 static void Ada_funcs __P((FILE *));
372 static void Asm_labels __P((FILE *));
373 static void C_entries __P((int c_ext, FILE *));
374 static void default_C_entries __P((FILE *));
375 static void plain_C_entries __P((FILE *));
376 static void Cjava_entries __P((FILE *));
377 static void Cobol_paragraphs __P((FILE *));
378 static void Cplusplus_entries __P((FILE *));
379 static void Cstar_entries __P((FILE *));
380 static void Erlang_functions __P((FILE *));
381 static void Forth_words __P((FILE *));
382 static void Fortran_functions __P((FILE *));
383 static void HTML_labels __P((FILE *));
384 static void Lisp_functions __P((FILE *));
385 static void Lua_functions __P((FILE *));
386 static void Makefile_targets __P((FILE *));
387 static void Pascal_functions __P((FILE *));
388 static void Perl_functions __P((FILE *));
389 static void PHP_functions __P((FILE *));
390 static void PS_functions __P((FILE *));
391 static void Prolog_functions __P((FILE *));
392 static void Python_functions __P((FILE *));
393 static void Scheme_functions __P((FILE *));
394 static void TeX_commands __P((FILE *));
395 static void Texinfo_nodes __P((FILE *));
396 static void Yacc_entries __P((FILE *));
397 static void just_read_file __P((FILE *));
398
399 static void print_language_names __P((void));
400 static void print_version __P((void));
401 static void print_help __P((argument *));
402 int main __P((int, char **));
403
404 static compressor *get_compressor_from_suffix __P((char *, char **));
405 static language *get_language_from_langname __P((const char *));
406 static language *get_language_from_interpreter __P((char *));
407 static language *get_language_from_filename __P((char *, bool));
408 static void readline __P((linebuffer *, FILE *));
409 static long readline_internal __P((linebuffer *, FILE *));
410 static bool nocase_tail __P((char *));
411 static void get_tag __P((char *, char **));
412
413 static void analyse_regex __P((char *));
414 static void free_regexps __P((void));
415 static void regex_tag_multiline __P((void));
416 static void error __P((const char *, const char *));
417 static void suggest_asking_for_help __P((void));
418 void fatal __P((char *, char *));
419 static void pfatal __P((char *));
420 static void add_node __P((node *, node **));
421
422 static void init __P((void));
423 static void process_file_name __P((char *, language *));
424 static void process_file __P((FILE *, char *, language *));
425 static void find_entries __P((FILE *));
426 static void free_tree __P((node *));
427 static void free_fdesc __P((fdesc *));
428 static void pfnote __P((char *, bool, char *, int, int, long));
429 static void make_tag __P((char *, int, bool, char *, int, int, long));
430 static void invalidate_nodes __P((fdesc *, node **));
431 static void put_entries __P((node *));
432
433 static char *concat __P((char *, char *, char *));
434 static char *skip_spaces __P((char *));
435 static char *skip_non_spaces __P((char *));
436 static char *savenstr __P((char *, int));
437 static char *savestr __P((char *));
438 static char *etags_strchr __P((const char *, int));
439 static char *etags_strrchr __P((const char *, int));
440 static int etags_strcasecmp __P((const char *, const char *));
441 static int etags_strncasecmp __P((const char *, const char *, int));
442 static char *etags_getcwd __P((void));
443 static char *relative_filename __P((char *, char *));
444 static char *absolute_filename __P((char *, char *));
445 static char *absolute_dirname __P((char *, char *));
446 static bool filename_is_absolute __P((char *f));
447 static void canonicalize_filename __P((char *));
448 static void linebuffer_init __P((linebuffer *));
449 static void linebuffer_setlen __P((linebuffer *, int));
450 static PTR xmalloc __P((unsigned int));
451 static PTR xrealloc __P((char *, unsigned int));
452
453 \f
454 static char searchar = '/'; /* use /.../ searches */
455
456 static char *tagfile; /* output file */
457 static char *progname; /* name this program was invoked with */
458 static char *cwd; /* current working directory */
459 static char *tagfiledir; /* directory of tagfile */
460 static FILE *tagf; /* ioptr for tags file */
461
462 static fdesc *fdhead; /* head of file description list */
463 static fdesc *curfdp; /* current file description */
464 static int lineno; /* line number of current line */
465 static long charno; /* current character number */
466 static long linecharno; /* charno of start of current line */
467 static char *dbp; /* pointer to start of current tag */
468
469 static const int invalidcharno = -1;
470
471 static node *nodehead; /* the head of the binary tree of tags */
472 static node *last_node; /* the last node created */
473
474 static linebuffer lb; /* the current line */
475 static linebuffer filebuf; /* a buffer containing the whole file */
476 static linebuffer token_name; /* a buffer containing a tag name */
477
478 /* boolean "functions" (see init) */
479 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
480 static char
481 /* white chars */
482 *white = " \f\t\n\r\v",
483 /* not in a name */
484 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
485 /* token ending chars */
486 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
487 /* token starting chars */
488 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
489 /* valid in-token chars */
490 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
491
492 static bool append_to_tagfile; /* -a: append to tags */
493 /* The next five default to TRUE in C and derived languages. */
494 static bool typedefs; /* -t: create tags for C and Ada typedefs */
495 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
496 /* 0 struct/enum/union decls, and C++ */
497 /* member functions. */
498 static bool constantypedefs; /* -d: create tags for C #define, enum */
499 /* constants and variables. */
500 /* -D: opposite of -d. Default under ctags. */
501 static bool globals; /* create tags for global variables */
502 static bool members; /* create tags for C member variables */
503 static bool declarations; /* --declarations: tag them and extern in C&Co*/
504 static bool no_line_directive; /* ignore #line directives (undocumented) */
505 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
506 static bool update; /* -u: update tags */
507 static bool vgrind_style; /* -v: create vgrind style index output */
508 static bool no_warnings; /* -w: suppress warnings (undocumented) */
509 static bool cxref_style; /* -x: create cxref style output */
510 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
511 static bool ignoreindent; /* -I: ignore indentation in C */
512 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
513
514 /* STDIN is defined in LynxOS system headers */
515 #ifdef STDIN
516 # undef STDIN
517 #endif
518
519 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
520 static bool parsing_stdin; /* --parse-stdin used */
521
522 static regexp *p_head; /* list of all regexps */
523 static bool need_filebuf; /* some regexes are multi-line */
524
525 static struct option longopts[] =
526 {
527 { "append", no_argument, NULL, 'a' },
528 { "packages-only", no_argument, &packages_only, TRUE },
529 { "c++", no_argument, NULL, 'C' },
530 { "declarations", no_argument, &declarations, TRUE },
531 { "no-line-directive", no_argument, &no_line_directive, TRUE },
532 { "no-duplicates", no_argument, &no_duplicates, TRUE },
533 { "help", no_argument, NULL, 'h' },
534 { "help", no_argument, NULL, 'H' },
535 { "ignore-indentation", no_argument, NULL, 'I' },
536 { "language", required_argument, NULL, 'l' },
537 { "members", no_argument, &members, TRUE },
538 { "no-members", no_argument, &members, FALSE },
539 { "output", required_argument, NULL, 'o' },
540 { "regex", required_argument, NULL, 'r' },
541 { "no-regex", no_argument, NULL, 'R' },
542 { "ignore-case-regex", required_argument, NULL, 'c' },
543 { "parse-stdin", required_argument, NULL, STDIN },
544 { "version", no_argument, NULL, 'V' },
545
546 #if CTAGS /* Ctags options */
547 { "backward-search", no_argument, NULL, 'B' },
548 { "cxref", no_argument, NULL, 'x' },
549 { "defines", no_argument, NULL, 'd' },
550 { "globals", no_argument, &globals, TRUE },
551 { "typedefs", no_argument, NULL, 't' },
552 { "typedefs-and-c++", no_argument, NULL, 'T' },
553 { "update", no_argument, NULL, 'u' },
554 { "vgrind", no_argument, NULL, 'v' },
555 { "no-warn", no_argument, NULL, 'w' },
556
557 #else /* Etags options */
558 { "no-defines", no_argument, NULL, 'D' },
559 { "no-globals", no_argument, &globals, FALSE },
560 { "include", required_argument, NULL, 'i' },
561 #endif
562 { NULL }
563 };
564
565 static compressor compressors[] =
566 {
567 { "z", "gzip -d -c"},
568 { "Z", "gzip -d -c"},
569 { "gz", "gzip -d -c"},
570 { "GZ", "gzip -d -c"},
571 { "bz2", "bzip2 -d -c" },
572 { NULL }
573 };
574
575 /*
576 * Language stuff.
577 */
578
579 /* Ada code */
580 static char *Ada_suffixes [] =
581 { "ads", "adb", "ada", NULL };
582 static char Ada_help [] =
583 "In Ada code, functions, procedures, packages, tasks and types are\n\
584 tags. Use the `--packages-only' option to create tags for\n\
585 packages only.\n\
586 Ada tag names have suffixes indicating the type of entity:\n\
587 Entity type: Qualifier:\n\
588 ------------ ----------\n\
589 function /f\n\
590 procedure /p\n\
591 package spec /s\n\
592 package body /b\n\
593 type /t\n\
594 task /k\n\
595 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
596 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
597 will just search for any tag `bidule'.";
598
599 /* Assembly code */
600 static char *Asm_suffixes [] =
601 { "a", /* Unix assembler */
602 "asm", /* Microcontroller assembly */
603 "def", /* BSO/Tasking definition includes */
604 "inc", /* Microcontroller include files */
605 "ins", /* Microcontroller include files */
606 "s", "sa", /* Unix assembler */
607 "S", /* cpp-processed Unix assembler */
608 "src", /* BSO/Tasking C compiler output */
609 NULL
610 };
611 static char Asm_help [] =
612 "In assembler code, labels appearing at the beginning of a line,\n\
613 followed by a colon, are tags.";
614
615
616 /* Note that .c and .h can be considered C++, if the --c++ flag was
617 given, or if the `class' or `template' keywords are met inside the file.
618 That is why default_C_entries is called for these. */
619 static char *default_C_suffixes [] =
620 { "c", "h", NULL };
621 #if CTAGS /* C help for Ctags */
622 static char default_C_help [] =
623 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
624 Use -T to tag definitions of `struct', `union' and `enum'.\n\
625 Use -d to tag `#define' macro definitions and `enum' constants.\n\
626 Use --globals to tag global variables.\n\
627 You can tag function declarations and external variables by\n\
628 using `--declarations', and struct members by using `--members'.";
629 #else /* C help for Etags */
630 static char default_C_help [] =
631 "In C code, any C function or typedef is a tag, and so are\n\
632 definitions of `struct', `union' and `enum'. `#define' macro\n\
633 definitions and `enum' constants are tags unless you specify\n\
634 `--no-defines'. Global variables are tags unless you specify\n\
635 `--no-globals' and so are struct members unless you specify\n\
636 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
637 `--no-members' can make the tags table file much smaller.\n\
638 You can tag function declarations and external variables by\n\
639 using `--declarations'.";
640 #endif /* C help for Ctags and Etags */
641
642 static char *Cplusplus_suffixes [] =
643 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
644 "M", /* Objective C++ */
645 "pdb", /* Postscript with C syntax */
646 NULL };
647 static char Cplusplus_help [] =
648 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
649 --help --lang=c --lang=c++ for full help.)\n\
650 In addition to C tags, member functions are also recognized. Member\n\
651 variables are recognized unless you use the `--no-members' option.\n\
652 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
653 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
654 `operator+'.";
655
656 static char *Cjava_suffixes [] =
657 { "java", NULL };
658 static char Cjava_help [] =
659 "In Java code, all the tags constructs of C and C++ code are\n\
660 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
661
662
663 static char *Cobol_suffixes [] =
664 { "COB", "cob", NULL };
665 static char Cobol_help [] =
666 "In Cobol code, tags are paragraph names; that is, any word\n\
667 starting in column 8 and followed by a period.";
668
669 static char *Cstar_suffixes [] =
670 { "cs", "hs", NULL };
671
672 static char *Erlang_suffixes [] =
673 { "erl", "hrl", NULL };
674 static char Erlang_help [] =
675 "In Erlang code, the tags are the functions, records and macros\n\
676 defined in the file.";
677
678 char *Forth_suffixes [] =
679 { "fth", "tok", NULL };
680 static char Forth_help [] =
681 "In Forth code, tags are words defined by `:',\n\
682 constant, code, create, defer, value, variable, buffer:, field.";
683
684 static char *Fortran_suffixes [] =
685 { "F", "f", "f90", "for", NULL };
686 static char Fortran_help [] =
687 "In Fortran code, functions, subroutines and block data are tags.";
688
689 static char *HTML_suffixes [] =
690 { "htm", "html", "shtml", NULL };
691 static char HTML_help [] =
692 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
693 `h3' headers. Also, tags are `name=' in anchors and all\n\
694 occurrences of `id='.";
695
696 static char *Lisp_suffixes [] =
697 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
698 static char Lisp_help [] =
699 "In Lisp code, any function defined with `defun', any variable\n\
700 defined with `defvar' or `defconst', and in general the first\n\
701 argument of any expression that starts with `(def' in column zero\n\
702 is a tag.";
703
704 static char *Lua_suffixes [] =
705 { "lua", "LUA", NULL };
706 static char Lua_help [] =
707 "In Lua scripts, all functions are tags.";
708
709 static char *Makefile_filenames [] =
710 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
711 static char Makefile_help [] =
712 "In makefiles, targets are tags; additionally, variables are tags\n\
713 unless you specify `--no-globals'.";
714
715 static char *Objc_suffixes [] =
716 { "lm", /* Objective lex file */
717 "m", /* Objective C file */
718 NULL };
719 static char Objc_help [] =
720 "In Objective C code, tags include Objective C definitions for classes,\n\
721 class categories, methods and protocols. Tags for variables and\n\
722 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
723 (Use --help --lang=c --lang=objc --lang=java for full help.)";
724
725 static char *Pascal_suffixes [] =
726 { "p", "pas", NULL };
727 static char Pascal_help [] =
728 "In Pascal code, the tags are the functions and procedures defined\n\
729 in the file.";
730 /* " // this is for working around an Emacs highlighting bug... */
731
732 static char *Perl_suffixes [] =
733 { "pl", "pm", NULL };
734 static char *Perl_interpreters [] =
735 { "perl", "@PERL@", NULL };
736 static char Perl_help [] =
737 "In Perl code, the tags are the packages, subroutines and variables\n\
738 defined by the `package', `sub', `my' and `local' keywords. Use\n\
739 `--globals' if you want to tag global variables. Tags for\n\
740 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
741 defined in the default package is `main::SUB'.";
742
743 static char *PHP_suffixes [] =
744 { "php", "php3", "php4", NULL };
745 static char PHP_help [] =
746 "In PHP code, tags are functions, classes and defines. Unless you use\n\
747 the `--no-members' option, vars are tags too.";
748
749 static char *plain_C_suffixes [] =
750 { "pc", /* Pro*C file */
751 NULL };
752
753 static char *PS_suffixes [] =
754 { "ps", "psw", NULL }; /* .psw is for PSWrap */
755 static char PS_help [] =
756 "In PostScript code, the tags are the functions.";
757
758 static char *Prolog_suffixes [] =
759 { "prolog", NULL };
760 static char Prolog_help [] =
761 "In Prolog code, tags are predicates and rules at the beginning of\n\
762 line.";
763
764 static char *Python_suffixes [] =
765 { "py", NULL };
766 static char Python_help [] =
767 "In Python code, `def' or `class' at the beginning of a line\n\
768 generate a tag.";
769
770 /* Can't do the `SCM' or `scm' prefix with a version number. */
771 static char *Scheme_suffixes [] =
772 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
773 static char Scheme_help [] =
774 "In Scheme code, tags include anything defined with `def' or with a\n\
775 construct whose name starts with `def'. They also include\n\
776 variables set with `set!' at top level in the file.";
777
778 static char *TeX_suffixes [] =
779 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
780 static char TeX_help [] =
781 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
782 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
783 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
784 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
785 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
786 \n\
787 Other commands can be specified by setting the environment variable\n\
788 `TEXTAGS' to a colon-separated list like, for example,\n\
789 TEXTAGS=\"mycommand:myothercommand\".";
790
791
792 static char *Texinfo_suffixes [] =
793 { "texi", "texinfo", "txi", NULL };
794 static char Texinfo_help [] =
795 "for texinfo files, lines starting with @node are tagged.";
796
797 static char *Yacc_suffixes [] =
798 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
799 static char Yacc_help [] =
800 "In Bison or Yacc input files, each rule defines as a tag the\n\
801 nonterminal it constructs. The portions of the file that contain\n\
802 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
803 for full help).";
804
805 static char auto_help [] =
806 "`auto' is not a real language, it indicates to use\n\
807 a default language for files base on file name suffix and file contents.";
808
809 static char none_help [] =
810 "`none' is not a real language, it indicates to only do\n\
811 regexp processing on files.";
812
813 static char no_lang_help [] =
814 "No detailed help available for this language.";
815
816
817 /*
818 * Table of languages.
819 *
820 * It is ok for a given function to be listed under more than one
821 * name. I just didn't.
822 */
823
824 static language lang_names [] =
825 {
826 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
827 { "asm", Asm_help, Asm_labels, Asm_suffixes },
828 { "c", default_C_help, default_C_entries, default_C_suffixes },
829 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
830 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
831 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
832 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
833 { "forth", Forth_help, Forth_words, Forth_suffixes },
834 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
835 { "html", HTML_help, HTML_labels, HTML_suffixes },
836 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
837 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
838 { "lua", Lua_help, Lua_functions, Lua_suffixes },
839 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
840 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
841 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
842 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
843 { "php", PHP_help, PHP_functions, PHP_suffixes },
844 { "postscript",PS_help, PS_functions, PS_suffixes },
845 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
846 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
847 { "python", Python_help, Python_functions, Python_suffixes },
848 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
849 { "tex", TeX_help, TeX_commands, TeX_suffixes },
850 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
851 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
852 { "auto", auto_help }, /* default guessing scheme */
853 { "none", none_help, just_read_file }, /* regexp matching only */
854 { NULL } /* end of list */
855 };
856
857 \f
858 static void
859 print_language_names ()
860 {
861 language *lang;
862 char **name, **ext;
863
864 puts ("\nThese are the currently supported languages, along with the\n\
865 default file names and dot suffixes:");
866 for (lang = lang_names; lang->name != NULL; lang++)
867 {
868 printf (" %-*s", 10, lang->name);
869 if (lang->filenames != NULL)
870 for (name = lang->filenames; *name != NULL; name++)
871 printf (" %s", *name);
872 if (lang->suffixes != NULL)
873 for (ext = lang->suffixes; *ext != NULL; ext++)
874 printf (" .%s", *ext);
875 puts ("");
876 }
877 puts ("where `auto' means use default language for files based on file\n\
878 name suffix, and `none' means only do regexp processing on files.\n\
879 If no language is specified and no matching suffix is found,\n\
880 the first line of the file is read for a sharp-bang (#!) sequence\n\
881 followed by the name of an interpreter. If no such sequence is found,\n\
882 Fortran is tried first; if no tags are found, C is tried next.\n\
883 When parsing any C file, a \"class\" or \"template\" keyword\n\
884 switches to C++.");
885 puts ("Compressed files are supported using gzip and bzip2.\n\
886 \n\
887 For detailed help on a given language use, for example,\n\
888 etags --help --lang=ada.");
889 }
890
891 #ifndef EMACS_NAME
892 # define EMACS_NAME "standalone"
893 #endif
894 #ifndef VERSION
895 # define VERSION "17.38"
896 #endif
897 static void
898 print_version ()
899 {
900 /* Makes it easier to update automatically. */
901 char emacs_copyright[] = "Copyright (C) 2008 Free Software Foundation, Inc.";
902
903 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
904 puts (emacs_copyright);
905 puts ("This program is distributed under the terms in ETAGS.README");
906
907 exit (EXIT_SUCCESS);
908 }
909
910 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
911 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
912 #endif
913
914 static void
915 print_help (argbuffer)
916 argument *argbuffer;
917 {
918 bool help_for_lang = FALSE;
919
920 for (; argbuffer->arg_type != at_end; argbuffer++)
921 if (argbuffer->arg_type == at_language)
922 {
923 if (help_for_lang)
924 puts ("");
925 puts (argbuffer->lang->help);
926 help_for_lang = TRUE;
927 }
928
929 if (help_for_lang)
930 exit (EXIT_SUCCESS);
931
932 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
933 \n\
934 These are the options accepted by %s.\n", progname, progname);
935 if (NO_LONG_OPTIONS)
936 puts ("WARNING: long option names do not work with this executable,\n\
937 as it is not linked with GNU getopt.");
938 else
939 puts ("You may use unambiguous abbreviations for the long option names.");
940 puts (" A - as file name means read names from stdin (one per line).\n\
941 Absolute names are stored in the output file as they are.\n\
942 Relative ones are stored relative to the output file's directory.\n");
943
944 puts ("-a, --append\n\
945 Append tag entries to existing tags file.");
946
947 puts ("--packages-only\n\
948 For Ada files, only generate tags for packages.");
949
950 if (CTAGS)
951 puts ("-B, --backward-search\n\
952 Write the search commands for the tag entries using '?', the\n\
953 backward-search command instead of '/', the forward-search command.");
954
955 /* This option is mostly obsolete, because etags can now automatically
956 detect C++. Retained for backward compatibility and for debugging and
957 experimentation. In principle, we could want to tag as C++ even
958 before any "class" or "template" keyword.
959 puts ("-C, --c++\n\
960 Treat files whose name suffix defaults to C language as C++ files.");
961 */
962
963 puts ("--declarations\n\
964 In C and derived languages, create tags for function declarations,");
965 if (CTAGS)
966 puts ("\tand create tags for extern variables if --globals is used.");
967 else
968 puts
969 ("\tand create tags for extern variables unless --no-globals is used.");
970
971 if (CTAGS)
972 puts ("-d, --defines\n\
973 Create tag entries for C #define constants and enum constants, too.");
974 else
975 puts ("-D, --no-defines\n\
976 Don't create tag entries for C #define constants and enum constants.\n\
977 This makes the tags file smaller.");
978
979 if (!CTAGS)
980 puts ("-i FILE, --include=FILE\n\
981 Include a note in tag file indicating that, when searching for\n\
982 a tag, one should also consult the tags file FILE after\n\
983 checking the current file.");
984
985 puts ("-l LANG, --language=LANG\n\
986 Force the following files to be considered as written in the\n\
987 named language up to the next --language=LANG option.");
988
989 if (CTAGS)
990 puts ("--globals\n\
991 Create tag entries for global variables in some languages.");
992 else
993 puts ("--no-globals\n\
994 Do not create tag entries for global variables in some\n\
995 languages. This makes the tags file smaller.");
996
997 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
998 puts ("--no-line-directive\n\
999 Ignore #line preprocessor directives in C and derived languages.");
1000
1001 if (CTAGS)
1002 puts ("--members\n\
1003 Create tag entries for members of structures in some languages.");
1004 else
1005 puts ("--no-members\n\
1006 Do not create tag entries for members of structures\n\
1007 in some languages.");
1008
1009 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1010 Make a tag for each line matching a regular expression pattern\n\
1011 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1012 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1013 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1014 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1015 puts (" If TAGNAME/ is present, the tags created are named.\n\
1016 For example Tcl named tags can be created with:\n\
1017 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1018 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1019 `m' means to allow multi-line matches, `s' implies `m' and\n\
1020 causes dot to match any character, including newline.");
1021
1022 puts ("-R, --no-regex\n\
1023 Don't create tags from regexps for the following files.");
1024
1025 puts ("-I, --ignore-indentation\n\
1026 In C and C++ do not assume that a closing brace in the first\n\
1027 column is the final brace of a function or structure definition.");
1028
1029 puts ("-o FILE, --output=FILE\n\
1030 Write the tags to FILE.");
1031
1032 puts ("--parse-stdin=NAME\n\
1033 Read from standard input and record tags as belonging to file NAME.");
1034
1035 if (CTAGS)
1036 {
1037 puts ("-t, --typedefs\n\
1038 Generate tag entries for C and Ada typedefs.");
1039 puts ("-T, --typedefs-and-c++\n\
1040 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1041 and C++ member functions.");
1042 }
1043
1044 if (CTAGS)
1045 puts ("-u, --update\n\
1046 Update the tag entries for the given files, leaving tag\n\
1047 entries for other files in place. Currently, this is\n\
1048 implemented by deleting the existing entries for the given\n\
1049 files and then rewriting the new entries at the end of the\n\
1050 tags file. It is often faster to simply rebuild the entire\n\
1051 tag file than to use this.");
1052
1053 if (CTAGS)
1054 {
1055 puts ("-v, --vgrind\n\
1056 Print on the standard output an index of items intended for\n\
1057 human consumption, similar to the output of vgrind. The index\n\
1058 is sorted, and gives the page number of each item.");
1059
1060 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1061 puts ("-w, --no-duplicates\n\
1062 Do not create duplicate tag entries, for compatibility with\n\
1063 traditional ctags.");
1064
1065 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1066 puts ("-w, --no-warn\n\
1067 Suppress warning messages about duplicate tag entries.");
1068
1069 puts ("-x, --cxref\n\
1070 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1071 The output uses line numbers instead of page numbers, but\n\
1072 beyond that the differences are cosmetic; try both to see\n\
1073 which you like.");
1074 }
1075
1076 puts ("-V, --version\n\
1077 Print the version of the program.\n\
1078 -h, --help\n\
1079 Print this help message.\n\
1080 Followed by one or more `--language' options prints detailed\n\
1081 help about tag generation for the specified languages.");
1082
1083 print_language_names ();
1084
1085 puts ("");
1086 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1087
1088 exit (EXIT_SUCCESS);
1089 }
1090
1091 \f
1092 int
1093 main (argc, argv)
1094 int argc;
1095 char *argv[];
1096 {
1097 int i;
1098 unsigned int nincluded_files;
1099 char **included_files;
1100 argument *argbuffer;
1101 int current_arg, file_count;
1102 linebuffer filename_lb;
1103 bool help_asked = FALSE;
1104 char *optstring;
1105 int opt;
1106
1107
1108 #ifdef DOS_NT
1109 _fmode = O_BINARY; /* all of files are treated as binary files */
1110 #endif /* DOS_NT */
1111
1112 progname = argv[0];
1113 nincluded_files = 0;
1114 included_files = xnew (argc, char *);
1115 current_arg = 0;
1116 file_count = 0;
1117
1118 /* Allocate enough no matter what happens. Overkill, but each one
1119 is small. */
1120 argbuffer = xnew (argc, argument);
1121
1122 /*
1123 * Always find typedefs and structure tags.
1124 * Also default to find macro constants, enum constants, struct
1125 * members and global variables. Do it for both etags and ctags.
1126 */
1127 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1128 globals = members = TRUE;
1129
1130 /* When the optstring begins with a '-' getopt_long does not rearrange the
1131 non-options arguments to be at the end, but leaves them alone. */
1132 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1133 "ac:Cf:Il:o:r:RSVhH",
1134 (CTAGS) ? "BxdtTuvw" : "Di:");
1135
1136 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1137 switch (opt)
1138 {
1139 case 0:
1140 /* If getopt returns 0, then it has already processed a
1141 long-named option. We should do nothing. */
1142 break;
1143
1144 case 1:
1145 /* This means that a file name has been seen. Record it. */
1146 argbuffer[current_arg].arg_type = at_filename;
1147 argbuffer[current_arg].what = optarg;
1148 ++current_arg;
1149 ++file_count;
1150 break;
1151
1152 case STDIN:
1153 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1154 argbuffer[current_arg].arg_type = at_stdin;
1155 argbuffer[current_arg].what = optarg;
1156 ++current_arg;
1157 ++file_count;
1158 if (parsing_stdin)
1159 fatal ("cannot parse standard input more than once", (char *)NULL);
1160 parsing_stdin = TRUE;
1161 break;
1162
1163 /* Common options. */
1164 case 'a': append_to_tagfile = TRUE; break;
1165 case 'C': cplusplus = TRUE; break;
1166 case 'f': /* for compatibility with old makefiles */
1167 case 'o':
1168 if (tagfile)
1169 {
1170 error ("-o option may only be given once.", (char *)NULL);
1171 suggest_asking_for_help ();
1172 /* NOTREACHED */
1173 }
1174 tagfile = optarg;
1175 break;
1176 case 'I':
1177 case 'S': /* for backward compatibility */
1178 ignoreindent = TRUE;
1179 break;
1180 case 'l':
1181 {
1182 language *lang = get_language_from_langname (optarg);
1183 if (lang != NULL)
1184 {
1185 argbuffer[current_arg].lang = lang;
1186 argbuffer[current_arg].arg_type = at_language;
1187 ++current_arg;
1188 }
1189 }
1190 break;
1191 case 'c':
1192 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1193 optarg = concat (optarg, "i", ""); /* memory leak here */
1194 /* FALLTHRU */
1195 case 'r':
1196 argbuffer[current_arg].arg_type = at_regexp;
1197 argbuffer[current_arg].what = optarg;
1198 ++current_arg;
1199 break;
1200 case 'R':
1201 argbuffer[current_arg].arg_type = at_regexp;
1202 argbuffer[current_arg].what = NULL;
1203 ++current_arg;
1204 break;
1205 case 'V':
1206 print_version ();
1207 break;
1208 case 'h':
1209 case 'H':
1210 help_asked = TRUE;
1211 break;
1212
1213 /* Etags options */
1214 case 'D': constantypedefs = FALSE; break;
1215 case 'i': included_files[nincluded_files++] = optarg; break;
1216
1217 /* Ctags options. */
1218 case 'B': searchar = '?'; break;
1219 case 'd': constantypedefs = TRUE; break;
1220 case 't': typedefs = TRUE; break;
1221 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1222 case 'u': update = TRUE; break;
1223 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1224 case 'x': cxref_style = TRUE; break;
1225 case 'w': no_warnings = TRUE; break;
1226 default:
1227 suggest_asking_for_help ();
1228 /* NOTREACHED */
1229 }
1230
1231 /* No more options. Store the rest of arguments. */
1232 for (; optind < argc; optind++)
1233 {
1234 argbuffer[current_arg].arg_type = at_filename;
1235 argbuffer[current_arg].what = argv[optind];
1236 ++current_arg;
1237 ++file_count;
1238 }
1239
1240 argbuffer[current_arg].arg_type = at_end;
1241
1242 if (help_asked)
1243 print_help (argbuffer);
1244 /* NOTREACHED */
1245
1246 if (nincluded_files == 0 && file_count == 0)
1247 {
1248 error ("no input files specified.", (char *)NULL);
1249 suggest_asking_for_help ();
1250 /* NOTREACHED */
1251 }
1252
1253 if (tagfile == NULL)
1254 tagfile = CTAGS ? "tags" : "TAGS";
1255 cwd = etags_getcwd (); /* the current working directory */
1256 if (cwd[strlen (cwd) - 1] != '/')
1257 {
1258 char *oldcwd = cwd;
1259 cwd = concat (oldcwd, "/", "");
1260 free (oldcwd);
1261 }
1262 /* Relative file names are made relative to the current directory. */
1263 if (streq (tagfile, "-")
1264 || strneq (tagfile, "/dev/", 5))
1265 tagfiledir = cwd;
1266 else
1267 tagfiledir = absolute_dirname (tagfile, cwd);
1268
1269 init (); /* set up boolean "functions" */
1270
1271 linebuffer_init (&lb);
1272 linebuffer_init (&filename_lb);
1273 linebuffer_init (&filebuf);
1274 linebuffer_init (&token_name);
1275
1276 if (!CTAGS)
1277 {
1278 if (streq (tagfile, "-"))
1279 {
1280 tagf = stdout;
1281 #ifdef DOS_NT
1282 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1283 doesn't take effect until after `stdout' is already open). */
1284 if (!isatty (fileno (stdout)))
1285 setmode (fileno (stdout), O_BINARY);
1286 #endif /* DOS_NT */
1287 }
1288 else
1289 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1290 if (tagf == NULL)
1291 pfatal (tagfile);
1292 }
1293
1294 /*
1295 * Loop through files finding functions.
1296 */
1297 for (i = 0; i < current_arg; i++)
1298 {
1299 static language *lang; /* non-NULL if language is forced */
1300 char *this_file;
1301
1302 switch (argbuffer[i].arg_type)
1303 {
1304 case at_language:
1305 lang = argbuffer[i].lang;
1306 break;
1307 case at_regexp:
1308 analyse_regex (argbuffer[i].what);
1309 break;
1310 case at_filename:
1311 this_file = argbuffer[i].what;
1312 /* Input file named "-" means read file names from stdin
1313 (one per line) and use them. */
1314 if (streq (this_file, "-"))
1315 {
1316 if (parsing_stdin)
1317 fatal ("cannot parse standard input AND read file names from it",
1318 (char *)NULL);
1319 while (readline_internal (&filename_lb, stdin) > 0)
1320 process_file_name (filename_lb.buffer, lang);
1321 }
1322 else
1323 process_file_name (this_file, lang);
1324 break;
1325 case at_stdin:
1326 this_file = argbuffer[i].what;
1327 process_file (stdin, this_file, lang);
1328 break;
1329 }
1330 }
1331
1332 free_regexps ();
1333 free (lb.buffer);
1334 free (filebuf.buffer);
1335 free (token_name.buffer);
1336
1337 if (!CTAGS || cxref_style)
1338 {
1339 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1340 put_entries (nodehead);
1341 free_tree (nodehead);
1342 nodehead = NULL;
1343 if (!CTAGS)
1344 {
1345 fdesc *fdp;
1346
1347 /* Output file entries that have no tags. */
1348 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1349 if (!fdp->written)
1350 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1351
1352 while (nincluded_files-- > 0)
1353 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1354
1355 if (fclose (tagf) == EOF)
1356 pfatal (tagfile);
1357 }
1358
1359 exit (EXIT_SUCCESS);
1360 }
1361
1362 /* From here on, we are in (CTAGS && !cxref_style) */
1363 if (update)
1364 {
1365 char cmd[BUFSIZ];
1366 for (i = 0; i < current_arg; ++i)
1367 {
1368 switch (argbuffer[i].arg_type)
1369 {
1370 case at_filename:
1371 case at_stdin:
1372 break;
1373 default:
1374 continue; /* the for loop */
1375 }
1376 sprintf (cmd,
1377 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1378 tagfile, argbuffer[i].what, tagfile);
1379 if (system (cmd) != EXIT_SUCCESS)
1380 fatal ("failed to execute shell command", (char *)NULL);
1381 }
1382 append_to_tagfile = TRUE;
1383 }
1384
1385 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1386 if (tagf == NULL)
1387 pfatal (tagfile);
1388 put_entries (nodehead); /* write all the tags (CTAGS) */
1389 free_tree (nodehead);
1390 nodehead = NULL;
1391 if (fclose (tagf) == EOF)
1392 pfatal (tagfile);
1393
1394 if (CTAGS)
1395 if (append_to_tagfile || update)
1396 {
1397 char cmd[2*BUFSIZ+20];
1398 /* Maybe these should be used:
1399 setenv ("LC_COLLATE", "C", 1);
1400 setenv ("LC_ALL", "C", 1); */
1401 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1402 exit (system (cmd));
1403 }
1404 return EXIT_SUCCESS;
1405 }
1406
1407
1408 /*
1409 * Return a compressor given the file name. If EXTPTR is non-zero,
1410 * return a pointer into FILE where the compressor-specific
1411 * extension begins. If no compressor is found, NULL is returned
1412 * and EXTPTR is not significant.
1413 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1414 */
1415 static compressor *
1416 get_compressor_from_suffix (file, extptr)
1417 char *file;
1418 char **extptr;
1419 {
1420 compressor *compr;
1421 char *slash, *suffix;
1422
1423 /* This relies on FN to be after canonicalize_filename,
1424 so we don't need to consider backslashes on DOS_NT. */
1425 slash = etags_strrchr (file, '/');
1426 suffix = etags_strrchr (file, '.');
1427 if (suffix == NULL || suffix < slash)
1428 return NULL;
1429 if (extptr != NULL)
1430 *extptr = suffix;
1431 suffix += 1;
1432 /* Let those poor souls who live with DOS 8+3 file name limits get
1433 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1434 Only the first do loop is run if not MSDOS */
1435 do
1436 {
1437 for (compr = compressors; compr->suffix != NULL; compr++)
1438 if (streq (compr->suffix, suffix))
1439 return compr;
1440 if (!MSDOS)
1441 break; /* do it only once: not really a loop */
1442 if (extptr != NULL)
1443 *extptr = ++suffix;
1444 } while (*suffix != '\0');
1445 return NULL;
1446 }
1447
1448
1449
1450 /*
1451 * Return a language given the name.
1452 */
1453 static language *
1454 get_language_from_langname (name)
1455 const char *name;
1456 {
1457 language *lang;
1458
1459 if (name == NULL)
1460 error ("empty language name", (char *)NULL);
1461 else
1462 {
1463 for (lang = lang_names; lang->name != NULL; lang++)
1464 if (streq (name, lang->name))
1465 return lang;
1466 error ("unknown language \"%s\"", name);
1467 }
1468
1469 return NULL;
1470 }
1471
1472
1473 /*
1474 * Return a language given the interpreter name.
1475 */
1476 static language *
1477 get_language_from_interpreter (interpreter)
1478 char *interpreter;
1479 {
1480 language *lang;
1481 char **iname;
1482
1483 if (interpreter == NULL)
1484 return NULL;
1485 for (lang = lang_names; lang->name != NULL; lang++)
1486 if (lang->interpreters != NULL)
1487 for (iname = lang->interpreters; *iname != NULL; iname++)
1488 if (streq (*iname, interpreter))
1489 return lang;
1490
1491 return NULL;
1492 }
1493
1494
1495
1496 /*
1497 * Return a language given the file name.
1498 */
1499 static language *
1500 get_language_from_filename (file, case_sensitive)
1501 char *file;
1502 bool case_sensitive;
1503 {
1504 language *lang;
1505 char **name, **ext, *suffix;
1506
1507 /* Try whole file name first. */
1508 for (lang = lang_names; lang->name != NULL; lang++)
1509 if (lang->filenames != NULL)
1510 for (name = lang->filenames; *name != NULL; name++)
1511 if ((case_sensitive)
1512 ? streq (*name, file)
1513 : strcaseeq (*name, file))
1514 return lang;
1515
1516 /* If not found, try suffix after last dot. */
1517 suffix = etags_strrchr (file, '.');
1518 if (suffix == NULL)
1519 return NULL;
1520 suffix += 1;
1521 for (lang = lang_names; lang->name != NULL; lang++)
1522 if (lang->suffixes != NULL)
1523 for (ext = lang->suffixes; *ext != NULL; ext++)
1524 if ((case_sensitive)
1525 ? streq (*ext, suffix)
1526 : strcaseeq (*ext, suffix))
1527 return lang;
1528 return NULL;
1529 }
1530
1531 \f
1532 /*
1533 * This routine is called on each file argument.
1534 */
1535 static void
1536 process_file_name (file, lang)
1537 char *file;
1538 language *lang;
1539 {
1540 struct stat stat_buf;
1541 FILE *inf;
1542 fdesc *fdp;
1543 compressor *compr;
1544 char *compressed_name, *uncompressed_name;
1545 char *ext, *real_name;
1546 int retval;
1547
1548 canonicalize_filename (file);
1549 if (streq (file, tagfile) && !streq (tagfile, "-"))
1550 {
1551 error ("skipping inclusion of %s in self.", file);
1552 return;
1553 }
1554 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1555 {
1556 compressed_name = NULL;
1557 real_name = uncompressed_name = savestr (file);
1558 }
1559 else
1560 {
1561 real_name = compressed_name = savestr (file);
1562 uncompressed_name = savenstr (file, ext - file);
1563 }
1564
1565 /* If the canonicalized uncompressed name
1566 has already been dealt with, skip it silently. */
1567 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1568 {
1569 assert (fdp->infname != NULL);
1570 if (streq (uncompressed_name, fdp->infname))
1571 goto cleanup;
1572 }
1573
1574 if (stat (real_name, &stat_buf) != 0)
1575 {
1576 /* Reset real_name and try with a different name. */
1577 real_name = NULL;
1578 if (compressed_name != NULL) /* try with the given suffix */
1579 {
1580 if (stat (uncompressed_name, &stat_buf) == 0)
1581 real_name = uncompressed_name;
1582 }
1583 else /* try all possible suffixes */
1584 {
1585 for (compr = compressors; compr->suffix != NULL; compr++)
1586 {
1587 compressed_name = concat (file, ".", compr->suffix);
1588 if (stat (compressed_name, &stat_buf) != 0)
1589 {
1590 if (MSDOS)
1591 {
1592 char *suf = compressed_name + strlen (file);
1593 size_t suflen = strlen (compr->suffix) + 1;
1594 for ( ; suf[1]; suf++, suflen--)
1595 {
1596 memmove (suf, suf + 1, suflen);
1597 if (stat (compressed_name, &stat_buf) == 0)
1598 {
1599 real_name = compressed_name;
1600 break;
1601 }
1602 }
1603 if (real_name != NULL)
1604 break;
1605 } /* MSDOS */
1606 free (compressed_name);
1607 compressed_name = NULL;
1608 }
1609 else
1610 {
1611 real_name = compressed_name;
1612 break;
1613 }
1614 }
1615 }
1616 if (real_name == NULL)
1617 {
1618 perror (file);
1619 goto cleanup;
1620 }
1621 } /* try with a different name */
1622
1623 if (!S_ISREG (stat_buf.st_mode))
1624 {
1625 error ("skipping %s: it is not a regular file.", real_name);
1626 goto cleanup;
1627 }
1628 if (real_name == compressed_name)
1629 {
1630 char *cmd = concat (compr->command, " ", real_name);
1631 inf = (FILE *) popen (cmd, "r");
1632 free (cmd);
1633 }
1634 else
1635 inf = fopen (real_name, "r");
1636 if (inf == NULL)
1637 {
1638 perror (real_name);
1639 goto cleanup;
1640 }
1641
1642 process_file (inf, uncompressed_name, lang);
1643
1644 if (real_name == compressed_name)
1645 retval = pclose (inf);
1646 else
1647 retval = fclose (inf);
1648 if (retval < 0)
1649 pfatal (file);
1650
1651 cleanup:
1652 free (compressed_name);
1653 free (uncompressed_name);
1654 last_node = NULL;
1655 curfdp = NULL;
1656 return;
1657 }
1658
1659 static void
1660 process_file (fh, fn, lang)
1661 FILE *fh;
1662 char *fn;
1663 language *lang;
1664 {
1665 static const fdesc emptyfdesc;
1666 fdesc *fdp;
1667
1668 /* Create a new input file description entry. */
1669 fdp = xnew (1, fdesc);
1670 *fdp = emptyfdesc;
1671 fdp->next = fdhead;
1672 fdp->infname = savestr (fn);
1673 fdp->lang = lang;
1674 fdp->infabsname = absolute_filename (fn, cwd);
1675 fdp->infabsdir = absolute_dirname (fn, cwd);
1676 if (filename_is_absolute (fn))
1677 {
1678 /* An absolute file name. Canonicalize it. */
1679 fdp->taggedfname = absolute_filename (fn, NULL);
1680 }
1681 else
1682 {
1683 /* A file name relative to cwd. Make it relative
1684 to the directory of the tags file. */
1685 fdp->taggedfname = relative_filename (fn, tagfiledir);
1686 }
1687 fdp->usecharno = TRUE; /* use char position when making tags */
1688 fdp->prop = NULL;
1689 fdp->written = FALSE; /* not written on tags file yet */
1690
1691 fdhead = fdp;
1692 curfdp = fdhead; /* the current file description */
1693
1694 find_entries (fh);
1695
1696 /* If not Ctags, and if this is not metasource and if it contained no #line
1697 directives, we can write the tags and free all nodes pointing to
1698 curfdp. */
1699 if (!CTAGS
1700 && curfdp->usecharno /* no #line directives in this file */
1701 && !curfdp->lang->metasource)
1702 {
1703 node *np, *prev;
1704
1705 /* Look for the head of the sublist relative to this file. See add_node
1706 for the structure of the node tree. */
1707 prev = NULL;
1708 for (np = nodehead; np != NULL; prev = np, np = np->left)
1709 if (np->fdp == curfdp)
1710 break;
1711
1712 /* If we generated tags for this file, write and delete them. */
1713 if (np != NULL)
1714 {
1715 /* This is the head of the last sublist, if any. The following
1716 instructions depend on this being true. */
1717 assert (np->left == NULL);
1718
1719 assert (fdhead == curfdp);
1720 assert (last_node->fdp == curfdp);
1721 put_entries (np); /* write tags for file curfdp->taggedfname */
1722 free_tree (np); /* remove the written nodes */
1723 if (prev == NULL)
1724 nodehead = NULL; /* no nodes left */
1725 else
1726 prev->left = NULL; /* delete the pointer to the sublist */
1727 }
1728 }
1729 }
1730
1731 /*
1732 * This routine sets up the boolean pseudo-functions which work
1733 * by setting boolean flags dependent upon the corresponding character.
1734 * Every char which is NOT in that string is not a white char. Therefore,
1735 * all of the array "_wht" is set to FALSE, and then the elements
1736 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1737 * of a char is TRUE if it is the string "white", else FALSE.
1738 */
1739 static void
1740 init ()
1741 {
1742 register char *sp;
1743 register int i;
1744
1745 for (i = 0; i < CHARS; i++)
1746 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1747 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1748 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1749 notinname('\0') = notinname('\n');
1750 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1751 begtoken('\0') = begtoken('\n');
1752 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1753 intoken('\0') = intoken('\n');
1754 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1755 endtoken('\0') = endtoken('\n');
1756 }
1757
1758 /*
1759 * This routine opens the specified file and calls the function
1760 * which finds the function and type definitions.
1761 */
1762 static void
1763 find_entries (inf)
1764 FILE *inf;
1765 {
1766 char *cp;
1767 language *lang = curfdp->lang;
1768 Lang_function *parser = NULL;
1769
1770 /* If user specified a language, use it. */
1771 if (lang != NULL && lang->function != NULL)
1772 {
1773 parser = lang->function;
1774 }
1775
1776 /* Else try to guess the language given the file name. */
1777 if (parser == NULL)
1778 {
1779 lang = get_language_from_filename (curfdp->infname, TRUE);
1780 if (lang != NULL && lang->function != NULL)
1781 {
1782 curfdp->lang = lang;
1783 parser = lang->function;
1784 }
1785 }
1786
1787 /* Else look for sharp-bang as the first two characters. */
1788 if (parser == NULL
1789 && readline_internal (&lb, inf) > 0
1790 && lb.len >= 2
1791 && lb.buffer[0] == '#'
1792 && lb.buffer[1] == '!')
1793 {
1794 char *lp;
1795
1796 /* Set lp to point at the first char after the last slash in the
1797 line or, if no slashes, at the first nonblank. Then set cp to
1798 the first successive blank and terminate the string. */
1799 lp = etags_strrchr (lb.buffer+2, '/');
1800 if (lp != NULL)
1801 lp += 1;
1802 else
1803 lp = skip_spaces (lb.buffer + 2);
1804 cp = skip_non_spaces (lp);
1805 *cp = '\0';
1806
1807 if (strlen (lp) > 0)
1808 {
1809 lang = get_language_from_interpreter (lp);
1810 if (lang != NULL && lang->function != NULL)
1811 {
1812 curfdp->lang = lang;
1813 parser = lang->function;
1814 }
1815 }
1816 }
1817
1818 /* We rewind here, even if inf may be a pipe. We fail if the
1819 length of the first line is longer than the pipe block size,
1820 which is unlikely. */
1821 rewind (inf);
1822
1823 /* Else try to guess the language given the case insensitive file name. */
1824 if (parser == NULL)
1825 {
1826 lang = get_language_from_filename (curfdp->infname, FALSE);
1827 if (lang != NULL && lang->function != NULL)
1828 {
1829 curfdp->lang = lang;
1830 parser = lang->function;
1831 }
1832 }
1833
1834 /* Else try Fortran or C. */
1835 if (parser == NULL)
1836 {
1837 node *old_last_node = last_node;
1838
1839 curfdp->lang = get_language_from_langname ("fortran");
1840 find_entries (inf);
1841
1842 if (old_last_node == last_node)
1843 /* No Fortran entries found. Try C. */
1844 {
1845 /* We do not tag if rewind fails.
1846 Only the file name will be recorded in the tags file. */
1847 rewind (inf);
1848 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1849 find_entries (inf);
1850 }
1851 return;
1852 }
1853
1854 if (!no_line_directive
1855 && curfdp->lang != NULL && curfdp->lang->metasource)
1856 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1857 file, or anyway we parsed a file that is automatically generated from
1858 this one. If this is the case, the bingo.c file contained #line
1859 directives that generated tags pointing to this file. Let's delete
1860 them all before parsing this file, which is the real source. */
1861 {
1862 fdesc **fdpp = &fdhead;
1863 while (*fdpp != NULL)
1864 if (*fdpp != curfdp
1865 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1866 /* We found one of those! We must delete both the file description
1867 and all tags referring to it. */
1868 {
1869 fdesc *badfdp = *fdpp;
1870
1871 /* Delete the tags referring to badfdp->taggedfname
1872 that were obtained from badfdp->infname. */
1873 invalidate_nodes (badfdp, &nodehead);
1874
1875 *fdpp = badfdp->next; /* remove the bad description from the list */
1876 free_fdesc (badfdp);
1877 }
1878 else
1879 fdpp = &(*fdpp)->next; /* advance the list pointer */
1880 }
1881
1882 assert (parser != NULL);
1883
1884 /* Generic initialisations before reading from file. */
1885 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1886
1887 /* Generic initialisations before parsing file with readline. */
1888 lineno = 0; /* reset global line number */
1889 charno = 0; /* reset global char number */
1890 linecharno = 0; /* reset global char number of line start */
1891
1892 parser (inf);
1893
1894 regex_tag_multiline ();
1895 }
1896
1897 \f
1898 /*
1899 * Check whether an implicitly named tag should be created,
1900 * then call `pfnote'.
1901 * NAME is a string that is internally copied by this function.
1902 *
1903 * TAGS format specification
1904 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1905 * The following is explained in some more detail in etc/ETAGS.EBNF.
1906 *
1907 * make_tag creates tags with "implicit tag names" (unnamed tags)
1908 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1909 * 1. NAME does not contain any of the characters in NONAM;
1910 * 2. LINESTART contains name as either a rightmost, or rightmost but
1911 * one character, substring;
1912 * 3. the character, if any, immediately before NAME in LINESTART must
1913 * be a character in NONAM;
1914 * 4. the character, if any, immediately after NAME in LINESTART must
1915 * also be a character in NONAM.
1916 *
1917 * The implementation uses the notinname() macro, which recognises the
1918 * characters stored in the string `nonam'.
1919 * etags.el needs to use the same characters that are in NONAM.
1920 */
1921 static void
1922 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1923 char *name; /* tag name, or NULL if unnamed */
1924 int namelen; /* tag length */
1925 bool is_func; /* tag is a function */
1926 char *linestart; /* start of the line where tag is */
1927 int linelen; /* length of the line where tag is */
1928 int lno; /* line number */
1929 long cno; /* character number */
1930 {
1931 bool named = (name != NULL && namelen > 0);
1932
1933 if (!CTAGS && named) /* maybe set named to false */
1934 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1935 such that etags.el can guess a name from it. */
1936 {
1937 int i;
1938 register char *cp = name;
1939
1940 for (i = 0; i < namelen; i++)
1941 if (notinname (*cp++))
1942 break;
1943 if (i == namelen) /* rule #1 */
1944 {
1945 cp = linestart + linelen - namelen;
1946 if (notinname (linestart[linelen-1]))
1947 cp -= 1; /* rule #4 */
1948 if (cp >= linestart /* rule #2 */
1949 && (cp == linestart
1950 || notinname (cp[-1])) /* rule #3 */
1951 && strneq (name, cp, namelen)) /* rule #2 */
1952 named = FALSE; /* use implicit tag name */
1953 }
1954 }
1955
1956 if (named)
1957 name = savenstr (name, namelen);
1958 else
1959 name = NULL;
1960 pfnote (name, is_func, linestart, linelen, lno, cno);
1961 }
1962
1963 /* Record a tag. */
1964 static void
1965 pfnote (name, is_func, linestart, linelen, lno, cno)
1966 char *name; /* tag name, or NULL if unnamed */
1967 bool is_func; /* tag is a function */
1968 char *linestart; /* start of the line where tag is */
1969 int linelen; /* length of the line where tag is */
1970 int lno; /* line number */
1971 long cno; /* character number */
1972 {
1973 register node *np;
1974
1975 assert (name == NULL || name[0] != '\0');
1976 if (CTAGS && name == NULL)
1977 return;
1978
1979 np = xnew (1, node);
1980
1981 /* If ctags mode, change name "main" to M<thisfilename>. */
1982 if (CTAGS && !cxref_style && streq (name, "main"))
1983 {
1984 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1985 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1986 fp = etags_strrchr (np->name, '.');
1987 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1988 fp[0] = '\0';
1989 }
1990 else
1991 np->name = name;
1992 np->valid = TRUE;
1993 np->been_warned = FALSE;
1994 np->fdp = curfdp;
1995 np->is_func = is_func;
1996 np->lno = lno;
1997 if (np->fdp->usecharno)
1998 /* Our char numbers are 0-base, because of C language tradition?
1999 ctags compatibility? old versions compatibility? I don't know.
2000 Anyway, since emacs's are 1-base we expect etags.el to take care
2001 of the difference. If we wanted to have 1-based numbers, we would
2002 uncomment the +1 below. */
2003 np->cno = cno /* + 1 */ ;
2004 else
2005 np->cno = invalidcharno;
2006 np->left = np->right = NULL;
2007 if (CTAGS && !cxref_style)
2008 {
2009 if (strlen (linestart) < 50)
2010 np->regex = concat (linestart, "$", "");
2011 else
2012 np->regex = savenstr (linestart, 50);
2013 }
2014 else
2015 np->regex = savenstr (linestart, linelen);
2016
2017 add_node (np, &nodehead);
2018 }
2019
2020 /*
2021 * free_tree ()
2022 * recurse on left children, iterate on right children.
2023 */
2024 static void
2025 free_tree (np)
2026 register node *np;
2027 {
2028 while (np)
2029 {
2030 register node *node_right = np->right;
2031 free_tree (np->left);
2032 free (np->name);
2033 free (np->regex);
2034 free (np);
2035 np = node_right;
2036 }
2037 }
2038
2039 /*
2040 * free_fdesc ()
2041 * delete a file description
2042 */
2043 static void
2044 free_fdesc (fdp)
2045 register fdesc *fdp;
2046 {
2047 free (fdp->infname);
2048 free (fdp->infabsname);
2049 free (fdp->infabsdir);
2050 free (fdp->taggedfname);
2051 free (fdp->prop);
2052 free (fdp);
2053 }
2054
2055 /*
2056 * add_node ()
2057 * Adds a node to the tree of nodes. In etags mode, sort by file
2058 * name. In ctags mode, sort by tag name. Make no attempt at
2059 * balancing.
2060 *
2061 * add_node is the only function allowed to add nodes, so it can
2062 * maintain state.
2063 */
2064 static void
2065 add_node (np, cur_node_p)
2066 node *np, **cur_node_p;
2067 {
2068 register int dif;
2069 register node *cur_node = *cur_node_p;
2070
2071 if (cur_node == NULL)
2072 {
2073 *cur_node_p = np;
2074 last_node = np;
2075 return;
2076 }
2077
2078 if (!CTAGS)
2079 /* Etags Mode */
2080 {
2081 /* For each file name, tags are in a linked sublist on the right
2082 pointer. The first tags of different files are a linked list
2083 on the left pointer. last_node points to the end of the last
2084 used sublist. */
2085 if (last_node != NULL && last_node->fdp == np->fdp)
2086 {
2087 /* Let's use the same sublist as the last added node. */
2088 assert (last_node->right == NULL);
2089 last_node->right = np;
2090 last_node = np;
2091 }
2092 else if (cur_node->fdp == np->fdp)
2093 {
2094 /* Scanning the list we found the head of a sublist which is
2095 good for us. Let's scan this sublist. */
2096 add_node (np, &cur_node->right);
2097 }
2098 else
2099 /* The head of this sublist is not good for us. Let's try the
2100 next one. */
2101 add_node (np, &cur_node->left);
2102 } /* if ETAGS mode */
2103
2104 else
2105 {
2106 /* Ctags Mode */
2107 dif = strcmp (np->name, cur_node->name);
2108
2109 /*
2110 * If this tag name matches an existing one, then
2111 * do not add the node, but maybe print a warning.
2112 */
2113 if (no_duplicates && !dif)
2114 {
2115 if (np->fdp == cur_node->fdp)
2116 {
2117 if (!no_warnings)
2118 {
2119 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2120 np->fdp->infname, lineno, np->name);
2121 fprintf (stderr, "Second entry ignored\n");
2122 }
2123 }
2124 else if (!cur_node->been_warned && !no_warnings)
2125 {
2126 fprintf
2127 (stderr,
2128 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2129 np->fdp->infname, cur_node->fdp->infname, np->name);
2130 cur_node->been_warned = TRUE;
2131 }
2132 return;
2133 }
2134
2135 /* Actually add the node */
2136 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2137 } /* if CTAGS mode */
2138 }
2139
2140 /*
2141 * invalidate_nodes ()
2142 * Scan the node tree and invalidate all nodes pointing to the
2143 * given file description (CTAGS case) or free them (ETAGS case).
2144 */
2145 static void
2146 invalidate_nodes (badfdp, npp)
2147 fdesc *badfdp;
2148 node **npp;
2149 {
2150 node *np = *npp;
2151
2152 if (np == NULL)
2153 return;
2154
2155 if (CTAGS)
2156 {
2157 if (np->left != NULL)
2158 invalidate_nodes (badfdp, &np->left);
2159 if (np->fdp == badfdp)
2160 np->valid = FALSE;
2161 if (np->right != NULL)
2162 invalidate_nodes (badfdp, &np->right);
2163 }
2164 else
2165 {
2166 assert (np->fdp != NULL);
2167 if (np->fdp == badfdp)
2168 {
2169 *npp = np->left; /* detach the sublist from the list */
2170 np->left = NULL; /* isolate it */
2171 free_tree (np); /* free it */
2172 invalidate_nodes (badfdp, npp);
2173 }
2174 else
2175 invalidate_nodes (badfdp, &np->left);
2176 }
2177 }
2178
2179 \f
2180 static int total_size_of_entries __P((node *));
2181 static int number_len __P((long));
2182
2183 /* Length of a non-negative number's decimal representation. */
2184 static int
2185 number_len (num)
2186 long num;
2187 {
2188 int len = 1;
2189 while ((num /= 10) > 0)
2190 len += 1;
2191 return len;
2192 }
2193
2194 /*
2195 * Return total number of characters that put_entries will output for
2196 * the nodes in the linked list at the right of the specified node.
2197 * This count is irrelevant with etags.el since emacs 19.34 at least,
2198 * but is still supplied for backward compatibility.
2199 */
2200 static int
2201 total_size_of_entries (np)
2202 register node *np;
2203 {
2204 register int total = 0;
2205
2206 for (; np != NULL; np = np->right)
2207 if (np->valid)
2208 {
2209 total += strlen (np->regex) + 1; /* pat\177 */
2210 if (np->name != NULL)
2211 total += strlen (np->name) + 1; /* name\001 */
2212 total += number_len ((long) np->lno) + 1; /* lno, */
2213 if (np->cno != invalidcharno) /* cno */
2214 total += number_len (np->cno);
2215 total += 1; /* newline */
2216 }
2217
2218 return total;
2219 }
2220
2221 static void
2222 put_entries (np)
2223 register node *np;
2224 {
2225 register char *sp;
2226 static fdesc *fdp = NULL;
2227
2228 if (np == NULL)
2229 return;
2230
2231 /* Output subentries that precede this one */
2232 if (CTAGS)
2233 put_entries (np->left);
2234
2235 /* Output this entry */
2236 if (np->valid)
2237 {
2238 if (!CTAGS)
2239 {
2240 /* Etags mode */
2241 if (fdp != np->fdp)
2242 {
2243 fdp = np->fdp;
2244 fprintf (tagf, "\f\n%s,%d\n",
2245 fdp->taggedfname, total_size_of_entries (np));
2246 fdp->written = TRUE;
2247 }
2248 fputs (np->regex, tagf);
2249 fputc ('\177', tagf);
2250 if (np->name != NULL)
2251 {
2252 fputs (np->name, tagf);
2253 fputc ('\001', tagf);
2254 }
2255 fprintf (tagf, "%d,", np->lno);
2256 if (np->cno != invalidcharno)
2257 fprintf (tagf, "%ld", np->cno);
2258 fputs ("\n", tagf);
2259 }
2260 else
2261 {
2262 /* Ctags mode */
2263 if (np->name == NULL)
2264 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2265
2266 if (cxref_style)
2267 {
2268 if (vgrind_style)
2269 fprintf (stdout, "%s %s %d\n",
2270 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2271 else
2272 fprintf (stdout, "%-16s %3d %-16s %s\n",
2273 np->name, np->lno, np->fdp->taggedfname, np->regex);
2274 }
2275 else
2276 {
2277 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2278
2279 if (np->is_func)
2280 { /* function or #define macro with args */
2281 putc (searchar, tagf);
2282 putc ('^', tagf);
2283
2284 for (sp = np->regex; *sp; sp++)
2285 {
2286 if (*sp == '\\' || *sp == searchar)
2287 putc ('\\', tagf);
2288 putc (*sp, tagf);
2289 }
2290 putc (searchar, tagf);
2291 }
2292 else
2293 { /* anything else; text pattern inadequate */
2294 fprintf (tagf, "%d", np->lno);
2295 }
2296 putc ('\n', tagf);
2297 }
2298 }
2299 } /* if this node contains a valid tag */
2300
2301 /* Output subentries that follow this one */
2302 put_entries (np->right);
2303 if (!CTAGS)
2304 put_entries (np->left);
2305 }
2306
2307 \f
2308 /* C extensions. */
2309 #define C_EXT 0x00fff /* C extensions */
2310 #define C_PLAIN 0x00000 /* C */
2311 #define C_PLPL 0x00001 /* C++ */
2312 #define C_STAR 0x00003 /* C* */
2313 #define C_JAVA 0x00005 /* JAVA */
2314 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2315 #define YACC 0x10000 /* yacc file */
2316
2317 /*
2318 * The C symbol tables.
2319 */
2320 enum sym_type
2321 {
2322 st_none,
2323 st_C_objprot, st_C_objimpl, st_C_objend,
2324 st_C_gnumacro,
2325 st_C_ignore, st_C_attribute,
2326 st_C_javastruct,
2327 st_C_operator,
2328 st_C_class, st_C_template,
2329 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2330 };
2331
2332 static unsigned int hash __P((const char *, unsigned int));
2333 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2334 static enum sym_type C_symtype __P((char *, int, int));
2335
2336 /* Feed stuff between (but not including) %[ and %] lines to:
2337 gperf -m 5
2338 %[
2339 %compare-strncmp
2340 %enum
2341 %struct-type
2342 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2343 %%
2344 if, 0, st_C_ignore
2345 for, 0, st_C_ignore
2346 while, 0, st_C_ignore
2347 switch, 0, st_C_ignore
2348 return, 0, st_C_ignore
2349 __attribute__, 0, st_C_attribute
2350 GTY, 0, st_C_attribute
2351 @interface, 0, st_C_objprot
2352 @protocol, 0, st_C_objprot
2353 @implementation,0, st_C_objimpl
2354 @end, 0, st_C_objend
2355 import, (C_JAVA & ~C_PLPL), st_C_ignore
2356 package, (C_JAVA & ~C_PLPL), st_C_ignore
2357 friend, C_PLPL, st_C_ignore
2358 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2359 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2360 interface, (C_JAVA & ~C_PLPL), st_C_struct
2361 class, 0, st_C_class
2362 namespace, C_PLPL, st_C_struct
2363 domain, C_STAR, st_C_struct
2364 union, 0, st_C_struct
2365 struct, 0, st_C_struct
2366 extern, 0, st_C_extern
2367 enum, 0, st_C_enum
2368 typedef, 0, st_C_typedef
2369 define, 0, st_C_define
2370 undef, 0, st_C_define
2371 operator, C_PLPL, st_C_operator
2372 template, 0, st_C_template
2373 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2374 DEFUN, 0, st_C_gnumacro
2375 SYSCALL, 0, st_C_gnumacro
2376 ENTRY, 0, st_C_gnumacro
2377 PSEUDO, 0, st_C_gnumacro
2378 # These are defined inside C functions, so currently they are not met.
2379 # EXFUN used in glibc, DEFVAR_* in emacs.
2380 #EXFUN, 0, st_C_gnumacro
2381 #DEFVAR_, 0, st_C_gnumacro
2382 %]
2383 and replace lines between %< and %> with its output, then:
2384 - remove the #if characterset check
2385 - make in_word_set static and not inline. */
2386 /*%<*/
2387 /* C code produced by gperf version 3.0.1 */
2388 /* Command-line: gperf -m 5 */
2389 /* Computed positions: -k'2-3' */
2390
2391 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2392 /* maximum key range = 33, duplicates = 0 */
2393
2394 #ifdef __GNUC__
2395 __inline
2396 #else
2397 #ifdef __cplusplus
2398 inline
2399 #endif
2400 #endif
2401 static unsigned int
2402 hash (str, len)
2403 register const char *str;
2404 register unsigned int len;
2405 {
2406 static unsigned char asso_values[] =
2407 {
2408 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2409 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2410 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2411 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2412 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2413 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2414 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2415 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2416 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2417 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2418 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2419 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2420 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2421 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2422 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2423 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2424 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2425 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2426 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2427 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2428 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2429 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2430 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2431 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2432 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2433 35, 35, 35, 35, 35, 35
2434 };
2435 register int hval = len;
2436
2437 switch (hval)
2438 {
2439 default:
2440 hval += asso_values[(unsigned char)str[2]];
2441 /*FALLTHROUGH*/
2442 case 2:
2443 hval += asso_values[(unsigned char)str[1]];
2444 break;
2445 }
2446 return hval;
2447 }
2448
2449 static struct C_stab_entry *
2450 in_word_set (str, len)
2451 register const char *str;
2452 register unsigned int len;
2453 {
2454 enum
2455 {
2456 TOTAL_KEYWORDS = 33,
2457 MIN_WORD_LENGTH = 2,
2458 MAX_WORD_LENGTH = 15,
2459 MIN_HASH_VALUE = 2,
2460 MAX_HASH_VALUE = 34
2461 };
2462
2463 static struct C_stab_entry wordlist[] =
2464 {
2465 {""}, {""},
2466 {"if", 0, st_C_ignore},
2467 {"GTY", 0, st_C_attribute},
2468 {"@end", 0, st_C_objend},
2469 {"union", 0, st_C_struct},
2470 {"define", 0, st_C_define},
2471 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2472 {"template", 0, st_C_template},
2473 {"operator", C_PLPL, st_C_operator},
2474 {"@interface", 0, st_C_objprot},
2475 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2476 {"friend", C_PLPL, st_C_ignore},
2477 {"typedef", 0, st_C_typedef},
2478 {"return", 0, st_C_ignore},
2479 {"@implementation",0, st_C_objimpl},
2480 {"@protocol", 0, st_C_objprot},
2481 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2482 {"extern", 0, st_C_extern},
2483 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2484 {"struct", 0, st_C_struct},
2485 {"domain", C_STAR, st_C_struct},
2486 {"switch", 0, st_C_ignore},
2487 {"enum", 0, st_C_enum},
2488 {"for", 0, st_C_ignore},
2489 {"namespace", C_PLPL, st_C_struct},
2490 {"class", 0, st_C_class},
2491 {"while", 0, st_C_ignore},
2492 {"undef", 0, st_C_define},
2493 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2494 {"__attribute__", 0, st_C_attribute},
2495 {"SYSCALL", 0, st_C_gnumacro},
2496 {"ENTRY", 0, st_C_gnumacro},
2497 {"PSEUDO", 0, st_C_gnumacro},
2498 {"DEFUN", 0, st_C_gnumacro}
2499 };
2500
2501 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2502 {
2503 register int key = hash (str, len);
2504
2505 if (key <= MAX_HASH_VALUE && key >= 0)
2506 {
2507 register const char *s = wordlist[key].name;
2508
2509 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2510 return &wordlist[key];
2511 }
2512 }
2513 return 0;
2514 }
2515 /*%>*/
2516
2517 static enum sym_type
2518 C_symtype (str, len, c_ext)
2519 char *str;
2520 int len;
2521 int c_ext;
2522 {
2523 register struct C_stab_entry *se = in_word_set (str, len);
2524
2525 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2526 return st_none;
2527 return se->type;
2528 }
2529
2530 \f
2531 /*
2532 * Ignoring __attribute__ ((list))
2533 */
2534 static bool inattribute; /* looking at an __attribute__ construct */
2535
2536 /*
2537 * C functions and variables are recognized using a simple
2538 * finite automaton. fvdef is its state variable.
2539 */
2540 static enum
2541 {
2542 fvnone, /* nothing seen */
2543 fdefunkey, /* Emacs DEFUN keyword seen */
2544 fdefunname, /* Emacs DEFUN name seen */
2545 foperator, /* func: operator keyword seen (cplpl) */
2546 fvnameseen, /* function or variable name seen */
2547 fstartlist, /* func: just after open parenthesis */
2548 finlist, /* func: in parameter list */
2549 flistseen, /* func: after parameter list */
2550 fignore, /* func: before open brace */
2551 vignore /* var-like: ignore until ';' */
2552 } fvdef;
2553
2554 static bool fvextern; /* func or var: extern keyword seen; */
2555
2556 /*
2557 * typedefs are recognized using a simple finite automaton.
2558 * typdef is its state variable.
2559 */
2560 static enum
2561 {
2562 tnone, /* nothing seen */
2563 tkeyseen, /* typedef keyword seen */
2564 ttypeseen, /* defined type seen */
2565 tinbody, /* inside typedef body */
2566 tend, /* just before typedef tag */
2567 tignore /* junk after typedef tag */
2568 } typdef;
2569
2570 /*
2571 * struct-like structures (enum, struct and union) are recognized
2572 * using another simple finite automaton. `structdef' is its state
2573 * variable.
2574 */
2575 static enum
2576 {
2577 snone, /* nothing seen yet,
2578 or in struct body if bracelev > 0 */
2579 skeyseen, /* struct-like keyword seen */
2580 stagseen, /* struct-like tag seen */
2581 scolonseen /* colon seen after struct-like tag */
2582 } structdef;
2583
2584 /*
2585 * When objdef is different from onone, objtag is the name of the class.
2586 */
2587 static char *objtag = "<uninited>";
2588
2589 /*
2590 * Yet another little state machine to deal with preprocessor lines.
2591 */
2592 static enum
2593 {
2594 dnone, /* nothing seen */
2595 dsharpseen, /* '#' seen as first char on line */
2596 ddefineseen, /* '#' and 'define' seen */
2597 dignorerest /* ignore rest of line */
2598 } definedef;
2599
2600 /*
2601 * State machine for Objective C protocols and implementations.
2602 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2603 */
2604 static enum
2605 {
2606 onone, /* nothing seen */
2607 oprotocol, /* @interface or @protocol seen */
2608 oimplementation, /* @implementations seen */
2609 otagseen, /* class name seen */
2610 oparenseen, /* parenthesis before category seen */
2611 ocatseen, /* category name seen */
2612 oinbody, /* in @implementation body */
2613 omethodsign, /* in @implementation body, after +/- */
2614 omethodtag, /* after method name */
2615 omethodcolon, /* after method colon */
2616 omethodparm, /* after method parameter */
2617 oignore /* wait for @end */
2618 } objdef;
2619
2620
2621 /*
2622 * Use this structure to keep info about the token read, and how it
2623 * should be tagged. Used by the make_C_tag function to build a tag.
2624 */
2625 static struct tok
2626 {
2627 char *line; /* string containing the token */
2628 int offset; /* where the token starts in LINE */
2629 int length; /* token length */
2630 /*
2631 The previous members can be used to pass strings around for generic
2632 purposes. The following ones specifically refer to creating tags. In this
2633 case the token contained here is the pattern that will be used to create a
2634 tag.
2635 */
2636 bool valid; /* do not create a tag; the token should be
2637 invalidated whenever a state machine is
2638 reset prematurely */
2639 bool named; /* create a named tag */
2640 int lineno; /* source line number of tag */
2641 long linepos; /* source char number of tag */
2642 } token; /* latest token read */
2643
2644 /*
2645 * Variables and functions for dealing with nested structures.
2646 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2647 */
2648 static void pushclass_above __P((int, char *, int));
2649 static void popclass_above __P((int));
2650 static void write_classname __P((linebuffer *, char *qualifier));
2651
2652 static struct {
2653 char **cname; /* nested class names */
2654 int *bracelev; /* nested class brace level */
2655 int nl; /* class nesting level (elements used) */
2656 int size; /* length of the array */
2657 } cstack; /* stack for nested declaration tags */
2658 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2659 #define nestlev (cstack.nl)
2660 /* After struct keyword or in struct body, not inside a nested function. */
2661 #define instruct (structdef == snone && nestlev > 0 \
2662 && bracelev == cstack.bracelev[nestlev-1] + 1)
2663
2664 static void
2665 pushclass_above (bracelev, str, len)
2666 int bracelev;
2667 char *str;
2668 int len;
2669 {
2670 int nl;
2671
2672 popclass_above (bracelev);
2673 nl = cstack.nl;
2674 if (nl >= cstack.size)
2675 {
2676 int size = cstack.size *= 2;
2677 xrnew (cstack.cname, size, char *);
2678 xrnew (cstack.bracelev, size, int);
2679 }
2680 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2681 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2682 cstack.bracelev[nl] = bracelev;
2683 cstack.nl = nl + 1;
2684 }
2685
2686 static void
2687 popclass_above (bracelev)
2688 int bracelev;
2689 {
2690 int nl;
2691
2692 for (nl = cstack.nl - 1;
2693 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2694 nl--)
2695 {
2696 free (cstack.cname[nl]);
2697 cstack.nl = nl;
2698 }
2699 }
2700
2701 static void
2702 write_classname (cn, qualifier)
2703 linebuffer *cn;
2704 char *qualifier;
2705 {
2706 int i, len;
2707 int qlen = strlen (qualifier);
2708
2709 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2710 {
2711 len = 0;
2712 cn->len = 0;
2713 cn->buffer[0] = '\0';
2714 }
2715 else
2716 {
2717 len = strlen (cstack.cname[0]);
2718 linebuffer_setlen (cn, len);
2719 strcpy (cn->buffer, cstack.cname[0]);
2720 }
2721 for (i = 1; i < cstack.nl; i++)
2722 {
2723 char *s;
2724 int slen;
2725
2726 s = cstack.cname[i];
2727 if (s == NULL)
2728 continue;
2729 slen = strlen (s);
2730 len += slen + qlen;
2731 linebuffer_setlen (cn, len);
2732 strncat (cn->buffer, qualifier, qlen);
2733 strncat (cn->buffer, s, slen);
2734 }
2735 }
2736
2737 \f
2738 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2739 static void make_C_tag __P((bool));
2740
2741 /*
2742 * consider_token ()
2743 * checks to see if the current token is at the start of a
2744 * function or variable, or corresponds to a typedef, or
2745 * is a struct/union/enum tag, or #define, or an enum constant.
2746 *
2747 * *IS_FUNC gets TRUE if the token is a function or #define macro
2748 * with args. C_EXTP points to which language we are looking at.
2749 *
2750 * Globals
2751 * fvdef IN OUT
2752 * structdef IN OUT
2753 * definedef IN OUT
2754 * typdef IN OUT
2755 * objdef IN OUT
2756 */
2757
2758 static bool
2759 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2760 register char *str; /* IN: token pointer */
2761 register int len; /* IN: token length */
2762 register int c; /* IN: first char after the token */
2763 int *c_extp; /* IN, OUT: C extensions mask */
2764 int bracelev; /* IN: brace level */
2765 int parlev; /* IN: parenthesis level */
2766 bool *is_func_or_var; /* OUT: function or variable found */
2767 {
2768 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2769 structtype is the type of the preceding struct-like keyword, and
2770 structbracelev is the brace level where it has been seen. */
2771 static enum sym_type structtype;
2772 static int structbracelev;
2773 static enum sym_type toktype;
2774
2775
2776 toktype = C_symtype (str, len, *c_extp);
2777
2778 /*
2779 * Skip __attribute__
2780 */
2781 if (toktype == st_C_attribute)
2782 {
2783 inattribute = TRUE;
2784 return FALSE;
2785 }
2786
2787 /*
2788 * Advance the definedef state machine.
2789 */
2790 switch (definedef)
2791 {
2792 case dnone:
2793 /* We're not on a preprocessor line. */
2794 if (toktype == st_C_gnumacro)
2795 {
2796 fvdef = fdefunkey;
2797 return FALSE;
2798 }
2799 break;
2800 case dsharpseen:
2801 if (toktype == st_C_define)
2802 {
2803 definedef = ddefineseen;
2804 }
2805 else
2806 {
2807 definedef = dignorerest;
2808 }
2809 return FALSE;
2810 case ddefineseen:
2811 /*
2812 * Make a tag for any macro, unless it is a constant
2813 * and constantypedefs is FALSE.
2814 */
2815 definedef = dignorerest;
2816 *is_func_or_var = (c == '(');
2817 if (!*is_func_or_var && !constantypedefs)
2818 return FALSE;
2819 else
2820 return TRUE;
2821 case dignorerest:
2822 return FALSE;
2823 default:
2824 error ("internal error: definedef value.", (char *)NULL);
2825 }
2826
2827 /*
2828 * Now typedefs
2829 */
2830 switch (typdef)
2831 {
2832 case tnone:
2833 if (toktype == st_C_typedef)
2834 {
2835 if (typedefs)
2836 typdef = tkeyseen;
2837 fvextern = FALSE;
2838 fvdef = fvnone;
2839 return FALSE;
2840 }
2841 break;
2842 case tkeyseen:
2843 switch (toktype)
2844 {
2845 case st_none:
2846 case st_C_class:
2847 case st_C_struct:
2848 case st_C_enum:
2849 typdef = ttypeseen;
2850 }
2851 break;
2852 case ttypeseen:
2853 if (structdef == snone && fvdef == fvnone)
2854 {
2855 fvdef = fvnameseen;
2856 return TRUE;
2857 }
2858 break;
2859 case tend:
2860 switch (toktype)
2861 {
2862 case st_C_class:
2863 case st_C_struct:
2864 case st_C_enum:
2865 return FALSE;
2866 }
2867 return TRUE;
2868 }
2869
2870 switch (toktype)
2871 {
2872 case st_C_javastruct:
2873 if (structdef == stagseen)
2874 structdef = scolonseen;
2875 return FALSE;
2876 case st_C_template:
2877 case st_C_class:
2878 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2879 && bracelev == 0
2880 && definedef == dnone && structdef == snone
2881 && typdef == tnone && fvdef == fvnone)
2882 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2883 if (toktype == st_C_template)
2884 break;
2885 /* FALLTHRU */
2886 case st_C_struct:
2887 case st_C_enum:
2888 if (parlev == 0
2889 && fvdef != vignore
2890 && (typdef == tkeyseen
2891 || (typedefs_or_cplusplus && structdef == snone)))
2892 {
2893 structdef = skeyseen;
2894 structtype = toktype;
2895 structbracelev = bracelev;
2896 if (fvdef == fvnameseen)
2897 fvdef = fvnone;
2898 }
2899 return FALSE;
2900 }
2901
2902 if (structdef == skeyseen)
2903 {
2904 structdef = stagseen;
2905 return TRUE;
2906 }
2907
2908 if (typdef != tnone)
2909 definedef = dnone;
2910
2911 /* Detect Objective C constructs. */
2912 switch (objdef)
2913 {
2914 case onone:
2915 switch (toktype)
2916 {
2917 case st_C_objprot:
2918 objdef = oprotocol;
2919 return FALSE;
2920 case st_C_objimpl:
2921 objdef = oimplementation;
2922 return FALSE;
2923 }
2924 break;
2925 case oimplementation:
2926 /* Save the class tag for functions or variables defined inside. */
2927 objtag = savenstr (str, len);
2928 objdef = oinbody;
2929 return FALSE;
2930 case oprotocol:
2931 /* Save the class tag for categories. */
2932 objtag = savenstr (str, len);
2933 objdef = otagseen;
2934 *is_func_or_var = TRUE;
2935 return TRUE;
2936 case oparenseen:
2937 objdef = ocatseen;
2938 *is_func_or_var = TRUE;
2939 return TRUE;
2940 case oinbody:
2941 break;
2942 case omethodsign:
2943 if (parlev == 0)
2944 {
2945 fvdef = fvnone;
2946 objdef = omethodtag;
2947 linebuffer_setlen (&token_name, len);
2948 strncpy (token_name.buffer, str, len);
2949 token_name.buffer[len] = '\0';
2950 return TRUE;
2951 }
2952 return FALSE;
2953 case omethodcolon:
2954 if (parlev == 0)
2955 objdef = omethodparm;
2956 return FALSE;
2957 case omethodparm:
2958 if (parlev == 0)
2959 {
2960 fvdef = fvnone;
2961 objdef = omethodtag;
2962 linebuffer_setlen (&token_name, token_name.len + len);
2963 strncat (token_name.buffer, str, len);
2964 return TRUE;
2965 }
2966 return FALSE;
2967 case oignore:
2968 if (toktype == st_C_objend)
2969 {
2970 /* Memory leakage here: the string pointed by objtag is
2971 never released, because many tests would be needed to
2972 avoid breaking on incorrect input code. The amount of
2973 memory leaked here is the sum of the lengths of the
2974 class tags.
2975 free (objtag); */
2976 objdef = onone;
2977 }
2978 return FALSE;
2979 }
2980
2981 /* A function, variable or enum constant? */
2982 switch (toktype)
2983 {
2984 case st_C_extern:
2985 fvextern = TRUE;
2986 switch (fvdef)
2987 {
2988 case finlist:
2989 case flistseen:
2990 case fignore:
2991 case vignore:
2992 break;
2993 default:
2994 fvdef = fvnone;
2995 }
2996 return FALSE;
2997 case st_C_ignore:
2998 fvextern = FALSE;
2999 fvdef = vignore;
3000 return FALSE;
3001 case st_C_operator:
3002 fvdef = foperator;
3003 *is_func_or_var = TRUE;
3004 return TRUE;
3005 case st_none:
3006 if (constantypedefs
3007 && structdef == snone
3008 && structtype == st_C_enum && bracelev > structbracelev)
3009 return TRUE; /* enum constant */
3010 switch (fvdef)
3011 {
3012 case fdefunkey:
3013 if (bracelev > 0)
3014 break;
3015 fvdef = fdefunname; /* GNU macro */
3016 *is_func_or_var = TRUE;
3017 return TRUE;
3018 case fvnone:
3019 switch (typdef)
3020 {
3021 case ttypeseen:
3022 return FALSE;
3023 case tnone:
3024 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3025 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3026 {
3027 fvdef = vignore;
3028 return FALSE;
3029 }
3030 break;
3031 }
3032 /* FALLTHRU */
3033 case fvnameseen:
3034 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3035 {
3036 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3037 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3038 fvdef = foperator;
3039 *is_func_or_var = TRUE;
3040 return TRUE;
3041 }
3042 if (bracelev > 0 && !instruct)
3043 break;
3044 fvdef = fvnameseen; /* function or variable */
3045 *is_func_or_var = TRUE;
3046 return TRUE;
3047 }
3048 break;
3049 }
3050
3051 return FALSE;
3052 }
3053
3054 \f
3055 /*
3056 * C_entries often keeps pointers to tokens or lines which are older than
3057 * the line currently read. By keeping two line buffers, and switching
3058 * them at end of line, it is possible to use those pointers.
3059 */
3060 static struct
3061 {
3062 long linepos;
3063 linebuffer lb;
3064 } lbs[2];
3065
3066 #define current_lb_is_new (newndx == curndx)
3067 #define switch_line_buffers() (curndx = 1 - curndx)
3068
3069 #define curlb (lbs[curndx].lb)
3070 #define newlb (lbs[newndx].lb)
3071 #define curlinepos (lbs[curndx].linepos)
3072 #define newlinepos (lbs[newndx].linepos)
3073
3074 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3075 #define cplpl (c_ext & C_PLPL)
3076 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3077
3078 #define CNL_SAVE_DEFINEDEF() \
3079 do { \
3080 curlinepos = charno; \
3081 readline (&curlb, inf); \
3082 lp = curlb.buffer; \
3083 quotednl = FALSE; \
3084 newndx = curndx; \
3085 } while (0)
3086
3087 #define CNL() \
3088 do { \
3089 CNL_SAVE_DEFINEDEF(); \
3090 if (savetoken.valid) \
3091 { \
3092 token = savetoken; \
3093 savetoken.valid = FALSE; \
3094 } \
3095 definedef = dnone; \
3096 } while (0)
3097
3098
3099 static void
3100 make_C_tag (isfun)
3101 bool isfun;
3102 {
3103 /* This function is never called when token.valid is FALSE, but
3104 we must protect against invalid input or internal errors. */
3105 if (token.valid)
3106 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3107 token.offset+token.length+1, token.lineno, token.linepos);
3108 else if (DEBUG)
3109 { /* this branch is optimised away if !DEBUG */
3110 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3111 token_name.len + 17, isfun, token.line,
3112 token.offset+token.length+1, token.lineno, token.linepos);
3113 error ("INVALID TOKEN", NULL);
3114 }
3115
3116 token.valid = FALSE;
3117 }
3118
3119
3120 /*
3121 * C_entries ()
3122 * This routine finds functions, variables, typedefs,
3123 * #define's, enum constants and struct/union/enum definitions in
3124 * C syntax and adds them to the list.
3125 */
3126 static void
3127 C_entries (c_ext, inf)
3128 int c_ext; /* extension of C */
3129 FILE *inf; /* input file */
3130 {
3131 register char c; /* latest char read; '\0' for end of line */
3132 register char *lp; /* pointer one beyond the character `c' */
3133 int curndx, newndx; /* indices for current and new lb */
3134 register int tokoff; /* offset in line of start of current token */
3135 register int toklen; /* length of current token */
3136 char *qualifier; /* string used to qualify names */
3137 int qlen; /* length of qualifier */
3138 int bracelev; /* current brace level */
3139 int bracketlev; /* current bracket level */
3140 int parlev; /* current parenthesis level */
3141 int attrparlev; /* __attribute__ parenthesis level */
3142 int templatelev; /* current template level */
3143 int typdefbracelev; /* bracelev where a typedef struct body begun */
3144 bool incomm, inquote, inchar, quotednl, midtoken;
3145 bool yacc_rules; /* in the rules part of a yacc file */
3146 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3147
3148
3149 linebuffer_init (&lbs[0].lb);
3150 linebuffer_init (&lbs[1].lb);
3151 if (cstack.size == 0)
3152 {
3153 cstack.size = (DEBUG) ? 1 : 4;
3154 cstack.nl = 0;
3155 cstack.cname = xnew (cstack.size, char *);
3156 cstack.bracelev = xnew (cstack.size, int);
3157 }
3158
3159 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3160 curndx = newndx = 0;
3161 lp = curlb.buffer;
3162 *lp = 0;
3163
3164 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3165 structdef = snone; definedef = dnone; objdef = onone;
3166 yacc_rules = FALSE;
3167 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3168 token.valid = savetoken.valid = FALSE;
3169 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3170 if (cjava)
3171 { qualifier = "."; qlen = 1; }
3172 else
3173 { qualifier = "::"; qlen = 2; }
3174
3175
3176 while (!feof (inf))
3177 {
3178 c = *lp++;
3179 if (c == '\\')
3180 {
3181 /* If we are at the end of the line, the next character is a
3182 '\0'; do not skip it, because it is what tells us
3183 to read the next line. */
3184 if (*lp == '\0')
3185 {
3186 quotednl = TRUE;
3187 continue;
3188 }
3189 lp++;
3190 c = ' ';
3191 }
3192 else if (incomm)
3193 {
3194 switch (c)
3195 {
3196 case '*':
3197 if (*lp == '/')
3198 {
3199 c = *lp++;
3200 incomm = FALSE;
3201 }
3202 break;
3203 case '\0':
3204 /* Newlines inside comments do not end macro definitions in
3205 traditional cpp. */
3206 CNL_SAVE_DEFINEDEF ();
3207 break;
3208 }
3209 continue;
3210 }
3211 else if (inquote)
3212 {
3213 switch (c)
3214 {
3215 case '"':
3216 inquote = FALSE;
3217 break;
3218 case '\0':
3219 /* Newlines inside strings do not end macro definitions
3220 in traditional cpp, even though compilers don't
3221 usually accept them. */
3222 CNL_SAVE_DEFINEDEF ();
3223 break;
3224 }
3225 continue;
3226 }
3227 else if (inchar)
3228 {
3229 switch (c)
3230 {
3231 case '\0':
3232 /* Hmmm, something went wrong. */
3233 CNL ();
3234 /* FALLTHRU */
3235 case '\'':
3236 inchar = FALSE;
3237 break;
3238 }
3239 continue;
3240 }
3241 else if (bracketlev > 0)
3242 {
3243 switch (c)
3244 {
3245 case ']':
3246 if (--bracketlev > 0)
3247 continue;
3248 break;
3249 case '\0':
3250 CNL_SAVE_DEFINEDEF ();
3251 break;
3252 }
3253 continue;
3254 }
3255 else switch (c)
3256 {
3257 case '"':
3258 inquote = TRUE;
3259 if (inattribute)
3260 break;
3261 switch (fvdef)
3262 {
3263 case fdefunkey:
3264 case fstartlist:
3265 case finlist:
3266 case fignore:
3267 case vignore:
3268 break;
3269 default:
3270 fvextern = FALSE;
3271 fvdef = fvnone;
3272 }
3273 continue;
3274 case '\'':
3275 inchar = TRUE;
3276 if (inattribute)
3277 break;
3278 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3279 {
3280 fvextern = FALSE;
3281 fvdef = fvnone;
3282 }
3283 continue;
3284 case '/':
3285 if (*lp == '*')
3286 {
3287 incomm = TRUE;
3288 lp++;
3289 c = ' ';
3290 }
3291 else if (/* cplpl && */ *lp == '/')
3292 {
3293 c = '\0';
3294 }
3295 break;
3296 case '%':
3297 if ((c_ext & YACC) && *lp == '%')
3298 {
3299 /* Entering or exiting rules section in yacc file. */
3300 lp++;
3301 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3302 typdef = tnone; structdef = snone;
3303 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3304 bracelev = 0;
3305 yacc_rules = !yacc_rules;
3306 continue;
3307 }
3308 else
3309 break;
3310 case '#':
3311 if (definedef == dnone)
3312 {
3313 char *cp;
3314 bool cpptoken = TRUE;
3315
3316 /* Look back on this line. If all blanks, or nonblanks
3317 followed by an end of comment, this is a preprocessor
3318 token. */
3319 for (cp = newlb.buffer; cp < lp-1; cp++)
3320 if (!iswhite (*cp))
3321 {
3322 if (*cp == '*' && *(cp+1) == '/')
3323 {
3324 cp++;
3325 cpptoken = TRUE;
3326 }
3327 else
3328 cpptoken = FALSE;
3329 }
3330 if (cpptoken)
3331 definedef = dsharpseen;
3332 } /* if (definedef == dnone) */
3333 continue;
3334 case '[':
3335 bracketlev++;
3336 continue;
3337 } /* switch (c) */
3338
3339
3340 /* Consider token only if some involved conditions are satisfied. */
3341 if (typdef != tignore
3342 && definedef != dignorerest
3343 && fvdef != finlist
3344 && templatelev == 0
3345 && (definedef != dnone
3346 || structdef != scolonseen)
3347 && !inattribute)
3348 {
3349 if (midtoken)
3350 {
3351 if (endtoken (c))
3352 {
3353 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3354 /* This handles :: in the middle,
3355 but not at the beginning of an identifier.
3356 Also, space-separated :: is not recognised. */
3357 {
3358 if (c_ext & C_AUTO) /* automatic detection of C++ */
3359 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3360 lp += 2;
3361 toklen += 2;
3362 c = lp[-1];
3363 goto still_in_token;
3364 }
3365 else
3366 {
3367 bool funorvar = FALSE;
3368
3369 if (yacc_rules
3370 || consider_token (newlb.buffer + tokoff, toklen, c,
3371 &c_ext, bracelev, parlev,
3372 &funorvar))
3373 {
3374 if (fvdef == foperator)
3375 {
3376 char *oldlp = lp;
3377 lp = skip_spaces (lp-1);
3378 if (*lp != '\0')
3379 lp += 1;
3380 while (*lp != '\0'
3381 && !iswhite (*lp) && *lp != '(')
3382 lp += 1;
3383 c = *lp++;
3384 toklen += lp - oldlp;
3385 }
3386 token.named = FALSE;
3387 if (!plainc
3388 && nestlev > 0 && definedef == dnone)
3389 /* in struct body */
3390 {
3391 write_classname (&token_name, qualifier);
3392 linebuffer_setlen (&token_name,
3393 token_name.len+qlen+toklen);
3394 strcat (token_name.buffer, qualifier);
3395 strncat (token_name.buffer,
3396 newlb.buffer + tokoff, toklen);
3397 token.named = TRUE;
3398 }
3399 else if (objdef == ocatseen)
3400 /* Objective C category */
3401 {
3402 int len = strlen (objtag) + 2 + toklen;
3403 linebuffer_setlen (&token_name, len);
3404 strcpy (token_name.buffer, objtag);
3405 strcat (token_name.buffer, "(");
3406 strncat (token_name.buffer,
3407 newlb.buffer + tokoff, toklen);
3408 strcat (token_name.buffer, ")");
3409 token.named = TRUE;
3410 }
3411 else if (objdef == omethodtag
3412 || objdef == omethodparm)
3413 /* Objective C method */
3414 {
3415 token.named = TRUE;
3416 }
3417 else if (fvdef == fdefunname)
3418 /* GNU DEFUN and similar macros */
3419 {
3420 bool defun = (newlb.buffer[tokoff] == 'F');
3421 int off = tokoff;
3422 int len = toklen;
3423
3424 /* Rewrite the tag so that emacs lisp DEFUNs
3425 can be found by their elisp name */
3426 if (defun)
3427 {
3428 off += 1;
3429 len -= 1;
3430 }
3431 linebuffer_setlen (&token_name, len);
3432 strncpy (token_name.buffer,
3433 newlb.buffer + off, len);
3434 token_name.buffer[len] = '\0';
3435 if (defun)
3436 while (--len >= 0)
3437 if (token_name.buffer[len] == '_')
3438 token_name.buffer[len] = '-';
3439 token.named = defun;
3440 }
3441 else
3442 {
3443 linebuffer_setlen (&token_name, toklen);
3444 strncpy (token_name.buffer,
3445 newlb.buffer + tokoff, toklen);
3446 token_name.buffer[toklen] = '\0';
3447 /* Name macros and members. */
3448 token.named = (structdef == stagseen
3449 || typdef == ttypeseen
3450 || typdef == tend
3451 || (funorvar
3452 && definedef == dignorerest)
3453 || (funorvar
3454 && definedef == dnone
3455 && structdef == snone
3456 && bracelev > 0));
3457 }
3458 token.lineno = lineno;
3459 token.offset = tokoff;
3460 token.length = toklen;
3461 token.line = newlb.buffer;
3462 token.linepos = newlinepos;
3463 token.valid = TRUE;
3464
3465 if (definedef == dnone
3466 && (fvdef == fvnameseen
3467 || fvdef == foperator
3468 || structdef == stagseen
3469 || typdef == tend
3470 || typdef == ttypeseen
3471 || objdef != onone))
3472 {
3473 if (current_lb_is_new)
3474 switch_line_buffers ();
3475 }
3476 else if (definedef != dnone
3477 || fvdef == fdefunname
3478 || instruct)
3479 make_C_tag (funorvar);
3480 }
3481 else /* not yacc and consider_token failed */
3482 {
3483 if (inattribute && fvdef == fignore)
3484 {
3485 /* We have just met __attribute__ after a
3486 function parameter list: do not tag the
3487 function again. */
3488 fvdef = fvnone;
3489 }
3490 }
3491 midtoken = FALSE;
3492 }
3493 } /* if (endtoken (c)) */
3494 else if (intoken (c))
3495 still_in_token:
3496 {
3497 toklen++;
3498 continue;
3499 }
3500 } /* if (midtoken) */
3501 else if (begtoken (c))
3502 {
3503 switch (definedef)
3504 {
3505 case dnone:
3506 switch (fvdef)
3507 {
3508 case fstartlist:
3509 /* This prevents tagging fb in
3510 void (__attribute__((noreturn)) *fb) (void);
3511 Fixing this is not easy and not very important. */
3512 fvdef = finlist;
3513 continue;
3514 case flistseen:
3515 if (plainc || declarations)
3516 {
3517 make_C_tag (TRUE); /* a function */
3518 fvdef = fignore;
3519 }
3520 break;
3521 }
3522 if (structdef == stagseen && !cjava)
3523 {
3524 popclass_above (bracelev);
3525 structdef = snone;
3526 }
3527 break;
3528 case dsharpseen:
3529 savetoken = token;
3530 break;
3531 }
3532 if (!yacc_rules || lp == newlb.buffer + 1)
3533 {
3534 tokoff = lp - 1 - newlb.buffer;
3535 toklen = 1;
3536 midtoken = TRUE;
3537 }
3538 continue;
3539 } /* if (begtoken) */
3540 } /* if must look at token */
3541
3542
3543 /* Detect end of line, colon, comma, semicolon and various braces
3544 after having handled a token.*/
3545 switch (c)
3546 {
3547 case ':':
3548 if (inattribute)
3549 break;
3550 if (yacc_rules && token.offset == 0 && token.valid)
3551 {
3552 make_C_tag (FALSE); /* a yacc function */
3553 break;
3554 }
3555 if (definedef != dnone)
3556 break;
3557 switch (objdef)
3558 {
3559 case otagseen:
3560 objdef = oignore;
3561 make_C_tag (TRUE); /* an Objective C class */
3562 break;
3563 case omethodtag:
3564 case omethodparm:
3565 objdef = omethodcolon;
3566 linebuffer_setlen (&token_name, token_name.len + 1);
3567 strcat (token_name.buffer, ":");
3568 break;
3569 }
3570 if (structdef == stagseen)
3571 {
3572 structdef = scolonseen;
3573 break;
3574 }
3575 /* Should be useless, but may be work as a safety net. */
3576 if (cplpl && fvdef == flistseen)
3577 {
3578 make_C_tag (TRUE); /* a function */
3579 fvdef = fignore;
3580 break;
3581 }
3582 break;
3583 case ';':
3584 if (definedef != dnone || inattribute)
3585 break;
3586 switch (typdef)
3587 {
3588 case tend:
3589 case ttypeseen:
3590 make_C_tag (FALSE); /* a typedef */
3591 typdef = tnone;
3592 fvdef = fvnone;
3593 break;
3594 case tnone:
3595 case tinbody:
3596 case tignore:
3597 switch (fvdef)
3598 {
3599 case fignore:
3600 if (typdef == tignore || cplpl)
3601 fvdef = fvnone;
3602 break;
3603 case fvnameseen:
3604 if ((globals && bracelev == 0 && (!fvextern || declarations))
3605 || (members && instruct))
3606 make_C_tag (FALSE); /* a variable */
3607 fvextern = FALSE;
3608 fvdef = fvnone;
3609 token.valid = FALSE;
3610 break;
3611 case flistseen:
3612 if ((declarations
3613 && (cplpl || !instruct)
3614 && (typdef == tnone || (typdef != tignore && instruct)))
3615 || (members
3616 && plainc && instruct))
3617 make_C_tag (TRUE); /* a function */
3618 /* FALLTHRU */
3619 default:
3620 fvextern = FALSE;
3621 fvdef = fvnone;
3622 if (declarations
3623 && cplpl && structdef == stagseen)
3624 make_C_tag (FALSE); /* forward declaration */
3625 else
3626 token.valid = FALSE;
3627 } /* switch (fvdef) */
3628 /* FALLTHRU */
3629 default:
3630 if (!instruct)
3631 typdef = tnone;
3632 }
3633 if (structdef == stagseen)
3634 structdef = snone;
3635 break;
3636 case ',':
3637 if (definedef != dnone || inattribute)
3638 break;
3639 switch (objdef)
3640 {
3641 case omethodtag:
3642 case omethodparm:
3643 make_C_tag (TRUE); /* an Objective C method */
3644 objdef = oinbody;
3645 break;
3646 }
3647 switch (fvdef)
3648 {
3649 case fdefunkey:
3650 case foperator:
3651 case fstartlist:
3652 case finlist:
3653 case fignore:
3654 case vignore:
3655 break;
3656 case fdefunname:
3657 fvdef = fignore;
3658 break;
3659 case fvnameseen:
3660 if (parlev == 0
3661 && ((globals
3662 && bracelev == 0
3663 && templatelev == 0
3664 && (!fvextern || declarations))
3665 || (members && instruct)))
3666 make_C_tag (FALSE); /* a variable */
3667 break;
3668 case flistseen:
3669 if ((declarations && typdef == tnone && !instruct)
3670 || (members && typdef != tignore && instruct))
3671 {
3672 make_C_tag (TRUE); /* a function */
3673 fvdef = fvnameseen;
3674 }
3675 else if (!declarations)
3676 fvdef = fvnone;
3677 token.valid = FALSE;
3678 break;
3679 default:
3680 fvdef = fvnone;
3681 }
3682 if (structdef == stagseen)
3683 structdef = snone;
3684 break;
3685 case ']':
3686 if (definedef != dnone || inattribute)
3687 break;
3688 if (structdef == stagseen)
3689 structdef = snone;
3690 switch (typdef)
3691 {
3692 case ttypeseen:
3693 case tend:
3694 typdef = tignore;
3695 make_C_tag (FALSE); /* a typedef */
3696 break;
3697 case tnone:
3698 case tinbody:
3699 switch (fvdef)
3700 {
3701 case foperator:
3702 case finlist:
3703 case fignore:
3704 case vignore:
3705 break;
3706 case fvnameseen:
3707 if ((members && bracelev == 1)
3708 || (globals && bracelev == 0
3709 && (!fvextern || declarations)))
3710 make_C_tag (FALSE); /* a variable */
3711 /* FALLTHRU */
3712 default:
3713 fvdef = fvnone;
3714 }
3715 break;
3716 }
3717 break;
3718 case '(':
3719 if (inattribute)
3720 {
3721 attrparlev++;
3722 break;
3723 }
3724 if (definedef != dnone)
3725 break;
3726 if (objdef == otagseen && parlev == 0)
3727 objdef = oparenseen;
3728 switch (fvdef)
3729 {
3730 case fvnameseen:
3731 if (typdef == ttypeseen
3732 && *lp != '*'
3733 && !instruct)
3734 {
3735 /* This handles constructs like:
3736 typedef void OperatorFun (int fun); */
3737 make_C_tag (FALSE);
3738 typdef = tignore;
3739 fvdef = fignore;
3740 break;
3741 }
3742 /* FALLTHRU */
3743 case foperator:
3744 fvdef = fstartlist;
3745 break;
3746 case flistseen:
3747 fvdef = finlist;
3748 break;
3749 }
3750 parlev++;
3751 break;
3752 case ')':
3753 if (inattribute)
3754 {
3755 if (--attrparlev == 0)
3756 inattribute = FALSE;
3757 break;
3758 }
3759 if (definedef != dnone)
3760 break;
3761 if (objdef == ocatseen && parlev == 1)
3762 {
3763 make_C_tag (TRUE); /* an Objective C category */
3764 objdef = oignore;
3765 }
3766 if (--parlev == 0)
3767 {
3768 switch (fvdef)
3769 {
3770 case fstartlist:
3771 case finlist:
3772 fvdef = flistseen;
3773 break;
3774 }
3775 if (!instruct
3776 && (typdef == tend
3777 || typdef == ttypeseen))
3778 {
3779 typdef = tignore;
3780 make_C_tag (FALSE); /* a typedef */
3781 }
3782 }
3783 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3784 parlev = 0;
3785 break;
3786 case '{':
3787 if (definedef != dnone)
3788 break;
3789 if (typdef == ttypeseen)
3790 {
3791 /* Whenever typdef is set to tinbody (currently only
3792 here), typdefbracelev should be set to bracelev. */
3793 typdef = tinbody;
3794 typdefbracelev = bracelev;
3795 }
3796 switch (fvdef)
3797 {
3798 case flistseen:
3799 make_C_tag (TRUE); /* a function */
3800 /* FALLTHRU */
3801 case fignore:
3802 fvdef = fvnone;
3803 break;
3804 case fvnone:
3805 switch (objdef)
3806 {
3807 case otagseen:
3808 make_C_tag (TRUE); /* an Objective C class */
3809 objdef = oignore;
3810 break;
3811 case omethodtag:
3812 case omethodparm:
3813 make_C_tag (TRUE); /* an Objective C method */
3814 objdef = oinbody;
3815 break;
3816 default:
3817 /* Neutralize `extern "C" {' grot. */
3818 if (bracelev == 0 && structdef == snone && nestlev == 0
3819 && typdef == tnone)
3820 bracelev = -1;
3821 }
3822 break;
3823 }
3824 switch (structdef)
3825 {
3826 case skeyseen: /* unnamed struct */
3827 pushclass_above (bracelev, NULL, 0);
3828 structdef = snone;
3829 break;
3830 case stagseen: /* named struct or enum */
3831 case scolonseen: /* a class */
3832 pushclass_above (bracelev,token.line+token.offset, token.length);
3833 structdef = snone;
3834 make_C_tag (FALSE); /* a struct or enum */
3835 break;
3836 }
3837 bracelev += 1;
3838 break;
3839 case '*':
3840 if (definedef != dnone)
3841 break;
3842 if (fvdef == fstartlist)
3843 {
3844 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3845 token.valid = FALSE;
3846 }
3847 break;
3848 case '}':
3849 if (definedef != dnone)
3850 break;
3851 bracelev -= 1;
3852 if (!ignoreindent && lp == newlb.buffer + 1)
3853 {
3854 if (bracelev != 0)
3855 token.valid = FALSE; /* unexpected value, token unreliable */
3856 bracelev = 0; /* reset brace level if first column */
3857 parlev = 0; /* also reset paren level, just in case... */
3858 }
3859 else if (bracelev < 0)
3860 {
3861 token.valid = FALSE; /* something gone amiss, token unreliable */
3862 bracelev = 0;
3863 }
3864 if (bracelev == 0 && fvdef == vignore)
3865 fvdef = fvnone; /* end of function */
3866 popclass_above (bracelev);
3867 structdef = snone;
3868 /* Only if typdef == tinbody is typdefbracelev significant. */
3869 if (typdef == tinbody && bracelev <= typdefbracelev)
3870 {
3871 assert (bracelev == typdefbracelev);
3872 typdef = tend;
3873 }
3874 break;
3875 case '=':
3876 if (definedef != dnone)
3877 break;
3878 switch (fvdef)
3879 {
3880 case foperator:
3881 case finlist:
3882 case fignore:
3883 case vignore:
3884 break;
3885 case fvnameseen:
3886 if ((members && bracelev == 1)
3887 || (globals && bracelev == 0 && (!fvextern || declarations)))
3888 make_C_tag (FALSE); /* a variable */
3889 /* FALLTHRU */
3890 default:
3891 fvdef = vignore;
3892 }
3893 break;
3894 case '<':
3895 if (cplpl
3896 && (structdef == stagseen || fvdef == fvnameseen))
3897 {
3898 templatelev++;
3899 break;
3900 }
3901 goto resetfvdef;
3902 case '>':
3903 if (templatelev > 0)
3904 {
3905 templatelev--;
3906 break;
3907 }
3908 goto resetfvdef;
3909 case '+':
3910 case '-':
3911 if (objdef == oinbody && bracelev == 0)
3912 {
3913 objdef = omethodsign;
3914 break;
3915 }
3916 /* FALLTHRU */
3917 resetfvdef:
3918 case '#': case '~': case '&': case '%': case '/':
3919 case '|': case '^': case '!': case '.': case '?':
3920 if (definedef != dnone)
3921 break;
3922 /* These surely cannot follow a function tag in C. */
3923 switch (fvdef)
3924 {
3925 case foperator:
3926 case finlist:
3927 case fignore:
3928 case vignore:
3929 break;
3930 default:
3931 fvdef = fvnone;
3932 }
3933 break;
3934 case '\0':
3935 if (objdef == otagseen)
3936 {
3937 make_C_tag (TRUE); /* an Objective C class */
3938 objdef = oignore;
3939 }
3940 /* If a macro spans multiple lines don't reset its state. */
3941 if (quotednl)
3942 CNL_SAVE_DEFINEDEF ();
3943 else
3944 CNL ();
3945 break;
3946 } /* switch (c) */
3947
3948 } /* while not eof */
3949
3950 free (lbs[0].lb.buffer);
3951 free (lbs[1].lb.buffer);
3952 }
3953
3954 /*
3955 * Process either a C++ file or a C file depending on the setting
3956 * of a global flag.
3957 */
3958 static void
3959 default_C_entries (inf)
3960 FILE *inf;
3961 {
3962 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3963 }
3964
3965 /* Always do plain C. */
3966 static void
3967 plain_C_entries (inf)
3968 FILE *inf;
3969 {
3970 C_entries (0, inf);
3971 }
3972
3973 /* Always do C++. */
3974 static void
3975 Cplusplus_entries (inf)
3976 FILE *inf;
3977 {
3978 C_entries (C_PLPL, inf);
3979 }
3980
3981 /* Always do Java. */
3982 static void
3983 Cjava_entries (inf)
3984 FILE *inf;
3985 {
3986 C_entries (C_JAVA, inf);
3987 }
3988
3989 /* Always do C*. */
3990 static void
3991 Cstar_entries (inf)
3992 FILE *inf;
3993 {
3994 C_entries (C_STAR, inf);
3995 }
3996
3997 /* Always do Yacc. */
3998 static void
3999 Yacc_entries (inf)
4000 FILE *inf;
4001 {
4002 C_entries (YACC, inf);
4003 }
4004
4005 \f
4006 /* Useful macros. */
4007 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4008 for (; /* loop initialization */ \
4009 !feof (file_pointer) /* loop test */ \
4010 && /* instructions at start of loop */ \
4011 (readline (&line_buffer, file_pointer), \
4012 char_pointer = line_buffer.buffer, \
4013 TRUE); \
4014 )
4015
4016 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4017 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4018 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4019 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4020 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4021
4022 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4023 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4024 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4025 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4026 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4027
4028 /*
4029 * Read a file, but do no processing. This is used to do regexp
4030 * matching on files that have no language defined.
4031 */
4032 static void
4033 just_read_file (inf)
4034 FILE *inf;
4035 {
4036 register char *dummy;
4037
4038 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4039 continue;
4040 }
4041
4042 \f
4043 /* Fortran parsing */
4044
4045 static void F_takeprec __P((void));
4046 static void F_getit __P((FILE *));
4047
4048 static void
4049 F_takeprec ()
4050 {
4051 dbp = skip_spaces (dbp);
4052 if (*dbp != '*')
4053 return;
4054 dbp++;
4055 dbp = skip_spaces (dbp);
4056 if (strneq (dbp, "(*)", 3))
4057 {
4058 dbp += 3;
4059 return;
4060 }
4061 if (!ISDIGIT (*dbp))
4062 {
4063 --dbp; /* force failure */
4064 return;
4065 }
4066 do
4067 dbp++;
4068 while (ISDIGIT (*dbp));
4069 }
4070
4071 static void
4072 F_getit (inf)
4073 FILE *inf;
4074 {
4075 register char *cp;
4076
4077 dbp = skip_spaces (dbp);
4078 if (*dbp == '\0')
4079 {
4080 readline (&lb, inf);
4081 dbp = lb.buffer;
4082 if (dbp[5] != '&')
4083 return;
4084 dbp += 6;
4085 dbp = skip_spaces (dbp);
4086 }
4087 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4088 return;
4089 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4090 continue;
4091 make_tag (dbp, cp-dbp, TRUE,
4092 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4093 }
4094
4095
4096 static void
4097 Fortran_functions (inf)
4098 FILE *inf;
4099 {
4100 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4101 {
4102 if (*dbp == '%')
4103 dbp++; /* Ratfor escape to fortran */
4104 dbp = skip_spaces (dbp);
4105 if (*dbp == '\0')
4106 continue;
4107 switch (lowcase (*dbp))
4108 {
4109 case 'i':
4110 if (nocase_tail ("integer"))
4111 F_takeprec ();
4112 break;
4113 case 'r':
4114 if (nocase_tail ("real"))
4115 F_takeprec ();
4116 break;
4117 case 'l':
4118 if (nocase_tail ("logical"))
4119 F_takeprec ();
4120 break;
4121 case 'c':
4122 if (nocase_tail ("complex") || nocase_tail ("character"))
4123 F_takeprec ();
4124 break;
4125 case 'd':
4126 if (nocase_tail ("double"))
4127 {
4128 dbp = skip_spaces (dbp);
4129 if (*dbp == '\0')
4130 continue;
4131 if (nocase_tail ("precision"))
4132 break;
4133 continue;
4134 }
4135 break;
4136 }
4137 dbp = skip_spaces (dbp);
4138 if (*dbp == '\0')
4139 continue;
4140 switch (lowcase (*dbp))
4141 {
4142 case 'f':
4143 if (nocase_tail ("function"))
4144 F_getit (inf);
4145 continue;
4146 case 's':
4147 if (nocase_tail ("subroutine"))
4148 F_getit (inf);
4149 continue;
4150 case 'e':
4151 if (nocase_tail ("entry"))
4152 F_getit (inf);
4153 continue;
4154 case 'b':
4155 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4156 {
4157 dbp = skip_spaces (dbp);
4158 if (*dbp == '\0') /* assume un-named */
4159 make_tag ("blockdata", 9, TRUE,
4160 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4161 else
4162 F_getit (inf); /* look for name */
4163 }
4164 continue;
4165 }
4166 }
4167 }
4168
4169 \f
4170 /*
4171 * Ada parsing
4172 * Original code by
4173 * Philippe Waroquiers (1998)
4174 */
4175
4176 static void Ada_getit __P((FILE *, char *));
4177
4178 /* Once we are positioned after an "interesting" keyword, let's get
4179 the real tag value necessary. */
4180 static void
4181 Ada_getit (inf, name_qualifier)
4182 FILE *inf;
4183 char *name_qualifier;
4184 {
4185 register char *cp;
4186 char *name;
4187 char c;
4188
4189 while (!feof (inf))
4190 {
4191 dbp = skip_spaces (dbp);
4192 if (*dbp == '\0'
4193 || (dbp[0] == '-' && dbp[1] == '-'))
4194 {
4195 readline (&lb, inf);
4196 dbp = lb.buffer;
4197 }
4198 switch (lowcase(*dbp))
4199 {
4200 case 'b':
4201 if (nocase_tail ("body"))
4202 {
4203 /* Skipping body of procedure body or package body or ....
4204 resetting qualifier to body instead of spec. */
4205 name_qualifier = "/b";
4206 continue;
4207 }
4208 break;
4209 case 't':
4210 /* Skipping type of task type or protected type ... */
4211 if (nocase_tail ("type"))
4212 continue;
4213 break;
4214 }
4215 if (*dbp == '"')
4216 {
4217 dbp += 1;
4218 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4219 continue;
4220 }
4221 else
4222 {
4223 dbp = skip_spaces (dbp);
4224 for (cp = dbp;
4225 (*cp != '\0'
4226 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4227 cp++)
4228 continue;
4229 if (cp == dbp)
4230 return;
4231 }
4232 c = *cp;
4233 *cp = '\0';
4234 name = concat (dbp, name_qualifier, "");
4235 *cp = c;
4236 make_tag (name, strlen (name), TRUE,
4237 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4238 free (name);
4239 if (c == '"')
4240 dbp = cp + 1;
4241 return;
4242 }
4243 }
4244
4245 static void
4246 Ada_funcs (inf)
4247 FILE *inf;
4248 {
4249 bool inquote = FALSE;
4250 bool skip_till_semicolumn = FALSE;
4251
4252 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4253 {
4254 while (*dbp != '\0')
4255 {
4256 /* Skip a string i.e. "abcd". */
4257 if (inquote || (*dbp == '"'))
4258 {
4259 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4260 if (dbp != NULL)
4261 {
4262 inquote = FALSE;
4263 dbp += 1;
4264 continue; /* advance char */
4265 }
4266 else
4267 {
4268 inquote = TRUE;
4269 break; /* advance line */
4270 }
4271 }
4272
4273 /* Skip comments. */
4274 if (dbp[0] == '-' && dbp[1] == '-')
4275 break; /* advance line */
4276
4277 /* Skip character enclosed in single quote i.e. 'a'
4278 and skip single quote starting an attribute i.e. 'Image. */
4279 if (*dbp == '\'')
4280 {
4281 dbp++ ;
4282 if (*dbp != '\0')
4283 dbp++;
4284 continue;
4285 }
4286
4287 if (skip_till_semicolumn)
4288 {
4289 if (*dbp == ';')
4290 skip_till_semicolumn = FALSE;
4291 dbp++;
4292 continue; /* advance char */
4293 }
4294
4295 /* Search for beginning of a token. */
4296 if (!begtoken (*dbp))
4297 {
4298 dbp++;
4299 continue; /* advance char */
4300 }
4301
4302 /* We are at the beginning of a token. */
4303 switch (lowcase(*dbp))
4304 {
4305 case 'f':
4306 if (!packages_only && nocase_tail ("function"))
4307 Ada_getit (inf, "/f");
4308 else
4309 break; /* from switch */
4310 continue; /* advance char */
4311 case 'p':
4312 if (!packages_only && nocase_tail ("procedure"))
4313 Ada_getit (inf, "/p");
4314 else if (nocase_tail ("package"))
4315 Ada_getit (inf, "/s");
4316 else if (nocase_tail ("protected")) /* protected type */
4317 Ada_getit (inf, "/t");
4318 else
4319 break; /* from switch */
4320 continue; /* advance char */
4321
4322 case 'u':
4323 if (typedefs && !packages_only && nocase_tail ("use"))
4324 {
4325 /* when tagging types, avoid tagging use type Pack.Typename;
4326 for this, we will skip everything till a ; */
4327 skip_till_semicolumn = TRUE;
4328 continue; /* advance char */
4329 }
4330
4331 case 't':
4332 if (!packages_only && nocase_tail ("task"))
4333 Ada_getit (inf, "/k");
4334 else if (typedefs && !packages_only && nocase_tail ("type"))
4335 {
4336 Ada_getit (inf, "/t");
4337 while (*dbp != '\0')
4338 dbp += 1;
4339 }
4340 else
4341 break; /* from switch */
4342 continue; /* advance char */
4343 }
4344
4345 /* Look for the end of the token. */
4346 while (!endtoken (*dbp))
4347 dbp++;
4348
4349 } /* advance char */
4350 } /* advance line */
4351 }
4352
4353 \f
4354 /*
4355 * Unix and microcontroller assembly tag handling
4356 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4357 * Idea by Bob Weiner, Motorola Inc. (1994)
4358 */
4359 static void
4360 Asm_labels (inf)
4361 FILE *inf;
4362 {
4363 register char *cp;
4364
4365 LOOP_ON_INPUT_LINES (inf, lb, cp)
4366 {
4367 /* If first char is alphabetic or one of [_.$], test for colon
4368 following identifier. */
4369 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4370 {
4371 /* Read past label. */
4372 cp++;
4373 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4374 cp++;
4375 if (*cp == ':' || iswhite (*cp))
4376 /* Found end of label, so copy it and add it to the table. */
4377 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4378 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4379 }
4380 }
4381 }
4382
4383 \f
4384 /*
4385 * Perl support
4386 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4387 * Perl variable names: /^(my|local).../
4388 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4389 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4390 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4391 */
4392 static void
4393 Perl_functions (inf)
4394 FILE *inf;
4395 {
4396 char *package = savestr ("main"); /* current package name */
4397 register char *cp;
4398
4399 LOOP_ON_INPUT_LINES (inf, lb, cp)
4400 {
4401 cp = skip_spaces (cp);
4402
4403 if (LOOKING_AT (cp, "package"))
4404 {
4405 free (package);
4406 get_tag (cp, &package);
4407 }
4408 else if (LOOKING_AT (cp, "sub"))
4409 {
4410 char *pos;
4411 char *sp = cp;
4412
4413 while (!notinname (*cp))
4414 cp++;
4415 if (cp == sp)
4416 continue; /* nothing found */
4417 if ((pos = etags_strchr (sp, ':')) != NULL
4418 && pos < cp && pos[1] == ':')
4419 /* The name is already qualified. */
4420 make_tag (sp, cp - sp, TRUE,
4421 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4422 else
4423 /* Qualify it. */
4424 {
4425 char savechar, *name;
4426
4427 savechar = *cp;
4428 *cp = '\0';
4429 name = concat (package, "::", sp);
4430 *cp = savechar;
4431 make_tag (name, strlen(name), TRUE,
4432 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4433 free (name);
4434 }
4435 }
4436 else if (globals) /* only if we are tagging global vars */
4437 {
4438 /* Skip a qualifier, if any. */
4439 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4440 /* After "my" or "local", but before any following paren or space. */
4441 char *varstart = cp;
4442
4443 if (qual /* should this be removed? If yes, how? */
4444 && (*cp == '$' || *cp == '@' || *cp == '%'))
4445 {
4446 varstart += 1;
4447 do
4448 cp++;
4449 while (ISALNUM (*cp) || *cp == '_');
4450 }
4451 else if (qual)
4452 {
4453 /* Should be examining a variable list at this point;
4454 could insist on seeing an open parenthesis. */
4455 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4456 cp++;
4457 }
4458 else
4459 continue;
4460
4461 make_tag (varstart, cp - varstart, FALSE,
4462 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4463 }
4464 }
4465 free (package);
4466 }
4467
4468
4469 /*
4470 * Python support
4471 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4472 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4473 * More ideas by seb bacon <seb@jamkit.com> (2002)
4474 */
4475 static void
4476 Python_functions (inf)
4477 FILE *inf;
4478 {
4479 register char *cp;
4480
4481 LOOP_ON_INPUT_LINES (inf, lb, cp)
4482 {
4483 cp = skip_spaces (cp);
4484 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4485 {
4486 char *name = cp;
4487 while (!notinname (*cp) && *cp != ':')
4488 cp++;
4489 make_tag (name, cp - name, TRUE,
4490 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4491 }
4492 }
4493 }
4494
4495 \f
4496 /*
4497 * PHP support
4498 * Look for:
4499 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4500 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4501 * - /^[ \t]*define\(\"[^\"]+/
4502 * Only with --members:
4503 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4504 * Idea by Diez B. Roggisch (2001)
4505 */
4506 static void
4507 PHP_functions (inf)
4508 FILE *inf;
4509 {
4510 register char *cp, *name;
4511 bool search_identifier = FALSE;
4512
4513 LOOP_ON_INPUT_LINES (inf, lb, cp)
4514 {
4515 cp = skip_spaces (cp);
4516 name = cp;
4517 if (search_identifier
4518 && *cp != '\0')
4519 {
4520 while (!notinname (*cp))
4521 cp++;
4522 make_tag (name, cp - name, TRUE,
4523 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4524 search_identifier = FALSE;
4525 }
4526 else if (LOOKING_AT (cp, "function"))
4527 {
4528 if(*cp == '&')
4529 cp = skip_spaces (cp+1);
4530 if(*cp != '\0')
4531 {
4532 name = cp;
4533 while (!notinname (*cp))
4534 cp++;
4535 make_tag (name, cp - name, TRUE,
4536 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4537 }
4538 else
4539 search_identifier = TRUE;
4540 }
4541 else if (LOOKING_AT (cp, "class"))
4542 {
4543 if (*cp != '\0')
4544 {
4545 name = cp;
4546 while (*cp != '\0' && !iswhite (*cp))
4547 cp++;
4548 make_tag (name, cp - name, FALSE,
4549 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4550 }
4551 else
4552 search_identifier = TRUE;
4553 }
4554 else if (strneq (cp, "define", 6)
4555 && (cp = skip_spaces (cp+6))
4556 && *cp++ == '('
4557 && (*cp == '"' || *cp == '\''))
4558 {
4559 char quote = *cp++;
4560 name = cp;
4561 while (*cp != quote && *cp != '\0')
4562 cp++;
4563 make_tag (name, cp - name, FALSE,
4564 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4565 }
4566 else if (members
4567 && LOOKING_AT (cp, "var")
4568 && *cp == '$')
4569 {
4570 name = cp;
4571 while (!notinname(*cp))
4572 cp++;
4573 make_tag (name, cp - name, FALSE,
4574 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4575 }
4576 }
4577 }
4578
4579 \f
4580 /*
4581 * Cobol tag functions
4582 * We could look for anything that could be a paragraph name.
4583 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4584 * Idea by Corny de Souza (1993)
4585 */
4586 static void
4587 Cobol_paragraphs (inf)
4588 FILE *inf;
4589 {
4590 register char *bp, *ep;
4591
4592 LOOP_ON_INPUT_LINES (inf, lb, bp)
4593 {
4594 if (lb.len < 9)
4595 continue;
4596 bp += 8;
4597
4598 /* If eoln, compiler option or comment ignore whole line. */
4599 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4600 continue;
4601
4602 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4603 continue;
4604 if (*ep++ == '.')
4605 make_tag (bp, ep - bp, TRUE,
4606 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4607 }
4608 }
4609
4610 \f
4611 /*
4612 * Makefile support
4613 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4614 */
4615 static void
4616 Makefile_targets (inf)
4617 FILE *inf;
4618 {
4619 register char *bp;
4620
4621 LOOP_ON_INPUT_LINES (inf, lb, bp)
4622 {
4623 if (*bp == '\t' || *bp == '#')
4624 continue;
4625 while (*bp != '\0' && *bp != '=' && *bp != ':')
4626 bp++;
4627 if (*bp == ':' || (globals && *bp == '='))
4628 {
4629 /* We should detect if there is more than one tag, but we do not.
4630 We just skip initial and final spaces. */
4631 char * namestart = skip_spaces (lb.buffer);
4632 while (--bp > namestart)
4633 if (!notinname (*bp))
4634 break;
4635 make_tag (namestart, bp - namestart + 1, TRUE,
4636 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4637 }
4638 }
4639 }
4640
4641 \f
4642 /*
4643 * Pascal parsing
4644 * Original code by Mosur K. Mohan (1989)
4645 *
4646 * Locates tags for procedures & functions. Doesn't do any type- or
4647 * var-definitions. It does look for the keyword "extern" or
4648 * "forward" immediately following the procedure statement; if found,
4649 * the tag is skipped.
4650 */
4651 static void
4652 Pascal_functions (inf)
4653 FILE *inf;
4654 {
4655 linebuffer tline; /* mostly copied from C_entries */
4656 long save_lcno;
4657 int save_lineno, namelen, taglen;
4658 char c, *name;
4659
4660 bool /* each of these flags is TRUE if: */
4661 incomment, /* point is inside a comment */
4662 inquote, /* point is inside '..' string */
4663 get_tagname, /* point is after PROCEDURE/FUNCTION
4664 keyword, so next item = potential tag */
4665 found_tag, /* point is after a potential tag */
4666 inparms, /* point is within parameter-list */
4667 verify_tag; /* point has passed the parm-list, so the
4668 next token will determine whether this
4669 is a FORWARD/EXTERN to be ignored, or
4670 whether it is a real tag */
4671
4672 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4673 name = NULL; /* keep compiler quiet */
4674 dbp = lb.buffer;
4675 *dbp = '\0';
4676 linebuffer_init (&tline);
4677
4678 incomment = inquote = FALSE;
4679 found_tag = FALSE; /* have a proc name; check if extern */
4680 get_tagname = FALSE; /* found "procedure" keyword */
4681 inparms = FALSE; /* found '(' after "proc" */
4682 verify_tag = FALSE; /* check if "extern" is ahead */
4683
4684
4685 while (!feof (inf)) /* long main loop to get next char */
4686 {
4687 c = *dbp++;
4688 if (c == '\0') /* if end of line */
4689 {
4690 readline (&lb, inf);
4691 dbp = lb.buffer;
4692 if (*dbp == '\0')
4693 continue;
4694 if (!((found_tag && verify_tag)
4695 || get_tagname))
4696 c = *dbp++; /* only if don't need *dbp pointing
4697 to the beginning of the name of
4698 the procedure or function */
4699 }
4700 if (incomment)
4701 {
4702 if (c == '}') /* within { } comments */
4703 incomment = FALSE;
4704 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4705 {
4706 dbp++;
4707 incomment = FALSE;
4708 }
4709 continue;
4710 }
4711 else if (inquote)
4712 {
4713 if (c == '\'')
4714 inquote = FALSE;
4715 continue;
4716 }
4717 else
4718 switch (c)
4719 {
4720 case '\'':
4721 inquote = TRUE; /* found first quote */
4722 continue;
4723 case '{': /* found open { comment */
4724 incomment = TRUE;
4725 continue;
4726 case '(':
4727 if (*dbp == '*') /* found open (* comment */
4728 {
4729 incomment = TRUE;
4730 dbp++;
4731 }
4732 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4733 inparms = TRUE;
4734 continue;
4735 case ')': /* end of parms list */
4736 if (inparms)
4737 inparms = FALSE;
4738 continue;
4739 case ';':
4740 if (found_tag && !inparms) /* end of proc or fn stmt */
4741 {
4742 verify_tag = TRUE;
4743 break;
4744 }
4745 continue;
4746 }
4747 if (found_tag && verify_tag && (*dbp != ' '))
4748 {
4749 /* Check if this is an "extern" declaration. */
4750 if (*dbp == '\0')
4751 continue;
4752 if (lowcase (*dbp == 'e'))
4753 {
4754 if (nocase_tail ("extern")) /* superfluous, really! */
4755 {
4756 found_tag = FALSE;
4757 verify_tag = FALSE;
4758 }
4759 }
4760 else if (lowcase (*dbp) == 'f')
4761 {
4762 if (nocase_tail ("forward")) /* check for forward reference */
4763 {
4764 found_tag = FALSE;
4765 verify_tag = FALSE;
4766 }
4767 }
4768 if (found_tag && verify_tag) /* not external proc, so make tag */
4769 {
4770 found_tag = FALSE;
4771 verify_tag = FALSE;
4772 make_tag (name, namelen, TRUE,
4773 tline.buffer, taglen, save_lineno, save_lcno);
4774 continue;
4775 }
4776 }
4777 if (get_tagname) /* grab name of proc or fn */
4778 {
4779 char *cp;
4780
4781 if (*dbp == '\0')
4782 continue;
4783
4784 /* Find block name. */
4785 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4786 continue;
4787
4788 /* Save all values for later tagging. */
4789 linebuffer_setlen (&tline, lb.len);
4790 strcpy (tline.buffer, lb.buffer);
4791 save_lineno = lineno;
4792 save_lcno = linecharno;
4793 name = tline.buffer + (dbp - lb.buffer);
4794 namelen = cp - dbp;
4795 taglen = cp - lb.buffer + 1;
4796
4797 dbp = cp; /* set dbp to e-o-token */
4798 get_tagname = FALSE;
4799 found_tag = TRUE;
4800 continue;
4801
4802 /* And proceed to check for "extern". */
4803 }
4804 else if (!incomment && !inquote && !found_tag)
4805 {
4806 /* Check for proc/fn keywords. */
4807 switch (lowcase (c))
4808 {
4809 case 'p':
4810 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4811 get_tagname = TRUE;
4812 continue;
4813 case 'f':
4814 if (nocase_tail ("unction"))
4815 get_tagname = TRUE;
4816 continue;
4817 }
4818 }
4819 } /* while not eof */
4820
4821 free (tline.buffer);
4822 }
4823
4824 \f
4825 /*
4826 * Lisp tag functions
4827 * look for (def or (DEF, quote or QUOTE
4828 */
4829
4830 static void L_getit __P((void));
4831
4832 static void
4833 L_getit ()
4834 {
4835 if (*dbp == '\'') /* Skip prefix quote */
4836 dbp++;
4837 else if (*dbp == '(')
4838 {
4839 dbp++;
4840 /* Try to skip "(quote " */
4841 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4842 /* Ok, then skip "(" before name in (defstruct (foo)) */
4843 dbp = skip_spaces (dbp);
4844 }
4845 get_tag (dbp, NULL);
4846 }
4847
4848 static void
4849 Lisp_functions (inf)
4850 FILE *inf;
4851 {
4852 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4853 {
4854 if (dbp[0] != '(')
4855 continue;
4856
4857 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4858 {
4859 dbp = skip_non_spaces (dbp);
4860 dbp = skip_spaces (dbp);
4861 L_getit ();
4862 }
4863 else
4864 {
4865 /* Check for (foo::defmumble name-defined ... */
4866 do
4867 dbp++;
4868 while (!notinname (*dbp) && *dbp != ':');
4869 if (*dbp == ':')
4870 {
4871 do
4872 dbp++;
4873 while (*dbp == ':');
4874
4875 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4876 {
4877 dbp = skip_non_spaces (dbp);
4878 dbp = skip_spaces (dbp);
4879 L_getit ();
4880 }
4881 }
4882 }
4883 }
4884 }
4885
4886 \f
4887 /*
4888 * Lua script language parsing
4889 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4890 *
4891 * "function" and "local function" are tags if they start at column 1.
4892 */
4893 static void
4894 Lua_functions (inf)
4895 FILE *inf;
4896 {
4897 register char *bp;
4898
4899 LOOP_ON_INPUT_LINES (inf, lb, bp)
4900 {
4901 if (bp[0] != 'f' && bp[0] != 'l')
4902 continue;
4903
4904 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4905
4906 if (LOOKING_AT (bp, "function"))
4907 get_tag (bp, NULL);
4908 }
4909 }
4910
4911 \f
4912 /*
4913 * Postscript tags
4914 * Just look for lines where the first character is '/'
4915 * Also look at "defineps" for PSWrap
4916 * Ideas by:
4917 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4918 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4919 */
4920 static void
4921 PS_functions (inf)
4922 FILE *inf;
4923 {
4924 register char *bp, *ep;
4925
4926 LOOP_ON_INPUT_LINES (inf, lb, bp)
4927 {
4928 if (bp[0] == '/')
4929 {
4930 for (ep = bp+1;
4931 *ep != '\0' && *ep != ' ' && *ep != '{';
4932 ep++)
4933 continue;
4934 make_tag (bp, ep - bp, TRUE,
4935 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4936 }
4937 else if (LOOKING_AT (bp, "defineps"))
4938 get_tag (bp, NULL);
4939 }
4940 }
4941
4942 \f
4943 /*
4944 * Forth tags
4945 * Ignore anything after \ followed by space or in ( )
4946 * Look for words defined by :
4947 * Look for constant, code, create, defer, value, and variable
4948 * OBP extensions: Look for buffer:, field,
4949 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4950 */
4951 static void
4952 Forth_words (inf)
4953 FILE *inf;
4954 {
4955 register char *bp;
4956
4957 LOOP_ON_INPUT_LINES (inf, lb, bp)
4958 while ((bp = skip_spaces (bp))[0] != '\0')
4959 if (bp[0] == '\\' && iswhite(bp[1]))
4960 break; /* read next line */
4961 else if (bp[0] == '(' && iswhite(bp[1]))
4962 do /* skip to ) or eol */
4963 bp++;
4964 while (*bp != ')' && *bp != '\0');
4965 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4966 || LOOKING_AT_NOCASE (bp, "constant")
4967 || LOOKING_AT_NOCASE (bp, "code")
4968 || LOOKING_AT_NOCASE (bp, "create")
4969 || LOOKING_AT_NOCASE (bp, "defer")
4970 || LOOKING_AT_NOCASE (bp, "value")
4971 || LOOKING_AT_NOCASE (bp, "variable")
4972 || LOOKING_AT_NOCASE (bp, "buffer:")
4973 || LOOKING_AT_NOCASE (bp, "field"))
4974 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4975 else
4976 bp = skip_non_spaces (bp);
4977 }
4978
4979 \f
4980 /*
4981 * Scheme tag functions
4982 * look for (def... xyzzy
4983 * (def... (xyzzy
4984 * (def ... ((...(xyzzy ....
4985 * (set! xyzzy
4986 * Original code by Ken Haase (1985?)
4987 */
4988 static void
4989 Scheme_functions (inf)
4990 FILE *inf;
4991 {
4992 register char *bp;
4993
4994 LOOP_ON_INPUT_LINES (inf, lb, bp)
4995 {
4996 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4997 {
4998 bp = skip_non_spaces (bp+4);
4999 /* Skip over open parens and white space */
5000 while (notinname (*bp))
5001 bp++;
5002 get_tag (bp, NULL);
5003 }
5004 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5005 get_tag (bp, NULL);
5006 }
5007 }
5008
5009 \f
5010 /* Find tags in TeX and LaTeX input files. */
5011
5012 /* TEX_toktab is a table of TeX control sequences that define tags.
5013 * Each entry records one such control sequence.
5014 *
5015 * Original code from who knows whom.
5016 * Ideas by:
5017 * Stefan Monnier (2002)
5018 */
5019
5020 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5021
5022 /* Default set of control sequences to put into TEX_toktab.
5023 The value of environment var TEXTAGS is prepended to this. */
5024 static char *TEX_defenv = "\
5025 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5026 :part:appendix:entry:index:def\
5027 :newcommand:renewcommand:newenvironment:renewenvironment";
5028
5029 static void TEX_mode __P((FILE *));
5030 static void TEX_decode_env __P((char *, char *));
5031
5032 static char TEX_esc = '\\';
5033 static char TEX_opgrp = '{';
5034 static char TEX_clgrp = '}';
5035
5036 /*
5037 * TeX/LaTeX scanning loop.
5038 */
5039 static void
5040 TeX_commands (inf)
5041 FILE *inf;
5042 {
5043 char *cp;
5044 linebuffer *key;
5045
5046 /* Select either \ or ! as escape character. */
5047 TEX_mode (inf);
5048
5049 /* Initialize token table once from environment. */
5050 if (TEX_toktab == NULL)
5051 TEX_decode_env ("TEXTAGS", TEX_defenv);
5052
5053 LOOP_ON_INPUT_LINES (inf, lb, cp)
5054 {
5055 /* Look at each TEX keyword in line. */
5056 for (;;)
5057 {
5058 /* Look for a TEX escape. */
5059 while (*cp++ != TEX_esc)
5060 if (cp[-1] == '\0' || cp[-1] == '%')
5061 goto tex_next_line;
5062
5063 for (key = TEX_toktab; key->buffer != NULL; key++)
5064 if (strneq (cp, key->buffer, key->len))
5065 {
5066 register char *p;
5067 int namelen, linelen;
5068 bool opgrp = FALSE;
5069
5070 cp = skip_spaces (cp + key->len);
5071 if (*cp == TEX_opgrp)
5072 {
5073 opgrp = TRUE;
5074 cp++;
5075 }
5076 for (p = cp;
5077 (!iswhite (*p) && *p != '#' &&
5078 *p != TEX_opgrp && *p != TEX_clgrp);
5079 p++)
5080 continue;
5081 namelen = p - cp;
5082 linelen = lb.len;
5083 if (!opgrp || *p == TEX_clgrp)
5084 {
5085 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5086 p++;
5087 linelen = p - lb.buffer + 1;
5088 }
5089 make_tag (cp, namelen, TRUE,
5090 lb.buffer, linelen, lineno, linecharno);
5091 goto tex_next_line; /* We only tag a line once */
5092 }
5093 }
5094 tex_next_line:
5095 ;
5096 }
5097 }
5098
5099 #define TEX_LESC '\\'
5100 #define TEX_SESC '!'
5101
5102 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5103 chars accordingly. */
5104 static void
5105 TEX_mode (inf)
5106 FILE *inf;
5107 {
5108 int c;
5109
5110 while ((c = getc (inf)) != EOF)
5111 {
5112 /* Skip to next line if we hit the TeX comment char. */
5113 if (c == '%')
5114 while (c != '\n' && c != EOF)
5115 c = getc (inf);
5116 else if (c == TEX_LESC || c == TEX_SESC )
5117 break;
5118 }
5119
5120 if (c == TEX_LESC)
5121 {
5122 TEX_esc = TEX_LESC;
5123 TEX_opgrp = '{';
5124 TEX_clgrp = '}';
5125 }
5126 else
5127 {
5128 TEX_esc = TEX_SESC;
5129 TEX_opgrp = '<';
5130 TEX_clgrp = '>';
5131 }
5132 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5133 No attempt is made to correct the situation. */
5134 rewind (inf);
5135 }
5136
5137 /* Read environment and prepend it to the default string.
5138 Build token table. */
5139 static void
5140 TEX_decode_env (evarname, defenv)
5141 char *evarname;
5142 char *defenv;
5143 {
5144 register char *env, *p;
5145 int i, len;
5146
5147 /* Append default string to environment. */
5148 env = getenv (evarname);
5149 if (!env)
5150 env = defenv;
5151 else
5152 {
5153 char *oldenv = env;
5154 env = concat (oldenv, defenv, "");
5155 }
5156
5157 /* Allocate a token table */
5158 for (len = 1, p = env; p;)
5159 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5160 len++;
5161 TEX_toktab = xnew (len, linebuffer);
5162
5163 /* Unpack environment string into token table. Be careful about */
5164 /* zero-length strings (leading ':', "::" and trailing ':') */
5165 for (i = 0; *env != '\0';)
5166 {
5167 p = etags_strchr (env, ':');
5168 if (!p) /* End of environment string. */
5169 p = env + strlen (env);
5170 if (p - env > 0)
5171 { /* Only non-zero strings. */
5172 TEX_toktab[i].buffer = savenstr (env, p - env);
5173 TEX_toktab[i].len = p - env;
5174 i++;
5175 }
5176 if (*p)
5177 env = p + 1;
5178 else
5179 {
5180 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5181 TEX_toktab[i].len = 0;
5182 break;
5183 }
5184 }
5185 }
5186
5187 \f
5188 /* Texinfo support. Dave Love, Mar. 2000. */
5189 static void
5190 Texinfo_nodes (inf)
5191 FILE * inf;
5192 {
5193 char *cp, *start;
5194 LOOP_ON_INPUT_LINES (inf, lb, cp)
5195 if (LOOKING_AT (cp, "@node"))
5196 {
5197 start = cp;
5198 while (*cp != '\0' && *cp != ',')
5199 cp++;
5200 make_tag (start, cp - start, TRUE,
5201 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5202 }
5203 }
5204
5205 \f
5206 /*
5207 * HTML support.
5208 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5209 * Contents of <a name=xxx> are tags with name xxx.
5210 *
5211 * Francesco Potortì, 2002.
5212 */
5213 static void
5214 HTML_labels (inf)
5215 FILE * inf;
5216 {
5217 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5218 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5219 bool intag = FALSE; /* inside an html tag, looking for ID= */
5220 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5221 char *end;
5222
5223
5224 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5225
5226 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5227 for (;;) /* loop on the same line */
5228 {
5229 if (skiptag) /* skip HTML tag */
5230 {
5231 while (*dbp != '\0' && *dbp != '>')
5232 dbp++;
5233 if (*dbp == '>')
5234 {
5235 dbp += 1;
5236 skiptag = FALSE;
5237 continue; /* look on the same line */
5238 }
5239 break; /* go to next line */
5240 }
5241
5242 else if (intag) /* look for "name=" or "id=" */
5243 {
5244 while (*dbp != '\0' && *dbp != '>'
5245 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5246 dbp++;
5247 if (*dbp == '\0')
5248 break; /* go to next line */
5249 if (*dbp == '>')
5250 {
5251 dbp += 1;
5252 intag = FALSE;
5253 continue; /* look on the same line */
5254 }
5255 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5256 || LOOKING_AT_NOCASE (dbp, "id="))
5257 {
5258 bool quoted = (dbp[0] == '"');
5259
5260 if (quoted)
5261 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5262 continue;
5263 else
5264 for (end = dbp; *end != '\0' && intoken (*end); end++)
5265 continue;
5266 linebuffer_setlen (&token_name, end - dbp);
5267 strncpy (token_name.buffer, dbp, end - dbp);
5268 token_name.buffer[end - dbp] = '\0';
5269
5270 dbp = end;
5271 intag = FALSE; /* we found what we looked for */
5272 skiptag = TRUE; /* skip to the end of the tag */
5273 getnext = TRUE; /* then grab the text */
5274 continue; /* look on the same line */
5275 }
5276 dbp += 1;
5277 }
5278
5279 else if (getnext) /* grab next tokens and tag them */
5280 {
5281 dbp = skip_spaces (dbp);
5282 if (*dbp == '\0')
5283 break; /* go to next line */
5284 if (*dbp == '<')
5285 {
5286 intag = TRUE;
5287 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5288 continue; /* look on the same line */
5289 }
5290
5291 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5292 continue;
5293 make_tag (token_name.buffer, token_name.len, TRUE,
5294 dbp, end - dbp, lineno, linecharno);
5295 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5296 getnext = FALSE;
5297 break; /* go to next line */
5298 }
5299
5300 else /* look for an interesting HTML tag */
5301 {
5302 while (*dbp != '\0' && *dbp != '<')
5303 dbp++;
5304 if (*dbp == '\0')
5305 break; /* go to next line */
5306 intag = TRUE;
5307 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5308 {
5309 inanchor = TRUE;
5310 continue; /* look on the same line */
5311 }
5312 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5313 || LOOKING_AT_NOCASE (dbp, "<h1>")
5314 || LOOKING_AT_NOCASE (dbp, "<h2>")
5315 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5316 {
5317 intag = FALSE;
5318 getnext = TRUE;
5319 continue; /* look on the same line */
5320 }
5321 dbp += 1;
5322 }
5323 }
5324 }
5325
5326 \f
5327 /*
5328 * Prolog support
5329 *
5330 * Assumes that the predicate or rule starts at column 0.
5331 * Only the first clause of a predicate or rule is added.
5332 * Original code by Sunichirou Sugou (1989)
5333 * Rewritten by Anders Lindgren (1996)
5334 */
5335 static int prolog_pr __P((char *, char *));
5336 static void prolog_skip_comment __P((linebuffer *, FILE *));
5337 static int prolog_atom __P((char *, int));
5338
5339 static void
5340 Prolog_functions (inf)
5341 FILE *inf;
5342 {
5343 char *cp, *last;
5344 int len;
5345 int allocated;
5346
5347 allocated = 0;
5348 len = 0;
5349 last = NULL;
5350
5351 LOOP_ON_INPUT_LINES (inf, lb, cp)
5352 {
5353 if (cp[0] == '\0') /* Empty line */
5354 continue;
5355 else if (iswhite (cp[0])) /* Not a predicate */
5356 continue;
5357 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5358 prolog_skip_comment (&lb, inf);
5359 else if ((len = prolog_pr (cp, last)) > 0)
5360 {
5361 /* Predicate or rule. Store the function name so that we
5362 only generate a tag for the first clause. */
5363 if (last == NULL)
5364 last = xnew(len + 1, char);
5365 else if (len + 1 > allocated)
5366 xrnew (last, len + 1, char);
5367 allocated = len + 1;
5368 strncpy (last, cp, len);
5369 last[len] = '\0';
5370 }
5371 }
5372 free (last);
5373 }
5374
5375
5376 static void
5377 prolog_skip_comment (plb, inf)
5378 linebuffer *plb;
5379 FILE *inf;
5380 {
5381 char *cp;
5382
5383 do
5384 {
5385 for (cp = plb->buffer; *cp != '\0'; cp++)
5386 if (cp[0] == '*' && cp[1] == '/')
5387 return;
5388 readline (plb, inf);
5389 }
5390 while (!feof(inf));
5391 }
5392
5393 /*
5394 * A predicate or rule definition is added if it matches:
5395 * <beginning of line><Prolog Atom><whitespace>(
5396 * or <beginning of line><Prolog Atom><whitespace>:-
5397 *
5398 * It is added to the tags database if it doesn't match the
5399 * name of the previous clause header.
5400 *
5401 * Return the size of the name of the predicate or rule, or 0 if no
5402 * header was found.
5403 */
5404 static int
5405 prolog_pr (s, last)
5406 char *s;
5407 char *last; /* Name of last clause. */
5408 {
5409 int pos;
5410 int len;
5411
5412 pos = prolog_atom (s, 0);
5413 if (pos < 1)
5414 return 0;
5415
5416 len = pos;
5417 pos = skip_spaces (s + pos) - s;
5418
5419 if ((s[pos] == '.'
5420 || (s[pos] == '(' && (pos += 1))
5421 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5422 && (last == NULL /* save only the first clause */
5423 || len != (int)strlen (last)
5424 || !strneq (s, last, len)))
5425 {
5426 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5427 return len;
5428 }
5429 else
5430 return 0;
5431 }
5432
5433 /*
5434 * Consume a Prolog atom.
5435 * Return the number of bytes consumed, or -1 if there was an error.
5436 *
5437 * A prolog atom, in this context, could be one of:
5438 * - An alphanumeric sequence, starting with a lower case letter.
5439 * - A quoted arbitrary string. Single quotes can escape themselves.
5440 * Backslash quotes everything.
5441 */
5442 static int
5443 prolog_atom (s, pos)
5444 char *s;
5445 int pos;
5446 {
5447 int origpos;
5448
5449 origpos = pos;
5450
5451 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5452 {
5453 /* The atom is unquoted. */
5454 pos++;
5455 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5456 {
5457 pos++;
5458 }
5459 return pos - origpos;
5460 }
5461 else if (s[pos] == '\'')
5462 {
5463 pos++;
5464
5465 for (;;)
5466 {
5467 if (s[pos] == '\'')
5468 {
5469 pos++;
5470 if (s[pos] != '\'')
5471 break;
5472 pos++; /* A double quote */
5473 }
5474 else if (s[pos] == '\0')
5475 /* Multiline quoted atoms are ignored. */
5476 return -1;
5477 else if (s[pos] == '\\')
5478 {
5479 if (s[pos+1] == '\0')
5480 return -1;
5481 pos += 2;
5482 }
5483 else
5484 pos++;
5485 }
5486 return pos - origpos;
5487 }
5488 else
5489 return -1;
5490 }
5491
5492 \f
5493 /*
5494 * Support for Erlang
5495 *
5496 * Generates tags for functions, defines, and records.
5497 * Assumes that Erlang functions start at column 0.
5498 * Original code by Anders Lindgren (1996)
5499 */
5500 static int erlang_func __P((char *, char *));
5501 static void erlang_attribute __P((char *));
5502 static int erlang_atom __P((char *));
5503
5504 static void
5505 Erlang_functions (inf)
5506 FILE *inf;
5507 {
5508 char *cp, *last;
5509 int len;
5510 int allocated;
5511
5512 allocated = 0;
5513 len = 0;
5514 last = NULL;
5515
5516 LOOP_ON_INPUT_LINES (inf, lb, cp)
5517 {
5518 if (cp[0] == '\0') /* Empty line */
5519 continue;
5520 else if (iswhite (cp[0])) /* Not function nor attribute */
5521 continue;
5522 else if (cp[0] == '%') /* comment */
5523 continue;
5524 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5525 continue;
5526 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5527 {
5528 erlang_attribute (cp);
5529 if (last != NULL)
5530 {
5531 free (last);
5532 last = NULL;
5533 }
5534 }
5535 else if ((len = erlang_func (cp, last)) > 0)
5536 {
5537 /*
5538 * Function. Store the function name so that we only
5539 * generates a tag for the first clause.
5540 */
5541 if (last == NULL)
5542 last = xnew (len + 1, char);
5543 else if (len + 1 > allocated)
5544 xrnew (last, len + 1, char);
5545 allocated = len + 1;
5546 strncpy (last, cp, len);
5547 last[len] = '\0';
5548 }
5549 }
5550 free (last);
5551 }
5552
5553
5554 /*
5555 * A function definition is added if it matches:
5556 * <beginning of line><Erlang Atom><whitespace>(
5557 *
5558 * It is added to the tags database if it doesn't match the
5559 * name of the previous clause header.
5560 *
5561 * Return the size of the name of the function, or 0 if no function
5562 * was found.
5563 */
5564 static int
5565 erlang_func (s, last)
5566 char *s;
5567 char *last; /* Name of last clause. */
5568 {
5569 int pos;
5570 int len;
5571
5572 pos = erlang_atom (s);
5573 if (pos < 1)
5574 return 0;
5575
5576 len = pos;
5577 pos = skip_spaces (s + pos) - s;
5578
5579 /* Save only the first clause. */
5580 if (s[pos++] == '('
5581 && (last == NULL
5582 || len != (int)strlen (last)
5583 || !strneq (s, last, len)))
5584 {
5585 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5586 return len;
5587 }
5588
5589 return 0;
5590 }
5591
5592
5593 /*
5594 * Handle attributes. Currently, tags are generated for defines
5595 * and records.
5596 *
5597 * They are on the form:
5598 * -define(foo, bar).
5599 * -define(Foo(M, N), M+N).
5600 * -record(graph, {vtab = notable, cyclic = true}).
5601 */
5602 static void
5603 erlang_attribute (s)
5604 char *s;
5605 {
5606 char *cp = s;
5607
5608 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5609 && *cp++ == '(')
5610 {
5611 int len = erlang_atom (skip_spaces (cp));
5612 if (len > 0)
5613 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5614 }
5615 return;
5616 }
5617
5618
5619 /*
5620 * Consume an Erlang atom (or variable).
5621 * Return the number of bytes consumed, or -1 if there was an error.
5622 */
5623 static int
5624 erlang_atom (s)
5625 char *s;
5626 {
5627 int pos = 0;
5628
5629 if (ISALPHA (s[pos]) || s[pos] == '_')
5630 {
5631 /* The atom is unquoted. */
5632 do
5633 pos++;
5634 while (ISALNUM (s[pos]) || s[pos] == '_');
5635 }
5636 else if (s[pos] == '\'')
5637 {
5638 for (pos++; s[pos] != '\''; pos++)
5639 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5640 || (s[pos] == '\\' && s[++pos] == '\0'))
5641 return 0;
5642 pos++;
5643 }
5644
5645 return pos;
5646 }
5647
5648 \f
5649 static char *scan_separators __P((char *));
5650 static void add_regex __P((char *, language *));
5651 static char *substitute __P((char *, char *, struct re_registers *));
5652
5653 /*
5654 * Take a string like "/blah/" and turn it into "blah", verifying
5655 * that the first and last characters are the same, and handling
5656 * quoted separator characters. Actually, stops on the occurrence of
5657 * an unquoted separator. Also process \t, \n, etc. and turn into
5658 * appropriate characters. Works in place. Null terminates name string.
5659 * Returns pointer to terminating separator, or NULL for
5660 * unterminated regexps.
5661 */
5662 static char *
5663 scan_separators (name)
5664 char *name;
5665 {
5666 char sep = name[0];
5667 char *copyto = name;
5668 bool quoted = FALSE;
5669
5670 for (++name; *name != '\0'; ++name)
5671 {
5672 if (quoted)
5673 {
5674 switch (*name)
5675 {
5676 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5677 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5678 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5679 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5680 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5681 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5682 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5683 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5684 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5685 default:
5686 if (*name == sep)
5687 *copyto++ = sep;
5688 else
5689 {
5690 /* Something else is quoted, so preserve the quote. */
5691 *copyto++ = '\\';
5692 *copyto++ = *name;
5693 }
5694 break;
5695 }
5696 quoted = FALSE;
5697 }
5698 else if (*name == '\\')
5699 quoted = TRUE;
5700 else if (*name == sep)
5701 break;
5702 else
5703 *copyto++ = *name;
5704 }
5705 if (*name != sep)
5706 name = NULL; /* signal unterminated regexp */
5707
5708 /* Terminate copied string. */
5709 *copyto = '\0';
5710 return name;
5711 }
5712
5713 /* Look at the argument of --regex or --no-regex and do the right
5714 thing. Same for each line of a regexp file. */
5715 static void
5716 analyse_regex (regex_arg)
5717 char *regex_arg;
5718 {
5719 if (regex_arg == NULL)
5720 {
5721 free_regexps (); /* --no-regex: remove existing regexps */
5722 return;
5723 }
5724
5725 /* A real --regexp option or a line in a regexp file. */
5726 switch (regex_arg[0])
5727 {
5728 /* Comments in regexp file or null arg to --regex. */
5729 case '\0':
5730 case ' ':
5731 case '\t':
5732 break;
5733
5734 /* Read a regex file. This is recursive and may result in a
5735 loop, which will stop when the file descriptors are exhausted. */
5736 case '@':
5737 {
5738 FILE *regexfp;
5739 linebuffer regexbuf;
5740 char *regexfile = regex_arg + 1;
5741
5742 /* regexfile is a file containing regexps, one per line. */
5743 regexfp = fopen (regexfile, "r");
5744 if (regexfp == NULL)
5745 {
5746 pfatal (regexfile);
5747 return;
5748 }
5749 linebuffer_init (&regexbuf);
5750 while (readline_internal (&regexbuf, regexfp) > 0)
5751 analyse_regex (regexbuf.buffer);
5752 free (regexbuf.buffer);
5753 fclose (regexfp);
5754 }
5755 break;
5756
5757 /* Regexp to be used for a specific language only. */
5758 case '{':
5759 {
5760 language *lang;
5761 char *lang_name = regex_arg + 1;
5762 char *cp;
5763
5764 for (cp = lang_name; *cp != '}'; cp++)
5765 if (*cp == '\0')
5766 {
5767 error ("unterminated language name in regex: %s", regex_arg);
5768 return;
5769 }
5770 *cp++ = '\0';
5771 lang = get_language_from_langname (lang_name);
5772 if (lang == NULL)
5773 return;
5774 add_regex (cp, lang);
5775 }
5776 break;
5777
5778 /* Regexp to be used for any language. */
5779 default:
5780 add_regex (regex_arg, NULL);
5781 break;
5782 }
5783 }
5784
5785 /* Separate the regexp pattern, compile it,
5786 and care for optional name and modifiers. */
5787 static void
5788 add_regex (regexp_pattern, lang)
5789 char *regexp_pattern;
5790 language *lang;
5791 {
5792 static struct re_pattern_buffer zeropattern;
5793 char sep, *pat, *name, *modifiers;
5794 const char *err;
5795 struct re_pattern_buffer *patbuf;
5796 regexp *rp;
5797 bool
5798 force_explicit_name = TRUE, /* do not use implicit tag names */
5799 ignore_case = FALSE, /* case is significant */
5800 multi_line = FALSE, /* matches are done one line at a time */
5801 single_line = FALSE; /* dot does not match newline */
5802
5803
5804 if (strlen(regexp_pattern) < 3)
5805 {
5806 error ("null regexp", (char *)NULL);
5807 return;
5808 }
5809 sep = regexp_pattern[0];
5810 name = scan_separators (regexp_pattern);
5811 if (name == NULL)
5812 {
5813 error ("%s: unterminated regexp", regexp_pattern);
5814 return;
5815 }
5816 if (name[1] == sep)
5817 {
5818 error ("null name for regexp \"%s\"", regexp_pattern);
5819 return;
5820 }
5821 modifiers = scan_separators (name);
5822 if (modifiers == NULL) /* no terminating separator --> no name */
5823 {
5824 modifiers = name;
5825 name = "";
5826 }
5827 else
5828 modifiers += 1; /* skip separator */
5829
5830 /* Parse regex modifiers. */
5831 for (; modifiers[0] != '\0'; modifiers++)
5832 switch (modifiers[0])
5833 {
5834 case 'N':
5835 if (modifiers == name)
5836 error ("forcing explicit tag name but no name, ignoring", NULL);
5837 force_explicit_name = TRUE;
5838 break;
5839 case 'i':
5840 ignore_case = TRUE;
5841 break;
5842 case 's':
5843 single_line = TRUE;
5844 /* FALLTHRU */
5845 case 'm':
5846 multi_line = TRUE;
5847 need_filebuf = TRUE;
5848 break;
5849 default:
5850 {
5851 char wrongmod [2];
5852 wrongmod[0] = modifiers[0];
5853 wrongmod[1] = '\0';
5854 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5855 }
5856 break;
5857 }
5858
5859 patbuf = xnew (1, struct re_pattern_buffer);
5860 *patbuf = zeropattern;
5861 if (ignore_case)
5862 {
5863 static char lc_trans[CHARS];
5864 int i;
5865 for (i = 0; i < CHARS; i++)
5866 lc_trans[i] = lowcase (i);
5867 patbuf->translate = lc_trans; /* translation table to fold case */
5868 }
5869
5870 if (multi_line)
5871 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5872 else
5873 pat = regexp_pattern;
5874
5875 if (single_line)
5876 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5877 else
5878 re_set_syntax (RE_SYNTAX_EMACS);
5879
5880 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5881 if (multi_line)
5882 free (pat);
5883 if (err != NULL)
5884 {
5885 error ("%s while compiling pattern", err);
5886 return;
5887 }
5888
5889 rp = p_head;
5890 p_head = xnew (1, regexp);
5891 p_head->pattern = savestr (regexp_pattern);
5892 p_head->p_next = rp;
5893 p_head->lang = lang;
5894 p_head->pat = patbuf;
5895 p_head->name = savestr (name);
5896 p_head->error_signaled = FALSE;
5897 p_head->force_explicit_name = force_explicit_name;
5898 p_head->ignore_case = ignore_case;
5899 p_head->multi_line = multi_line;
5900 }
5901
5902 /*
5903 * Do the substitutions indicated by the regular expression and
5904 * arguments.
5905 */
5906 static char *
5907 substitute (in, out, regs)
5908 char *in, *out;
5909 struct re_registers *regs;
5910 {
5911 char *result, *t;
5912 int size, dig, diglen;
5913
5914 result = NULL;
5915 size = strlen (out);
5916
5917 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5918 if (out[size - 1] == '\\')
5919 fatal ("pattern error in \"%s\"", out);
5920 for (t = etags_strchr (out, '\\');
5921 t != NULL;
5922 t = etags_strchr (t + 2, '\\'))
5923 if (ISDIGIT (t[1]))
5924 {
5925 dig = t[1] - '0';
5926 diglen = regs->end[dig] - regs->start[dig];
5927 size += diglen - 2;
5928 }
5929 else
5930 size -= 1;
5931
5932 /* Allocate space and do the substitutions. */
5933 assert (size >= 0);
5934 result = xnew (size + 1, char);
5935
5936 for (t = result; *out != '\0'; out++)
5937 if (*out == '\\' && ISDIGIT (*++out))
5938 {
5939 dig = *out - '0';
5940 diglen = regs->end[dig] - regs->start[dig];
5941 strncpy (t, in + regs->start[dig], diglen);
5942 t += diglen;
5943 }
5944 else
5945 *t++ = *out;
5946 *t = '\0';
5947
5948 assert (t <= result + size);
5949 assert (t - result == (int)strlen (result));
5950
5951 return result;
5952 }
5953
5954 /* Deallocate all regexps. */
5955 static void
5956 free_regexps ()
5957 {
5958 regexp *rp;
5959 while (p_head != NULL)
5960 {
5961 rp = p_head->p_next;
5962 free (p_head->pattern);
5963 free (p_head->name);
5964 free (p_head);
5965 p_head = rp;
5966 }
5967 return;
5968 }
5969
5970 /*
5971 * Reads the whole file as a single string from `filebuf' and looks for
5972 * multi-line regular expressions, creating tags on matches.
5973 * readline already dealt with normal regexps.
5974 *
5975 * Idea by Ben Wing <ben@666.com> (2002).
5976 */
5977 static void
5978 regex_tag_multiline ()
5979 {
5980 char *buffer = filebuf.buffer;
5981 regexp *rp;
5982 char *name;
5983
5984 for (rp = p_head; rp != NULL; rp = rp->p_next)
5985 {
5986 int match = 0;
5987
5988 if (!rp->multi_line)
5989 continue; /* skip normal regexps */
5990
5991 /* Generic initialisations before parsing file from memory. */
5992 lineno = 1; /* reset global line number */
5993 charno = 0; /* reset global char number */
5994 linecharno = 0; /* reset global char number of line start */
5995
5996 /* Only use generic regexps or those for the current language. */
5997 if (rp->lang != NULL && rp->lang != curfdp->lang)
5998 continue;
5999
6000 while (match >= 0 && match < filebuf.len)
6001 {
6002 match = re_search (rp->pat, buffer, filebuf.len, charno,
6003 filebuf.len - match, &rp->regs);
6004 switch (match)
6005 {
6006 case -2:
6007 /* Some error. */
6008 if (!rp->error_signaled)
6009 {
6010 error ("regexp stack overflow while matching \"%s\"",
6011 rp->pattern);
6012 rp->error_signaled = TRUE;
6013 }
6014 break;
6015 case -1:
6016 /* No match. */
6017 break;
6018 default:
6019 if (match == rp->regs.end[0])
6020 {
6021 if (!rp->error_signaled)
6022 {
6023 error ("regexp matches the empty string: \"%s\"",
6024 rp->pattern);
6025 rp->error_signaled = TRUE;
6026 }
6027 match = -3; /* exit from while loop */
6028 break;
6029 }
6030
6031 /* Match occurred. Construct a tag. */
6032 while (charno < rp->regs.end[0])
6033 if (buffer[charno++] == '\n')
6034 lineno++, linecharno = charno;
6035 name = rp->name;
6036 if (name[0] == '\0')
6037 name = NULL;
6038 else /* make a named tag */
6039 name = substitute (buffer, rp->name, &rp->regs);
6040 if (rp->force_explicit_name)
6041 /* Force explicit tag name, if a name is there. */
6042 pfnote (name, TRUE, buffer + linecharno,
6043 charno - linecharno + 1, lineno, linecharno);
6044 else
6045 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6046 charno - linecharno + 1, lineno, linecharno);
6047 break;
6048 }
6049 }
6050 }
6051 }
6052
6053 \f
6054 static bool
6055 nocase_tail (cp)
6056 char *cp;
6057 {
6058 register int len = 0;
6059
6060 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6061 cp++, len++;
6062 if (*cp == '\0' && !intoken (dbp[len]))
6063 {
6064 dbp += len;
6065 return TRUE;
6066 }
6067 return FALSE;
6068 }
6069
6070 static void
6071 get_tag (bp, namepp)
6072 register char *bp;
6073 char **namepp;
6074 {
6075 register char *cp = bp;
6076
6077 if (*bp != '\0')
6078 {
6079 /* Go till you get to white space or a syntactic break */
6080 for (cp = bp + 1; !notinname (*cp); cp++)
6081 continue;
6082 make_tag (bp, cp - bp, TRUE,
6083 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6084 }
6085
6086 if (namepp != NULL)
6087 *namepp = savenstr (bp, cp - bp);
6088 }
6089
6090 /*
6091 * Read a line of text from `stream' into `lbp', excluding the
6092 * newline or CR-NL, if any. Return the number of characters read from
6093 * `stream', which is the length of the line including the newline.
6094 *
6095 * On DOS or Windows we do not count the CR character, if any before the
6096 * NL, in the returned length; this mirrors the behavior of Emacs on those
6097 * platforms (for text files, it translates CR-NL to NL as it reads in the
6098 * file).
6099 *
6100 * If multi-line regular expressions are requested, each line read is
6101 * appended to `filebuf'.
6102 */
6103 static long
6104 readline_internal (lbp, stream)
6105 linebuffer *lbp;
6106 register FILE *stream;
6107 {
6108 char *buffer = lbp->buffer;
6109 register char *p = lbp->buffer;
6110 register char *pend;
6111 int chars_deleted;
6112
6113 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6114
6115 for (;;)
6116 {
6117 register int c = getc (stream);
6118 if (p == pend)
6119 {
6120 /* We're at the end of linebuffer: expand it. */
6121 lbp->size *= 2;
6122 xrnew (buffer, lbp->size, char);
6123 p += buffer - lbp->buffer;
6124 pend = buffer + lbp->size;
6125 lbp->buffer = buffer;
6126 }
6127 if (c == EOF)
6128 {
6129 *p = '\0';
6130 chars_deleted = 0;
6131 break;
6132 }
6133 if (c == '\n')
6134 {
6135 if (p > buffer && p[-1] == '\r')
6136 {
6137 p -= 1;
6138 #ifdef DOS_NT
6139 /* Assume CRLF->LF translation will be performed by Emacs
6140 when loading this file, so CRs won't appear in the buffer.
6141 It would be cleaner to compensate within Emacs;
6142 however, Emacs does not know how many CRs were deleted
6143 before any given point in the file. */
6144 chars_deleted = 1;
6145 #else
6146 chars_deleted = 2;
6147 #endif
6148 }
6149 else
6150 {
6151 chars_deleted = 1;
6152 }
6153 *p = '\0';
6154 break;
6155 }
6156 *p++ = c;
6157 }
6158 lbp->len = p - buffer;
6159
6160 if (need_filebuf /* we need filebuf for multi-line regexps */
6161 && chars_deleted > 0) /* not at EOF */
6162 {
6163 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6164 {
6165 /* Expand filebuf. */
6166 filebuf.size *= 2;
6167 xrnew (filebuf.buffer, filebuf.size, char);
6168 }
6169 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6170 filebuf.len += lbp->len;
6171 filebuf.buffer[filebuf.len++] = '\n';
6172 filebuf.buffer[filebuf.len] = '\0';
6173 }
6174
6175 return lbp->len + chars_deleted;
6176 }
6177
6178 /*
6179 * Like readline_internal, above, but in addition try to match the
6180 * input line against relevant regular expressions and manage #line
6181 * directives.
6182 */
6183 static void
6184 readline (lbp, stream)
6185 linebuffer *lbp;
6186 FILE *stream;
6187 {
6188 long result;
6189
6190 linecharno = charno; /* update global char number of line start */
6191 result = readline_internal (lbp, stream); /* read line */
6192 lineno += 1; /* increment global line number */
6193 charno += result; /* increment global char number */
6194
6195 /* Honour #line directives. */
6196 if (!no_line_directive)
6197 {
6198 static bool discard_until_line_directive;
6199
6200 /* Check whether this is a #line directive. */
6201 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6202 {
6203 unsigned int lno;
6204 int start = 0;
6205
6206 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6207 && start > 0) /* double quote character found */
6208 {
6209 char *endp = lbp->buffer + start;
6210
6211 while ((endp = etags_strchr (endp, '"')) != NULL
6212 && endp[-1] == '\\')
6213 endp++;
6214 if (endp != NULL)
6215 /* Ok, this is a real #line directive. Let's deal with it. */
6216 {
6217 char *taggedabsname; /* absolute name of original file */
6218 char *taggedfname; /* name of original file as given */
6219 char *name; /* temp var */
6220
6221 discard_until_line_directive = FALSE; /* found it */
6222 name = lbp->buffer + start;
6223 *endp = '\0';
6224 canonicalize_filename (name); /* for DOS */
6225 taggedabsname = absolute_filename (name, tagfiledir);
6226 if (filename_is_absolute (name)
6227 || filename_is_absolute (curfdp->infname))
6228 taggedfname = savestr (taggedabsname);
6229 else
6230 taggedfname = relative_filename (taggedabsname,tagfiledir);
6231
6232 if (streq (curfdp->taggedfname, taggedfname))
6233 /* The #line directive is only a line number change. We
6234 deal with this afterwards. */
6235 free (taggedfname);
6236 else
6237 /* The tags following this #line directive should be
6238 attributed to taggedfname. In order to do this, set
6239 curfdp accordingly. */
6240 {
6241 fdesc *fdp; /* file description pointer */
6242
6243 /* Go look for a file description already set up for the
6244 file indicated in the #line directive. If there is
6245 one, use it from now until the next #line
6246 directive. */
6247 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6248 if (streq (fdp->infname, curfdp->infname)
6249 && streq (fdp->taggedfname, taggedfname))
6250 /* If we remove the second test above (after the &&)
6251 then all entries pertaining to the same file are
6252 coalesced in the tags file. If we use it, then
6253 entries pertaining to the same file but generated
6254 from different files (via #line directives) will
6255 go into separate sections in the tags file. These
6256 alternatives look equivalent. The first one
6257 destroys some apparently useless information. */
6258 {
6259 curfdp = fdp;
6260 free (taggedfname);
6261 break;
6262 }
6263 /* Else, if we already tagged the real file, skip all
6264 input lines until the next #line directive. */
6265 if (fdp == NULL) /* not found */
6266 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6267 if (streq (fdp->infabsname, taggedabsname))
6268 {
6269 discard_until_line_directive = TRUE;
6270 free (taggedfname);
6271 break;
6272 }
6273 /* Else create a new file description and use that from
6274 now on, until the next #line directive. */
6275 if (fdp == NULL) /* not found */
6276 {
6277 fdp = fdhead;
6278 fdhead = xnew (1, fdesc);
6279 *fdhead = *curfdp; /* copy curr. file description */
6280 fdhead->next = fdp;
6281 fdhead->infname = savestr (curfdp->infname);
6282 fdhead->infabsname = savestr (curfdp->infabsname);
6283 fdhead->infabsdir = savestr (curfdp->infabsdir);
6284 fdhead->taggedfname = taggedfname;
6285 fdhead->usecharno = FALSE;
6286 fdhead->prop = NULL;
6287 fdhead->written = FALSE;
6288 curfdp = fdhead;
6289 }
6290 }
6291 free (taggedabsname);
6292 lineno = lno - 1;
6293 readline (lbp, stream);
6294 return;
6295 } /* if a real #line directive */
6296 } /* if #line is followed by a a number */
6297 } /* if line begins with "#line " */
6298
6299 /* If we are here, no #line directive was found. */
6300 if (discard_until_line_directive)
6301 {
6302 if (result > 0)
6303 {
6304 /* Do a tail recursion on ourselves, thus discarding the contents
6305 of the line buffer. */
6306 readline (lbp, stream);
6307 return;
6308 }
6309 /* End of file. */
6310 discard_until_line_directive = FALSE;
6311 return;
6312 }
6313 } /* if #line directives should be considered */
6314
6315 {
6316 int match;
6317 regexp *rp;
6318 char *name;
6319
6320 /* Match against relevant regexps. */
6321 if (lbp->len > 0)
6322 for (rp = p_head; rp != NULL; rp = rp->p_next)
6323 {
6324 /* Only use generic regexps or those for the current language.
6325 Also do not use multiline regexps, which is the job of
6326 regex_tag_multiline. */
6327 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6328 || rp->multi_line)
6329 continue;
6330
6331 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6332 switch (match)
6333 {
6334 case -2:
6335 /* Some error. */
6336 if (!rp->error_signaled)
6337 {
6338 error ("regexp stack overflow while matching \"%s\"",
6339 rp->pattern);
6340 rp->error_signaled = TRUE;
6341 }
6342 break;
6343 case -1:
6344 /* No match. */
6345 break;
6346 case 0:
6347 /* Empty string matched. */
6348 if (!rp->error_signaled)
6349 {
6350 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6351 rp->error_signaled = TRUE;
6352 }
6353 break;
6354 default:
6355 /* Match occurred. Construct a tag. */
6356 name = rp->name;
6357 if (name[0] == '\0')
6358 name = NULL;
6359 else /* make a named tag */
6360 name = substitute (lbp->buffer, rp->name, &rp->regs);
6361 if (rp->force_explicit_name)
6362 /* Force explicit tag name, if a name is there. */
6363 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6364 else
6365 make_tag (name, strlen (name), TRUE,
6366 lbp->buffer, match, lineno, linecharno);
6367 break;
6368 }
6369 }
6370 }
6371 }
6372
6373 \f
6374 /*
6375 * Return a pointer to a space of size strlen(cp)+1 allocated
6376 * with xnew where the string CP has been copied.
6377 */
6378 static char *
6379 savestr (cp)
6380 char *cp;
6381 {
6382 return savenstr (cp, strlen (cp));
6383 }
6384
6385 /*
6386 * Return a pointer to a space of size LEN+1 allocated with xnew where
6387 * the string CP has been copied for at most the first LEN characters.
6388 */
6389 static char *
6390 savenstr (cp, len)
6391 char *cp;
6392 int len;
6393 {
6394 register char *dp;
6395
6396 dp = xnew (len + 1, char);
6397 strncpy (dp, cp, len);
6398 dp[len] = '\0';
6399 return dp;
6400 }
6401
6402 /*
6403 * Return the ptr in sp at which the character c last
6404 * appears; NULL if not found
6405 *
6406 * Identical to POSIX strrchr, included for portability.
6407 */
6408 static char *
6409 etags_strrchr (sp, c)
6410 register const char *sp;
6411 register int c;
6412 {
6413 register const char *r;
6414
6415 r = NULL;
6416 do
6417 {
6418 if (*sp == c)
6419 r = sp;
6420 } while (*sp++);
6421 return (char *)r;
6422 }
6423
6424 /*
6425 * Return the ptr in sp at which the character c first
6426 * appears; NULL if not found
6427 *
6428 * Identical to POSIX strchr, included for portability.
6429 */
6430 static char *
6431 etags_strchr (sp, c)
6432 register const char *sp;
6433 register int c;
6434 {
6435 do
6436 {
6437 if (*sp == c)
6438 return (char *)sp;
6439 } while (*sp++);
6440 return NULL;
6441 }
6442
6443 /*
6444 * Compare two strings, ignoring case for alphabetic characters.
6445 *
6446 * Same as BSD's strcasecmp, included for portability.
6447 */
6448 static int
6449 etags_strcasecmp (s1, s2)
6450 register const char *s1;
6451 register const char *s2;
6452 {
6453 while (*s1 != '\0'
6454 && (ISALPHA (*s1) && ISALPHA (*s2)
6455 ? lowcase (*s1) == lowcase (*s2)
6456 : *s1 == *s2))
6457 s1++, s2++;
6458
6459 return (ISALPHA (*s1) && ISALPHA (*s2)
6460 ? lowcase (*s1) - lowcase (*s2)
6461 : *s1 - *s2);
6462 }
6463
6464 /*
6465 * Compare two strings, ignoring case for alphabetic characters.
6466 * Stop after a given number of characters
6467 *
6468 * Same as BSD's strncasecmp, included for portability.
6469 */
6470 static int
6471 etags_strncasecmp (s1, s2, n)
6472 register const char *s1;
6473 register const char *s2;
6474 register int n;
6475 {
6476 while (*s1 != '\0' && n-- > 0
6477 && (ISALPHA (*s1) && ISALPHA (*s2)
6478 ? lowcase (*s1) == lowcase (*s2)
6479 : *s1 == *s2))
6480 s1++, s2++;
6481
6482 if (n < 0)
6483 return 0;
6484 else
6485 return (ISALPHA (*s1) && ISALPHA (*s2)
6486 ? lowcase (*s1) - lowcase (*s2)
6487 : *s1 - *s2);
6488 }
6489
6490 /* Skip spaces (end of string is not space), return new pointer. */
6491 static char *
6492 skip_spaces (cp)
6493 char *cp;
6494 {
6495 while (iswhite (*cp))
6496 cp++;
6497 return cp;
6498 }
6499
6500 /* Skip non spaces, except end of string, return new pointer. */
6501 static char *
6502 skip_non_spaces (cp)
6503 char *cp;
6504 {
6505 while (*cp != '\0' && !iswhite (*cp))
6506 cp++;
6507 return cp;
6508 }
6509
6510 /* Print error message and exit. */
6511 void
6512 fatal (s1, s2)
6513 char *s1, *s2;
6514 {
6515 error (s1, s2);
6516 exit (EXIT_FAILURE);
6517 }
6518
6519 static void
6520 pfatal (s1)
6521 char *s1;
6522 {
6523 perror (s1);
6524 exit (EXIT_FAILURE);
6525 }
6526
6527 static void
6528 suggest_asking_for_help ()
6529 {
6530 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6531 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6532 exit (EXIT_FAILURE);
6533 }
6534
6535 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6536 static void
6537 error (s1, s2)
6538 const char *s1, *s2;
6539 {
6540 fprintf (stderr, "%s: ", progname);
6541 fprintf (stderr, s1, s2);
6542 fprintf (stderr, "\n");
6543 }
6544
6545 /* Return a newly-allocated string whose contents
6546 concatenate those of s1, s2, s3. */
6547 static char *
6548 concat (s1, s2, s3)
6549 char *s1, *s2, *s3;
6550 {
6551 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6552 char *result = xnew (len1 + len2 + len3 + 1, char);
6553
6554 strcpy (result, s1);
6555 strcpy (result + len1, s2);
6556 strcpy (result + len1 + len2, s3);
6557 result[len1 + len2 + len3] = '\0';
6558
6559 return result;
6560 }
6561
6562 \f
6563 /* Does the same work as the system V getcwd, but does not need to
6564 guess the buffer size in advance. */
6565 static char *
6566 etags_getcwd ()
6567 {
6568 #ifdef HAVE_GETCWD
6569 int bufsize = 200;
6570 char *path = xnew (bufsize, char);
6571
6572 while (getcwd (path, bufsize) == NULL)
6573 {
6574 if (errno != ERANGE)
6575 pfatal ("getcwd");
6576 bufsize *= 2;
6577 free (path);
6578 path = xnew (bufsize, char);
6579 }
6580
6581 canonicalize_filename (path);
6582 return path;
6583
6584 #else /* not HAVE_GETCWD */
6585 #if MSDOS
6586
6587 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6588
6589 getwd (path);
6590
6591 for (p = path; *p != '\0'; p++)
6592 if (*p == '\\')
6593 *p = '/';
6594 else
6595 *p = lowcase (*p);
6596
6597 return strdup (path);
6598 #else /* not MSDOS */
6599 linebuffer path;
6600 FILE *pipe;
6601
6602 linebuffer_init (&path);
6603 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6604 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6605 pfatal ("pwd");
6606 pclose (pipe);
6607
6608 return path.buffer;
6609 #endif /* not MSDOS */
6610 #endif /* not HAVE_GETCWD */
6611 }
6612
6613 /* Return a newly allocated string containing the file name of FILE
6614 relative to the absolute directory DIR (which should end with a slash). */
6615 static char *
6616 relative_filename (file, dir)
6617 char *file, *dir;
6618 {
6619 char *fp, *dp, *afn, *res;
6620 int i;
6621
6622 /* Find the common root of file and dir (with a trailing slash). */
6623 afn = absolute_filename (file, cwd);
6624 fp = afn;
6625 dp = dir;
6626 while (*fp++ == *dp++)
6627 continue;
6628 fp--, dp--; /* back to the first differing char */
6629 #ifdef DOS_NT
6630 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6631 return afn;
6632 #endif
6633 do /* look at the equal chars until '/' */
6634 fp--, dp--;
6635 while (*fp != '/');
6636
6637 /* Build a sequence of "../" strings for the resulting relative file name. */
6638 i = 0;
6639 while (*dp == '/')
6640 ++dp;
6641 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6642 {
6643 i += 1;
6644 while (*dp == '/')
6645 ++dp;
6646 }
6647 res = xnew (3*i + strlen (fp + 1) + 1, char);
6648 res[0] = '\0';
6649 while (i-- > 0)
6650 strcat (res, "../");
6651
6652 /* Add the file name relative to the common root of file and dir. */
6653 strcat (res, fp + 1);
6654 free (afn);
6655
6656 return res;
6657 }
6658
6659 /* Return a newly allocated string containing the absolute file name
6660 of FILE given DIR (which should end with a slash). */
6661 static char *
6662 absolute_filename (file, dir)
6663 char *file, *dir;
6664 {
6665 char *slashp, *cp, *res;
6666
6667 if (filename_is_absolute (file))
6668 res = savestr (file);
6669 #ifdef DOS_NT
6670 /* We don't support non-absolute file names with a drive
6671 letter, like `d:NAME' (it's too much hassle). */
6672 else if (file[1] == ':')
6673 fatal ("%s: relative file names with drive letters not supported", file);
6674 #endif
6675 else
6676 res = concat (dir, file, "");
6677
6678 /* Delete the "/dirname/.." and "/." substrings. */
6679 slashp = etags_strchr (res, '/');
6680 while (slashp != NULL && slashp[0] != '\0')
6681 {
6682 if (slashp[1] == '.')
6683 {
6684 if (slashp[2] == '.'
6685 && (slashp[3] == '/' || slashp[3] == '\0'))
6686 {
6687 cp = slashp;
6688 do
6689 cp--;
6690 while (cp >= res && !filename_is_absolute (cp));
6691 if (cp < res)
6692 cp = slashp; /* the absolute name begins with "/.." */
6693 #ifdef DOS_NT
6694 /* Under MSDOS and NT we get `d:/NAME' as absolute
6695 file name, so the luser could say `d:/../NAME'.
6696 We silently treat this as `d:/NAME'. */
6697 else if (cp[0] != '/')
6698 cp = slashp;
6699 #endif
6700 strcpy (cp, slashp + 3);
6701 slashp = cp;
6702 continue;
6703 }
6704 else if (slashp[2] == '/' || slashp[2] == '\0')
6705 {
6706 strcpy (slashp, slashp + 2);
6707 continue;
6708 }
6709 }
6710
6711 slashp = etags_strchr (slashp + 1, '/');
6712 }
6713
6714 if (res[0] == '\0') /* just a safety net: should never happen */
6715 {
6716 free (res);
6717 return savestr ("/");
6718 }
6719 else
6720 return res;
6721 }
6722
6723 /* Return a newly allocated string containing the absolute
6724 file name of dir where FILE resides given DIR (which should
6725 end with a slash). */
6726 static char *
6727 absolute_dirname (file, dir)
6728 char *file, *dir;
6729 {
6730 char *slashp, *res;
6731 char save;
6732
6733 canonicalize_filename (file);
6734 slashp = etags_strrchr (file, '/');
6735 if (slashp == NULL)
6736 return savestr (dir);
6737 save = slashp[1];
6738 slashp[1] = '\0';
6739 res = absolute_filename (file, dir);
6740 slashp[1] = save;
6741
6742 return res;
6743 }
6744
6745 /* Whether the argument string is an absolute file name. The argument
6746 string must have been canonicalized with canonicalize_filename. */
6747 static bool
6748 filename_is_absolute (fn)
6749 char *fn;
6750 {
6751 return (fn[0] == '/'
6752 #ifdef DOS_NT
6753 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6754 #endif
6755 );
6756 }
6757
6758 /* Translate backslashes into slashes. Works in place. */
6759 static void
6760 canonicalize_filename (fn)
6761 register char *fn;
6762 {
6763 #ifdef DOS_NT
6764 /* Canonicalize drive letter case. */
6765 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6766 fn[0] = upcase (fn[0]);
6767 /* Convert backslashes to slashes. */
6768 for (; *fn != '\0'; fn++)
6769 if (*fn == '\\')
6770 *fn = '/';
6771 #else
6772 /* No action. */
6773 fn = NULL; /* shut up the compiler */
6774 #endif
6775 }
6776
6777 \f
6778 /* Initialize a linebuffer for use */
6779 static void
6780 linebuffer_init (lbp)
6781 linebuffer *lbp;
6782 {
6783 lbp->size = (DEBUG) ? 3 : 200;
6784 lbp->buffer = xnew (lbp->size, char);
6785 lbp->buffer[0] = '\0';
6786 lbp->len = 0;
6787 }
6788
6789 /* Set the minimum size of a string contained in a linebuffer. */
6790 static void
6791 linebuffer_setlen (lbp, toksize)
6792 linebuffer *lbp;
6793 int toksize;
6794 {
6795 while (lbp->size <= toksize)
6796 {
6797 lbp->size *= 2;
6798 xrnew (lbp->buffer, lbp->size, char);
6799 }
6800 lbp->len = toksize;
6801 }
6802
6803 /* Like malloc but get fatal error if memory is exhausted. */
6804 static PTR
6805 xmalloc (size)
6806 unsigned int size;
6807 {
6808 PTR result = (PTR) malloc (size);
6809 if (result == NULL)
6810 fatal ("virtual memory exhausted", (char *)NULL);
6811 return result;
6812 }
6813
6814 static PTR
6815 xrealloc (ptr, size)
6816 char *ptr;
6817 unsigned int size;
6818 {
6819 PTR result = (PTR) realloc (ptr, size);
6820 if (result == NULL)
6821 fatal ("virtual memory exhausted", (char *)NULL);
6822 return result;
6823 }
6824
6825 /*
6826 * Local Variables:
6827 * indent-tabs-mode: t
6828 * tab-width: 8
6829 * fill-column: 79
6830 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6831 * c-file-style: "gnu"
6832 * End:
6833 */
6834
6835 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6836 (do not change this comment) */
6837
6838 /* etags.c ends here */