* net/dbus.el (dbus-introspect): Use `dbus-call-method-non-blocking'.
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
33 Free Software Foundation, Inc.
34
35 This file is not considered part of GNU Emacs.
36
37 This program is free software: you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation, either version 3 of the License, or
40 (at your option) any later version.
41
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
46
47 You should have received a copy of the GNU General Public License
48 along with this program. If not, see <http://www.gnu.org/licenses/>. */
49
50
51 /* NB To comply with the above BSD license, copyright information is
52 reproduced in etc/ETAGS.README. That file should be updated when the
53 above notices are.
54
55 To the best of our knowledge, this code was originally based on the
56 ctags.c distributed with BSD4.2, which was copyrighted by the
57 University of California, as described above. */
58
59
60 /*
61 * Authors:
62 * 1983 Ctags originally by Ken Arnold.
63 * 1984 Fortran added by Jim Kleckner.
64 * 1984 Ed Pelegri-Llopart added C typedefs.
65 * 1985 Emacs TAGS format by Richard Stallman.
66 * 1989 Sam Kendall added C++.
67 * 1992 Joseph B. Wells improved C and C++ parsing.
68 * 1993 Francesco Potortì reorganized C and C++.
69 * 1994 Line-by-line regexp tags by Tom Tromey.
70 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
71 * 2002 #line directives by Francesco Potortì.
72 *
73 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
74 */
75
76 /*
77 * If you want to add support for a new language, start by looking at the LUA
78 * language, which is the simplest. Alternatively, consider distributing etags
79 * together with a configuration file containing regexp definitions for etags.
80 */
81
82 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.3";
83
84 #define TRUE 1
85 #define FALSE 0
86
87 #ifdef DEBUG
88 # undef DEBUG
89 # define DEBUG TRUE
90 #else
91 # define DEBUG FALSE
92 # define NDEBUG /* disable assert */
93 #endif
94
95 #ifdef HAVE_CONFIG_H
96 # include <config.h>
97 /* On some systems, Emacs defines static as nothing for the sake
98 of unexec. We don't want that here since we don't use unexec. */
99 # undef static
100 # ifndef PTR /* for XEmacs */
101 # define PTR void *
102 # endif
103 # ifndef __P /* for XEmacs */
104 # define __P(args) args
105 # endif
106 #else /* no config.h */
107 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
108 # define __P(args) args /* use prototypes */
109 # define PTR void * /* for generic pointers */
110 # else /* not standard C */
111 # define __P(args) () /* no prototypes */
112 # define const /* remove const for old compilers' sake */
113 # define PTR long * /* don't use void* */
114 # endif
115 #endif /* !HAVE_CONFIG_H */
116
117 #ifndef _GNU_SOURCE
118 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
119 #endif
120
121 /* WIN32_NATIVE is for XEmacs.
122 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
123 #ifdef WIN32_NATIVE
124 # undef MSDOS
125 # undef WINDOWSNT
126 # define WINDOWSNT
127 #endif /* WIN32_NATIVE */
128
129 #ifdef MSDOS
130 # undef MSDOS
131 # define MSDOS TRUE
132 # include <fcntl.h>
133 # include <sys/param.h>
134 # include <io.h>
135 # ifndef HAVE_CONFIG_H
136 # define DOS_NT
137 # include <sys/config.h>
138 # endif
139 #else
140 # define MSDOS FALSE
141 #endif /* MSDOS */
142
143 #ifdef WINDOWSNT
144 # include <stdlib.h>
145 # include <fcntl.h>
146 # include <string.h>
147 # include <direct.h>
148 # include <io.h>
149 # define MAXPATHLEN _MAX_PATH
150 # undef HAVE_NTGUI
151 # undef DOS_NT
152 # define DOS_NT
153 # ifndef HAVE_GETCWD
154 # define HAVE_GETCWD
155 # endif /* undef HAVE_GETCWD */
156 #else /* not WINDOWSNT */
157 # ifdef STDC_HEADERS
158 # include <stdlib.h>
159 # include <string.h>
160 # else /* no standard C headers */
161 extern char *getenv __P((const char *));
162 extern char *strcpy __P((char *, const char *));
163 extern char *strncpy __P((char *, const char *, unsigned long));
164 extern char *strcat __P((char *, const char *));
165 extern char *strncat __P((char *, const char *, unsigned long));
166 extern int strcmp __P((const char *, const char *));
167 extern int strncmp __P((const char *, const char *, unsigned long));
168 extern int system __P((const char *));
169 extern unsigned long strlen __P((const char *));
170 extern void *malloc __P((unsigned long));
171 extern void *realloc __P((void *, unsigned long));
172 extern void exit __P((int));
173 extern void free __P((void *));
174 extern void *memmove __P((void *, const void *, unsigned long));
175 # define EXIT_SUCCESS 0
176 # define EXIT_FAILURE 1
177 # endif
178 #endif /* !WINDOWSNT */
179
180 #ifdef HAVE_UNISTD_H
181 # include <unistd.h>
182 #else
183 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
184 extern char *getcwd (char *buf, size_t size);
185 # endif
186 #endif /* HAVE_UNISTD_H */
187
188 #include <stdio.h>
189 #include <ctype.h>
190 #include <errno.h>
191 #ifndef errno
192 extern int errno;
193 #endif
194 #include <sys/types.h>
195 #include <sys/stat.h>
196
197 #include <assert.h>
198 #ifdef NDEBUG
199 # undef assert /* some systems have a buggy assert.h */
200 # define assert(x) ((void) 0)
201 #endif
202
203 #if !defined (S_ISREG) && defined (S_IFREG)
204 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
205 #endif
206
207 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
208 # define NO_LONG_OPTIONS TRUE
209 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
210 extern char *optarg;
211 extern int optind, opterr;
212 #else
213 # define NO_LONG_OPTIONS FALSE
214 # include <getopt.h>
215 #endif /* NO_LONG_OPTIONS */
216
217 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
218 # ifdef __CYGWIN__ /* compiling on Cygwin */
219 !!! NOTICE !!!
220 the regex.h distributed with Cygwin is not compatible with etags, alas!
221 If you want regular expression support, you should delete this notice and
222 arrange to use the GNU regex.h and regex.c.
223 # endif
224 #endif
225 #include <regex.h>
226
227 /* Define CTAGS to make the program "ctags" compatible with the usual one.
228 Leave it undefined to make the program "etags", which makes emacs-style
229 tag tables and tags typedefs, #defines and struct/union/enum by default. */
230 #ifdef CTAGS
231 # undef CTAGS
232 # define CTAGS TRUE
233 #else
234 # define CTAGS FALSE
235 #endif
236
237 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
238 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
239 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
240 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
241
242 #define CHARS 256 /* 2^sizeof(char) */
243 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
244 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
245 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
246 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
247 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
248 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
249
250 #define ISALNUM(c) isalnum (CHAR(c))
251 #define ISALPHA(c) isalpha (CHAR(c))
252 #define ISDIGIT(c) isdigit (CHAR(c))
253 #define ISLOWER(c) islower (CHAR(c))
254
255 #define lowcase(c) tolower (CHAR(c))
256 #define upcase(c) toupper (CHAR(c))
257
258
259 /*
260 * xnew, xrnew -- allocate, reallocate storage
261 *
262 * SYNOPSIS: Type *xnew (int n, Type);
263 * void xrnew (OldPointer, int n, Type);
264 */
265 #if DEBUG
266 # include "chkmalloc.h"
267 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
268 (n) * sizeof (Type)))
269 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
270 (char *) (op), (n) * sizeof (Type)))
271 #else
272 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
273 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
274 (char *) (op), (n) * sizeof (Type)))
275 #endif
276
277 #define bool int
278
279 typedef void Lang_function __P((FILE *));
280
281 typedef struct
282 {
283 char *suffix; /* file name suffix for this compressor */
284 char *command; /* takes one arg and decompresses to stdout */
285 } compressor;
286
287 typedef struct
288 {
289 char *name; /* language name */
290 char *help; /* detailed help for the language */
291 Lang_function *function; /* parse function */
292 char **suffixes; /* name suffixes of this language's files */
293 char **filenames; /* names of this language's files */
294 char **interpreters; /* interpreters for this language */
295 bool metasource; /* source used to generate other sources */
296 } language;
297
298 typedef struct fdesc
299 {
300 struct fdesc *next; /* for the linked list */
301 char *infname; /* uncompressed input file name */
302 char *infabsname; /* absolute uncompressed input file name */
303 char *infabsdir; /* absolute dir of input file */
304 char *taggedfname; /* file name to write in tagfile */
305 language *lang; /* language of file */
306 char *prop; /* file properties to write in tagfile */
307 bool usecharno; /* etags tags shall contain char number */
308 bool written; /* entry written in the tags file */
309 } fdesc;
310
311 typedef struct node_st
312 { /* sorting structure */
313 struct node_st *left, *right; /* left and right sons */
314 fdesc *fdp; /* description of file to whom tag belongs */
315 char *name; /* tag name */
316 char *regex; /* search regexp */
317 bool valid; /* write this tag on the tag file */
318 bool is_func; /* function tag: use regexp in CTAGS mode */
319 bool been_warned; /* warning already given for duplicated tag */
320 int lno; /* line number tag is on */
321 long cno; /* character number line starts on */
322 } node;
323
324 /*
325 * A `linebuffer' is a structure which holds a line of text.
326 * `readline_internal' reads a line from a stream into a linebuffer
327 * and works regardless of the length of the line.
328 * SIZE is the size of BUFFER, LEN is the length of the string in
329 * BUFFER after readline reads it.
330 */
331 typedef struct
332 {
333 long size;
334 int len;
335 char *buffer;
336 } linebuffer;
337
338 /* Used to support mixing of --lang and file names. */
339 typedef struct
340 {
341 enum {
342 at_language, /* a language specification */
343 at_regexp, /* a regular expression */
344 at_filename, /* a file name */
345 at_stdin, /* read from stdin here */
346 at_end /* stop parsing the list */
347 } arg_type; /* argument type */
348 language *lang; /* language associated with the argument */
349 char *what; /* the argument itself */
350 } argument;
351
352 /* Structure defining a regular expression. */
353 typedef struct regexp
354 {
355 struct regexp *p_next; /* pointer to next in list */
356 language *lang; /* if set, use only for this language */
357 char *pattern; /* the regexp pattern */
358 char *name; /* tag name */
359 struct re_pattern_buffer *pat; /* the compiled pattern */
360 struct re_registers regs; /* re registers */
361 bool error_signaled; /* already signaled for this regexp */
362 bool force_explicit_name; /* do not allow implict tag name */
363 bool ignore_case; /* ignore case when matching */
364 bool multi_line; /* do a multi-line match on the whole file */
365 } regexp;
366
367
368 /* Many compilers barf on this:
369 Lang_function Ada_funcs;
370 so let's write it this way */
371 static void Ada_funcs __P((FILE *));
372 static void Asm_labels __P((FILE *));
373 static void C_entries __P((int c_ext, FILE *));
374 static void default_C_entries __P((FILE *));
375 static void plain_C_entries __P((FILE *));
376 static void Cjava_entries __P((FILE *));
377 static void Cobol_paragraphs __P((FILE *));
378 static void Cplusplus_entries __P((FILE *));
379 static void Cstar_entries __P((FILE *));
380 static void Erlang_functions __P((FILE *));
381 static void Forth_words __P((FILE *));
382 static void Fortran_functions __P((FILE *));
383 static void HTML_labels __P((FILE *));
384 static void Lisp_functions __P((FILE *));
385 static void Lua_functions __P((FILE *));
386 static void Makefile_targets __P((FILE *));
387 static void Pascal_functions __P((FILE *));
388 static void Perl_functions __P((FILE *));
389 static void PHP_functions __P((FILE *));
390 static void PS_functions __P((FILE *));
391 static void Prolog_functions __P((FILE *));
392 static void Python_functions __P((FILE *));
393 static void Scheme_functions __P((FILE *));
394 static void TeX_commands __P((FILE *));
395 static void Texinfo_nodes __P((FILE *));
396 static void Yacc_entries __P((FILE *));
397 static void just_read_file __P((FILE *));
398
399 static void print_language_names __P((void));
400 static void print_version __P((void));
401 static void print_help __P((argument *));
402 int main __P((int, char **));
403
404 static compressor *get_compressor_from_suffix __P((char *, char **));
405 static language *get_language_from_langname __P((const char *));
406 static language *get_language_from_interpreter __P((char *));
407 static language *get_language_from_filename __P((char *, bool));
408 static void readline __P((linebuffer *, FILE *));
409 static long readline_internal __P((linebuffer *, FILE *));
410 static bool nocase_tail __P((char *));
411 static void get_tag __P((char *, char **));
412
413 static void analyse_regex __P((char *));
414 static void free_regexps __P((void));
415 static void regex_tag_multiline __P((void));
416 static void error __P((const char *, const char *));
417 static void suggest_asking_for_help __P((void));
418 void fatal __P((char *, char *));
419 static void pfatal __P((char *));
420 static void add_node __P((node *, node **));
421
422 static void init __P((void));
423 static void process_file_name __P((char *, language *));
424 static void process_file __P((FILE *, char *, language *));
425 static void find_entries __P((FILE *));
426 static void free_tree __P((node *));
427 static void free_fdesc __P((fdesc *));
428 static void pfnote __P((char *, bool, char *, int, int, long));
429 static void make_tag __P((char *, int, bool, char *, int, int, long));
430 static void invalidate_nodes __P((fdesc *, node **));
431 static void put_entries __P((node *));
432
433 static char *concat __P((char *, char *, char *));
434 static char *skip_spaces __P((char *));
435 static char *skip_non_spaces __P((char *));
436 static char *savenstr __P((char *, int));
437 static char *savestr __P((char *));
438 static char *etags_strchr __P((const char *, int));
439 static char *etags_strrchr __P((const char *, int));
440 static int etags_strcasecmp __P((const char *, const char *));
441 static int etags_strncasecmp __P((const char *, const char *, int));
442 static char *etags_getcwd __P((void));
443 static char *relative_filename __P((char *, char *));
444 static char *absolute_filename __P((char *, char *));
445 static char *absolute_dirname __P((char *, char *));
446 static bool filename_is_absolute __P((char *f));
447 static void canonicalize_filename __P((char *));
448 static void linebuffer_init __P((linebuffer *));
449 static void linebuffer_setlen __P((linebuffer *, int));
450 static PTR xmalloc __P((unsigned int));
451 static PTR xrealloc __P((char *, unsigned int));
452
453 \f
454 static char searchar = '/'; /* use /.../ searches */
455
456 static char *tagfile; /* output file */
457 static char *progname; /* name this program was invoked with */
458 static char *cwd; /* current working directory */
459 static char *tagfiledir; /* directory of tagfile */
460 static FILE *tagf; /* ioptr for tags file */
461
462 static fdesc *fdhead; /* head of file description list */
463 static fdesc *curfdp; /* current file description */
464 static int lineno; /* line number of current line */
465 static long charno; /* current character number */
466 static long linecharno; /* charno of start of current line */
467 static char *dbp; /* pointer to start of current tag */
468
469 static const int invalidcharno = -1;
470
471 static node *nodehead; /* the head of the binary tree of tags */
472 static node *last_node; /* the last node created */
473
474 static linebuffer lb; /* the current line */
475 static linebuffer filebuf; /* a buffer containing the whole file */
476 static linebuffer token_name; /* a buffer containing a tag name */
477
478 /* boolean "functions" (see init) */
479 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
480 static char
481 /* white chars */
482 *white = " \f\t\n\r\v",
483 /* not in a name */
484 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
485 /* token ending chars */
486 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
487 /* token starting chars */
488 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
489 /* valid in-token chars */
490 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
491
492 static bool append_to_tagfile; /* -a: append to tags */
493 /* The next five default to TRUE in C and derived languages. */
494 static bool typedefs; /* -t: create tags for C and Ada typedefs */
495 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
496 /* 0 struct/enum/union decls, and C++ */
497 /* member functions. */
498 static bool constantypedefs; /* -d: create tags for C #define, enum */
499 /* constants and variables. */
500 /* -D: opposite of -d. Default under ctags. */
501 static bool globals; /* create tags for global variables */
502 static bool members; /* create tags for C member variables */
503 static bool declarations; /* --declarations: tag them and extern in C&Co*/
504 static bool no_line_directive; /* ignore #line directives (undocumented) */
505 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
506 static bool update; /* -u: update tags */
507 static bool vgrind_style; /* -v: create vgrind style index output */
508 static bool no_warnings; /* -w: suppress warnings (undocumented) */
509 static bool cxref_style; /* -x: create cxref style output */
510 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
511 static bool ignoreindent; /* -I: ignore indentation in C */
512 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
513
514 /* STDIN is defined in LynxOS system headers */
515 #ifdef STDIN
516 # undef STDIN
517 #endif
518
519 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
520 static bool parsing_stdin; /* --parse-stdin used */
521
522 static regexp *p_head; /* list of all regexps */
523 static bool need_filebuf; /* some regexes are multi-line */
524
525 static struct option longopts[] =
526 {
527 { "append", no_argument, NULL, 'a' },
528 { "packages-only", no_argument, &packages_only, TRUE },
529 { "c++", no_argument, NULL, 'C' },
530 { "declarations", no_argument, &declarations, TRUE },
531 { "no-line-directive", no_argument, &no_line_directive, TRUE },
532 { "no-duplicates", no_argument, &no_duplicates, TRUE },
533 { "help", no_argument, NULL, 'h' },
534 { "help", no_argument, NULL, 'H' },
535 { "ignore-indentation", no_argument, NULL, 'I' },
536 { "language", required_argument, NULL, 'l' },
537 { "members", no_argument, &members, TRUE },
538 { "no-members", no_argument, &members, FALSE },
539 { "output", required_argument, NULL, 'o' },
540 { "regex", required_argument, NULL, 'r' },
541 { "no-regex", no_argument, NULL, 'R' },
542 { "ignore-case-regex", required_argument, NULL, 'c' },
543 { "parse-stdin", required_argument, NULL, STDIN },
544 { "version", no_argument, NULL, 'V' },
545
546 #if CTAGS /* Ctags options */
547 { "backward-search", no_argument, NULL, 'B' },
548 { "cxref", no_argument, NULL, 'x' },
549 { "defines", no_argument, NULL, 'd' },
550 { "globals", no_argument, &globals, TRUE },
551 { "typedefs", no_argument, NULL, 't' },
552 { "typedefs-and-c++", no_argument, NULL, 'T' },
553 { "update", no_argument, NULL, 'u' },
554 { "vgrind", no_argument, NULL, 'v' },
555 { "no-warn", no_argument, NULL, 'w' },
556
557 #else /* Etags options */
558 { "no-defines", no_argument, NULL, 'D' },
559 { "no-globals", no_argument, &globals, FALSE },
560 { "include", required_argument, NULL, 'i' },
561 #endif
562 { NULL }
563 };
564
565 static compressor compressors[] =
566 {
567 { "z", "gzip -d -c"},
568 { "Z", "gzip -d -c"},
569 { "gz", "gzip -d -c"},
570 { "GZ", "gzip -d -c"},
571 { "bz2", "bzip2 -d -c" },
572 { NULL }
573 };
574
575 /*
576 * Language stuff.
577 */
578
579 /* Ada code */
580 static char *Ada_suffixes [] =
581 { "ads", "adb", "ada", NULL };
582 static char Ada_help [] =
583 "In Ada code, functions, procedures, packages, tasks and types are\n\
584 tags. Use the `--packages-only' option to create tags for\n\
585 packages only.\n\
586 Ada tag names have suffixes indicating the type of entity:\n\
587 Entity type: Qualifier:\n\
588 ------------ ----------\n\
589 function /f\n\
590 procedure /p\n\
591 package spec /s\n\
592 package body /b\n\
593 type /t\n\
594 task /k\n\
595 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
596 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
597 will just search for any tag `bidule'.";
598
599 /* Assembly code */
600 static char *Asm_suffixes [] =
601 { "a", /* Unix assembler */
602 "asm", /* Microcontroller assembly */
603 "def", /* BSO/Tasking definition includes */
604 "inc", /* Microcontroller include files */
605 "ins", /* Microcontroller include files */
606 "s", "sa", /* Unix assembler */
607 "S", /* cpp-processed Unix assembler */
608 "src", /* BSO/Tasking C compiler output */
609 NULL
610 };
611 static char Asm_help [] =
612 "In assembler code, labels appearing at the beginning of a line,\n\
613 followed by a colon, are tags.";
614
615
616 /* Note that .c and .h can be considered C++, if the --c++ flag was
617 given, or if the `class' or `template' keywords are met inside the file.
618 That is why default_C_entries is called for these. */
619 static char *default_C_suffixes [] =
620 { "c", "h", NULL };
621 #if CTAGS /* C help for Ctags */
622 static char default_C_help [] =
623 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
624 Use -T to tag definitions of `struct', `union' and `enum'.\n\
625 Use -d to tag `#define' macro definitions and `enum' constants.\n\
626 Use --globals to tag global variables.\n\
627 You can tag function declarations and external variables by\n\
628 using `--declarations', and struct members by using `--members'.";
629 #else /* C help for Etags */
630 static char default_C_help [] =
631 "In C code, any C function or typedef is a tag, and so are\n\
632 definitions of `struct', `union' and `enum'. `#define' macro\n\
633 definitions and `enum' constants are tags unless you specify\n\
634 `--no-defines'. Global variables are tags unless you specify\n\
635 `--no-globals' and so are struct members unless you specify\n\
636 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
637 `--no-members' can make the tags table file much smaller.\n\
638 You can tag function declarations and external variables by\n\
639 using `--declarations'.";
640 #endif /* C help for Ctags and Etags */
641
642 static char *Cplusplus_suffixes [] =
643 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
644 "M", /* Objective C++ */
645 "pdb", /* Postscript with C syntax */
646 NULL };
647 static char Cplusplus_help [] =
648 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
649 --help --lang=c --lang=c++ for full help.)\n\
650 In addition to C tags, member functions are also recognized. Member\n\
651 variables are recognized unless you use the `--no-members' option.\n\
652 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
653 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
654 `operator+'.";
655
656 static char *Cjava_suffixes [] =
657 { "java", NULL };
658 static char Cjava_help [] =
659 "In Java code, all the tags constructs of C and C++ code are\n\
660 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
661
662
663 static char *Cobol_suffixes [] =
664 { "COB", "cob", NULL };
665 static char Cobol_help [] =
666 "In Cobol code, tags are paragraph names; that is, any word\n\
667 starting in column 8 and followed by a period.";
668
669 static char *Cstar_suffixes [] =
670 { "cs", "hs", NULL };
671
672 static char *Erlang_suffixes [] =
673 { "erl", "hrl", NULL };
674 static char Erlang_help [] =
675 "In Erlang code, the tags are the functions, records and macros\n\
676 defined in the file.";
677
678 char *Forth_suffixes [] =
679 { "fth", "tok", NULL };
680 static char Forth_help [] =
681 "In Forth code, tags are words defined by `:',\n\
682 constant, code, create, defer, value, variable, buffer:, field.";
683
684 static char *Fortran_suffixes [] =
685 { "F", "f", "f90", "for", NULL };
686 static char Fortran_help [] =
687 "In Fortran code, functions, subroutines and block data are tags.";
688
689 static char *HTML_suffixes [] =
690 { "htm", "html", "shtml", NULL };
691 static char HTML_help [] =
692 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
693 `h3' headers. Also, tags are `name=' in anchors and all\n\
694 occurrences of `id='.";
695
696 static char *Lisp_suffixes [] =
697 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
698 static char Lisp_help [] =
699 "In Lisp code, any function defined with `defun', any variable\n\
700 defined with `defvar' or `defconst', and in general the first\n\
701 argument of any expression that starts with `(def' in column zero\n\
702 is a tag.";
703
704 static char *Lua_suffixes [] =
705 { "lua", "LUA", NULL };
706 static char Lua_help [] =
707 "In Lua scripts, all functions are tags.";
708
709 static char *Makefile_filenames [] =
710 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
711 static char Makefile_help [] =
712 "In makefiles, targets are tags; additionally, variables are tags\n\
713 unless you specify `--no-globals'.";
714
715 static char *Objc_suffixes [] =
716 { "lm", /* Objective lex file */
717 "m", /* Objective C file */
718 NULL };
719 static char Objc_help [] =
720 "In Objective C code, tags include Objective C definitions for classes,\n\
721 class categories, methods and protocols. Tags for variables and\n\
722 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
723 (Use --help --lang=c --lang=objc --lang=java for full help.)";
724
725 static char *Pascal_suffixes [] =
726 { "p", "pas", NULL };
727 static char Pascal_help [] =
728 "In Pascal code, the tags are the functions and procedures defined\n\
729 in the file.";
730 /* " // this is for working around an Emacs highlighting bug... */
731
732 static char *Perl_suffixes [] =
733 { "pl", "pm", NULL };
734 static char *Perl_interpreters [] =
735 { "perl", "@PERL@", NULL };
736 static char Perl_help [] =
737 "In Perl code, the tags are the packages, subroutines and variables\n\
738 defined by the `package', `sub', `my' and `local' keywords. Use\n\
739 `--globals' if you want to tag global variables. Tags for\n\
740 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
741 defined in the default package is `main::SUB'.";
742
743 static char *PHP_suffixes [] =
744 { "php", "php3", "php4", NULL };
745 static char PHP_help [] =
746 "In PHP code, tags are functions, classes and defines. Unless you use\n\
747 the `--no-members' option, vars are tags too.";
748
749 static char *plain_C_suffixes [] =
750 { "pc", /* Pro*C file */
751 NULL };
752
753 static char *PS_suffixes [] =
754 { "ps", "psw", NULL }; /* .psw is for PSWrap */
755 static char PS_help [] =
756 "In PostScript code, the tags are the functions.";
757
758 static char *Prolog_suffixes [] =
759 { "prolog", NULL };
760 static char Prolog_help [] =
761 "In Prolog code, tags are predicates and rules at the beginning of\n\
762 line.";
763
764 static char *Python_suffixes [] =
765 { "py", NULL };
766 static char Python_help [] =
767 "In Python code, `def' or `class' at the beginning of a line\n\
768 generate a tag.";
769
770 /* Can't do the `SCM' or `scm' prefix with a version number. */
771 static char *Scheme_suffixes [] =
772 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
773 static char Scheme_help [] =
774 "In Scheme code, tags include anything defined with `def' or with a\n\
775 construct whose name starts with `def'. They also include\n\
776 variables set with `set!' at top level in the file.";
777
778 static char *TeX_suffixes [] =
779 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
780 static char TeX_help [] =
781 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
782 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
783 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
784 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
785 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
786 \n\
787 Other commands can be specified by setting the environment variable\n\
788 `TEXTAGS' to a colon-separated list like, for example,\n\
789 TEXTAGS=\"mycommand:myothercommand\".";
790
791
792 static char *Texinfo_suffixes [] =
793 { "texi", "texinfo", "txi", NULL };
794 static char Texinfo_help [] =
795 "for texinfo files, lines starting with @node are tagged.";
796
797 static char *Yacc_suffixes [] =
798 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
799 static char Yacc_help [] =
800 "In Bison or Yacc input files, each rule defines as a tag the\n\
801 nonterminal it constructs. The portions of the file that contain\n\
802 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
803 for full help).";
804
805 static char auto_help [] =
806 "`auto' is not a real language, it indicates to use\n\
807 a default language for files base on file name suffix and file contents.";
808
809 static char none_help [] =
810 "`none' is not a real language, it indicates to only do\n\
811 regexp processing on files.";
812
813 static char no_lang_help [] =
814 "No detailed help available for this language.";
815
816
817 /*
818 * Table of languages.
819 *
820 * It is ok for a given function to be listed under more than one
821 * name. I just didn't.
822 */
823
824 static language lang_names [] =
825 {
826 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
827 { "asm", Asm_help, Asm_labels, Asm_suffixes },
828 { "c", default_C_help, default_C_entries, default_C_suffixes },
829 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
830 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
831 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
832 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
833 { "forth", Forth_help, Forth_words, Forth_suffixes },
834 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
835 { "html", HTML_help, HTML_labels, HTML_suffixes },
836 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
837 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
838 { "lua", Lua_help, Lua_functions, Lua_suffixes },
839 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
840 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
841 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
842 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
843 { "php", PHP_help, PHP_functions, PHP_suffixes },
844 { "postscript",PS_help, PS_functions, PS_suffixes },
845 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
846 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
847 { "python", Python_help, Python_functions, Python_suffixes },
848 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
849 { "tex", TeX_help, TeX_commands, TeX_suffixes },
850 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
851 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
852 { "auto", auto_help }, /* default guessing scheme */
853 { "none", none_help, just_read_file }, /* regexp matching only */
854 { NULL } /* end of list */
855 };
856
857 \f
858 static void
859 print_language_names ()
860 {
861 language *lang;
862 char **name, **ext;
863
864 puts ("\nThese are the currently supported languages, along with the\n\
865 default file names and dot suffixes:");
866 for (lang = lang_names; lang->name != NULL; lang++)
867 {
868 printf (" %-*s", 10, lang->name);
869 if (lang->filenames != NULL)
870 for (name = lang->filenames; *name != NULL; name++)
871 printf (" %s", *name);
872 if (lang->suffixes != NULL)
873 for (ext = lang->suffixes; *ext != NULL; ext++)
874 printf (" .%s", *ext);
875 puts ("");
876 }
877 puts ("where `auto' means use default language for files based on file\n\
878 name suffix, and `none' means only do regexp processing on files.\n\
879 If no language is specified and no matching suffix is found,\n\
880 the first line of the file is read for a sharp-bang (#!) sequence\n\
881 followed by the name of an interpreter. If no such sequence is found,\n\
882 Fortran is tried first; if no tags are found, C is tried next.\n\
883 When parsing any C file, a \"class\" or \"template\" keyword\n\
884 switches to C++.");
885 puts ("Compressed files are supported using gzip and bzip2.\n\
886 \n\
887 For detailed help on a given language use, for example,\n\
888 etags --help --lang=ada.");
889 }
890
891 #ifndef EMACS_NAME
892 # define EMACS_NAME "standalone"
893 #endif
894 #ifndef VERSION
895 # define VERSION "17.38.1.3"
896 #endif
897 static void
898 print_version ()
899 {
900 /* Makes it easier to update automatically. */
901 char emacs_copyright[] = "Copyright (C) 2008 Free Software Foundation, Inc.";
902
903 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
904 puts (emacs_copyright);
905 puts ("This program is distributed under the terms in ETAGS.README");
906
907 exit (EXIT_SUCCESS);
908 }
909
910 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
911 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
912 #endif
913
914 static void
915 print_help (argbuffer)
916 argument *argbuffer;
917 {
918 bool help_for_lang = FALSE;
919
920 for (; argbuffer->arg_type != at_end; argbuffer++)
921 if (argbuffer->arg_type == at_language)
922 {
923 if (help_for_lang)
924 puts ("");
925 puts (argbuffer->lang->help);
926 help_for_lang = TRUE;
927 }
928
929 if (help_for_lang)
930 exit (EXIT_SUCCESS);
931
932 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
933 \n\
934 These are the options accepted by %s.\n", progname, progname);
935 if (NO_LONG_OPTIONS)
936 puts ("WARNING: long option names do not work with this executable,\n\
937 as it is not linked with GNU getopt.");
938 else
939 puts ("You may use unambiguous abbreviations for the long option names.");
940 puts (" A - as file name means read names from stdin (one per line).\n\
941 Absolute names are stored in the output file as they are.\n\
942 Relative ones are stored relative to the output file's directory.\n");
943
944 puts ("-a, --append\n\
945 Append tag entries to existing tags file.");
946
947 puts ("--packages-only\n\
948 For Ada files, only generate tags for packages.");
949
950 if (CTAGS)
951 puts ("-B, --backward-search\n\
952 Write the search commands for the tag entries using '?', the\n\
953 backward-search command instead of '/', the forward-search command.");
954
955 /* This option is mostly obsolete, because etags can now automatically
956 detect C++. Retained for backward compatibility and for debugging and
957 experimentation. In principle, we could want to tag as C++ even
958 before any "class" or "template" keyword.
959 puts ("-C, --c++\n\
960 Treat files whose name suffix defaults to C language as C++ files.");
961 */
962
963 puts ("--declarations\n\
964 In C and derived languages, create tags for function declarations,");
965 if (CTAGS)
966 puts ("\tand create tags for extern variables if --globals is used.");
967 else
968 puts
969 ("\tand create tags for extern variables unless --no-globals is used.");
970
971 if (CTAGS)
972 puts ("-d, --defines\n\
973 Create tag entries for C #define constants and enum constants, too.");
974 else
975 puts ("-D, --no-defines\n\
976 Don't create tag entries for C #define constants and enum constants.\n\
977 This makes the tags file smaller.");
978
979 if (!CTAGS)
980 puts ("-i FILE, --include=FILE\n\
981 Include a note in tag file indicating that, when searching for\n\
982 a tag, one should also consult the tags file FILE after\n\
983 checking the current file.");
984
985 puts ("-l LANG, --language=LANG\n\
986 Force the following files to be considered as written in the\n\
987 named language up to the next --language=LANG option.");
988
989 if (CTAGS)
990 puts ("--globals\n\
991 Create tag entries for global variables in some languages.");
992 else
993 puts ("--no-globals\n\
994 Do not create tag entries for global variables in some\n\
995 languages. This makes the tags file smaller.");
996
997 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
998 puts ("--no-line-directive\n\
999 Ignore #line preprocessor directives in C and derived languages.");
1000
1001 if (CTAGS)
1002 puts ("--members\n\
1003 Create tag entries for members of structures in some languages.");
1004 else
1005 puts ("--no-members\n\
1006 Do not create tag entries for members of structures\n\
1007 in some languages.");
1008
1009 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1010 Make a tag for each line matching a regular expression pattern\n\
1011 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1012 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1013 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1014 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1015 puts (" If TAGNAME/ is present, the tags created are named.\n\
1016 For example Tcl named tags can be created with:\n\
1017 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1018 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1019 `m' means to allow multi-line matches, `s' implies `m' and\n\
1020 causes dot to match any character, including newline.");
1021
1022 puts ("-R, --no-regex\n\
1023 Don't create tags from regexps for the following files.");
1024
1025 puts ("-I, --ignore-indentation\n\
1026 In C and C++ do not assume that a closing brace in the first\n\
1027 column is the final brace of a function or structure definition.");
1028
1029 puts ("-o FILE, --output=FILE\n\
1030 Write the tags to FILE.");
1031
1032 puts ("--parse-stdin=NAME\n\
1033 Read from standard input and record tags as belonging to file NAME.");
1034
1035 if (CTAGS)
1036 {
1037 puts ("-t, --typedefs\n\
1038 Generate tag entries for C and Ada typedefs.");
1039 puts ("-T, --typedefs-and-c++\n\
1040 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1041 and C++ member functions.");
1042 }
1043
1044 if (CTAGS)
1045 puts ("-u, --update\n\
1046 Update the tag entries for the given files, leaving tag\n\
1047 entries for other files in place. Currently, this is\n\
1048 implemented by deleting the existing entries for the given\n\
1049 files and then rewriting the new entries at the end of the\n\
1050 tags file. It is often faster to simply rebuild the entire\n\
1051 tag file than to use this.");
1052
1053 if (CTAGS)
1054 {
1055 puts ("-v, --vgrind\n\
1056 Print on the standard output an index of items intended for\n\
1057 human consumption, similar to the output of vgrind. The index\n\
1058 is sorted, and gives the page number of each item.");
1059
1060 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1061 puts ("-w, --no-duplicates\n\
1062 Do not create duplicate tag entries, for compatibility with\n\
1063 traditional ctags.");
1064
1065 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1066 puts ("-w, --no-warn\n\
1067 Suppress warning messages about duplicate tag entries.");
1068
1069 puts ("-x, --cxref\n\
1070 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1071 The output uses line numbers instead of page numbers, but\n\
1072 beyond that the differences are cosmetic; try both to see\n\
1073 which you like.");
1074 }
1075
1076 puts ("-V, --version\n\
1077 Print the version of the program.\n\
1078 -h, --help\n\
1079 Print this help message.\n\
1080 Followed by one or more `--language' options prints detailed\n\
1081 help about tag generation for the specified languages.");
1082
1083 print_language_names ();
1084
1085 puts ("");
1086 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1087
1088 exit (EXIT_SUCCESS);
1089 }
1090
1091 \f
1092 int
1093 main (argc, argv)
1094 int argc;
1095 char *argv[];
1096 {
1097 int i;
1098 unsigned int nincluded_files;
1099 char **included_files;
1100 argument *argbuffer;
1101 int current_arg, file_count;
1102 linebuffer filename_lb;
1103 bool help_asked = FALSE;
1104 char *optstring;
1105 int opt;
1106
1107
1108 #ifdef DOS_NT
1109 _fmode = O_BINARY; /* all of files are treated as binary files */
1110 #endif /* DOS_NT */
1111
1112 progname = argv[0];
1113 nincluded_files = 0;
1114 included_files = xnew (argc, char *);
1115 current_arg = 0;
1116 file_count = 0;
1117
1118 /* Allocate enough no matter what happens. Overkill, but each one
1119 is small. */
1120 argbuffer = xnew (argc, argument);
1121
1122 /*
1123 * Always find typedefs and structure tags.
1124 * Also default to find macro constants, enum constants, struct
1125 * members and global variables. Do it for both etags and ctags.
1126 */
1127 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1128 globals = members = TRUE;
1129
1130 /* When the optstring begins with a '-' getopt_long does not rearrange the
1131 non-options arguments to be at the end, but leaves them alone. */
1132 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1133 "ac:Cf:Il:o:r:RSVhH",
1134 (CTAGS) ? "BxdtTuvw" : "Di:");
1135
1136 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1137 switch (opt)
1138 {
1139 case 0:
1140 /* If getopt returns 0, then it has already processed a
1141 long-named option. We should do nothing. */
1142 break;
1143
1144 case 1:
1145 /* This means that a file name has been seen. Record it. */
1146 argbuffer[current_arg].arg_type = at_filename;
1147 argbuffer[current_arg].what = optarg;
1148 ++current_arg;
1149 ++file_count;
1150 break;
1151
1152 case STDIN:
1153 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1154 argbuffer[current_arg].arg_type = at_stdin;
1155 argbuffer[current_arg].what = optarg;
1156 ++current_arg;
1157 ++file_count;
1158 if (parsing_stdin)
1159 fatal ("cannot parse standard input more than once", (char *)NULL);
1160 parsing_stdin = TRUE;
1161 break;
1162
1163 /* Common options. */
1164 case 'a': append_to_tagfile = TRUE; break;
1165 case 'C': cplusplus = TRUE; break;
1166 case 'f': /* for compatibility with old makefiles */
1167 case 'o':
1168 if (tagfile)
1169 {
1170 error ("-o option may only be given once.", (char *)NULL);
1171 suggest_asking_for_help ();
1172 /* NOTREACHED */
1173 }
1174 tagfile = optarg;
1175 break;
1176 case 'I':
1177 case 'S': /* for backward compatibility */
1178 ignoreindent = TRUE;
1179 break;
1180 case 'l':
1181 {
1182 language *lang = get_language_from_langname (optarg);
1183 if (lang != NULL)
1184 {
1185 argbuffer[current_arg].lang = lang;
1186 argbuffer[current_arg].arg_type = at_language;
1187 ++current_arg;
1188 }
1189 }
1190 break;
1191 case 'c':
1192 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1193 optarg = concat (optarg, "i", ""); /* memory leak here */
1194 /* FALLTHRU */
1195 case 'r':
1196 argbuffer[current_arg].arg_type = at_regexp;
1197 argbuffer[current_arg].what = optarg;
1198 ++current_arg;
1199 break;
1200 case 'R':
1201 argbuffer[current_arg].arg_type = at_regexp;
1202 argbuffer[current_arg].what = NULL;
1203 ++current_arg;
1204 break;
1205 case 'V':
1206 print_version ();
1207 break;
1208 case 'h':
1209 case 'H':
1210 help_asked = TRUE;
1211 break;
1212
1213 /* Etags options */
1214 case 'D': constantypedefs = FALSE; break;
1215 case 'i': included_files[nincluded_files++] = optarg; break;
1216
1217 /* Ctags options. */
1218 case 'B': searchar = '?'; break;
1219 case 'd': constantypedefs = TRUE; break;
1220 case 't': typedefs = TRUE; break;
1221 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1222 case 'u': update = TRUE; break;
1223 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1224 case 'x': cxref_style = TRUE; break;
1225 case 'w': no_warnings = TRUE; break;
1226 default:
1227 suggest_asking_for_help ();
1228 /* NOTREACHED */
1229 }
1230
1231 /* No more options. Store the rest of arguments. */
1232 for (; optind < argc; optind++)
1233 {
1234 argbuffer[current_arg].arg_type = at_filename;
1235 argbuffer[current_arg].what = argv[optind];
1236 ++current_arg;
1237 ++file_count;
1238 }
1239
1240 argbuffer[current_arg].arg_type = at_end;
1241
1242 if (help_asked)
1243 print_help (argbuffer);
1244 /* NOTREACHED */
1245
1246 if (nincluded_files == 0 && file_count == 0)
1247 {
1248 error ("no input files specified.", (char *)NULL);
1249 suggest_asking_for_help ();
1250 /* NOTREACHED */
1251 }
1252
1253 if (tagfile == NULL)
1254 tagfile = CTAGS ? "tags" : "TAGS";
1255 cwd = etags_getcwd (); /* the current working directory */
1256 if (cwd[strlen (cwd) - 1] != '/')
1257 {
1258 char *oldcwd = cwd;
1259 cwd = concat (oldcwd, "/", "");
1260 free (oldcwd);
1261 }
1262 /* Relative file names are made relative to the current directory. */
1263 if (streq (tagfile, "-")
1264 || strneq (tagfile, "/dev/", 5))
1265 tagfiledir = cwd;
1266 else
1267 {
1268 canonicalize_filename (tagfile);
1269 tagfiledir = absolute_dirname (tagfile, cwd);
1270 }
1271
1272 init (); /* set up boolean "functions" */
1273
1274 linebuffer_init (&lb);
1275 linebuffer_init (&filename_lb);
1276 linebuffer_init (&filebuf);
1277 linebuffer_init (&token_name);
1278
1279 if (!CTAGS)
1280 {
1281 if (streq (tagfile, "-"))
1282 {
1283 tagf = stdout;
1284 #ifdef DOS_NT
1285 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1286 doesn't take effect until after `stdout' is already open). */
1287 if (!isatty (fileno (stdout)))
1288 setmode (fileno (stdout), O_BINARY);
1289 #endif /* DOS_NT */
1290 }
1291 else
1292 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1293 if (tagf == NULL)
1294 pfatal (tagfile);
1295 }
1296
1297 /*
1298 * Loop through files finding functions.
1299 */
1300 for (i = 0; i < current_arg; i++)
1301 {
1302 static language *lang; /* non-NULL if language is forced */
1303 char *this_file;
1304
1305 switch (argbuffer[i].arg_type)
1306 {
1307 case at_language:
1308 lang = argbuffer[i].lang;
1309 break;
1310 case at_regexp:
1311 analyse_regex (argbuffer[i].what);
1312 break;
1313 case at_filename:
1314 this_file = argbuffer[i].what;
1315 /* Input file named "-" means read file names from stdin
1316 (one per line) and use them. */
1317 if (streq (this_file, "-"))
1318 {
1319 if (parsing_stdin)
1320 fatal ("cannot parse standard input AND read file names from it",
1321 (char *)NULL);
1322 while (readline_internal (&filename_lb, stdin) > 0)
1323 process_file_name (filename_lb.buffer, lang);
1324 }
1325 else
1326 process_file_name (this_file, lang);
1327 break;
1328 case at_stdin:
1329 this_file = argbuffer[i].what;
1330 process_file (stdin, this_file, lang);
1331 break;
1332 }
1333 }
1334
1335 free_regexps ();
1336 free (lb.buffer);
1337 free (filebuf.buffer);
1338 free (token_name.buffer);
1339
1340 if (!CTAGS || cxref_style)
1341 {
1342 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1343 put_entries (nodehead);
1344 free_tree (nodehead);
1345 nodehead = NULL;
1346 if (!CTAGS)
1347 {
1348 fdesc *fdp;
1349
1350 /* Output file entries that have no tags. */
1351 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1352 if (!fdp->written)
1353 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1354
1355 while (nincluded_files-- > 0)
1356 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1357
1358 if (fclose (tagf) == EOF)
1359 pfatal (tagfile);
1360 }
1361
1362 exit (EXIT_SUCCESS);
1363 }
1364
1365 /* From here on, we are in (CTAGS && !cxref_style) */
1366 if (update)
1367 {
1368 char cmd[BUFSIZ];
1369 for (i = 0; i < current_arg; ++i)
1370 {
1371 switch (argbuffer[i].arg_type)
1372 {
1373 case at_filename:
1374 case at_stdin:
1375 break;
1376 default:
1377 continue; /* the for loop */
1378 }
1379 sprintf (cmd,
1380 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1381 tagfile, argbuffer[i].what, tagfile);
1382 if (system (cmd) != EXIT_SUCCESS)
1383 fatal ("failed to execute shell command", (char *)NULL);
1384 }
1385 append_to_tagfile = TRUE;
1386 }
1387
1388 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1389 if (tagf == NULL)
1390 pfatal (tagfile);
1391 put_entries (nodehead); /* write all the tags (CTAGS) */
1392 free_tree (nodehead);
1393 nodehead = NULL;
1394 if (fclose (tagf) == EOF)
1395 pfatal (tagfile);
1396
1397 if (CTAGS)
1398 if (append_to_tagfile || update)
1399 {
1400 char cmd[2*BUFSIZ+20];
1401 /* Maybe these should be used:
1402 setenv ("LC_COLLATE", "C", 1);
1403 setenv ("LC_ALL", "C", 1); */
1404 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1405 exit (system (cmd));
1406 }
1407 return EXIT_SUCCESS;
1408 }
1409
1410
1411 /*
1412 * Return a compressor given the file name. If EXTPTR is non-zero,
1413 * return a pointer into FILE where the compressor-specific
1414 * extension begins. If no compressor is found, NULL is returned
1415 * and EXTPTR is not significant.
1416 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1417 */
1418 static compressor *
1419 get_compressor_from_suffix (file, extptr)
1420 char *file;
1421 char **extptr;
1422 {
1423 compressor *compr;
1424 char *slash, *suffix;
1425
1426 /* File has been processed by canonicalize_filename,
1427 so we don't need to consider backslashes on DOS_NT. */
1428 slash = etags_strrchr (file, '/');
1429 suffix = etags_strrchr (file, '.');
1430 if (suffix == NULL || suffix < slash)
1431 return NULL;
1432 if (extptr != NULL)
1433 *extptr = suffix;
1434 suffix += 1;
1435 /* Let those poor souls who live with DOS 8+3 file name limits get
1436 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1437 Only the first do loop is run if not MSDOS */
1438 do
1439 {
1440 for (compr = compressors; compr->suffix != NULL; compr++)
1441 if (streq (compr->suffix, suffix))
1442 return compr;
1443 if (!MSDOS)
1444 break; /* do it only once: not really a loop */
1445 if (extptr != NULL)
1446 *extptr = ++suffix;
1447 } while (*suffix != '\0');
1448 return NULL;
1449 }
1450
1451
1452
1453 /*
1454 * Return a language given the name.
1455 */
1456 static language *
1457 get_language_from_langname (name)
1458 const char *name;
1459 {
1460 language *lang;
1461
1462 if (name == NULL)
1463 error ("empty language name", (char *)NULL);
1464 else
1465 {
1466 for (lang = lang_names; lang->name != NULL; lang++)
1467 if (streq (name, lang->name))
1468 return lang;
1469 error ("unknown language \"%s\"", name);
1470 }
1471
1472 return NULL;
1473 }
1474
1475
1476 /*
1477 * Return a language given the interpreter name.
1478 */
1479 static language *
1480 get_language_from_interpreter (interpreter)
1481 char *interpreter;
1482 {
1483 language *lang;
1484 char **iname;
1485
1486 if (interpreter == NULL)
1487 return NULL;
1488 for (lang = lang_names; lang->name != NULL; lang++)
1489 if (lang->interpreters != NULL)
1490 for (iname = lang->interpreters; *iname != NULL; iname++)
1491 if (streq (*iname, interpreter))
1492 return lang;
1493
1494 return NULL;
1495 }
1496
1497
1498
1499 /*
1500 * Return a language given the file name.
1501 */
1502 static language *
1503 get_language_from_filename (file, case_sensitive)
1504 char *file;
1505 bool case_sensitive;
1506 {
1507 language *lang;
1508 char **name, **ext, *suffix;
1509
1510 /* Try whole file name first. */
1511 for (lang = lang_names; lang->name != NULL; lang++)
1512 if (lang->filenames != NULL)
1513 for (name = lang->filenames; *name != NULL; name++)
1514 if ((case_sensitive)
1515 ? streq (*name, file)
1516 : strcaseeq (*name, file))
1517 return lang;
1518
1519 /* If not found, try suffix after last dot. */
1520 suffix = etags_strrchr (file, '.');
1521 if (suffix == NULL)
1522 return NULL;
1523 suffix += 1;
1524 for (lang = lang_names; lang->name != NULL; lang++)
1525 if (lang->suffixes != NULL)
1526 for (ext = lang->suffixes; *ext != NULL; ext++)
1527 if ((case_sensitive)
1528 ? streq (*ext, suffix)
1529 : strcaseeq (*ext, suffix))
1530 return lang;
1531 return NULL;
1532 }
1533
1534 \f
1535 /*
1536 * This routine is called on each file argument.
1537 */
1538 static void
1539 process_file_name (file, lang)
1540 char *file;
1541 language *lang;
1542 {
1543 struct stat stat_buf;
1544 FILE *inf;
1545 fdesc *fdp;
1546 compressor *compr;
1547 char *compressed_name, *uncompressed_name;
1548 char *ext, *real_name;
1549 int retval;
1550
1551 canonicalize_filename (file);
1552 if (streq (file, tagfile) && !streq (tagfile, "-"))
1553 {
1554 error ("skipping inclusion of %s in self.", file);
1555 return;
1556 }
1557 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1558 {
1559 compressed_name = NULL;
1560 real_name = uncompressed_name = savestr (file);
1561 }
1562 else
1563 {
1564 real_name = compressed_name = savestr (file);
1565 uncompressed_name = savenstr (file, ext - file);
1566 }
1567
1568 /* If the canonicalized uncompressed name
1569 has already been dealt with, skip it silently. */
1570 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1571 {
1572 assert (fdp->infname != NULL);
1573 if (streq (uncompressed_name, fdp->infname))
1574 goto cleanup;
1575 }
1576
1577 if (stat (real_name, &stat_buf) != 0)
1578 {
1579 /* Reset real_name and try with a different name. */
1580 real_name = NULL;
1581 if (compressed_name != NULL) /* try with the given suffix */
1582 {
1583 if (stat (uncompressed_name, &stat_buf) == 0)
1584 real_name = uncompressed_name;
1585 }
1586 else /* try all possible suffixes */
1587 {
1588 for (compr = compressors; compr->suffix != NULL; compr++)
1589 {
1590 compressed_name = concat (file, ".", compr->suffix);
1591 if (stat (compressed_name, &stat_buf) != 0)
1592 {
1593 if (MSDOS)
1594 {
1595 char *suf = compressed_name + strlen (file);
1596 size_t suflen = strlen (compr->suffix) + 1;
1597 for ( ; suf[1]; suf++, suflen--)
1598 {
1599 memmove (suf, suf + 1, suflen);
1600 if (stat (compressed_name, &stat_buf) == 0)
1601 {
1602 real_name = compressed_name;
1603 break;
1604 }
1605 }
1606 if (real_name != NULL)
1607 break;
1608 } /* MSDOS */
1609 free (compressed_name);
1610 compressed_name = NULL;
1611 }
1612 else
1613 {
1614 real_name = compressed_name;
1615 break;
1616 }
1617 }
1618 }
1619 if (real_name == NULL)
1620 {
1621 perror (file);
1622 goto cleanup;
1623 }
1624 } /* try with a different name */
1625
1626 if (!S_ISREG (stat_buf.st_mode))
1627 {
1628 error ("skipping %s: it is not a regular file.", real_name);
1629 goto cleanup;
1630 }
1631 if (real_name == compressed_name)
1632 {
1633 char *cmd = concat (compr->command, " ", real_name);
1634 inf = (FILE *) popen (cmd, "r");
1635 free (cmd);
1636 }
1637 else
1638 inf = fopen (real_name, "r");
1639 if (inf == NULL)
1640 {
1641 perror (real_name);
1642 goto cleanup;
1643 }
1644
1645 process_file (inf, uncompressed_name, lang);
1646
1647 if (real_name == compressed_name)
1648 retval = pclose (inf);
1649 else
1650 retval = fclose (inf);
1651 if (retval < 0)
1652 pfatal (file);
1653
1654 cleanup:
1655 free (compressed_name);
1656 free (uncompressed_name);
1657 last_node = NULL;
1658 curfdp = NULL;
1659 return;
1660 }
1661
1662 static void
1663 process_file (fh, fn, lang)
1664 FILE *fh;
1665 char *fn;
1666 language *lang;
1667 {
1668 static const fdesc emptyfdesc;
1669 fdesc *fdp;
1670
1671 /* Create a new input file description entry. */
1672 fdp = xnew (1, fdesc);
1673 *fdp = emptyfdesc;
1674 fdp->next = fdhead;
1675 fdp->infname = savestr (fn);
1676 fdp->lang = lang;
1677 fdp->infabsname = absolute_filename (fn, cwd);
1678 fdp->infabsdir = absolute_dirname (fn, cwd);
1679 if (filename_is_absolute (fn))
1680 {
1681 /* An absolute file name. Canonicalize it. */
1682 fdp->taggedfname = absolute_filename (fn, NULL);
1683 }
1684 else
1685 {
1686 /* A file name relative to cwd. Make it relative
1687 to the directory of the tags file. */
1688 fdp->taggedfname = relative_filename (fn, tagfiledir);
1689 }
1690 fdp->usecharno = TRUE; /* use char position when making tags */
1691 fdp->prop = NULL;
1692 fdp->written = FALSE; /* not written on tags file yet */
1693
1694 fdhead = fdp;
1695 curfdp = fdhead; /* the current file description */
1696
1697 find_entries (fh);
1698
1699 /* If not Ctags, and if this is not metasource and if it contained no #line
1700 directives, we can write the tags and free all nodes pointing to
1701 curfdp. */
1702 if (!CTAGS
1703 && curfdp->usecharno /* no #line directives in this file */
1704 && !curfdp->lang->metasource)
1705 {
1706 node *np, *prev;
1707
1708 /* Look for the head of the sublist relative to this file. See add_node
1709 for the structure of the node tree. */
1710 prev = NULL;
1711 for (np = nodehead; np != NULL; prev = np, np = np->left)
1712 if (np->fdp == curfdp)
1713 break;
1714
1715 /* If we generated tags for this file, write and delete them. */
1716 if (np != NULL)
1717 {
1718 /* This is the head of the last sublist, if any. The following
1719 instructions depend on this being true. */
1720 assert (np->left == NULL);
1721
1722 assert (fdhead == curfdp);
1723 assert (last_node->fdp == curfdp);
1724 put_entries (np); /* write tags for file curfdp->taggedfname */
1725 free_tree (np); /* remove the written nodes */
1726 if (prev == NULL)
1727 nodehead = NULL; /* no nodes left */
1728 else
1729 prev->left = NULL; /* delete the pointer to the sublist */
1730 }
1731 }
1732 }
1733
1734 /*
1735 * This routine sets up the boolean pseudo-functions which work
1736 * by setting boolean flags dependent upon the corresponding character.
1737 * Every char which is NOT in that string is not a white char. Therefore,
1738 * all of the array "_wht" is set to FALSE, and then the elements
1739 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1740 * of a char is TRUE if it is the string "white", else FALSE.
1741 */
1742 static void
1743 init ()
1744 {
1745 register char *sp;
1746 register int i;
1747
1748 for (i = 0; i < CHARS; i++)
1749 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1750 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1751 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1752 notinname('\0') = notinname('\n');
1753 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1754 begtoken('\0') = begtoken('\n');
1755 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1756 intoken('\0') = intoken('\n');
1757 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1758 endtoken('\0') = endtoken('\n');
1759 }
1760
1761 /*
1762 * This routine opens the specified file and calls the function
1763 * which finds the function and type definitions.
1764 */
1765 static void
1766 find_entries (inf)
1767 FILE *inf;
1768 {
1769 char *cp;
1770 language *lang = curfdp->lang;
1771 Lang_function *parser = NULL;
1772
1773 /* If user specified a language, use it. */
1774 if (lang != NULL && lang->function != NULL)
1775 {
1776 parser = lang->function;
1777 }
1778
1779 /* Else try to guess the language given the file name. */
1780 if (parser == NULL)
1781 {
1782 lang = get_language_from_filename (curfdp->infname, TRUE);
1783 if (lang != NULL && lang->function != NULL)
1784 {
1785 curfdp->lang = lang;
1786 parser = lang->function;
1787 }
1788 }
1789
1790 /* Else look for sharp-bang as the first two characters. */
1791 if (parser == NULL
1792 && readline_internal (&lb, inf) > 0
1793 && lb.len >= 2
1794 && lb.buffer[0] == '#'
1795 && lb.buffer[1] == '!')
1796 {
1797 char *lp;
1798
1799 /* Set lp to point at the first char after the last slash in the
1800 line or, if no slashes, at the first nonblank. Then set cp to
1801 the first successive blank and terminate the string. */
1802 lp = etags_strrchr (lb.buffer+2, '/');
1803 if (lp != NULL)
1804 lp += 1;
1805 else
1806 lp = skip_spaces (lb.buffer + 2);
1807 cp = skip_non_spaces (lp);
1808 *cp = '\0';
1809
1810 if (strlen (lp) > 0)
1811 {
1812 lang = get_language_from_interpreter (lp);
1813 if (lang != NULL && lang->function != NULL)
1814 {
1815 curfdp->lang = lang;
1816 parser = lang->function;
1817 }
1818 }
1819 }
1820
1821 /* We rewind here, even if inf may be a pipe. We fail if the
1822 length of the first line is longer than the pipe block size,
1823 which is unlikely. */
1824 rewind (inf);
1825
1826 /* Else try to guess the language given the case insensitive file name. */
1827 if (parser == NULL)
1828 {
1829 lang = get_language_from_filename (curfdp->infname, FALSE);
1830 if (lang != NULL && lang->function != NULL)
1831 {
1832 curfdp->lang = lang;
1833 parser = lang->function;
1834 }
1835 }
1836
1837 /* Else try Fortran or C. */
1838 if (parser == NULL)
1839 {
1840 node *old_last_node = last_node;
1841
1842 curfdp->lang = get_language_from_langname ("fortran");
1843 find_entries (inf);
1844
1845 if (old_last_node == last_node)
1846 /* No Fortran entries found. Try C. */
1847 {
1848 /* We do not tag if rewind fails.
1849 Only the file name will be recorded in the tags file. */
1850 rewind (inf);
1851 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1852 find_entries (inf);
1853 }
1854 return;
1855 }
1856
1857 if (!no_line_directive
1858 && curfdp->lang != NULL && curfdp->lang->metasource)
1859 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1860 file, or anyway we parsed a file that is automatically generated from
1861 this one. If this is the case, the bingo.c file contained #line
1862 directives that generated tags pointing to this file. Let's delete
1863 them all before parsing this file, which is the real source. */
1864 {
1865 fdesc **fdpp = &fdhead;
1866 while (*fdpp != NULL)
1867 if (*fdpp != curfdp
1868 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1869 /* We found one of those! We must delete both the file description
1870 and all tags referring to it. */
1871 {
1872 fdesc *badfdp = *fdpp;
1873
1874 /* Delete the tags referring to badfdp->taggedfname
1875 that were obtained from badfdp->infname. */
1876 invalidate_nodes (badfdp, &nodehead);
1877
1878 *fdpp = badfdp->next; /* remove the bad description from the list */
1879 free_fdesc (badfdp);
1880 }
1881 else
1882 fdpp = &(*fdpp)->next; /* advance the list pointer */
1883 }
1884
1885 assert (parser != NULL);
1886
1887 /* Generic initialisations before reading from file. */
1888 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1889
1890 /* Generic initialisations before parsing file with readline. */
1891 lineno = 0; /* reset global line number */
1892 charno = 0; /* reset global char number */
1893 linecharno = 0; /* reset global char number of line start */
1894
1895 parser (inf);
1896
1897 regex_tag_multiline ();
1898 }
1899
1900 \f
1901 /*
1902 * Check whether an implicitly named tag should be created,
1903 * then call `pfnote'.
1904 * NAME is a string that is internally copied by this function.
1905 *
1906 * TAGS format specification
1907 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1908 * The following is explained in some more detail in etc/ETAGS.EBNF.
1909 *
1910 * make_tag creates tags with "implicit tag names" (unnamed tags)
1911 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1912 * 1. NAME does not contain any of the characters in NONAM;
1913 * 2. LINESTART contains name as either a rightmost, or rightmost but
1914 * one character, substring;
1915 * 3. the character, if any, immediately before NAME in LINESTART must
1916 * be a character in NONAM;
1917 * 4. the character, if any, immediately after NAME in LINESTART must
1918 * also be a character in NONAM.
1919 *
1920 * The implementation uses the notinname() macro, which recognises the
1921 * characters stored in the string `nonam'.
1922 * etags.el needs to use the same characters that are in NONAM.
1923 */
1924 static void
1925 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1926 char *name; /* tag name, or NULL if unnamed */
1927 int namelen; /* tag length */
1928 bool is_func; /* tag is a function */
1929 char *linestart; /* start of the line where tag is */
1930 int linelen; /* length of the line where tag is */
1931 int lno; /* line number */
1932 long cno; /* character number */
1933 {
1934 bool named = (name != NULL && namelen > 0);
1935
1936 if (!CTAGS && named) /* maybe set named to false */
1937 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1938 such that etags.el can guess a name from it. */
1939 {
1940 int i;
1941 register char *cp = name;
1942
1943 for (i = 0; i < namelen; i++)
1944 if (notinname (*cp++))
1945 break;
1946 if (i == namelen) /* rule #1 */
1947 {
1948 cp = linestart + linelen - namelen;
1949 if (notinname (linestart[linelen-1]))
1950 cp -= 1; /* rule #4 */
1951 if (cp >= linestart /* rule #2 */
1952 && (cp == linestart
1953 || notinname (cp[-1])) /* rule #3 */
1954 && strneq (name, cp, namelen)) /* rule #2 */
1955 named = FALSE; /* use implicit tag name */
1956 }
1957 }
1958
1959 if (named)
1960 name = savenstr (name, namelen);
1961 else
1962 name = NULL;
1963 pfnote (name, is_func, linestart, linelen, lno, cno);
1964 }
1965
1966 /* Record a tag. */
1967 static void
1968 pfnote (name, is_func, linestart, linelen, lno, cno)
1969 char *name; /* tag name, or NULL if unnamed */
1970 bool is_func; /* tag is a function */
1971 char *linestart; /* start of the line where tag is */
1972 int linelen; /* length of the line where tag is */
1973 int lno; /* line number */
1974 long cno; /* character number */
1975 {
1976 register node *np;
1977
1978 assert (name == NULL || name[0] != '\0');
1979 if (CTAGS && name == NULL)
1980 return;
1981
1982 np = xnew (1, node);
1983
1984 /* If ctags mode, change name "main" to M<thisfilename>. */
1985 if (CTAGS && !cxref_style && streq (name, "main"))
1986 {
1987 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1988 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1989 fp = etags_strrchr (np->name, '.');
1990 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1991 fp[0] = '\0';
1992 }
1993 else
1994 np->name = name;
1995 np->valid = TRUE;
1996 np->been_warned = FALSE;
1997 np->fdp = curfdp;
1998 np->is_func = is_func;
1999 np->lno = lno;
2000 if (np->fdp->usecharno)
2001 /* Our char numbers are 0-base, because of C language tradition?
2002 ctags compatibility? old versions compatibility? I don't know.
2003 Anyway, since emacs's are 1-base we expect etags.el to take care
2004 of the difference. If we wanted to have 1-based numbers, we would
2005 uncomment the +1 below. */
2006 np->cno = cno /* + 1 */ ;
2007 else
2008 np->cno = invalidcharno;
2009 np->left = np->right = NULL;
2010 if (CTAGS && !cxref_style)
2011 {
2012 if (strlen (linestart) < 50)
2013 np->regex = concat (linestart, "$", "");
2014 else
2015 np->regex = savenstr (linestart, 50);
2016 }
2017 else
2018 np->regex = savenstr (linestart, linelen);
2019
2020 add_node (np, &nodehead);
2021 }
2022
2023 /*
2024 * free_tree ()
2025 * recurse on left children, iterate on right children.
2026 */
2027 static void
2028 free_tree (np)
2029 register node *np;
2030 {
2031 while (np)
2032 {
2033 register node *node_right = np->right;
2034 free_tree (np->left);
2035 free (np->name);
2036 free (np->regex);
2037 free (np);
2038 np = node_right;
2039 }
2040 }
2041
2042 /*
2043 * free_fdesc ()
2044 * delete a file description
2045 */
2046 static void
2047 free_fdesc (fdp)
2048 register fdesc *fdp;
2049 {
2050 free (fdp->infname);
2051 free (fdp->infabsname);
2052 free (fdp->infabsdir);
2053 free (fdp->taggedfname);
2054 free (fdp->prop);
2055 free (fdp);
2056 }
2057
2058 /*
2059 * add_node ()
2060 * Adds a node to the tree of nodes. In etags mode, sort by file
2061 * name. In ctags mode, sort by tag name. Make no attempt at
2062 * balancing.
2063 *
2064 * add_node is the only function allowed to add nodes, so it can
2065 * maintain state.
2066 */
2067 static void
2068 add_node (np, cur_node_p)
2069 node *np, **cur_node_p;
2070 {
2071 register int dif;
2072 register node *cur_node = *cur_node_p;
2073
2074 if (cur_node == NULL)
2075 {
2076 *cur_node_p = np;
2077 last_node = np;
2078 return;
2079 }
2080
2081 if (!CTAGS)
2082 /* Etags Mode */
2083 {
2084 /* For each file name, tags are in a linked sublist on the right
2085 pointer. The first tags of different files are a linked list
2086 on the left pointer. last_node points to the end of the last
2087 used sublist. */
2088 if (last_node != NULL && last_node->fdp == np->fdp)
2089 {
2090 /* Let's use the same sublist as the last added node. */
2091 assert (last_node->right == NULL);
2092 last_node->right = np;
2093 last_node = np;
2094 }
2095 else if (cur_node->fdp == np->fdp)
2096 {
2097 /* Scanning the list we found the head of a sublist which is
2098 good for us. Let's scan this sublist. */
2099 add_node (np, &cur_node->right);
2100 }
2101 else
2102 /* The head of this sublist is not good for us. Let's try the
2103 next one. */
2104 add_node (np, &cur_node->left);
2105 } /* if ETAGS mode */
2106
2107 else
2108 {
2109 /* Ctags Mode */
2110 dif = strcmp (np->name, cur_node->name);
2111
2112 /*
2113 * If this tag name matches an existing one, then
2114 * do not add the node, but maybe print a warning.
2115 */
2116 if (no_duplicates && !dif)
2117 {
2118 if (np->fdp == cur_node->fdp)
2119 {
2120 if (!no_warnings)
2121 {
2122 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2123 np->fdp->infname, lineno, np->name);
2124 fprintf (stderr, "Second entry ignored\n");
2125 }
2126 }
2127 else if (!cur_node->been_warned && !no_warnings)
2128 {
2129 fprintf
2130 (stderr,
2131 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2132 np->fdp->infname, cur_node->fdp->infname, np->name);
2133 cur_node->been_warned = TRUE;
2134 }
2135 return;
2136 }
2137
2138 /* Actually add the node */
2139 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2140 } /* if CTAGS mode */
2141 }
2142
2143 /*
2144 * invalidate_nodes ()
2145 * Scan the node tree and invalidate all nodes pointing to the
2146 * given file description (CTAGS case) or free them (ETAGS case).
2147 */
2148 static void
2149 invalidate_nodes (badfdp, npp)
2150 fdesc *badfdp;
2151 node **npp;
2152 {
2153 node *np = *npp;
2154
2155 if (np == NULL)
2156 return;
2157
2158 if (CTAGS)
2159 {
2160 if (np->left != NULL)
2161 invalidate_nodes (badfdp, &np->left);
2162 if (np->fdp == badfdp)
2163 np->valid = FALSE;
2164 if (np->right != NULL)
2165 invalidate_nodes (badfdp, &np->right);
2166 }
2167 else
2168 {
2169 assert (np->fdp != NULL);
2170 if (np->fdp == badfdp)
2171 {
2172 *npp = np->left; /* detach the sublist from the list */
2173 np->left = NULL; /* isolate it */
2174 free_tree (np); /* free it */
2175 invalidate_nodes (badfdp, npp);
2176 }
2177 else
2178 invalidate_nodes (badfdp, &np->left);
2179 }
2180 }
2181
2182 \f
2183 static int total_size_of_entries __P((node *));
2184 static int number_len __P((long));
2185
2186 /* Length of a non-negative number's decimal representation. */
2187 static int
2188 number_len (num)
2189 long num;
2190 {
2191 int len = 1;
2192 while ((num /= 10) > 0)
2193 len += 1;
2194 return len;
2195 }
2196
2197 /*
2198 * Return total number of characters that put_entries will output for
2199 * the nodes in the linked list at the right of the specified node.
2200 * This count is irrelevant with etags.el since emacs 19.34 at least,
2201 * but is still supplied for backward compatibility.
2202 */
2203 static int
2204 total_size_of_entries (np)
2205 register node *np;
2206 {
2207 register int total = 0;
2208
2209 for (; np != NULL; np = np->right)
2210 if (np->valid)
2211 {
2212 total += strlen (np->regex) + 1; /* pat\177 */
2213 if (np->name != NULL)
2214 total += strlen (np->name) + 1; /* name\001 */
2215 total += number_len ((long) np->lno) + 1; /* lno, */
2216 if (np->cno != invalidcharno) /* cno */
2217 total += number_len (np->cno);
2218 total += 1; /* newline */
2219 }
2220
2221 return total;
2222 }
2223
2224 static void
2225 put_entries (np)
2226 register node *np;
2227 {
2228 register char *sp;
2229 static fdesc *fdp = NULL;
2230
2231 if (np == NULL)
2232 return;
2233
2234 /* Output subentries that precede this one */
2235 if (CTAGS)
2236 put_entries (np->left);
2237
2238 /* Output this entry */
2239 if (np->valid)
2240 {
2241 if (!CTAGS)
2242 {
2243 /* Etags mode */
2244 if (fdp != np->fdp)
2245 {
2246 fdp = np->fdp;
2247 fprintf (tagf, "\f\n%s,%d\n",
2248 fdp->taggedfname, total_size_of_entries (np));
2249 fdp->written = TRUE;
2250 }
2251 fputs (np->regex, tagf);
2252 fputc ('\177', tagf);
2253 if (np->name != NULL)
2254 {
2255 fputs (np->name, tagf);
2256 fputc ('\001', tagf);
2257 }
2258 fprintf (tagf, "%d,", np->lno);
2259 if (np->cno != invalidcharno)
2260 fprintf (tagf, "%ld", np->cno);
2261 fputs ("\n", tagf);
2262 }
2263 else
2264 {
2265 /* Ctags mode */
2266 if (np->name == NULL)
2267 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2268
2269 if (cxref_style)
2270 {
2271 if (vgrind_style)
2272 fprintf (stdout, "%s %s %d\n",
2273 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2274 else
2275 fprintf (stdout, "%-16s %3d %-16s %s\n",
2276 np->name, np->lno, np->fdp->taggedfname, np->regex);
2277 }
2278 else
2279 {
2280 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2281
2282 if (np->is_func)
2283 { /* function or #define macro with args */
2284 putc (searchar, tagf);
2285 putc ('^', tagf);
2286
2287 for (sp = np->regex; *sp; sp++)
2288 {
2289 if (*sp == '\\' || *sp == searchar)
2290 putc ('\\', tagf);
2291 putc (*sp, tagf);
2292 }
2293 putc (searchar, tagf);
2294 }
2295 else
2296 { /* anything else; text pattern inadequate */
2297 fprintf (tagf, "%d", np->lno);
2298 }
2299 putc ('\n', tagf);
2300 }
2301 }
2302 } /* if this node contains a valid tag */
2303
2304 /* Output subentries that follow this one */
2305 put_entries (np->right);
2306 if (!CTAGS)
2307 put_entries (np->left);
2308 }
2309
2310 \f
2311 /* C extensions. */
2312 #define C_EXT 0x00fff /* C extensions */
2313 #define C_PLAIN 0x00000 /* C */
2314 #define C_PLPL 0x00001 /* C++ */
2315 #define C_STAR 0x00003 /* C* */
2316 #define C_JAVA 0x00005 /* JAVA */
2317 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2318 #define YACC 0x10000 /* yacc file */
2319
2320 /*
2321 * The C symbol tables.
2322 */
2323 enum sym_type
2324 {
2325 st_none,
2326 st_C_objprot, st_C_objimpl, st_C_objend,
2327 st_C_gnumacro,
2328 st_C_ignore, st_C_attribute,
2329 st_C_javastruct,
2330 st_C_operator,
2331 st_C_class, st_C_template,
2332 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2333 };
2334
2335 static unsigned int hash __P((const char *, unsigned int));
2336 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2337 static enum sym_type C_symtype __P((char *, int, int));
2338
2339 /* Feed stuff between (but not including) %[ and %] lines to:
2340 gperf -m 5
2341 %[
2342 %compare-strncmp
2343 %enum
2344 %struct-type
2345 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2346 %%
2347 if, 0, st_C_ignore
2348 for, 0, st_C_ignore
2349 while, 0, st_C_ignore
2350 switch, 0, st_C_ignore
2351 return, 0, st_C_ignore
2352 __attribute__, 0, st_C_attribute
2353 GTY, 0, st_C_attribute
2354 @interface, 0, st_C_objprot
2355 @protocol, 0, st_C_objprot
2356 @implementation,0, st_C_objimpl
2357 @end, 0, st_C_objend
2358 import, (C_JAVA & ~C_PLPL), st_C_ignore
2359 package, (C_JAVA & ~C_PLPL), st_C_ignore
2360 friend, C_PLPL, st_C_ignore
2361 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2362 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2363 interface, (C_JAVA & ~C_PLPL), st_C_struct
2364 class, 0, st_C_class
2365 namespace, C_PLPL, st_C_struct
2366 domain, C_STAR, st_C_struct
2367 union, 0, st_C_struct
2368 struct, 0, st_C_struct
2369 extern, 0, st_C_extern
2370 enum, 0, st_C_enum
2371 typedef, 0, st_C_typedef
2372 define, 0, st_C_define
2373 undef, 0, st_C_define
2374 operator, C_PLPL, st_C_operator
2375 template, 0, st_C_template
2376 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2377 DEFUN, 0, st_C_gnumacro
2378 SYSCALL, 0, st_C_gnumacro
2379 ENTRY, 0, st_C_gnumacro
2380 PSEUDO, 0, st_C_gnumacro
2381 # These are defined inside C functions, so currently they are not met.
2382 # EXFUN used in glibc, DEFVAR_* in emacs.
2383 #EXFUN, 0, st_C_gnumacro
2384 #DEFVAR_, 0, st_C_gnumacro
2385 %]
2386 and replace lines between %< and %> with its output, then:
2387 - remove the #if characterset check
2388 - make in_word_set static and not inline. */
2389 /*%<*/
2390 /* C code produced by gperf version 3.0.1 */
2391 /* Command-line: gperf -m 5 */
2392 /* Computed positions: -k'2-3' */
2393
2394 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2395 /* maximum key range = 33, duplicates = 0 */
2396
2397 #ifdef __GNUC__
2398 __inline
2399 #else
2400 #ifdef __cplusplus
2401 inline
2402 #endif
2403 #endif
2404 static unsigned int
2405 hash (str, len)
2406 register const char *str;
2407 register unsigned int len;
2408 {
2409 static unsigned char asso_values[] =
2410 {
2411 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2412 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2413 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2414 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2415 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2416 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2417 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2418 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2419 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2420 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2421 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2422 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2423 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2424 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2425 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2426 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2427 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2428 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2429 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2430 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2431 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2432 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2433 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2434 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2435 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2436 35, 35, 35, 35, 35, 35
2437 };
2438 register int hval = len;
2439
2440 switch (hval)
2441 {
2442 default:
2443 hval += asso_values[(unsigned char)str[2]];
2444 /*FALLTHROUGH*/
2445 case 2:
2446 hval += asso_values[(unsigned char)str[1]];
2447 break;
2448 }
2449 return hval;
2450 }
2451
2452 static struct C_stab_entry *
2453 in_word_set (str, len)
2454 register const char *str;
2455 register unsigned int len;
2456 {
2457 enum
2458 {
2459 TOTAL_KEYWORDS = 33,
2460 MIN_WORD_LENGTH = 2,
2461 MAX_WORD_LENGTH = 15,
2462 MIN_HASH_VALUE = 2,
2463 MAX_HASH_VALUE = 34
2464 };
2465
2466 static struct C_stab_entry wordlist[] =
2467 {
2468 {""}, {""},
2469 {"if", 0, st_C_ignore},
2470 {"GTY", 0, st_C_attribute},
2471 {"@end", 0, st_C_objend},
2472 {"union", 0, st_C_struct},
2473 {"define", 0, st_C_define},
2474 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2475 {"template", 0, st_C_template},
2476 {"operator", C_PLPL, st_C_operator},
2477 {"@interface", 0, st_C_objprot},
2478 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2479 {"friend", C_PLPL, st_C_ignore},
2480 {"typedef", 0, st_C_typedef},
2481 {"return", 0, st_C_ignore},
2482 {"@implementation",0, st_C_objimpl},
2483 {"@protocol", 0, st_C_objprot},
2484 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2485 {"extern", 0, st_C_extern},
2486 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2487 {"struct", 0, st_C_struct},
2488 {"domain", C_STAR, st_C_struct},
2489 {"switch", 0, st_C_ignore},
2490 {"enum", 0, st_C_enum},
2491 {"for", 0, st_C_ignore},
2492 {"namespace", C_PLPL, st_C_struct},
2493 {"class", 0, st_C_class},
2494 {"while", 0, st_C_ignore},
2495 {"undef", 0, st_C_define},
2496 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2497 {"__attribute__", 0, st_C_attribute},
2498 {"SYSCALL", 0, st_C_gnumacro},
2499 {"ENTRY", 0, st_C_gnumacro},
2500 {"PSEUDO", 0, st_C_gnumacro},
2501 {"DEFUN", 0, st_C_gnumacro}
2502 };
2503
2504 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2505 {
2506 register int key = hash (str, len);
2507
2508 if (key <= MAX_HASH_VALUE && key >= 0)
2509 {
2510 register const char *s = wordlist[key].name;
2511
2512 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2513 return &wordlist[key];
2514 }
2515 }
2516 return 0;
2517 }
2518 /*%>*/
2519
2520 static enum sym_type
2521 C_symtype (str, len, c_ext)
2522 char *str;
2523 int len;
2524 int c_ext;
2525 {
2526 register struct C_stab_entry *se = in_word_set (str, len);
2527
2528 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2529 return st_none;
2530 return se->type;
2531 }
2532
2533 \f
2534 /*
2535 * Ignoring __attribute__ ((list))
2536 */
2537 static bool inattribute; /* looking at an __attribute__ construct */
2538
2539 /*
2540 * C functions and variables are recognized using a simple
2541 * finite automaton. fvdef is its state variable.
2542 */
2543 static enum
2544 {
2545 fvnone, /* nothing seen */
2546 fdefunkey, /* Emacs DEFUN keyword seen */
2547 fdefunname, /* Emacs DEFUN name seen */
2548 foperator, /* func: operator keyword seen (cplpl) */
2549 fvnameseen, /* function or variable name seen */
2550 fstartlist, /* func: just after open parenthesis */
2551 finlist, /* func: in parameter list */
2552 flistseen, /* func: after parameter list */
2553 fignore, /* func: before open brace */
2554 vignore /* var-like: ignore until ';' */
2555 } fvdef;
2556
2557 static bool fvextern; /* func or var: extern keyword seen; */
2558
2559 /*
2560 * typedefs are recognized using a simple finite automaton.
2561 * typdef is its state variable.
2562 */
2563 static enum
2564 {
2565 tnone, /* nothing seen */
2566 tkeyseen, /* typedef keyword seen */
2567 ttypeseen, /* defined type seen */
2568 tinbody, /* inside typedef body */
2569 tend, /* just before typedef tag */
2570 tignore /* junk after typedef tag */
2571 } typdef;
2572
2573 /*
2574 * struct-like structures (enum, struct and union) are recognized
2575 * using another simple finite automaton. `structdef' is its state
2576 * variable.
2577 */
2578 static enum
2579 {
2580 snone, /* nothing seen yet,
2581 or in struct body if bracelev > 0 */
2582 skeyseen, /* struct-like keyword seen */
2583 stagseen, /* struct-like tag seen */
2584 scolonseen /* colon seen after struct-like tag */
2585 } structdef;
2586
2587 /*
2588 * When objdef is different from onone, objtag is the name of the class.
2589 */
2590 static char *objtag = "<uninited>";
2591
2592 /*
2593 * Yet another little state machine to deal with preprocessor lines.
2594 */
2595 static enum
2596 {
2597 dnone, /* nothing seen */
2598 dsharpseen, /* '#' seen as first char on line */
2599 ddefineseen, /* '#' and 'define' seen */
2600 dignorerest /* ignore rest of line */
2601 } definedef;
2602
2603 /*
2604 * State machine for Objective C protocols and implementations.
2605 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2606 */
2607 static enum
2608 {
2609 onone, /* nothing seen */
2610 oprotocol, /* @interface or @protocol seen */
2611 oimplementation, /* @implementations seen */
2612 otagseen, /* class name seen */
2613 oparenseen, /* parenthesis before category seen */
2614 ocatseen, /* category name seen */
2615 oinbody, /* in @implementation body */
2616 omethodsign, /* in @implementation body, after +/- */
2617 omethodtag, /* after method name */
2618 omethodcolon, /* after method colon */
2619 omethodparm, /* after method parameter */
2620 oignore /* wait for @end */
2621 } objdef;
2622
2623
2624 /*
2625 * Use this structure to keep info about the token read, and how it
2626 * should be tagged. Used by the make_C_tag function to build a tag.
2627 */
2628 static struct tok
2629 {
2630 char *line; /* string containing the token */
2631 int offset; /* where the token starts in LINE */
2632 int length; /* token length */
2633 /*
2634 The previous members can be used to pass strings around for generic
2635 purposes. The following ones specifically refer to creating tags. In this
2636 case the token contained here is the pattern that will be used to create a
2637 tag.
2638 */
2639 bool valid; /* do not create a tag; the token should be
2640 invalidated whenever a state machine is
2641 reset prematurely */
2642 bool named; /* create a named tag */
2643 int lineno; /* source line number of tag */
2644 long linepos; /* source char number of tag */
2645 } token; /* latest token read */
2646
2647 /*
2648 * Variables and functions for dealing with nested structures.
2649 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2650 */
2651 static void pushclass_above __P((int, char *, int));
2652 static void popclass_above __P((int));
2653 static void write_classname __P((linebuffer *, char *qualifier));
2654
2655 static struct {
2656 char **cname; /* nested class names */
2657 int *bracelev; /* nested class brace level */
2658 int nl; /* class nesting level (elements used) */
2659 int size; /* length of the array */
2660 } cstack; /* stack for nested declaration tags */
2661 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2662 #define nestlev (cstack.nl)
2663 /* After struct keyword or in struct body, not inside a nested function. */
2664 #define instruct (structdef == snone && nestlev > 0 \
2665 && bracelev == cstack.bracelev[nestlev-1] + 1)
2666
2667 static void
2668 pushclass_above (bracelev, str, len)
2669 int bracelev;
2670 char *str;
2671 int len;
2672 {
2673 int nl;
2674
2675 popclass_above (bracelev);
2676 nl = cstack.nl;
2677 if (nl >= cstack.size)
2678 {
2679 int size = cstack.size *= 2;
2680 xrnew (cstack.cname, size, char *);
2681 xrnew (cstack.bracelev, size, int);
2682 }
2683 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2684 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2685 cstack.bracelev[nl] = bracelev;
2686 cstack.nl = nl + 1;
2687 }
2688
2689 static void
2690 popclass_above (bracelev)
2691 int bracelev;
2692 {
2693 int nl;
2694
2695 for (nl = cstack.nl - 1;
2696 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2697 nl--)
2698 {
2699 free (cstack.cname[nl]);
2700 cstack.nl = nl;
2701 }
2702 }
2703
2704 static void
2705 write_classname (cn, qualifier)
2706 linebuffer *cn;
2707 char *qualifier;
2708 {
2709 int i, len;
2710 int qlen = strlen (qualifier);
2711
2712 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2713 {
2714 len = 0;
2715 cn->len = 0;
2716 cn->buffer[0] = '\0';
2717 }
2718 else
2719 {
2720 len = strlen (cstack.cname[0]);
2721 linebuffer_setlen (cn, len);
2722 strcpy (cn->buffer, cstack.cname[0]);
2723 }
2724 for (i = 1; i < cstack.nl; i++)
2725 {
2726 char *s;
2727 int slen;
2728
2729 s = cstack.cname[i];
2730 if (s == NULL)
2731 continue;
2732 slen = strlen (s);
2733 len += slen + qlen;
2734 linebuffer_setlen (cn, len);
2735 strncat (cn->buffer, qualifier, qlen);
2736 strncat (cn->buffer, s, slen);
2737 }
2738 }
2739
2740 \f
2741 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2742 static void make_C_tag __P((bool));
2743
2744 /*
2745 * consider_token ()
2746 * checks to see if the current token is at the start of a
2747 * function or variable, or corresponds to a typedef, or
2748 * is a struct/union/enum tag, or #define, or an enum constant.
2749 *
2750 * *IS_FUNC gets TRUE if the token is a function or #define macro
2751 * with args. C_EXTP points to which language we are looking at.
2752 *
2753 * Globals
2754 * fvdef IN OUT
2755 * structdef IN OUT
2756 * definedef IN OUT
2757 * typdef IN OUT
2758 * objdef IN OUT
2759 */
2760
2761 static bool
2762 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2763 register char *str; /* IN: token pointer */
2764 register int len; /* IN: token length */
2765 register int c; /* IN: first char after the token */
2766 int *c_extp; /* IN, OUT: C extensions mask */
2767 int bracelev; /* IN: brace level */
2768 int parlev; /* IN: parenthesis level */
2769 bool *is_func_or_var; /* OUT: function or variable found */
2770 {
2771 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2772 structtype is the type of the preceding struct-like keyword, and
2773 structbracelev is the brace level where it has been seen. */
2774 static enum sym_type structtype;
2775 static int structbracelev;
2776 static enum sym_type toktype;
2777
2778
2779 toktype = C_symtype (str, len, *c_extp);
2780
2781 /*
2782 * Skip __attribute__
2783 */
2784 if (toktype == st_C_attribute)
2785 {
2786 inattribute = TRUE;
2787 return FALSE;
2788 }
2789
2790 /*
2791 * Advance the definedef state machine.
2792 */
2793 switch (definedef)
2794 {
2795 case dnone:
2796 /* We're not on a preprocessor line. */
2797 if (toktype == st_C_gnumacro)
2798 {
2799 fvdef = fdefunkey;
2800 return FALSE;
2801 }
2802 break;
2803 case dsharpseen:
2804 if (toktype == st_C_define)
2805 {
2806 definedef = ddefineseen;
2807 }
2808 else
2809 {
2810 definedef = dignorerest;
2811 }
2812 return FALSE;
2813 case ddefineseen:
2814 /*
2815 * Make a tag for any macro, unless it is a constant
2816 * and constantypedefs is FALSE.
2817 */
2818 definedef = dignorerest;
2819 *is_func_or_var = (c == '(');
2820 if (!*is_func_or_var && !constantypedefs)
2821 return FALSE;
2822 else
2823 return TRUE;
2824 case dignorerest:
2825 return FALSE;
2826 default:
2827 error ("internal error: definedef value.", (char *)NULL);
2828 }
2829
2830 /*
2831 * Now typedefs
2832 */
2833 switch (typdef)
2834 {
2835 case tnone:
2836 if (toktype == st_C_typedef)
2837 {
2838 if (typedefs)
2839 typdef = tkeyseen;
2840 fvextern = FALSE;
2841 fvdef = fvnone;
2842 return FALSE;
2843 }
2844 break;
2845 case tkeyseen:
2846 switch (toktype)
2847 {
2848 case st_none:
2849 case st_C_class:
2850 case st_C_struct:
2851 case st_C_enum:
2852 typdef = ttypeseen;
2853 }
2854 break;
2855 case ttypeseen:
2856 if (structdef == snone && fvdef == fvnone)
2857 {
2858 fvdef = fvnameseen;
2859 return TRUE;
2860 }
2861 break;
2862 case tend:
2863 switch (toktype)
2864 {
2865 case st_C_class:
2866 case st_C_struct:
2867 case st_C_enum:
2868 return FALSE;
2869 }
2870 return TRUE;
2871 }
2872
2873 switch (toktype)
2874 {
2875 case st_C_javastruct:
2876 if (structdef == stagseen)
2877 structdef = scolonseen;
2878 return FALSE;
2879 case st_C_template:
2880 case st_C_class:
2881 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2882 && bracelev == 0
2883 && definedef == dnone && structdef == snone
2884 && typdef == tnone && fvdef == fvnone)
2885 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2886 if (toktype == st_C_template)
2887 break;
2888 /* FALLTHRU */
2889 case st_C_struct:
2890 case st_C_enum:
2891 if (parlev == 0
2892 && fvdef != vignore
2893 && (typdef == tkeyseen
2894 || (typedefs_or_cplusplus && structdef == snone)))
2895 {
2896 structdef = skeyseen;
2897 structtype = toktype;
2898 structbracelev = bracelev;
2899 if (fvdef == fvnameseen)
2900 fvdef = fvnone;
2901 }
2902 return FALSE;
2903 }
2904
2905 if (structdef == skeyseen)
2906 {
2907 structdef = stagseen;
2908 return TRUE;
2909 }
2910
2911 if (typdef != tnone)
2912 definedef = dnone;
2913
2914 /* Detect Objective C constructs. */
2915 switch (objdef)
2916 {
2917 case onone:
2918 switch (toktype)
2919 {
2920 case st_C_objprot:
2921 objdef = oprotocol;
2922 return FALSE;
2923 case st_C_objimpl:
2924 objdef = oimplementation;
2925 return FALSE;
2926 }
2927 break;
2928 case oimplementation:
2929 /* Save the class tag for functions or variables defined inside. */
2930 objtag = savenstr (str, len);
2931 objdef = oinbody;
2932 return FALSE;
2933 case oprotocol:
2934 /* Save the class tag for categories. */
2935 objtag = savenstr (str, len);
2936 objdef = otagseen;
2937 *is_func_or_var = TRUE;
2938 return TRUE;
2939 case oparenseen:
2940 objdef = ocatseen;
2941 *is_func_or_var = TRUE;
2942 return TRUE;
2943 case oinbody:
2944 break;
2945 case omethodsign:
2946 if (parlev == 0)
2947 {
2948 fvdef = fvnone;
2949 objdef = omethodtag;
2950 linebuffer_setlen (&token_name, len);
2951 strncpy (token_name.buffer, str, len);
2952 token_name.buffer[len] = '\0';
2953 return TRUE;
2954 }
2955 return FALSE;
2956 case omethodcolon:
2957 if (parlev == 0)
2958 objdef = omethodparm;
2959 return FALSE;
2960 case omethodparm:
2961 if (parlev == 0)
2962 {
2963 fvdef = fvnone;
2964 objdef = omethodtag;
2965 linebuffer_setlen (&token_name, token_name.len + len);
2966 strncat (token_name.buffer, str, len);
2967 return TRUE;
2968 }
2969 return FALSE;
2970 case oignore:
2971 if (toktype == st_C_objend)
2972 {
2973 /* Memory leakage here: the string pointed by objtag is
2974 never released, because many tests would be needed to
2975 avoid breaking on incorrect input code. The amount of
2976 memory leaked here is the sum of the lengths of the
2977 class tags.
2978 free (objtag); */
2979 objdef = onone;
2980 }
2981 return FALSE;
2982 }
2983
2984 /* A function, variable or enum constant? */
2985 switch (toktype)
2986 {
2987 case st_C_extern:
2988 fvextern = TRUE;
2989 switch (fvdef)
2990 {
2991 case finlist:
2992 case flistseen:
2993 case fignore:
2994 case vignore:
2995 break;
2996 default:
2997 fvdef = fvnone;
2998 }
2999 return FALSE;
3000 case st_C_ignore:
3001 fvextern = FALSE;
3002 fvdef = vignore;
3003 return FALSE;
3004 case st_C_operator:
3005 fvdef = foperator;
3006 *is_func_or_var = TRUE;
3007 return TRUE;
3008 case st_none:
3009 if (constantypedefs
3010 && structdef == snone
3011 && structtype == st_C_enum && bracelev > structbracelev)
3012 return TRUE; /* enum constant */
3013 switch (fvdef)
3014 {
3015 case fdefunkey:
3016 if (bracelev > 0)
3017 break;
3018 fvdef = fdefunname; /* GNU macro */
3019 *is_func_or_var = TRUE;
3020 return TRUE;
3021 case fvnone:
3022 switch (typdef)
3023 {
3024 case ttypeseen:
3025 return FALSE;
3026 case tnone:
3027 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3028 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3029 {
3030 fvdef = vignore;
3031 return FALSE;
3032 }
3033 break;
3034 }
3035 /* FALLTHRU */
3036 case fvnameseen:
3037 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3038 {
3039 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3040 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3041 fvdef = foperator;
3042 *is_func_or_var = TRUE;
3043 return TRUE;
3044 }
3045 if (bracelev > 0 && !instruct)
3046 break;
3047 fvdef = fvnameseen; /* function or variable */
3048 *is_func_or_var = TRUE;
3049 return TRUE;
3050 }
3051 break;
3052 }
3053
3054 return FALSE;
3055 }
3056
3057 \f
3058 /*
3059 * C_entries often keeps pointers to tokens or lines which are older than
3060 * the line currently read. By keeping two line buffers, and switching
3061 * them at end of line, it is possible to use those pointers.
3062 */
3063 static struct
3064 {
3065 long linepos;
3066 linebuffer lb;
3067 } lbs[2];
3068
3069 #define current_lb_is_new (newndx == curndx)
3070 #define switch_line_buffers() (curndx = 1 - curndx)
3071
3072 #define curlb (lbs[curndx].lb)
3073 #define newlb (lbs[newndx].lb)
3074 #define curlinepos (lbs[curndx].linepos)
3075 #define newlinepos (lbs[newndx].linepos)
3076
3077 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3078 #define cplpl (c_ext & C_PLPL)
3079 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3080
3081 #define CNL_SAVE_DEFINEDEF() \
3082 do { \
3083 curlinepos = charno; \
3084 readline (&curlb, inf); \
3085 lp = curlb.buffer; \
3086 quotednl = FALSE; \
3087 newndx = curndx; \
3088 } while (0)
3089
3090 #define CNL() \
3091 do { \
3092 CNL_SAVE_DEFINEDEF(); \
3093 if (savetoken.valid) \
3094 { \
3095 token = savetoken; \
3096 savetoken.valid = FALSE; \
3097 } \
3098 definedef = dnone; \
3099 } while (0)
3100
3101
3102 static void
3103 make_C_tag (isfun)
3104 bool isfun;
3105 {
3106 /* This function is never called when token.valid is FALSE, but
3107 we must protect against invalid input or internal errors. */
3108 if (token.valid)
3109 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3110 token.offset+token.length+1, token.lineno, token.linepos);
3111 else if (DEBUG)
3112 { /* this branch is optimised away if !DEBUG */
3113 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3114 token_name.len + 17, isfun, token.line,
3115 token.offset+token.length+1, token.lineno, token.linepos);
3116 error ("INVALID TOKEN", NULL);
3117 }
3118
3119 token.valid = FALSE;
3120 }
3121
3122
3123 /*
3124 * C_entries ()
3125 * This routine finds functions, variables, typedefs,
3126 * #define's, enum constants and struct/union/enum definitions in
3127 * C syntax and adds them to the list.
3128 */
3129 static void
3130 C_entries (c_ext, inf)
3131 int c_ext; /* extension of C */
3132 FILE *inf; /* input file */
3133 {
3134 register char c; /* latest char read; '\0' for end of line */
3135 register char *lp; /* pointer one beyond the character `c' */
3136 int curndx, newndx; /* indices for current and new lb */
3137 register int tokoff; /* offset in line of start of current token */
3138 register int toklen; /* length of current token */
3139 char *qualifier; /* string used to qualify names */
3140 int qlen; /* length of qualifier */
3141 int bracelev; /* current brace level */
3142 int bracketlev; /* current bracket level */
3143 int parlev; /* current parenthesis level */
3144 int attrparlev; /* __attribute__ parenthesis level */
3145 int templatelev; /* current template level */
3146 int typdefbracelev; /* bracelev where a typedef struct body begun */
3147 bool incomm, inquote, inchar, quotednl, midtoken;
3148 bool yacc_rules; /* in the rules part of a yacc file */
3149 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3150
3151
3152 linebuffer_init (&lbs[0].lb);
3153 linebuffer_init (&lbs[1].lb);
3154 if (cstack.size == 0)
3155 {
3156 cstack.size = (DEBUG) ? 1 : 4;
3157 cstack.nl = 0;
3158 cstack.cname = xnew (cstack.size, char *);
3159 cstack.bracelev = xnew (cstack.size, int);
3160 }
3161
3162 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3163 curndx = newndx = 0;
3164 lp = curlb.buffer;
3165 *lp = 0;
3166
3167 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3168 structdef = snone; definedef = dnone; objdef = onone;
3169 yacc_rules = FALSE;
3170 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3171 token.valid = savetoken.valid = FALSE;
3172 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3173 if (cjava)
3174 { qualifier = "."; qlen = 1; }
3175 else
3176 { qualifier = "::"; qlen = 2; }
3177
3178
3179 while (!feof (inf))
3180 {
3181 c = *lp++;
3182 if (c == '\\')
3183 {
3184 /* If we are at the end of the line, the next character is a
3185 '\0'; do not skip it, because it is what tells us
3186 to read the next line. */
3187 if (*lp == '\0')
3188 {
3189 quotednl = TRUE;
3190 continue;
3191 }
3192 lp++;
3193 c = ' ';
3194 }
3195 else if (incomm)
3196 {
3197 switch (c)
3198 {
3199 case '*':
3200 if (*lp == '/')
3201 {
3202 c = *lp++;
3203 incomm = FALSE;
3204 }
3205 break;
3206 case '\0':
3207 /* Newlines inside comments do not end macro definitions in
3208 traditional cpp. */
3209 CNL_SAVE_DEFINEDEF ();
3210 break;
3211 }
3212 continue;
3213 }
3214 else if (inquote)
3215 {
3216 switch (c)
3217 {
3218 case '"':
3219 inquote = FALSE;
3220 break;
3221 case '\0':
3222 /* Newlines inside strings do not end macro definitions
3223 in traditional cpp, even though compilers don't
3224 usually accept them. */
3225 CNL_SAVE_DEFINEDEF ();
3226 break;
3227 }
3228 continue;
3229 }
3230 else if (inchar)
3231 {
3232 switch (c)
3233 {
3234 case '\0':
3235 /* Hmmm, something went wrong. */
3236 CNL ();
3237 /* FALLTHRU */
3238 case '\'':
3239 inchar = FALSE;
3240 break;
3241 }
3242 continue;
3243 }
3244 else if (bracketlev > 0)
3245 {
3246 switch (c)
3247 {
3248 case ']':
3249 if (--bracketlev > 0)
3250 continue;
3251 break;
3252 case '\0':
3253 CNL_SAVE_DEFINEDEF ();
3254 break;
3255 }
3256 continue;
3257 }
3258 else switch (c)
3259 {
3260 case '"':
3261 inquote = TRUE;
3262 if (inattribute)
3263 break;
3264 switch (fvdef)
3265 {
3266 case fdefunkey:
3267 case fstartlist:
3268 case finlist:
3269 case fignore:
3270 case vignore:
3271 break;
3272 default:
3273 fvextern = FALSE;
3274 fvdef = fvnone;
3275 }
3276 continue;
3277 case '\'':
3278 inchar = TRUE;
3279 if (inattribute)
3280 break;
3281 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3282 {
3283 fvextern = FALSE;
3284 fvdef = fvnone;
3285 }
3286 continue;
3287 case '/':
3288 if (*lp == '*')
3289 {
3290 incomm = TRUE;
3291 lp++;
3292 c = ' ';
3293 }
3294 else if (/* cplpl && */ *lp == '/')
3295 {
3296 c = '\0';
3297 }
3298 break;
3299 case '%':
3300 if ((c_ext & YACC) && *lp == '%')
3301 {
3302 /* Entering or exiting rules section in yacc file. */
3303 lp++;
3304 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3305 typdef = tnone; structdef = snone;
3306 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3307 bracelev = 0;
3308 yacc_rules = !yacc_rules;
3309 continue;
3310 }
3311 else
3312 break;
3313 case '#':
3314 if (definedef == dnone)
3315 {
3316 char *cp;
3317 bool cpptoken = TRUE;
3318
3319 /* Look back on this line. If all blanks, or nonblanks
3320 followed by an end of comment, this is a preprocessor
3321 token. */
3322 for (cp = newlb.buffer; cp < lp-1; cp++)
3323 if (!iswhite (*cp))
3324 {
3325 if (*cp == '*' && *(cp+1) == '/')
3326 {
3327 cp++;
3328 cpptoken = TRUE;
3329 }
3330 else
3331 cpptoken = FALSE;
3332 }
3333 if (cpptoken)
3334 definedef = dsharpseen;
3335 } /* if (definedef == dnone) */
3336 continue;
3337 case '[':
3338 bracketlev++;
3339 continue;
3340 } /* switch (c) */
3341
3342
3343 /* Consider token only if some involved conditions are satisfied. */
3344 if (typdef != tignore
3345 && definedef != dignorerest
3346 && fvdef != finlist
3347 && templatelev == 0
3348 && (definedef != dnone
3349 || structdef != scolonseen)
3350 && !inattribute)
3351 {
3352 if (midtoken)
3353 {
3354 if (endtoken (c))
3355 {
3356 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3357 /* This handles :: in the middle,
3358 but not at the beginning of an identifier.
3359 Also, space-separated :: is not recognised. */
3360 {
3361 if (c_ext & C_AUTO) /* automatic detection of C++ */
3362 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3363 lp += 2;
3364 toklen += 2;
3365 c = lp[-1];
3366 goto still_in_token;
3367 }
3368 else
3369 {
3370 bool funorvar = FALSE;
3371
3372 if (yacc_rules
3373 || consider_token (newlb.buffer + tokoff, toklen, c,
3374 &c_ext, bracelev, parlev,
3375 &funorvar))
3376 {
3377 if (fvdef == foperator)
3378 {
3379 char *oldlp = lp;
3380 lp = skip_spaces (lp-1);
3381 if (*lp != '\0')
3382 lp += 1;
3383 while (*lp != '\0'
3384 && !iswhite (*lp) && *lp != '(')
3385 lp += 1;
3386 c = *lp++;
3387 toklen += lp - oldlp;
3388 }
3389 token.named = FALSE;
3390 if (!plainc
3391 && nestlev > 0 && definedef == dnone)
3392 /* in struct body */
3393 {
3394 write_classname (&token_name, qualifier);
3395 linebuffer_setlen (&token_name,
3396 token_name.len+qlen+toklen);
3397 strcat (token_name.buffer, qualifier);
3398 strncat (token_name.buffer,
3399 newlb.buffer + tokoff, toklen);
3400 token.named = TRUE;
3401 }
3402 else if (objdef == ocatseen)
3403 /* Objective C category */
3404 {
3405 int len = strlen (objtag) + 2 + toklen;
3406 linebuffer_setlen (&token_name, len);
3407 strcpy (token_name.buffer, objtag);
3408 strcat (token_name.buffer, "(");
3409 strncat (token_name.buffer,
3410 newlb.buffer + tokoff, toklen);
3411 strcat (token_name.buffer, ")");
3412 token.named = TRUE;
3413 }
3414 else if (objdef == omethodtag
3415 || objdef == omethodparm)
3416 /* Objective C method */
3417 {
3418 token.named = TRUE;
3419 }
3420 else if (fvdef == fdefunname)
3421 /* GNU DEFUN and similar macros */
3422 {
3423 bool defun = (newlb.buffer[tokoff] == 'F');
3424 int off = tokoff;
3425 int len = toklen;
3426
3427 /* Rewrite the tag so that emacs lisp DEFUNs
3428 can be found by their elisp name */
3429 if (defun)
3430 {
3431 off += 1;
3432 len -= 1;
3433 }
3434 linebuffer_setlen (&token_name, len);
3435 strncpy (token_name.buffer,
3436 newlb.buffer + off, len);
3437 token_name.buffer[len] = '\0';
3438 if (defun)
3439 while (--len >= 0)
3440 if (token_name.buffer[len] == '_')
3441 token_name.buffer[len] = '-';
3442 token.named = defun;
3443 }
3444 else
3445 {
3446 linebuffer_setlen (&token_name, toklen);
3447 strncpy (token_name.buffer,
3448 newlb.buffer + tokoff, toklen);
3449 token_name.buffer[toklen] = '\0';
3450 /* Name macros and members. */
3451 token.named = (structdef == stagseen
3452 || typdef == ttypeseen
3453 || typdef == tend
3454 || (funorvar
3455 && definedef == dignorerest)
3456 || (funorvar
3457 && definedef == dnone
3458 && structdef == snone
3459 && bracelev > 0));
3460 }
3461 token.lineno = lineno;
3462 token.offset = tokoff;
3463 token.length = toklen;
3464 token.line = newlb.buffer;
3465 token.linepos = newlinepos;
3466 token.valid = TRUE;
3467
3468 if (definedef == dnone
3469 && (fvdef == fvnameseen
3470 || fvdef == foperator
3471 || structdef == stagseen
3472 || typdef == tend
3473 || typdef == ttypeseen
3474 || objdef != onone))
3475 {
3476 if (current_lb_is_new)
3477 switch_line_buffers ();
3478 }
3479 else if (definedef != dnone
3480 || fvdef == fdefunname
3481 || instruct)
3482 make_C_tag (funorvar);
3483 }
3484 else /* not yacc and consider_token failed */
3485 {
3486 if (inattribute && fvdef == fignore)
3487 {
3488 /* We have just met __attribute__ after a
3489 function parameter list: do not tag the
3490 function again. */
3491 fvdef = fvnone;
3492 }
3493 }
3494 midtoken = FALSE;
3495 }
3496 } /* if (endtoken (c)) */
3497 else if (intoken (c))
3498 still_in_token:
3499 {
3500 toklen++;
3501 continue;
3502 }
3503 } /* if (midtoken) */
3504 else if (begtoken (c))
3505 {
3506 switch (definedef)
3507 {
3508 case dnone:
3509 switch (fvdef)
3510 {
3511 case fstartlist:
3512 /* This prevents tagging fb in
3513 void (__attribute__((noreturn)) *fb) (void);
3514 Fixing this is not easy and not very important. */
3515 fvdef = finlist;
3516 continue;
3517 case flistseen:
3518 if (plainc || declarations)
3519 {
3520 make_C_tag (TRUE); /* a function */
3521 fvdef = fignore;
3522 }
3523 break;
3524 }
3525 if (structdef == stagseen && !cjava)
3526 {
3527 popclass_above (bracelev);
3528 structdef = snone;
3529 }
3530 break;
3531 case dsharpseen:
3532 savetoken = token;
3533 break;
3534 }
3535 if (!yacc_rules || lp == newlb.buffer + 1)
3536 {
3537 tokoff = lp - 1 - newlb.buffer;
3538 toklen = 1;
3539 midtoken = TRUE;
3540 }
3541 continue;
3542 } /* if (begtoken) */
3543 } /* if must look at token */
3544
3545
3546 /* Detect end of line, colon, comma, semicolon and various braces
3547 after having handled a token.*/
3548 switch (c)
3549 {
3550 case ':':
3551 if (inattribute)
3552 break;
3553 if (yacc_rules && token.offset == 0 && token.valid)
3554 {
3555 make_C_tag (FALSE); /* a yacc function */
3556 break;
3557 }
3558 if (definedef != dnone)
3559 break;
3560 switch (objdef)
3561 {
3562 case otagseen:
3563 objdef = oignore;
3564 make_C_tag (TRUE); /* an Objective C class */
3565 break;
3566 case omethodtag:
3567 case omethodparm:
3568 objdef = omethodcolon;
3569 linebuffer_setlen (&token_name, token_name.len + 1);
3570 strcat (token_name.buffer, ":");
3571 break;
3572 }
3573 if (structdef == stagseen)
3574 {
3575 structdef = scolonseen;
3576 break;
3577 }
3578 /* Should be useless, but may be work as a safety net. */
3579 if (cplpl && fvdef == flistseen)
3580 {
3581 make_C_tag (TRUE); /* a function */
3582 fvdef = fignore;
3583 break;
3584 }
3585 break;
3586 case ';':
3587 if (definedef != dnone || inattribute)
3588 break;
3589 switch (typdef)
3590 {
3591 case tend:
3592 case ttypeseen:
3593 make_C_tag (FALSE); /* a typedef */
3594 typdef = tnone;
3595 fvdef = fvnone;
3596 break;
3597 case tnone:
3598 case tinbody:
3599 case tignore:
3600 switch (fvdef)
3601 {
3602 case fignore:
3603 if (typdef == tignore || cplpl)
3604 fvdef = fvnone;
3605 break;
3606 case fvnameseen:
3607 if ((globals && bracelev == 0 && (!fvextern || declarations))
3608 || (members && instruct))
3609 make_C_tag (FALSE); /* a variable */
3610 fvextern = FALSE;
3611 fvdef = fvnone;
3612 token.valid = FALSE;
3613 break;
3614 case flistseen:
3615 if ((declarations
3616 && (cplpl || !instruct)
3617 && (typdef == tnone || (typdef != tignore && instruct)))
3618 || (members
3619 && plainc && instruct))
3620 make_C_tag (TRUE); /* a function */
3621 /* FALLTHRU */
3622 default:
3623 fvextern = FALSE;
3624 fvdef = fvnone;
3625 if (declarations
3626 && cplpl && structdef == stagseen)
3627 make_C_tag (FALSE); /* forward declaration */
3628 else
3629 token.valid = FALSE;
3630 } /* switch (fvdef) */
3631 /* FALLTHRU */
3632 default:
3633 if (!instruct)
3634 typdef = tnone;
3635 }
3636 if (structdef == stagseen)
3637 structdef = snone;
3638 break;
3639 case ',':
3640 if (definedef != dnone || inattribute)
3641 break;
3642 switch (objdef)
3643 {
3644 case omethodtag:
3645 case omethodparm:
3646 make_C_tag (TRUE); /* an Objective C method */
3647 objdef = oinbody;
3648 break;
3649 }
3650 switch (fvdef)
3651 {
3652 case fdefunkey:
3653 case foperator:
3654 case fstartlist:
3655 case finlist:
3656 case fignore:
3657 case vignore:
3658 break;
3659 case fdefunname:
3660 fvdef = fignore;
3661 break;
3662 case fvnameseen:
3663 if (parlev == 0
3664 && ((globals
3665 && bracelev == 0
3666 && templatelev == 0
3667 && (!fvextern || declarations))
3668 || (members && instruct)))
3669 make_C_tag (FALSE); /* a variable */
3670 break;
3671 case flistseen:
3672 if ((declarations && typdef == tnone && !instruct)
3673 || (members && typdef != tignore && instruct))
3674 {
3675 make_C_tag (TRUE); /* a function */
3676 fvdef = fvnameseen;
3677 }
3678 else if (!declarations)
3679 fvdef = fvnone;
3680 token.valid = FALSE;
3681 break;
3682 default:
3683 fvdef = fvnone;
3684 }
3685 if (structdef == stagseen)
3686 structdef = snone;
3687 break;
3688 case ']':
3689 if (definedef != dnone || inattribute)
3690 break;
3691 if (structdef == stagseen)
3692 structdef = snone;
3693 switch (typdef)
3694 {
3695 case ttypeseen:
3696 case tend:
3697 typdef = tignore;
3698 make_C_tag (FALSE); /* a typedef */
3699 break;
3700 case tnone:
3701 case tinbody:
3702 switch (fvdef)
3703 {
3704 case foperator:
3705 case finlist:
3706 case fignore:
3707 case vignore:
3708 break;
3709 case fvnameseen:
3710 if ((members && bracelev == 1)
3711 || (globals && bracelev == 0
3712 && (!fvextern || declarations)))
3713 make_C_tag (FALSE); /* a variable */
3714 /* FALLTHRU */
3715 default:
3716 fvdef = fvnone;
3717 }
3718 break;
3719 }
3720 break;
3721 case '(':
3722 if (inattribute)
3723 {
3724 attrparlev++;
3725 break;
3726 }
3727 if (definedef != dnone)
3728 break;
3729 if (objdef == otagseen && parlev == 0)
3730 objdef = oparenseen;
3731 switch (fvdef)
3732 {
3733 case fvnameseen:
3734 if (typdef == ttypeseen
3735 && *lp != '*'
3736 && !instruct)
3737 {
3738 /* This handles constructs like:
3739 typedef void OperatorFun (int fun); */
3740 make_C_tag (FALSE);
3741 typdef = tignore;
3742 fvdef = fignore;
3743 break;
3744 }
3745 /* FALLTHRU */
3746 case foperator:
3747 fvdef = fstartlist;
3748 break;
3749 case flistseen:
3750 fvdef = finlist;
3751 break;
3752 }
3753 parlev++;
3754 break;
3755 case ')':
3756 if (inattribute)
3757 {
3758 if (--attrparlev == 0)
3759 inattribute = FALSE;
3760 break;
3761 }
3762 if (definedef != dnone)
3763 break;
3764 if (objdef == ocatseen && parlev == 1)
3765 {
3766 make_C_tag (TRUE); /* an Objective C category */
3767 objdef = oignore;
3768 }
3769 if (--parlev == 0)
3770 {
3771 switch (fvdef)
3772 {
3773 case fstartlist:
3774 case finlist:
3775 fvdef = flistseen;
3776 break;
3777 }
3778 if (!instruct
3779 && (typdef == tend
3780 || typdef == ttypeseen))
3781 {
3782 typdef = tignore;
3783 make_C_tag (FALSE); /* a typedef */
3784 }
3785 }
3786 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3787 parlev = 0;
3788 break;
3789 case '{':
3790 if (definedef != dnone)
3791 break;
3792 if (typdef == ttypeseen)
3793 {
3794 /* Whenever typdef is set to tinbody (currently only
3795 here), typdefbracelev should be set to bracelev. */
3796 typdef = tinbody;
3797 typdefbracelev = bracelev;
3798 }
3799 switch (fvdef)
3800 {
3801 case flistseen:
3802 make_C_tag (TRUE); /* a function */
3803 /* FALLTHRU */
3804 case fignore:
3805 fvdef = fvnone;
3806 break;
3807 case fvnone:
3808 switch (objdef)
3809 {
3810 case otagseen:
3811 make_C_tag (TRUE); /* an Objective C class */
3812 objdef = oignore;
3813 break;
3814 case omethodtag:
3815 case omethodparm:
3816 make_C_tag (TRUE); /* an Objective C method */
3817 objdef = oinbody;
3818 break;
3819 default:
3820 /* Neutralize `extern "C" {' grot. */
3821 if (bracelev == 0 && structdef == snone && nestlev == 0
3822 && typdef == tnone)
3823 bracelev = -1;
3824 }
3825 break;
3826 }
3827 switch (structdef)
3828 {
3829 case skeyseen: /* unnamed struct */
3830 pushclass_above (bracelev, NULL, 0);
3831 structdef = snone;
3832 break;
3833 case stagseen: /* named struct or enum */
3834 case scolonseen: /* a class */
3835 pushclass_above (bracelev,token.line+token.offset, token.length);
3836 structdef = snone;
3837 make_C_tag (FALSE); /* a struct or enum */
3838 break;
3839 }
3840 bracelev += 1;
3841 break;
3842 case '*':
3843 if (definedef != dnone)
3844 break;
3845 if (fvdef == fstartlist)
3846 {
3847 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3848 token.valid = FALSE;
3849 }
3850 break;
3851 case '}':
3852 if (definedef != dnone)
3853 break;
3854 bracelev -= 1;
3855 if (!ignoreindent && lp == newlb.buffer + 1)
3856 {
3857 if (bracelev != 0)
3858 token.valid = FALSE; /* unexpected value, token unreliable */
3859 bracelev = 0; /* reset brace level if first column */
3860 parlev = 0; /* also reset paren level, just in case... */
3861 }
3862 else if (bracelev < 0)
3863 {
3864 token.valid = FALSE; /* something gone amiss, token unreliable */
3865 bracelev = 0;
3866 }
3867 if (bracelev == 0 && fvdef == vignore)
3868 fvdef = fvnone; /* end of function */
3869 popclass_above (bracelev);
3870 structdef = snone;
3871 /* Only if typdef == tinbody is typdefbracelev significant. */
3872 if (typdef == tinbody && bracelev <= typdefbracelev)
3873 {
3874 assert (bracelev == typdefbracelev);
3875 typdef = tend;
3876 }
3877 break;
3878 case '=':
3879 if (definedef != dnone)
3880 break;
3881 switch (fvdef)
3882 {
3883 case foperator:
3884 case finlist:
3885 case fignore:
3886 case vignore:
3887 break;
3888 case fvnameseen:
3889 if ((members && bracelev == 1)
3890 || (globals && bracelev == 0 && (!fvextern || declarations)))
3891 make_C_tag (FALSE); /* a variable */
3892 /* FALLTHRU */
3893 default:
3894 fvdef = vignore;
3895 }
3896 break;
3897 case '<':
3898 if (cplpl
3899 && (structdef == stagseen || fvdef == fvnameseen))
3900 {
3901 templatelev++;
3902 break;
3903 }
3904 goto resetfvdef;
3905 case '>':
3906 if (templatelev > 0)
3907 {
3908 templatelev--;
3909 break;
3910 }
3911 goto resetfvdef;
3912 case '+':
3913 case '-':
3914 if (objdef == oinbody && bracelev == 0)
3915 {
3916 objdef = omethodsign;
3917 break;
3918 }
3919 /* FALLTHRU */
3920 resetfvdef:
3921 case '#': case '~': case '&': case '%': case '/':
3922 case '|': case '^': case '!': case '.': case '?':
3923 if (definedef != dnone)
3924 break;
3925 /* These surely cannot follow a function tag in C. */
3926 switch (fvdef)
3927 {
3928 case foperator:
3929 case finlist:
3930 case fignore:
3931 case vignore:
3932 break;
3933 default:
3934 fvdef = fvnone;
3935 }
3936 break;
3937 case '\0':
3938 if (objdef == otagseen)
3939 {
3940 make_C_tag (TRUE); /* an Objective C class */
3941 objdef = oignore;
3942 }
3943 /* If a macro spans multiple lines don't reset its state. */
3944 if (quotednl)
3945 CNL_SAVE_DEFINEDEF ();
3946 else
3947 CNL ();
3948 break;
3949 } /* switch (c) */
3950
3951 } /* while not eof */
3952
3953 free (lbs[0].lb.buffer);
3954 free (lbs[1].lb.buffer);
3955 }
3956
3957 /*
3958 * Process either a C++ file or a C file depending on the setting
3959 * of a global flag.
3960 */
3961 static void
3962 default_C_entries (inf)
3963 FILE *inf;
3964 {
3965 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3966 }
3967
3968 /* Always do plain C. */
3969 static void
3970 plain_C_entries (inf)
3971 FILE *inf;
3972 {
3973 C_entries (0, inf);
3974 }
3975
3976 /* Always do C++. */
3977 static void
3978 Cplusplus_entries (inf)
3979 FILE *inf;
3980 {
3981 C_entries (C_PLPL, inf);
3982 }
3983
3984 /* Always do Java. */
3985 static void
3986 Cjava_entries (inf)
3987 FILE *inf;
3988 {
3989 C_entries (C_JAVA, inf);
3990 }
3991
3992 /* Always do C*. */
3993 static void
3994 Cstar_entries (inf)
3995 FILE *inf;
3996 {
3997 C_entries (C_STAR, inf);
3998 }
3999
4000 /* Always do Yacc. */
4001 static void
4002 Yacc_entries (inf)
4003 FILE *inf;
4004 {
4005 C_entries (YACC, inf);
4006 }
4007
4008 \f
4009 /* Useful macros. */
4010 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4011 for (; /* loop initialization */ \
4012 !feof (file_pointer) /* loop test */ \
4013 && /* instructions at start of loop */ \
4014 (readline (&line_buffer, file_pointer), \
4015 char_pointer = line_buffer.buffer, \
4016 TRUE); \
4017 )
4018
4019 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4020 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4021 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4022 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4023 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4024
4025 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4026 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4027 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4028 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4029 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4030
4031 /*
4032 * Read a file, but do no processing. This is used to do regexp
4033 * matching on files that have no language defined.
4034 */
4035 static void
4036 just_read_file (inf)
4037 FILE *inf;
4038 {
4039 register char *dummy;
4040
4041 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4042 continue;
4043 }
4044
4045 \f
4046 /* Fortran parsing */
4047
4048 static void F_takeprec __P((void));
4049 static void F_getit __P((FILE *));
4050
4051 static void
4052 F_takeprec ()
4053 {
4054 dbp = skip_spaces (dbp);
4055 if (*dbp != '*')
4056 return;
4057 dbp++;
4058 dbp = skip_spaces (dbp);
4059 if (strneq (dbp, "(*)", 3))
4060 {
4061 dbp += 3;
4062 return;
4063 }
4064 if (!ISDIGIT (*dbp))
4065 {
4066 --dbp; /* force failure */
4067 return;
4068 }
4069 do
4070 dbp++;
4071 while (ISDIGIT (*dbp));
4072 }
4073
4074 static void
4075 F_getit (inf)
4076 FILE *inf;
4077 {
4078 register char *cp;
4079
4080 dbp = skip_spaces (dbp);
4081 if (*dbp == '\0')
4082 {
4083 readline (&lb, inf);
4084 dbp = lb.buffer;
4085 if (dbp[5] != '&')
4086 return;
4087 dbp += 6;
4088 dbp = skip_spaces (dbp);
4089 }
4090 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4091 return;
4092 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4093 continue;
4094 make_tag (dbp, cp-dbp, TRUE,
4095 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4096 }
4097
4098
4099 static void
4100 Fortran_functions (inf)
4101 FILE *inf;
4102 {
4103 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4104 {
4105 if (*dbp == '%')
4106 dbp++; /* Ratfor escape to fortran */
4107 dbp = skip_spaces (dbp);
4108 if (*dbp == '\0')
4109 continue;
4110 switch (lowcase (*dbp))
4111 {
4112 case 'i':
4113 if (nocase_tail ("integer"))
4114 F_takeprec ();
4115 break;
4116 case 'r':
4117 if (nocase_tail ("real"))
4118 F_takeprec ();
4119 break;
4120 case 'l':
4121 if (nocase_tail ("logical"))
4122 F_takeprec ();
4123 break;
4124 case 'c':
4125 if (nocase_tail ("complex") || nocase_tail ("character"))
4126 F_takeprec ();
4127 break;
4128 case 'd':
4129 if (nocase_tail ("double"))
4130 {
4131 dbp = skip_spaces (dbp);
4132 if (*dbp == '\0')
4133 continue;
4134 if (nocase_tail ("precision"))
4135 break;
4136 continue;
4137 }
4138 break;
4139 }
4140 dbp = skip_spaces (dbp);
4141 if (*dbp == '\0')
4142 continue;
4143 switch (lowcase (*dbp))
4144 {
4145 case 'f':
4146 if (nocase_tail ("function"))
4147 F_getit (inf);
4148 continue;
4149 case 's':
4150 if (nocase_tail ("subroutine"))
4151 F_getit (inf);
4152 continue;
4153 case 'e':
4154 if (nocase_tail ("entry"))
4155 F_getit (inf);
4156 continue;
4157 case 'b':
4158 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4159 {
4160 dbp = skip_spaces (dbp);
4161 if (*dbp == '\0') /* assume un-named */
4162 make_tag ("blockdata", 9, TRUE,
4163 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4164 else
4165 F_getit (inf); /* look for name */
4166 }
4167 continue;
4168 }
4169 }
4170 }
4171
4172 \f
4173 /*
4174 * Ada parsing
4175 * Original code by
4176 * Philippe Waroquiers (1998)
4177 */
4178
4179 static void Ada_getit __P((FILE *, char *));
4180
4181 /* Once we are positioned after an "interesting" keyword, let's get
4182 the real tag value necessary. */
4183 static void
4184 Ada_getit (inf, name_qualifier)
4185 FILE *inf;
4186 char *name_qualifier;
4187 {
4188 register char *cp;
4189 char *name;
4190 char c;
4191
4192 while (!feof (inf))
4193 {
4194 dbp = skip_spaces (dbp);
4195 if (*dbp == '\0'
4196 || (dbp[0] == '-' && dbp[1] == '-'))
4197 {
4198 readline (&lb, inf);
4199 dbp = lb.buffer;
4200 }
4201 switch (lowcase(*dbp))
4202 {
4203 case 'b':
4204 if (nocase_tail ("body"))
4205 {
4206 /* Skipping body of procedure body or package body or ....
4207 resetting qualifier to body instead of spec. */
4208 name_qualifier = "/b";
4209 continue;
4210 }
4211 break;
4212 case 't':
4213 /* Skipping type of task type or protected type ... */
4214 if (nocase_tail ("type"))
4215 continue;
4216 break;
4217 }
4218 if (*dbp == '"')
4219 {
4220 dbp += 1;
4221 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4222 continue;
4223 }
4224 else
4225 {
4226 dbp = skip_spaces (dbp);
4227 for (cp = dbp;
4228 (*cp != '\0'
4229 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4230 cp++)
4231 continue;
4232 if (cp == dbp)
4233 return;
4234 }
4235 c = *cp;
4236 *cp = '\0';
4237 name = concat (dbp, name_qualifier, "");
4238 *cp = c;
4239 make_tag (name, strlen (name), TRUE,
4240 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4241 free (name);
4242 if (c == '"')
4243 dbp = cp + 1;
4244 return;
4245 }
4246 }
4247
4248 static void
4249 Ada_funcs (inf)
4250 FILE *inf;
4251 {
4252 bool inquote = FALSE;
4253 bool skip_till_semicolumn = FALSE;
4254
4255 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4256 {
4257 while (*dbp != '\0')
4258 {
4259 /* Skip a string i.e. "abcd". */
4260 if (inquote || (*dbp == '"'))
4261 {
4262 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4263 if (dbp != NULL)
4264 {
4265 inquote = FALSE;
4266 dbp += 1;
4267 continue; /* advance char */
4268 }
4269 else
4270 {
4271 inquote = TRUE;
4272 break; /* advance line */
4273 }
4274 }
4275
4276 /* Skip comments. */
4277 if (dbp[0] == '-' && dbp[1] == '-')
4278 break; /* advance line */
4279
4280 /* Skip character enclosed in single quote i.e. 'a'
4281 and skip single quote starting an attribute i.e. 'Image. */
4282 if (*dbp == '\'')
4283 {
4284 dbp++ ;
4285 if (*dbp != '\0')
4286 dbp++;
4287 continue;
4288 }
4289
4290 if (skip_till_semicolumn)
4291 {
4292 if (*dbp == ';')
4293 skip_till_semicolumn = FALSE;
4294 dbp++;
4295 continue; /* advance char */
4296 }
4297
4298 /* Search for beginning of a token. */
4299 if (!begtoken (*dbp))
4300 {
4301 dbp++;
4302 continue; /* advance char */
4303 }
4304
4305 /* We are at the beginning of a token. */
4306 switch (lowcase(*dbp))
4307 {
4308 case 'f':
4309 if (!packages_only && nocase_tail ("function"))
4310 Ada_getit (inf, "/f");
4311 else
4312 break; /* from switch */
4313 continue; /* advance char */
4314 case 'p':
4315 if (!packages_only && nocase_tail ("procedure"))
4316 Ada_getit (inf, "/p");
4317 else if (nocase_tail ("package"))
4318 Ada_getit (inf, "/s");
4319 else if (nocase_tail ("protected")) /* protected type */
4320 Ada_getit (inf, "/t");
4321 else
4322 break; /* from switch */
4323 continue; /* advance char */
4324
4325 case 'u':
4326 if (typedefs && !packages_only && nocase_tail ("use"))
4327 {
4328 /* when tagging types, avoid tagging use type Pack.Typename;
4329 for this, we will skip everything till a ; */
4330 skip_till_semicolumn = TRUE;
4331 continue; /* advance char */
4332 }
4333
4334 case 't':
4335 if (!packages_only && nocase_tail ("task"))
4336 Ada_getit (inf, "/k");
4337 else if (typedefs && !packages_only && nocase_tail ("type"))
4338 {
4339 Ada_getit (inf, "/t");
4340 while (*dbp != '\0')
4341 dbp += 1;
4342 }
4343 else
4344 break; /* from switch */
4345 continue; /* advance char */
4346 }
4347
4348 /* Look for the end of the token. */
4349 while (!endtoken (*dbp))
4350 dbp++;
4351
4352 } /* advance char */
4353 } /* advance line */
4354 }
4355
4356 \f
4357 /*
4358 * Unix and microcontroller assembly tag handling
4359 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4360 * Idea by Bob Weiner, Motorola Inc. (1994)
4361 */
4362 static void
4363 Asm_labels (inf)
4364 FILE *inf;
4365 {
4366 register char *cp;
4367
4368 LOOP_ON_INPUT_LINES (inf, lb, cp)
4369 {
4370 /* If first char is alphabetic or one of [_.$], test for colon
4371 following identifier. */
4372 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4373 {
4374 /* Read past label. */
4375 cp++;
4376 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4377 cp++;
4378 if (*cp == ':' || iswhite (*cp))
4379 /* Found end of label, so copy it and add it to the table. */
4380 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4381 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4382 }
4383 }
4384 }
4385
4386 \f
4387 /*
4388 * Perl support
4389 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4390 * Perl variable names: /^(my|local).../
4391 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4392 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4393 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4394 */
4395 static void
4396 Perl_functions (inf)
4397 FILE *inf;
4398 {
4399 char *package = savestr ("main"); /* current package name */
4400 register char *cp;
4401
4402 LOOP_ON_INPUT_LINES (inf, lb, cp)
4403 {
4404 cp = skip_spaces (cp);
4405
4406 if (LOOKING_AT (cp, "package"))
4407 {
4408 free (package);
4409 get_tag (cp, &package);
4410 }
4411 else if (LOOKING_AT (cp, "sub"))
4412 {
4413 char *pos;
4414 char *sp = cp;
4415
4416 while (!notinname (*cp))
4417 cp++;
4418 if (cp == sp)
4419 continue; /* nothing found */
4420 if ((pos = etags_strchr (sp, ':')) != NULL
4421 && pos < cp && pos[1] == ':')
4422 /* The name is already qualified. */
4423 make_tag (sp, cp - sp, TRUE,
4424 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4425 else
4426 /* Qualify it. */
4427 {
4428 char savechar, *name;
4429
4430 savechar = *cp;
4431 *cp = '\0';
4432 name = concat (package, "::", sp);
4433 *cp = savechar;
4434 make_tag (name, strlen(name), TRUE,
4435 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4436 free (name);
4437 }
4438 }
4439 else if (globals) /* only if we are tagging global vars */
4440 {
4441 /* Skip a qualifier, if any. */
4442 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4443 /* After "my" or "local", but before any following paren or space. */
4444 char *varstart = cp;
4445
4446 if (qual /* should this be removed? If yes, how? */
4447 && (*cp == '$' || *cp == '@' || *cp == '%'))
4448 {
4449 varstart += 1;
4450 do
4451 cp++;
4452 while (ISALNUM (*cp) || *cp == '_');
4453 }
4454 else if (qual)
4455 {
4456 /* Should be examining a variable list at this point;
4457 could insist on seeing an open parenthesis. */
4458 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4459 cp++;
4460 }
4461 else
4462 continue;
4463
4464 make_tag (varstart, cp - varstart, FALSE,
4465 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4466 }
4467 }
4468 free (package);
4469 }
4470
4471
4472 /*
4473 * Python support
4474 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4475 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4476 * More ideas by seb bacon <seb@jamkit.com> (2002)
4477 */
4478 static void
4479 Python_functions (inf)
4480 FILE *inf;
4481 {
4482 register char *cp;
4483
4484 LOOP_ON_INPUT_LINES (inf, lb, cp)
4485 {
4486 cp = skip_spaces (cp);
4487 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4488 {
4489 char *name = cp;
4490 while (!notinname (*cp) && *cp != ':')
4491 cp++;
4492 make_tag (name, cp - name, TRUE,
4493 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4494 }
4495 }
4496 }
4497
4498 \f
4499 /*
4500 * PHP support
4501 * Look for:
4502 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4503 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4504 * - /^[ \t]*define\(\"[^\"]+/
4505 * Only with --members:
4506 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4507 * Idea by Diez B. Roggisch (2001)
4508 */
4509 static void
4510 PHP_functions (inf)
4511 FILE *inf;
4512 {
4513 register char *cp, *name;
4514 bool search_identifier = FALSE;
4515
4516 LOOP_ON_INPUT_LINES (inf, lb, cp)
4517 {
4518 cp = skip_spaces (cp);
4519 name = cp;
4520 if (search_identifier
4521 && *cp != '\0')
4522 {
4523 while (!notinname (*cp))
4524 cp++;
4525 make_tag (name, cp - name, TRUE,
4526 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4527 search_identifier = FALSE;
4528 }
4529 else if (LOOKING_AT (cp, "function"))
4530 {
4531 if(*cp == '&')
4532 cp = skip_spaces (cp+1);
4533 if(*cp != '\0')
4534 {
4535 name = cp;
4536 while (!notinname (*cp))
4537 cp++;
4538 make_tag (name, cp - name, TRUE,
4539 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4540 }
4541 else
4542 search_identifier = TRUE;
4543 }
4544 else if (LOOKING_AT (cp, "class"))
4545 {
4546 if (*cp != '\0')
4547 {
4548 name = cp;
4549 while (*cp != '\0' && !iswhite (*cp))
4550 cp++;
4551 make_tag (name, cp - name, FALSE,
4552 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4553 }
4554 else
4555 search_identifier = TRUE;
4556 }
4557 else if (strneq (cp, "define", 6)
4558 && (cp = skip_spaces (cp+6))
4559 && *cp++ == '('
4560 && (*cp == '"' || *cp == '\''))
4561 {
4562 char quote = *cp++;
4563 name = cp;
4564 while (*cp != quote && *cp != '\0')
4565 cp++;
4566 make_tag (name, cp - name, FALSE,
4567 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4568 }
4569 else if (members
4570 && LOOKING_AT (cp, "var")
4571 && *cp == '$')
4572 {
4573 name = cp;
4574 while (!notinname(*cp))
4575 cp++;
4576 make_tag (name, cp - name, FALSE,
4577 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4578 }
4579 }
4580 }
4581
4582 \f
4583 /*
4584 * Cobol tag functions
4585 * We could look for anything that could be a paragraph name.
4586 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4587 * Idea by Corny de Souza (1993)
4588 */
4589 static void
4590 Cobol_paragraphs (inf)
4591 FILE *inf;
4592 {
4593 register char *bp, *ep;
4594
4595 LOOP_ON_INPUT_LINES (inf, lb, bp)
4596 {
4597 if (lb.len < 9)
4598 continue;
4599 bp += 8;
4600
4601 /* If eoln, compiler option or comment ignore whole line. */
4602 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4603 continue;
4604
4605 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4606 continue;
4607 if (*ep++ == '.')
4608 make_tag (bp, ep - bp, TRUE,
4609 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4610 }
4611 }
4612
4613 \f
4614 /*
4615 * Makefile support
4616 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4617 */
4618 static void
4619 Makefile_targets (inf)
4620 FILE *inf;
4621 {
4622 register char *bp;
4623
4624 LOOP_ON_INPUT_LINES (inf, lb, bp)
4625 {
4626 if (*bp == '\t' || *bp == '#')
4627 continue;
4628 while (*bp != '\0' && *bp != '=' && *bp != ':')
4629 bp++;
4630 if (*bp == ':' || (globals && *bp == '='))
4631 {
4632 /* We should detect if there is more than one tag, but we do not.
4633 We just skip initial and final spaces. */
4634 char * namestart = skip_spaces (lb.buffer);
4635 while (--bp > namestart)
4636 if (!notinname (*bp))
4637 break;
4638 make_tag (namestart, bp - namestart + 1, TRUE,
4639 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4640 }
4641 }
4642 }
4643
4644 \f
4645 /*
4646 * Pascal parsing
4647 * Original code by Mosur K. Mohan (1989)
4648 *
4649 * Locates tags for procedures & functions. Doesn't do any type- or
4650 * var-definitions. It does look for the keyword "extern" or
4651 * "forward" immediately following the procedure statement; if found,
4652 * the tag is skipped.
4653 */
4654 static void
4655 Pascal_functions (inf)
4656 FILE *inf;
4657 {
4658 linebuffer tline; /* mostly copied from C_entries */
4659 long save_lcno;
4660 int save_lineno, namelen, taglen;
4661 char c, *name;
4662
4663 bool /* each of these flags is TRUE if: */
4664 incomment, /* point is inside a comment */
4665 inquote, /* point is inside '..' string */
4666 get_tagname, /* point is after PROCEDURE/FUNCTION
4667 keyword, so next item = potential tag */
4668 found_tag, /* point is after a potential tag */
4669 inparms, /* point is within parameter-list */
4670 verify_tag; /* point has passed the parm-list, so the
4671 next token will determine whether this
4672 is a FORWARD/EXTERN to be ignored, or
4673 whether it is a real tag */
4674
4675 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4676 name = NULL; /* keep compiler quiet */
4677 dbp = lb.buffer;
4678 *dbp = '\0';
4679 linebuffer_init (&tline);
4680
4681 incomment = inquote = FALSE;
4682 found_tag = FALSE; /* have a proc name; check if extern */
4683 get_tagname = FALSE; /* found "procedure" keyword */
4684 inparms = FALSE; /* found '(' after "proc" */
4685 verify_tag = FALSE; /* check if "extern" is ahead */
4686
4687
4688 while (!feof (inf)) /* long main loop to get next char */
4689 {
4690 c = *dbp++;
4691 if (c == '\0') /* if end of line */
4692 {
4693 readline (&lb, inf);
4694 dbp = lb.buffer;
4695 if (*dbp == '\0')
4696 continue;
4697 if (!((found_tag && verify_tag)
4698 || get_tagname))
4699 c = *dbp++; /* only if don't need *dbp pointing
4700 to the beginning of the name of
4701 the procedure or function */
4702 }
4703 if (incomment)
4704 {
4705 if (c == '}') /* within { } comments */
4706 incomment = FALSE;
4707 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4708 {
4709 dbp++;
4710 incomment = FALSE;
4711 }
4712 continue;
4713 }
4714 else if (inquote)
4715 {
4716 if (c == '\'')
4717 inquote = FALSE;
4718 continue;
4719 }
4720 else
4721 switch (c)
4722 {
4723 case '\'':
4724 inquote = TRUE; /* found first quote */
4725 continue;
4726 case '{': /* found open { comment */
4727 incomment = TRUE;
4728 continue;
4729 case '(':
4730 if (*dbp == '*') /* found open (* comment */
4731 {
4732 incomment = TRUE;
4733 dbp++;
4734 }
4735 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4736 inparms = TRUE;
4737 continue;
4738 case ')': /* end of parms list */
4739 if (inparms)
4740 inparms = FALSE;
4741 continue;
4742 case ';':
4743 if (found_tag && !inparms) /* end of proc or fn stmt */
4744 {
4745 verify_tag = TRUE;
4746 break;
4747 }
4748 continue;
4749 }
4750 if (found_tag && verify_tag && (*dbp != ' '))
4751 {
4752 /* Check if this is an "extern" declaration. */
4753 if (*dbp == '\0')
4754 continue;
4755 if (lowcase (*dbp == 'e'))
4756 {
4757 if (nocase_tail ("extern")) /* superfluous, really! */
4758 {
4759 found_tag = FALSE;
4760 verify_tag = FALSE;
4761 }
4762 }
4763 else if (lowcase (*dbp) == 'f')
4764 {
4765 if (nocase_tail ("forward")) /* check for forward reference */
4766 {
4767 found_tag = FALSE;
4768 verify_tag = FALSE;
4769 }
4770 }
4771 if (found_tag && verify_tag) /* not external proc, so make tag */
4772 {
4773 found_tag = FALSE;
4774 verify_tag = FALSE;
4775 make_tag (name, namelen, TRUE,
4776 tline.buffer, taglen, save_lineno, save_lcno);
4777 continue;
4778 }
4779 }
4780 if (get_tagname) /* grab name of proc or fn */
4781 {
4782 char *cp;
4783
4784 if (*dbp == '\0')
4785 continue;
4786
4787 /* Find block name. */
4788 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4789 continue;
4790
4791 /* Save all values for later tagging. */
4792 linebuffer_setlen (&tline, lb.len);
4793 strcpy (tline.buffer, lb.buffer);
4794 save_lineno = lineno;
4795 save_lcno = linecharno;
4796 name = tline.buffer + (dbp - lb.buffer);
4797 namelen = cp - dbp;
4798 taglen = cp - lb.buffer + 1;
4799
4800 dbp = cp; /* set dbp to e-o-token */
4801 get_tagname = FALSE;
4802 found_tag = TRUE;
4803 continue;
4804
4805 /* And proceed to check for "extern". */
4806 }
4807 else if (!incomment && !inquote && !found_tag)
4808 {
4809 /* Check for proc/fn keywords. */
4810 switch (lowcase (c))
4811 {
4812 case 'p':
4813 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4814 get_tagname = TRUE;
4815 continue;
4816 case 'f':
4817 if (nocase_tail ("unction"))
4818 get_tagname = TRUE;
4819 continue;
4820 }
4821 }
4822 } /* while not eof */
4823
4824 free (tline.buffer);
4825 }
4826
4827 \f
4828 /*
4829 * Lisp tag functions
4830 * look for (def or (DEF, quote or QUOTE
4831 */
4832
4833 static void L_getit __P((void));
4834
4835 static void
4836 L_getit ()
4837 {
4838 if (*dbp == '\'') /* Skip prefix quote */
4839 dbp++;
4840 else if (*dbp == '(')
4841 {
4842 dbp++;
4843 /* Try to skip "(quote " */
4844 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4845 /* Ok, then skip "(" before name in (defstruct (foo)) */
4846 dbp = skip_spaces (dbp);
4847 }
4848 get_tag (dbp, NULL);
4849 }
4850
4851 static void
4852 Lisp_functions (inf)
4853 FILE *inf;
4854 {
4855 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4856 {
4857 if (dbp[0] != '(')
4858 continue;
4859
4860 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4861 {
4862 dbp = skip_non_spaces (dbp);
4863 dbp = skip_spaces (dbp);
4864 L_getit ();
4865 }
4866 else
4867 {
4868 /* Check for (foo::defmumble name-defined ... */
4869 do
4870 dbp++;
4871 while (!notinname (*dbp) && *dbp != ':');
4872 if (*dbp == ':')
4873 {
4874 do
4875 dbp++;
4876 while (*dbp == ':');
4877
4878 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4879 {
4880 dbp = skip_non_spaces (dbp);
4881 dbp = skip_spaces (dbp);
4882 L_getit ();
4883 }
4884 }
4885 }
4886 }
4887 }
4888
4889 \f
4890 /*
4891 * Lua script language parsing
4892 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4893 *
4894 * "function" and "local function" are tags if they start at column 1.
4895 */
4896 static void
4897 Lua_functions (inf)
4898 FILE *inf;
4899 {
4900 register char *bp;
4901
4902 LOOP_ON_INPUT_LINES (inf, lb, bp)
4903 {
4904 if (bp[0] != 'f' && bp[0] != 'l')
4905 continue;
4906
4907 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4908
4909 if (LOOKING_AT (bp, "function"))
4910 get_tag (bp, NULL);
4911 }
4912 }
4913
4914 \f
4915 /*
4916 * Postscript tags
4917 * Just look for lines where the first character is '/'
4918 * Also look at "defineps" for PSWrap
4919 * Ideas by:
4920 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4921 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4922 */
4923 static void
4924 PS_functions (inf)
4925 FILE *inf;
4926 {
4927 register char *bp, *ep;
4928
4929 LOOP_ON_INPUT_LINES (inf, lb, bp)
4930 {
4931 if (bp[0] == '/')
4932 {
4933 for (ep = bp+1;
4934 *ep != '\0' && *ep != ' ' && *ep != '{';
4935 ep++)
4936 continue;
4937 make_tag (bp, ep - bp, TRUE,
4938 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4939 }
4940 else if (LOOKING_AT (bp, "defineps"))
4941 get_tag (bp, NULL);
4942 }
4943 }
4944
4945 \f
4946 /*
4947 * Forth tags
4948 * Ignore anything after \ followed by space or in ( )
4949 * Look for words defined by :
4950 * Look for constant, code, create, defer, value, and variable
4951 * OBP extensions: Look for buffer:, field,
4952 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4953 */
4954 static void
4955 Forth_words (inf)
4956 FILE *inf;
4957 {
4958 register char *bp;
4959
4960 LOOP_ON_INPUT_LINES (inf, lb, bp)
4961 while ((bp = skip_spaces (bp))[0] != '\0')
4962 if (bp[0] == '\\' && iswhite(bp[1]))
4963 break; /* read next line */
4964 else if (bp[0] == '(' && iswhite(bp[1]))
4965 do /* skip to ) or eol */
4966 bp++;
4967 while (*bp != ')' && *bp != '\0');
4968 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4969 || LOOKING_AT_NOCASE (bp, "constant")
4970 || LOOKING_AT_NOCASE (bp, "code")
4971 || LOOKING_AT_NOCASE (bp, "create")
4972 || LOOKING_AT_NOCASE (bp, "defer")
4973 || LOOKING_AT_NOCASE (bp, "value")
4974 || LOOKING_AT_NOCASE (bp, "variable")
4975 || LOOKING_AT_NOCASE (bp, "buffer:")
4976 || LOOKING_AT_NOCASE (bp, "field"))
4977 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4978 else
4979 bp = skip_non_spaces (bp);
4980 }
4981
4982 \f
4983 /*
4984 * Scheme tag functions
4985 * look for (def... xyzzy
4986 * (def... (xyzzy
4987 * (def ... ((...(xyzzy ....
4988 * (set! xyzzy
4989 * Original code by Ken Haase (1985?)
4990 */
4991 static void
4992 Scheme_functions (inf)
4993 FILE *inf;
4994 {
4995 register char *bp;
4996
4997 LOOP_ON_INPUT_LINES (inf, lb, bp)
4998 {
4999 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5000 {
5001 bp = skip_non_spaces (bp+4);
5002 /* Skip over open parens and white space */
5003 while (notinname (*bp))
5004 bp++;
5005 get_tag (bp, NULL);
5006 }
5007 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5008 get_tag (bp, NULL);
5009 }
5010 }
5011
5012 \f
5013 /* Find tags in TeX and LaTeX input files. */
5014
5015 /* TEX_toktab is a table of TeX control sequences that define tags.
5016 * Each entry records one such control sequence.
5017 *
5018 * Original code from who knows whom.
5019 * Ideas by:
5020 * Stefan Monnier (2002)
5021 */
5022
5023 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5024
5025 /* Default set of control sequences to put into TEX_toktab.
5026 The value of environment var TEXTAGS is prepended to this. */
5027 static char *TEX_defenv = "\
5028 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5029 :part:appendix:entry:index:def\
5030 :newcommand:renewcommand:newenvironment:renewenvironment";
5031
5032 static void TEX_mode __P((FILE *));
5033 static void TEX_decode_env __P((char *, char *));
5034
5035 static char TEX_esc = '\\';
5036 static char TEX_opgrp = '{';
5037 static char TEX_clgrp = '}';
5038
5039 /*
5040 * TeX/LaTeX scanning loop.
5041 */
5042 static void
5043 TeX_commands (inf)
5044 FILE *inf;
5045 {
5046 char *cp;
5047 linebuffer *key;
5048
5049 /* Select either \ or ! as escape character. */
5050 TEX_mode (inf);
5051
5052 /* Initialize token table once from environment. */
5053 if (TEX_toktab == NULL)
5054 TEX_decode_env ("TEXTAGS", TEX_defenv);
5055
5056 LOOP_ON_INPUT_LINES (inf, lb, cp)
5057 {
5058 /* Look at each TEX keyword in line. */
5059 for (;;)
5060 {
5061 /* Look for a TEX escape. */
5062 while (*cp++ != TEX_esc)
5063 if (cp[-1] == '\0' || cp[-1] == '%')
5064 goto tex_next_line;
5065
5066 for (key = TEX_toktab; key->buffer != NULL; key++)
5067 if (strneq (cp, key->buffer, key->len))
5068 {
5069 register char *p;
5070 int namelen, linelen;
5071 bool opgrp = FALSE;
5072
5073 cp = skip_spaces (cp + key->len);
5074 if (*cp == TEX_opgrp)
5075 {
5076 opgrp = TRUE;
5077 cp++;
5078 }
5079 for (p = cp;
5080 (!iswhite (*p) && *p != '#' &&
5081 *p != TEX_opgrp && *p != TEX_clgrp);
5082 p++)
5083 continue;
5084 namelen = p - cp;
5085 linelen = lb.len;
5086 if (!opgrp || *p == TEX_clgrp)
5087 {
5088 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5089 p++;
5090 linelen = p - lb.buffer + 1;
5091 }
5092 make_tag (cp, namelen, TRUE,
5093 lb.buffer, linelen, lineno, linecharno);
5094 goto tex_next_line; /* We only tag a line once */
5095 }
5096 }
5097 tex_next_line:
5098 ;
5099 }
5100 }
5101
5102 #define TEX_LESC '\\'
5103 #define TEX_SESC '!'
5104
5105 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5106 chars accordingly. */
5107 static void
5108 TEX_mode (inf)
5109 FILE *inf;
5110 {
5111 int c;
5112
5113 while ((c = getc (inf)) != EOF)
5114 {
5115 /* Skip to next line if we hit the TeX comment char. */
5116 if (c == '%')
5117 while (c != '\n' && c != EOF)
5118 c = getc (inf);
5119 else if (c == TEX_LESC || c == TEX_SESC )
5120 break;
5121 }
5122
5123 if (c == TEX_LESC)
5124 {
5125 TEX_esc = TEX_LESC;
5126 TEX_opgrp = '{';
5127 TEX_clgrp = '}';
5128 }
5129 else
5130 {
5131 TEX_esc = TEX_SESC;
5132 TEX_opgrp = '<';
5133 TEX_clgrp = '>';
5134 }
5135 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5136 No attempt is made to correct the situation. */
5137 rewind (inf);
5138 }
5139
5140 /* Read environment and prepend it to the default string.
5141 Build token table. */
5142 static void
5143 TEX_decode_env (evarname, defenv)
5144 char *evarname;
5145 char *defenv;
5146 {
5147 register char *env, *p;
5148 int i, len;
5149
5150 /* Append default string to environment. */
5151 env = getenv (evarname);
5152 if (!env)
5153 env = defenv;
5154 else
5155 {
5156 char *oldenv = env;
5157 env = concat (oldenv, defenv, "");
5158 }
5159
5160 /* Allocate a token table */
5161 for (len = 1, p = env; p;)
5162 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5163 len++;
5164 TEX_toktab = xnew (len, linebuffer);
5165
5166 /* Unpack environment string into token table. Be careful about */
5167 /* zero-length strings (leading ':', "::" and trailing ':') */
5168 for (i = 0; *env != '\0';)
5169 {
5170 p = etags_strchr (env, ':');
5171 if (!p) /* End of environment string. */
5172 p = env + strlen (env);
5173 if (p - env > 0)
5174 { /* Only non-zero strings. */
5175 TEX_toktab[i].buffer = savenstr (env, p - env);
5176 TEX_toktab[i].len = p - env;
5177 i++;
5178 }
5179 if (*p)
5180 env = p + 1;
5181 else
5182 {
5183 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5184 TEX_toktab[i].len = 0;
5185 break;
5186 }
5187 }
5188 }
5189
5190 \f
5191 /* Texinfo support. Dave Love, Mar. 2000. */
5192 static void
5193 Texinfo_nodes (inf)
5194 FILE * inf;
5195 {
5196 char *cp, *start;
5197 LOOP_ON_INPUT_LINES (inf, lb, cp)
5198 if (LOOKING_AT (cp, "@node"))
5199 {
5200 start = cp;
5201 while (*cp != '\0' && *cp != ',')
5202 cp++;
5203 make_tag (start, cp - start, TRUE,
5204 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5205 }
5206 }
5207
5208 \f
5209 /*
5210 * HTML support.
5211 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5212 * Contents of <a name=xxx> are tags with name xxx.
5213 *
5214 * Francesco Potortì, 2002.
5215 */
5216 static void
5217 HTML_labels (inf)
5218 FILE * inf;
5219 {
5220 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5221 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5222 bool intag = FALSE; /* inside an html tag, looking for ID= */
5223 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5224 char *end;
5225
5226
5227 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5228
5229 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5230 for (;;) /* loop on the same line */
5231 {
5232 if (skiptag) /* skip HTML tag */
5233 {
5234 while (*dbp != '\0' && *dbp != '>')
5235 dbp++;
5236 if (*dbp == '>')
5237 {
5238 dbp += 1;
5239 skiptag = FALSE;
5240 continue; /* look on the same line */
5241 }
5242 break; /* go to next line */
5243 }
5244
5245 else if (intag) /* look for "name=" or "id=" */
5246 {
5247 while (*dbp != '\0' && *dbp != '>'
5248 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5249 dbp++;
5250 if (*dbp == '\0')
5251 break; /* go to next line */
5252 if (*dbp == '>')
5253 {
5254 dbp += 1;
5255 intag = FALSE;
5256 continue; /* look on the same line */
5257 }
5258 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5259 || LOOKING_AT_NOCASE (dbp, "id="))
5260 {
5261 bool quoted = (dbp[0] == '"');
5262
5263 if (quoted)
5264 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5265 continue;
5266 else
5267 for (end = dbp; *end != '\0' && intoken (*end); end++)
5268 continue;
5269 linebuffer_setlen (&token_name, end - dbp);
5270 strncpy (token_name.buffer, dbp, end - dbp);
5271 token_name.buffer[end - dbp] = '\0';
5272
5273 dbp = end;
5274 intag = FALSE; /* we found what we looked for */
5275 skiptag = TRUE; /* skip to the end of the tag */
5276 getnext = TRUE; /* then grab the text */
5277 continue; /* look on the same line */
5278 }
5279 dbp += 1;
5280 }
5281
5282 else if (getnext) /* grab next tokens and tag them */
5283 {
5284 dbp = skip_spaces (dbp);
5285 if (*dbp == '\0')
5286 break; /* go to next line */
5287 if (*dbp == '<')
5288 {
5289 intag = TRUE;
5290 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5291 continue; /* look on the same line */
5292 }
5293
5294 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5295 continue;
5296 make_tag (token_name.buffer, token_name.len, TRUE,
5297 dbp, end - dbp, lineno, linecharno);
5298 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5299 getnext = FALSE;
5300 break; /* go to next line */
5301 }
5302
5303 else /* look for an interesting HTML tag */
5304 {
5305 while (*dbp != '\0' && *dbp != '<')
5306 dbp++;
5307 if (*dbp == '\0')
5308 break; /* go to next line */
5309 intag = TRUE;
5310 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5311 {
5312 inanchor = TRUE;
5313 continue; /* look on the same line */
5314 }
5315 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5316 || LOOKING_AT_NOCASE (dbp, "<h1>")
5317 || LOOKING_AT_NOCASE (dbp, "<h2>")
5318 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5319 {
5320 intag = FALSE;
5321 getnext = TRUE;
5322 continue; /* look on the same line */
5323 }
5324 dbp += 1;
5325 }
5326 }
5327 }
5328
5329 \f
5330 /*
5331 * Prolog support
5332 *
5333 * Assumes that the predicate or rule starts at column 0.
5334 * Only the first clause of a predicate or rule is added.
5335 * Original code by Sunichirou Sugou (1989)
5336 * Rewritten by Anders Lindgren (1996)
5337 */
5338 static int prolog_pr __P((char *, char *));
5339 static void prolog_skip_comment __P((linebuffer *, FILE *));
5340 static int prolog_atom __P((char *, int));
5341
5342 static void
5343 Prolog_functions (inf)
5344 FILE *inf;
5345 {
5346 char *cp, *last;
5347 int len;
5348 int allocated;
5349
5350 allocated = 0;
5351 len = 0;
5352 last = NULL;
5353
5354 LOOP_ON_INPUT_LINES (inf, lb, cp)
5355 {
5356 if (cp[0] == '\0') /* Empty line */
5357 continue;
5358 else if (iswhite (cp[0])) /* Not a predicate */
5359 continue;
5360 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5361 prolog_skip_comment (&lb, inf);
5362 else if ((len = prolog_pr (cp, last)) > 0)
5363 {
5364 /* Predicate or rule. Store the function name so that we
5365 only generate a tag for the first clause. */
5366 if (last == NULL)
5367 last = xnew(len + 1, char);
5368 else if (len + 1 > allocated)
5369 xrnew (last, len + 1, char);
5370 allocated = len + 1;
5371 strncpy (last, cp, len);
5372 last[len] = '\0';
5373 }
5374 }
5375 free (last);
5376 }
5377
5378
5379 static void
5380 prolog_skip_comment (plb, inf)
5381 linebuffer *plb;
5382 FILE *inf;
5383 {
5384 char *cp;
5385
5386 do
5387 {
5388 for (cp = plb->buffer; *cp != '\0'; cp++)
5389 if (cp[0] == '*' && cp[1] == '/')
5390 return;
5391 readline (plb, inf);
5392 }
5393 while (!feof(inf));
5394 }
5395
5396 /*
5397 * A predicate or rule definition is added if it matches:
5398 * <beginning of line><Prolog Atom><whitespace>(
5399 * or <beginning of line><Prolog Atom><whitespace>:-
5400 *
5401 * It is added to the tags database if it doesn't match the
5402 * name of the previous clause header.
5403 *
5404 * Return the size of the name of the predicate or rule, or 0 if no
5405 * header was found.
5406 */
5407 static int
5408 prolog_pr (s, last)
5409 char *s;
5410 char *last; /* Name of last clause. */
5411 {
5412 int pos;
5413 int len;
5414
5415 pos = prolog_atom (s, 0);
5416 if (pos < 1)
5417 return 0;
5418
5419 len = pos;
5420 pos = skip_spaces (s + pos) - s;
5421
5422 if ((s[pos] == '.'
5423 || (s[pos] == '(' && (pos += 1))
5424 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5425 && (last == NULL /* save only the first clause */
5426 || len != (int)strlen (last)
5427 || !strneq (s, last, len)))
5428 {
5429 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5430 return len;
5431 }
5432 else
5433 return 0;
5434 }
5435
5436 /*
5437 * Consume a Prolog atom.
5438 * Return the number of bytes consumed, or -1 if there was an error.
5439 *
5440 * A prolog atom, in this context, could be one of:
5441 * - An alphanumeric sequence, starting with a lower case letter.
5442 * - A quoted arbitrary string. Single quotes can escape themselves.
5443 * Backslash quotes everything.
5444 */
5445 static int
5446 prolog_atom (s, pos)
5447 char *s;
5448 int pos;
5449 {
5450 int origpos;
5451
5452 origpos = pos;
5453
5454 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5455 {
5456 /* The atom is unquoted. */
5457 pos++;
5458 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5459 {
5460 pos++;
5461 }
5462 return pos - origpos;
5463 }
5464 else if (s[pos] == '\'')
5465 {
5466 pos++;
5467
5468 for (;;)
5469 {
5470 if (s[pos] == '\'')
5471 {
5472 pos++;
5473 if (s[pos] != '\'')
5474 break;
5475 pos++; /* A double quote */
5476 }
5477 else if (s[pos] == '\0')
5478 /* Multiline quoted atoms are ignored. */
5479 return -1;
5480 else if (s[pos] == '\\')
5481 {
5482 if (s[pos+1] == '\0')
5483 return -1;
5484 pos += 2;
5485 }
5486 else
5487 pos++;
5488 }
5489 return pos - origpos;
5490 }
5491 else
5492 return -1;
5493 }
5494
5495 \f
5496 /*
5497 * Support for Erlang
5498 *
5499 * Generates tags for functions, defines, and records.
5500 * Assumes that Erlang functions start at column 0.
5501 * Original code by Anders Lindgren (1996)
5502 */
5503 static int erlang_func __P((char *, char *));
5504 static void erlang_attribute __P((char *));
5505 static int erlang_atom __P((char *));
5506
5507 static void
5508 Erlang_functions (inf)
5509 FILE *inf;
5510 {
5511 char *cp, *last;
5512 int len;
5513 int allocated;
5514
5515 allocated = 0;
5516 len = 0;
5517 last = NULL;
5518
5519 LOOP_ON_INPUT_LINES (inf, lb, cp)
5520 {
5521 if (cp[0] == '\0') /* Empty line */
5522 continue;
5523 else if (iswhite (cp[0])) /* Not function nor attribute */
5524 continue;
5525 else if (cp[0] == '%') /* comment */
5526 continue;
5527 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5528 continue;
5529 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5530 {
5531 erlang_attribute (cp);
5532 if (last != NULL)
5533 {
5534 free (last);
5535 last = NULL;
5536 }
5537 }
5538 else if ((len = erlang_func (cp, last)) > 0)
5539 {
5540 /*
5541 * Function. Store the function name so that we only
5542 * generates a tag for the first clause.
5543 */
5544 if (last == NULL)
5545 last = xnew (len + 1, char);
5546 else if (len + 1 > allocated)
5547 xrnew (last, len + 1, char);
5548 allocated = len + 1;
5549 strncpy (last, cp, len);
5550 last[len] = '\0';
5551 }
5552 }
5553 free (last);
5554 }
5555
5556
5557 /*
5558 * A function definition is added if it matches:
5559 * <beginning of line><Erlang Atom><whitespace>(
5560 *
5561 * It is added to the tags database if it doesn't match the
5562 * name of the previous clause header.
5563 *
5564 * Return the size of the name of the function, or 0 if no function
5565 * was found.
5566 */
5567 static int
5568 erlang_func (s, last)
5569 char *s;
5570 char *last; /* Name of last clause. */
5571 {
5572 int pos;
5573 int len;
5574
5575 pos = erlang_atom (s);
5576 if (pos < 1)
5577 return 0;
5578
5579 len = pos;
5580 pos = skip_spaces (s + pos) - s;
5581
5582 /* Save only the first clause. */
5583 if (s[pos++] == '('
5584 && (last == NULL
5585 || len != (int)strlen (last)
5586 || !strneq (s, last, len)))
5587 {
5588 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5589 return len;
5590 }
5591
5592 return 0;
5593 }
5594
5595
5596 /*
5597 * Handle attributes. Currently, tags are generated for defines
5598 * and records.
5599 *
5600 * They are on the form:
5601 * -define(foo, bar).
5602 * -define(Foo(M, N), M+N).
5603 * -record(graph, {vtab = notable, cyclic = true}).
5604 */
5605 static void
5606 erlang_attribute (s)
5607 char *s;
5608 {
5609 char *cp = s;
5610
5611 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5612 && *cp++ == '(')
5613 {
5614 int len = erlang_atom (skip_spaces (cp));
5615 if (len > 0)
5616 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5617 }
5618 return;
5619 }
5620
5621
5622 /*
5623 * Consume an Erlang atom (or variable).
5624 * Return the number of bytes consumed, or -1 if there was an error.
5625 */
5626 static int
5627 erlang_atom (s)
5628 char *s;
5629 {
5630 int pos = 0;
5631
5632 if (ISALPHA (s[pos]) || s[pos] == '_')
5633 {
5634 /* The atom is unquoted. */
5635 do
5636 pos++;
5637 while (ISALNUM (s[pos]) || s[pos] == '_');
5638 }
5639 else if (s[pos] == '\'')
5640 {
5641 for (pos++; s[pos] != '\''; pos++)
5642 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5643 || (s[pos] == '\\' && s[++pos] == '\0'))
5644 return 0;
5645 pos++;
5646 }
5647
5648 return pos;
5649 }
5650
5651 \f
5652 static char *scan_separators __P((char *));
5653 static void add_regex __P((char *, language *));
5654 static char *substitute __P((char *, char *, struct re_registers *));
5655
5656 /*
5657 * Take a string like "/blah/" and turn it into "blah", verifying
5658 * that the first and last characters are the same, and handling
5659 * quoted separator characters. Actually, stops on the occurrence of
5660 * an unquoted separator. Also process \t, \n, etc. and turn into
5661 * appropriate characters. Works in place. Null terminates name string.
5662 * Returns pointer to terminating separator, or NULL for
5663 * unterminated regexps.
5664 */
5665 static char *
5666 scan_separators (name)
5667 char *name;
5668 {
5669 char sep = name[0];
5670 char *copyto = name;
5671 bool quoted = FALSE;
5672
5673 for (++name; *name != '\0'; ++name)
5674 {
5675 if (quoted)
5676 {
5677 switch (*name)
5678 {
5679 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5680 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5681 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5682 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5683 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5684 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5685 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5686 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5687 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5688 default:
5689 if (*name == sep)
5690 *copyto++ = sep;
5691 else
5692 {
5693 /* Something else is quoted, so preserve the quote. */
5694 *copyto++ = '\\';
5695 *copyto++ = *name;
5696 }
5697 break;
5698 }
5699 quoted = FALSE;
5700 }
5701 else if (*name == '\\')
5702 quoted = TRUE;
5703 else if (*name == sep)
5704 break;
5705 else
5706 *copyto++ = *name;
5707 }
5708 if (*name != sep)
5709 name = NULL; /* signal unterminated regexp */
5710
5711 /* Terminate copied string. */
5712 *copyto = '\0';
5713 return name;
5714 }
5715
5716 /* Look at the argument of --regex or --no-regex and do the right
5717 thing. Same for each line of a regexp file. */
5718 static void
5719 analyse_regex (regex_arg)
5720 char *regex_arg;
5721 {
5722 if (regex_arg == NULL)
5723 {
5724 free_regexps (); /* --no-regex: remove existing regexps */
5725 return;
5726 }
5727
5728 /* A real --regexp option or a line in a regexp file. */
5729 switch (regex_arg[0])
5730 {
5731 /* Comments in regexp file or null arg to --regex. */
5732 case '\0':
5733 case ' ':
5734 case '\t':
5735 break;
5736
5737 /* Read a regex file. This is recursive and may result in a
5738 loop, which will stop when the file descriptors are exhausted. */
5739 case '@':
5740 {
5741 FILE *regexfp;
5742 linebuffer regexbuf;
5743 char *regexfile = regex_arg + 1;
5744
5745 /* regexfile is a file containing regexps, one per line. */
5746 regexfp = fopen (regexfile, "r");
5747 if (regexfp == NULL)
5748 {
5749 pfatal (regexfile);
5750 return;
5751 }
5752 linebuffer_init (&regexbuf);
5753 while (readline_internal (&regexbuf, regexfp) > 0)
5754 analyse_regex (regexbuf.buffer);
5755 free (regexbuf.buffer);
5756 fclose (regexfp);
5757 }
5758 break;
5759
5760 /* Regexp to be used for a specific language only. */
5761 case '{':
5762 {
5763 language *lang;
5764 char *lang_name = regex_arg + 1;
5765 char *cp;
5766
5767 for (cp = lang_name; *cp != '}'; cp++)
5768 if (*cp == '\0')
5769 {
5770 error ("unterminated language name in regex: %s", regex_arg);
5771 return;
5772 }
5773 *cp++ = '\0';
5774 lang = get_language_from_langname (lang_name);
5775 if (lang == NULL)
5776 return;
5777 add_regex (cp, lang);
5778 }
5779 break;
5780
5781 /* Regexp to be used for any language. */
5782 default:
5783 add_regex (regex_arg, NULL);
5784 break;
5785 }
5786 }
5787
5788 /* Separate the regexp pattern, compile it,
5789 and care for optional name and modifiers. */
5790 static void
5791 add_regex (regexp_pattern, lang)
5792 char *regexp_pattern;
5793 language *lang;
5794 {
5795 static struct re_pattern_buffer zeropattern;
5796 char sep, *pat, *name, *modifiers;
5797 const char *err;
5798 struct re_pattern_buffer *patbuf;
5799 regexp *rp;
5800 bool
5801 force_explicit_name = TRUE, /* do not use implicit tag names */
5802 ignore_case = FALSE, /* case is significant */
5803 multi_line = FALSE, /* matches are done one line at a time */
5804 single_line = FALSE; /* dot does not match newline */
5805
5806
5807 if (strlen(regexp_pattern) < 3)
5808 {
5809 error ("null regexp", (char *)NULL);
5810 return;
5811 }
5812 sep = regexp_pattern[0];
5813 name = scan_separators (regexp_pattern);
5814 if (name == NULL)
5815 {
5816 error ("%s: unterminated regexp", regexp_pattern);
5817 return;
5818 }
5819 if (name[1] == sep)
5820 {
5821 error ("null name for regexp \"%s\"", regexp_pattern);
5822 return;
5823 }
5824 modifiers = scan_separators (name);
5825 if (modifiers == NULL) /* no terminating separator --> no name */
5826 {
5827 modifiers = name;
5828 name = "";
5829 }
5830 else
5831 modifiers += 1; /* skip separator */
5832
5833 /* Parse regex modifiers. */
5834 for (; modifiers[0] != '\0'; modifiers++)
5835 switch (modifiers[0])
5836 {
5837 case 'N':
5838 if (modifiers == name)
5839 error ("forcing explicit tag name but no name, ignoring", NULL);
5840 force_explicit_name = TRUE;
5841 break;
5842 case 'i':
5843 ignore_case = TRUE;
5844 break;
5845 case 's':
5846 single_line = TRUE;
5847 /* FALLTHRU */
5848 case 'm':
5849 multi_line = TRUE;
5850 need_filebuf = TRUE;
5851 break;
5852 default:
5853 {
5854 char wrongmod [2];
5855 wrongmod[0] = modifiers[0];
5856 wrongmod[1] = '\0';
5857 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5858 }
5859 break;
5860 }
5861
5862 patbuf = xnew (1, struct re_pattern_buffer);
5863 *patbuf = zeropattern;
5864 if (ignore_case)
5865 {
5866 static char lc_trans[CHARS];
5867 int i;
5868 for (i = 0; i < CHARS; i++)
5869 lc_trans[i] = lowcase (i);
5870 patbuf->translate = lc_trans; /* translation table to fold case */
5871 }
5872
5873 if (multi_line)
5874 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5875 else
5876 pat = regexp_pattern;
5877
5878 if (single_line)
5879 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5880 else
5881 re_set_syntax (RE_SYNTAX_EMACS);
5882
5883 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5884 if (multi_line)
5885 free (pat);
5886 if (err != NULL)
5887 {
5888 error ("%s while compiling pattern", err);
5889 return;
5890 }
5891
5892 rp = p_head;
5893 p_head = xnew (1, regexp);
5894 p_head->pattern = savestr (regexp_pattern);
5895 p_head->p_next = rp;
5896 p_head->lang = lang;
5897 p_head->pat = patbuf;
5898 p_head->name = savestr (name);
5899 p_head->error_signaled = FALSE;
5900 p_head->force_explicit_name = force_explicit_name;
5901 p_head->ignore_case = ignore_case;
5902 p_head->multi_line = multi_line;
5903 }
5904
5905 /*
5906 * Do the substitutions indicated by the regular expression and
5907 * arguments.
5908 */
5909 static char *
5910 substitute (in, out, regs)
5911 char *in, *out;
5912 struct re_registers *regs;
5913 {
5914 char *result, *t;
5915 int size, dig, diglen;
5916
5917 result = NULL;
5918 size = strlen (out);
5919
5920 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5921 if (out[size - 1] == '\\')
5922 fatal ("pattern error in \"%s\"", out);
5923 for (t = etags_strchr (out, '\\');
5924 t != NULL;
5925 t = etags_strchr (t + 2, '\\'))
5926 if (ISDIGIT (t[1]))
5927 {
5928 dig = t[1] - '0';
5929 diglen = regs->end[dig] - regs->start[dig];
5930 size += diglen - 2;
5931 }
5932 else
5933 size -= 1;
5934
5935 /* Allocate space and do the substitutions. */
5936 assert (size >= 0);
5937 result = xnew (size + 1, char);
5938
5939 for (t = result; *out != '\0'; out++)
5940 if (*out == '\\' && ISDIGIT (*++out))
5941 {
5942 dig = *out - '0';
5943 diglen = regs->end[dig] - regs->start[dig];
5944 strncpy (t, in + regs->start[dig], diglen);
5945 t += diglen;
5946 }
5947 else
5948 *t++ = *out;
5949 *t = '\0';
5950
5951 assert (t <= result + size);
5952 assert (t - result == (int)strlen (result));
5953
5954 return result;
5955 }
5956
5957 /* Deallocate all regexps. */
5958 static void
5959 free_regexps ()
5960 {
5961 regexp *rp;
5962 while (p_head != NULL)
5963 {
5964 rp = p_head->p_next;
5965 free (p_head->pattern);
5966 free (p_head->name);
5967 free (p_head);
5968 p_head = rp;
5969 }
5970 return;
5971 }
5972
5973 /*
5974 * Reads the whole file as a single string from `filebuf' and looks for
5975 * multi-line regular expressions, creating tags on matches.
5976 * readline already dealt with normal regexps.
5977 *
5978 * Idea by Ben Wing <ben@666.com> (2002).
5979 */
5980 static void
5981 regex_tag_multiline ()
5982 {
5983 char *buffer = filebuf.buffer;
5984 regexp *rp;
5985 char *name;
5986
5987 for (rp = p_head; rp != NULL; rp = rp->p_next)
5988 {
5989 int match = 0;
5990
5991 if (!rp->multi_line)
5992 continue; /* skip normal regexps */
5993
5994 /* Generic initialisations before parsing file from memory. */
5995 lineno = 1; /* reset global line number */
5996 charno = 0; /* reset global char number */
5997 linecharno = 0; /* reset global char number of line start */
5998
5999 /* Only use generic regexps or those for the current language. */
6000 if (rp->lang != NULL && rp->lang != curfdp->lang)
6001 continue;
6002
6003 while (match >= 0 && match < filebuf.len)
6004 {
6005 match = re_search (rp->pat, buffer, filebuf.len, charno,
6006 filebuf.len - match, &rp->regs);
6007 switch (match)
6008 {
6009 case -2:
6010 /* Some error. */
6011 if (!rp->error_signaled)
6012 {
6013 error ("regexp stack overflow while matching \"%s\"",
6014 rp->pattern);
6015 rp->error_signaled = TRUE;
6016 }
6017 break;
6018 case -1:
6019 /* No match. */
6020 break;
6021 default:
6022 if (match == rp->regs.end[0])
6023 {
6024 if (!rp->error_signaled)
6025 {
6026 error ("regexp matches the empty string: \"%s\"",
6027 rp->pattern);
6028 rp->error_signaled = TRUE;
6029 }
6030 match = -3; /* exit from while loop */
6031 break;
6032 }
6033
6034 /* Match occurred. Construct a tag. */
6035 while (charno < rp->regs.end[0])
6036 if (buffer[charno++] == '\n')
6037 lineno++, linecharno = charno;
6038 name = rp->name;
6039 if (name[0] == '\0')
6040 name = NULL;
6041 else /* make a named tag */
6042 name = substitute (buffer, rp->name, &rp->regs);
6043 if (rp->force_explicit_name)
6044 /* Force explicit tag name, if a name is there. */
6045 pfnote (name, TRUE, buffer + linecharno,
6046 charno - linecharno + 1, lineno, linecharno);
6047 else
6048 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6049 charno - linecharno + 1, lineno, linecharno);
6050 break;
6051 }
6052 }
6053 }
6054 }
6055
6056 \f
6057 static bool
6058 nocase_tail (cp)
6059 char *cp;
6060 {
6061 register int len = 0;
6062
6063 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6064 cp++, len++;
6065 if (*cp == '\0' && !intoken (dbp[len]))
6066 {
6067 dbp += len;
6068 return TRUE;
6069 }
6070 return FALSE;
6071 }
6072
6073 static void
6074 get_tag (bp, namepp)
6075 register char *bp;
6076 char **namepp;
6077 {
6078 register char *cp = bp;
6079
6080 if (*bp != '\0')
6081 {
6082 /* Go till you get to white space or a syntactic break */
6083 for (cp = bp + 1; !notinname (*cp); cp++)
6084 continue;
6085 make_tag (bp, cp - bp, TRUE,
6086 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6087 }
6088
6089 if (namepp != NULL)
6090 *namepp = savenstr (bp, cp - bp);
6091 }
6092
6093 /*
6094 * Read a line of text from `stream' into `lbp', excluding the
6095 * newline or CR-NL, if any. Return the number of characters read from
6096 * `stream', which is the length of the line including the newline.
6097 *
6098 * On DOS or Windows we do not count the CR character, if any before the
6099 * NL, in the returned length; this mirrors the behavior of Emacs on those
6100 * platforms (for text files, it translates CR-NL to NL as it reads in the
6101 * file).
6102 *
6103 * If multi-line regular expressions are requested, each line read is
6104 * appended to `filebuf'.
6105 */
6106 static long
6107 readline_internal (lbp, stream)
6108 linebuffer *lbp;
6109 register FILE *stream;
6110 {
6111 char *buffer = lbp->buffer;
6112 register char *p = lbp->buffer;
6113 register char *pend;
6114 int chars_deleted;
6115
6116 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6117
6118 for (;;)
6119 {
6120 register int c = getc (stream);
6121 if (p == pend)
6122 {
6123 /* We're at the end of linebuffer: expand it. */
6124 lbp->size *= 2;
6125 xrnew (buffer, lbp->size, char);
6126 p += buffer - lbp->buffer;
6127 pend = buffer + lbp->size;
6128 lbp->buffer = buffer;
6129 }
6130 if (c == EOF)
6131 {
6132 *p = '\0';
6133 chars_deleted = 0;
6134 break;
6135 }
6136 if (c == '\n')
6137 {
6138 if (p > buffer && p[-1] == '\r')
6139 {
6140 p -= 1;
6141 #ifdef DOS_NT
6142 /* Assume CRLF->LF translation will be performed by Emacs
6143 when loading this file, so CRs won't appear in the buffer.
6144 It would be cleaner to compensate within Emacs;
6145 however, Emacs does not know how many CRs were deleted
6146 before any given point in the file. */
6147 chars_deleted = 1;
6148 #else
6149 chars_deleted = 2;
6150 #endif
6151 }
6152 else
6153 {
6154 chars_deleted = 1;
6155 }
6156 *p = '\0';
6157 break;
6158 }
6159 *p++ = c;
6160 }
6161 lbp->len = p - buffer;
6162
6163 if (need_filebuf /* we need filebuf for multi-line regexps */
6164 && chars_deleted > 0) /* not at EOF */
6165 {
6166 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6167 {
6168 /* Expand filebuf. */
6169 filebuf.size *= 2;
6170 xrnew (filebuf.buffer, filebuf.size, char);
6171 }
6172 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6173 filebuf.len += lbp->len;
6174 filebuf.buffer[filebuf.len++] = '\n';
6175 filebuf.buffer[filebuf.len] = '\0';
6176 }
6177
6178 return lbp->len + chars_deleted;
6179 }
6180
6181 /*
6182 * Like readline_internal, above, but in addition try to match the
6183 * input line against relevant regular expressions and manage #line
6184 * directives.
6185 */
6186 static void
6187 readline (lbp, stream)
6188 linebuffer *lbp;
6189 FILE *stream;
6190 {
6191 long result;
6192
6193 linecharno = charno; /* update global char number of line start */
6194 result = readline_internal (lbp, stream); /* read line */
6195 lineno += 1; /* increment global line number */
6196 charno += result; /* increment global char number */
6197
6198 /* Honour #line directives. */
6199 if (!no_line_directive)
6200 {
6201 static bool discard_until_line_directive;
6202
6203 /* Check whether this is a #line directive. */
6204 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6205 {
6206 unsigned int lno;
6207 int start = 0;
6208
6209 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6210 && start > 0) /* double quote character found */
6211 {
6212 char *endp = lbp->buffer + start;
6213
6214 while ((endp = etags_strchr (endp, '"')) != NULL
6215 && endp[-1] == '\\')
6216 endp++;
6217 if (endp != NULL)
6218 /* Ok, this is a real #line directive. Let's deal with it. */
6219 {
6220 char *taggedabsname; /* absolute name of original file */
6221 char *taggedfname; /* name of original file as given */
6222 char *name; /* temp var */
6223
6224 discard_until_line_directive = FALSE; /* found it */
6225 name = lbp->buffer + start;
6226 *endp = '\0';
6227 canonicalize_filename (name);
6228 taggedabsname = absolute_filename (name, tagfiledir);
6229 if (filename_is_absolute (name)
6230 || filename_is_absolute (curfdp->infname))
6231 taggedfname = savestr (taggedabsname);
6232 else
6233 taggedfname = relative_filename (taggedabsname,tagfiledir);
6234
6235 if (streq (curfdp->taggedfname, taggedfname))
6236 /* The #line directive is only a line number change. We
6237 deal with this afterwards. */
6238 free (taggedfname);
6239 else
6240 /* The tags following this #line directive should be
6241 attributed to taggedfname. In order to do this, set
6242 curfdp accordingly. */
6243 {
6244 fdesc *fdp; /* file description pointer */
6245
6246 /* Go look for a file description already set up for the
6247 file indicated in the #line directive. If there is
6248 one, use it from now until the next #line
6249 directive. */
6250 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6251 if (streq (fdp->infname, curfdp->infname)
6252 && streq (fdp->taggedfname, taggedfname))
6253 /* If we remove the second test above (after the &&)
6254 then all entries pertaining to the same file are
6255 coalesced in the tags file. If we use it, then
6256 entries pertaining to the same file but generated
6257 from different files (via #line directives) will
6258 go into separate sections in the tags file. These
6259 alternatives look equivalent. The first one
6260 destroys some apparently useless information. */
6261 {
6262 curfdp = fdp;
6263 free (taggedfname);
6264 break;
6265 }
6266 /* Else, if we already tagged the real file, skip all
6267 input lines until the next #line directive. */
6268 if (fdp == NULL) /* not found */
6269 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6270 if (streq (fdp->infabsname, taggedabsname))
6271 {
6272 discard_until_line_directive = TRUE;
6273 free (taggedfname);
6274 break;
6275 }
6276 /* Else create a new file description and use that from
6277 now on, until the next #line directive. */
6278 if (fdp == NULL) /* not found */
6279 {
6280 fdp = fdhead;
6281 fdhead = xnew (1, fdesc);
6282 *fdhead = *curfdp; /* copy curr. file description */
6283 fdhead->next = fdp;
6284 fdhead->infname = savestr (curfdp->infname);
6285 fdhead->infabsname = savestr (curfdp->infabsname);
6286 fdhead->infabsdir = savestr (curfdp->infabsdir);
6287 fdhead->taggedfname = taggedfname;
6288 fdhead->usecharno = FALSE;
6289 fdhead->prop = NULL;
6290 fdhead->written = FALSE;
6291 curfdp = fdhead;
6292 }
6293 }
6294 free (taggedabsname);
6295 lineno = lno - 1;
6296 readline (lbp, stream);
6297 return;
6298 } /* if a real #line directive */
6299 } /* if #line is followed by a a number */
6300 } /* if line begins with "#line " */
6301
6302 /* If we are here, no #line directive was found. */
6303 if (discard_until_line_directive)
6304 {
6305 if (result > 0)
6306 {
6307 /* Do a tail recursion on ourselves, thus discarding the contents
6308 of the line buffer. */
6309 readline (lbp, stream);
6310 return;
6311 }
6312 /* End of file. */
6313 discard_until_line_directive = FALSE;
6314 return;
6315 }
6316 } /* if #line directives should be considered */
6317
6318 {
6319 int match;
6320 regexp *rp;
6321 char *name;
6322
6323 /* Match against relevant regexps. */
6324 if (lbp->len > 0)
6325 for (rp = p_head; rp != NULL; rp = rp->p_next)
6326 {
6327 /* Only use generic regexps or those for the current language.
6328 Also do not use multiline regexps, which is the job of
6329 regex_tag_multiline. */
6330 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6331 || rp->multi_line)
6332 continue;
6333
6334 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6335 switch (match)
6336 {
6337 case -2:
6338 /* Some error. */
6339 if (!rp->error_signaled)
6340 {
6341 error ("regexp stack overflow while matching \"%s\"",
6342 rp->pattern);
6343 rp->error_signaled = TRUE;
6344 }
6345 break;
6346 case -1:
6347 /* No match. */
6348 break;
6349 case 0:
6350 /* Empty string matched. */
6351 if (!rp->error_signaled)
6352 {
6353 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6354 rp->error_signaled = TRUE;
6355 }
6356 break;
6357 default:
6358 /* Match occurred. Construct a tag. */
6359 name = rp->name;
6360 if (name[0] == '\0')
6361 name = NULL;
6362 else /* make a named tag */
6363 name = substitute (lbp->buffer, rp->name, &rp->regs);
6364 if (rp->force_explicit_name)
6365 /* Force explicit tag name, if a name is there. */
6366 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6367 else
6368 make_tag (name, strlen (name), TRUE,
6369 lbp->buffer, match, lineno, linecharno);
6370 break;
6371 }
6372 }
6373 }
6374 }
6375
6376 \f
6377 /*
6378 * Return a pointer to a space of size strlen(cp)+1 allocated
6379 * with xnew where the string CP has been copied.
6380 */
6381 static char *
6382 savestr (cp)
6383 char *cp;
6384 {
6385 return savenstr (cp, strlen (cp));
6386 }
6387
6388 /*
6389 * Return a pointer to a space of size LEN+1 allocated with xnew where
6390 * the string CP has been copied for at most the first LEN characters.
6391 */
6392 static char *
6393 savenstr (cp, len)
6394 char *cp;
6395 int len;
6396 {
6397 register char *dp;
6398
6399 dp = xnew (len + 1, char);
6400 strncpy (dp, cp, len);
6401 dp[len] = '\0';
6402 return dp;
6403 }
6404
6405 /*
6406 * Return the ptr in sp at which the character c last
6407 * appears; NULL if not found
6408 *
6409 * Identical to POSIX strrchr, included for portability.
6410 */
6411 static char *
6412 etags_strrchr (sp, c)
6413 register const char *sp;
6414 register int c;
6415 {
6416 register const char *r;
6417
6418 r = NULL;
6419 do
6420 {
6421 if (*sp == c)
6422 r = sp;
6423 } while (*sp++);
6424 return (char *)r;
6425 }
6426
6427 /*
6428 * Return the ptr in sp at which the character c first
6429 * appears; NULL if not found
6430 *
6431 * Identical to POSIX strchr, included for portability.
6432 */
6433 static char *
6434 etags_strchr (sp, c)
6435 register const char *sp;
6436 register int c;
6437 {
6438 do
6439 {
6440 if (*sp == c)
6441 return (char *)sp;
6442 } while (*sp++);
6443 return NULL;
6444 }
6445
6446 /*
6447 * Compare two strings, ignoring case for alphabetic characters.
6448 *
6449 * Same as BSD's strcasecmp, included for portability.
6450 */
6451 static int
6452 etags_strcasecmp (s1, s2)
6453 register const char *s1;
6454 register const char *s2;
6455 {
6456 while (*s1 != '\0'
6457 && (ISALPHA (*s1) && ISALPHA (*s2)
6458 ? lowcase (*s1) == lowcase (*s2)
6459 : *s1 == *s2))
6460 s1++, s2++;
6461
6462 return (ISALPHA (*s1) && ISALPHA (*s2)
6463 ? lowcase (*s1) - lowcase (*s2)
6464 : *s1 - *s2);
6465 }
6466
6467 /*
6468 * Compare two strings, ignoring case for alphabetic characters.
6469 * Stop after a given number of characters
6470 *
6471 * Same as BSD's strncasecmp, included for portability.
6472 */
6473 static int
6474 etags_strncasecmp (s1, s2, n)
6475 register const char *s1;
6476 register const char *s2;
6477 register int n;
6478 {
6479 while (*s1 != '\0' && n-- > 0
6480 && (ISALPHA (*s1) && ISALPHA (*s2)
6481 ? lowcase (*s1) == lowcase (*s2)
6482 : *s1 == *s2))
6483 s1++, s2++;
6484
6485 if (n < 0)
6486 return 0;
6487 else
6488 return (ISALPHA (*s1) && ISALPHA (*s2)
6489 ? lowcase (*s1) - lowcase (*s2)
6490 : *s1 - *s2);
6491 }
6492
6493 /* Skip spaces (end of string is not space), return new pointer. */
6494 static char *
6495 skip_spaces (cp)
6496 char *cp;
6497 {
6498 while (iswhite (*cp))
6499 cp++;
6500 return cp;
6501 }
6502
6503 /* Skip non spaces, except end of string, return new pointer. */
6504 static char *
6505 skip_non_spaces (cp)
6506 char *cp;
6507 {
6508 while (*cp != '\0' && !iswhite (*cp))
6509 cp++;
6510 return cp;
6511 }
6512
6513 /* Print error message and exit. */
6514 void
6515 fatal (s1, s2)
6516 char *s1, *s2;
6517 {
6518 error (s1, s2);
6519 exit (EXIT_FAILURE);
6520 }
6521
6522 static void
6523 pfatal (s1)
6524 char *s1;
6525 {
6526 perror (s1);
6527 exit (EXIT_FAILURE);
6528 }
6529
6530 static void
6531 suggest_asking_for_help ()
6532 {
6533 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6534 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6535 exit (EXIT_FAILURE);
6536 }
6537
6538 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6539 static void
6540 error (s1, s2)
6541 const char *s1, *s2;
6542 {
6543 fprintf (stderr, "%s: ", progname);
6544 fprintf (stderr, s1, s2);
6545 fprintf (stderr, "\n");
6546 }
6547
6548 /* Return a newly-allocated string whose contents
6549 concatenate those of s1, s2, s3. */
6550 static char *
6551 concat (s1, s2, s3)
6552 char *s1, *s2, *s3;
6553 {
6554 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6555 char *result = xnew (len1 + len2 + len3 + 1, char);
6556
6557 strcpy (result, s1);
6558 strcpy (result + len1, s2);
6559 strcpy (result + len1 + len2, s3);
6560 result[len1 + len2 + len3] = '\0';
6561
6562 return result;
6563 }
6564
6565 \f
6566 /* Does the same work as the system V getcwd, but does not need to
6567 guess the buffer size in advance. */
6568 static char *
6569 etags_getcwd ()
6570 {
6571 #ifdef HAVE_GETCWD
6572 int bufsize = 200;
6573 char *path = xnew (bufsize, char);
6574
6575 while (getcwd (path, bufsize) == NULL)
6576 {
6577 if (errno != ERANGE)
6578 pfatal ("getcwd");
6579 bufsize *= 2;
6580 free (path);
6581 path = xnew (bufsize, char);
6582 }
6583
6584 canonicalize_filename (path);
6585 return path;
6586
6587 #else /* not HAVE_GETCWD */
6588 #if MSDOS
6589
6590 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6591
6592 getwd (path);
6593
6594 for (p = path; *p != '\0'; p++)
6595 if (*p == '\\')
6596 *p = '/';
6597 else
6598 *p = lowcase (*p);
6599
6600 return strdup (path);
6601 #else /* not MSDOS */
6602 linebuffer path;
6603 FILE *pipe;
6604
6605 linebuffer_init (&path);
6606 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6607 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6608 pfatal ("pwd");
6609 pclose (pipe);
6610
6611 return path.buffer;
6612 #endif /* not MSDOS */
6613 #endif /* not HAVE_GETCWD */
6614 }
6615
6616 /* Return a newly allocated string containing the file name of FILE
6617 relative to the absolute directory DIR (which should end with a slash). */
6618 static char *
6619 relative_filename (file, dir)
6620 char *file, *dir;
6621 {
6622 char *fp, *dp, *afn, *res;
6623 int i;
6624
6625 /* Find the common root of file and dir (with a trailing slash). */
6626 afn = absolute_filename (file, cwd);
6627 fp = afn;
6628 dp = dir;
6629 while (*fp++ == *dp++)
6630 continue;
6631 fp--, dp--; /* back to the first differing char */
6632 #ifdef DOS_NT
6633 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6634 return afn;
6635 #endif
6636 do /* look at the equal chars until '/' */
6637 fp--, dp--;
6638 while (*fp != '/');
6639
6640 /* Build a sequence of "../" strings for the resulting relative file name. */
6641 i = 0;
6642 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6643 i += 1;
6644 res = xnew (3*i + strlen (fp + 1) + 1, char);
6645 res[0] = '\0';
6646 while (i-- > 0)
6647 strcat (res, "../");
6648
6649 /* Add the file name relative to the common root of file and dir. */
6650 strcat (res, fp + 1);
6651 free (afn);
6652
6653 return res;
6654 }
6655
6656 /* Return a newly allocated string containing the absolute file name
6657 of FILE given DIR (which should end with a slash). */
6658 static char *
6659 absolute_filename (file, dir)
6660 char *file, *dir;
6661 {
6662 char *slashp, *cp, *res;
6663
6664 if (filename_is_absolute (file))
6665 res = savestr (file);
6666 #ifdef DOS_NT
6667 /* We don't support non-absolute file names with a drive
6668 letter, like `d:NAME' (it's too much hassle). */
6669 else if (file[1] == ':')
6670 fatal ("%s: relative file names with drive letters not supported", file);
6671 #endif
6672 else
6673 res = concat (dir, file, "");
6674
6675 /* Delete the "/dirname/.." and "/." substrings. */
6676 slashp = etags_strchr (res, '/');
6677 while (slashp != NULL && slashp[0] != '\0')
6678 {
6679 if (slashp[1] == '.')
6680 {
6681 if (slashp[2] == '.'
6682 && (slashp[3] == '/' || slashp[3] == '\0'))
6683 {
6684 cp = slashp;
6685 do
6686 cp--;
6687 while (cp >= res && !filename_is_absolute (cp));
6688 if (cp < res)
6689 cp = slashp; /* the absolute name begins with "/.." */
6690 #ifdef DOS_NT
6691 /* Under MSDOS and NT we get `d:/NAME' as absolute
6692 file name, so the luser could say `d:/../NAME'.
6693 We silently treat this as `d:/NAME'. */
6694 else if (cp[0] != '/')
6695 cp = slashp;
6696 #endif
6697 strcpy (cp, slashp + 3);
6698 slashp = cp;
6699 continue;
6700 }
6701 else if (slashp[2] == '/' || slashp[2] == '\0')
6702 {
6703 strcpy (slashp, slashp + 2);
6704 continue;
6705 }
6706 }
6707
6708 slashp = etags_strchr (slashp + 1, '/');
6709 }
6710
6711 if (res[0] == '\0') /* just a safety net: should never happen */
6712 {
6713 free (res);
6714 return savestr ("/");
6715 }
6716 else
6717 return res;
6718 }
6719
6720 /* Return a newly allocated string containing the absolute
6721 file name of dir where FILE resides given DIR (which should
6722 end with a slash). */
6723 static char *
6724 absolute_dirname (file, dir)
6725 char *file, *dir;
6726 {
6727 char *slashp, *res;
6728 char save;
6729
6730 slashp = etags_strrchr (file, '/');
6731 if (slashp == NULL)
6732 return savestr (dir);
6733 save = slashp[1];
6734 slashp[1] = '\0';
6735 res = absolute_filename (file, dir);
6736 slashp[1] = save;
6737
6738 return res;
6739 }
6740
6741 /* Whether the argument string is an absolute file name. The argument
6742 string must have been canonicalized with canonicalize_filename. */
6743 static bool
6744 filename_is_absolute (fn)
6745 char *fn;
6746 {
6747 return (fn[0] == '/'
6748 #ifdef DOS_NT
6749 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6750 #endif
6751 );
6752 }
6753
6754 /* Upcase DOS drive letter and collapse separators into single slashes.
6755 Works in place. */
6756 static void
6757 canonicalize_filename (fn)
6758 register char *fn;
6759 {
6760 register char* cp;
6761 char sep = '/';
6762
6763 #ifdef DOS_NT
6764 /* Canonicalize drive letter case. */
6765 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6766 fn[0] = upcase (fn[0]);
6767
6768 sep = '\\';
6769 #endif
6770
6771 /* Collapse multiple separators into a single slash. */
6772 for (cp = fn; *cp != '\0'; cp++, fn++)
6773 if (*cp == sep)
6774 {
6775 *fn = '/';
6776 while (cp[1] == sep)
6777 cp++;
6778 }
6779 else
6780 *fn = *cp;
6781 *fn = '\0';
6782 }
6783
6784 \f
6785 /* Initialize a linebuffer for use. */
6786 static void
6787 linebuffer_init (lbp)
6788 linebuffer *lbp;
6789 {
6790 lbp->size = (DEBUG) ? 3 : 200;
6791 lbp->buffer = xnew (lbp->size, char);
6792 lbp->buffer[0] = '\0';
6793 lbp->len = 0;
6794 }
6795
6796 /* Set the minimum size of a string contained in a linebuffer. */
6797 static void
6798 linebuffer_setlen (lbp, toksize)
6799 linebuffer *lbp;
6800 int toksize;
6801 {
6802 while (lbp->size <= toksize)
6803 {
6804 lbp->size *= 2;
6805 xrnew (lbp->buffer, lbp->size, char);
6806 }
6807 lbp->len = toksize;
6808 }
6809
6810 /* Like malloc but get fatal error if memory is exhausted. */
6811 static PTR
6812 xmalloc (size)
6813 unsigned int size;
6814 {
6815 PTR result = (PTR) malloc (size);
6816 if (result == NULL)
6817 fatal ("virtual memory exhausted", (char *)NULL);
6818 return result;
6819 }
6820
6821 static PTR
6822 xrealloc (ptr, size)
6823 char *ptr;
6824 unsigned int size;
6825 {
6826 PTR result = (PTR) realloc (ptr, size);
6827 if (result == NULL)
6828 fatal ("virtual memory exhausted", (char *)NULL);
6829 return result;
6830 }
6831
6832 /*
6833 * Local Variables:
6834 * indent-tabs-mode: t
6835 * tab-width: 8
6836 * fill-column: 79
6837 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6838 * c-file-style: "gnu"
6839 * End:
6840 */
6841
6842 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6843 (do not change this comment) */
6844
6845 /* etags.c ends here */