Set copyright year to 2010.
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
33 Free Software Foundation, Inc.
34
35 This file is not considered part of GNU Emacs.
36
37 This program is free software: you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation, either version 3 of the License, or
40 (at your option) any later version.
41
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
46
47 You should have received a copy of the GNU General Public License
48 along with this program. If not, see <http://www.gnu.org/licenses/>. */
49
50
51 /* NB To comply with the above BSD license, copyright information is
52 reproduced in etc/ETAGS.README. That file should be updated when the
53 above notices are.
54
55 To the best of our knowledge, this code was originally based on the
56 ctags.c distributed with BSD4.2, which was copyrighted by the
57 University of California, as described above. */
58
59
60 /*
61 * Authors:
62 * 1983 Ctags originally by Ken Arnold.
63 * 1984 Fortran added by Jim Kleckner.
64 * 1984 Ed Pelegri-Llopart added C typedefs.
65 * 1985 Emacs TAGS format by Richard Stallman.
66 * 1989 Sam Kendall added C++.
67 * 1992 Joseph B. Wells improved C and C++ parsing.
68 * 1993 Francesco Potortì reorganized C and C++.
69 * 1994 Line-by-line regexp tags by Tom Tromey.
70 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
71 * 2002 #line directives by Francesco Potortì.
72 *
73 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
74 */
75
76 /*
77 * If you want to add support for a new language, start by looking at the LUA
78 * language, which is the simplest. Alternatively, consider distributing etags
79 * together with a configuration file containing regexp definitions for etags.
80 */
81
82 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
83
84 #define TRUE 1
85 #define FALSE 0
86
87 #ifdef DEBUG
88 # undef DEBUG
89 # define DEBUG TRUE
90 #else
91 # define DEBUG FALSE
92 # define NDEBUG /* disable assert */
93 #endif
94
95 #ifdef HAVE_CONFIG_H
96 # include <config.h>
97 /* On some systems, Emacs defines static as nothing for the sake
98 of unexec. We don't want that here since we don't use unexec. */
99 # undef static
100 # ifndef PTR /* for XEmacs */
101 # define PTR void *
102 # endif
103 # ifndef __P /* for XEmacs */
104 # define __P(args) args
105 # endif
106 #else /* no config.h */
107 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
108 # define __P(args) args /* use prototypes */
109 # define PTR void * /* for generic pointers */
110 # else /* not standard C */
111 # define __P(args) () /* no prototypes */
112 # define const /* remove const for old compilers' sake */
113 # define PTR long * /* don't use void* */
114 # endif
115 #endif /* !HAVE_CONFIG_H */
116
117 #ifndef _GNU_SOURCE
118 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
119 #endif
120
121 /* WIN32_NATIVE is for XEmacs.
122 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
123 #ifdef WIN32_NATIVE
124 # undef MSDOS
125 # undef WINDOWSNT
126 # define WINDOWSNT
127 #endif /* WIN32_NATIVE */
128
129 #ifdef MSDOS
130 # undef MSDOS
131 # define MSDOS TRUE
132 # include <fcntl.h>
133 # include <sys/param.h>
134 # include <io.h>
135 # ifndef HAVE_CONFIG_H
136 # define DOS_NT
137 # include <sys/config.h>
138 # endif
139 #else
140 # define MSDOS FALSE
141 #endif /* MSDOS */
142
143 #ifdef WINDOWSNT
144 # include <stdlib.h>
145 # include <fcntl.h>
146 # include <string.h>
147 # include <direct.h>
148 # include <io.h>
149 # define MAXPATHLEN _MAX_PATH
150 # undef HAVE_NTGUI
151 # undef DOS_NT
152 # define DOS_NT
153 # ifndef HAVE_GETCWD
154 # define HAVE_GETCWD
155 # endif /* undef HAVE_GETCWD */
156 #else /* not WINDOWSNT */
157 # ifdef STDC_HEADERS
158 # include <stdlib.h>
159 # include <string.h>
160 # else /* no standard C headers */
161 extern char *getenv __P((const char *));
162 extern char *strcpy __P((char *, const char *));
163 extern char *strncpy __P((char *, const char *, unsigned long));
164 extern char *strcat __P((char *, const char *));
165 extern char *strncat __P((char *, const char *, unsigned long));
166 extern int strcmp __P((const char *, const char *));
167 extern int strncmp __P((const char *, const char *, unsigned long));
168 extern int system __P((const char *));
169 extern unsigned long strlen __P((const char *));
170 extern void *malloc __P((unsigned long));
171 extern void *realloc __P((void *, unsigned long));
172 extern void exit __P((int));
173 extern void free __P((void *));
174 extern void *memmove __P((void *, const void *, unsigned long));
175 # define EXIT_SUCCESS 0
176 # define EXIT_FAILURE 1
177 # endif
178 #endif /* !WINDOWSNT */
179
180 #ifdef HAVE_UNISTD_H
181 # include <unistd.h>
182 #else
183 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
184 extern char *getcwd (char *buf, size_t size);
185 # endif
186 #endif /* HAVE_UNISTD_H */
187
188 #include <stdio.h>
189 #include <ctype.h>
190 #include <errno.h>
191 #ifndef errno
192 extern int errno;
193 #endif
194 #include <sys/types.h>
195 #include <sys/stat.h>
196
197 #include <assert.h>
198 #ifdef NDEBUG
199 # undef assert /* some systems have a buggy assert.h */
200 # define assert(x) ((void) 0)
201 #endif
202
203 #if !defined (S_ISREG) && defined (S_IFREG)
204 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
205 #endif
206
207 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
208 # define NO_LONG_OPTIONS TRUE
209 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
210 extern char *optarg;
211 extern int optind, opterr;
212 #else
213 # define NO_LONG_OPTIONS FALSE
214 # include <getopt.h>
215 #endif /* NO_LONG_OPTIONS */
216
217 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
218 # ifdef __CYGWIN__ /* compiling on Cygwin */
219 !!! NOTICE !!!
220 the regex.h distributed with Cygwin is not compatible with etags, alas!
221 If you want regular expression support, you should delete this notice and
222 arrange to use the GNU regex.h and regex.c.
223 # endif
224 #endif
225 #include <regex.h>
226
227 /* Define CTAGS to make the program "ctags" compatible with the usual one.
228 Leave it undefined to make the program "etags", which makes emacs-style
229 tag tables and tags typedefs, #defines and struct/union/enum by default. */
230 #ifdef CTAGS
231 # undef CTAGS
232 # define CTAGS TRUE
233 #else
234 # define CTAGS FALSE
235 #endif
236
237 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
238 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
239 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
240 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
241
242 #define CHARS 256 /* 2^sizeof(char) */
243 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
244 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
245 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
246 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
247 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
248 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
249
250 #define ISALNUM(c) isalnum (CHAR(c))
251 #define ISALPHA(c) isalpha (CHAR(c))
252 #define ISDIGIT(c) isdigit (CHAR(c))
253 #define ISLOWER(c) islower (CHAR(c))
254
255 #define lowcase(c) tolower (CHAR(c))
256 #define upcase(c) toupper (CHAR(c))
257
258
259 /*
260 * xnew, xrnew -- allocate, reallocate storage
261 *
262 * SYNOPSIS: Type *xnew (int n, Type);
263 * void xrnew (OldPointer, int n, Type);
264 */
265 #if DEBUG
266 # include "chkmalloc.h"
267 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
268 (n) * sizeof (Type)))
269 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
270 (char *) (op), (n) * sizeof (Type)))
271 #else
272 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
273 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
274 (char *) (op), (n) * sizeof (Type)))
275 #endif
276
277 #define bool int
278
279 typedef void Lang_function __P((FILE *));
280
281 typedef struct
282 {
283 char *suffix; /* file name suffix for this compressor */
284 char *command; /* takes one arg and decompresses to stdout */
285 } compressor;
286
287 typedef struct
288 {
289 char *name; /* language name */
290 char *help; /* detailed help for the language */
291 Lang_function *function; /* parse function */
292 char **suffixes; /* name suffixes of this language's files */
293 char **filenames; /* names of this language's files */
294 char **interpreters; /* interpreters for this language */
295 bool metasource; /* source used to generate other sources */
296 } language;
297
298 typedef struct fdesc
299 {
300 struct fdesc *next; /* for the linked list */
301 char *infname; /* uncompressed input file name */
302 char *infabsname; /* absolute uncompressed input file name */
303 char *infabsdir; /* absolute dir of input file */
304 char *taggedfname; /* file name to write in tagfile */
305 language *lang; /* language of file */
306 char *prop; /* file properties to write in tagfile */
307 bool usecharno; /* etags tags shall contain char number */
308 bool written; /* entry written in the tags file */
309 } fdesc;
310
311 typedef struct node_st
312 { /* sorting structure */
313 struct node_st *left, *right; /* left and right sons */
314 fdesc *fdp; /* description of file to whom tag belongs */
315 char *name; /* tag name */
316 char *regex; /* search regexp */
317 bool valid; /* write this tag on the tag file */
318 bool is_func; /* function tag: use regexp in CTAGS mode */
319 bool been_warned; /* warning already given for duplicated tag */
320 int lno; /* line number tag is on */
321 long cno; /* character number line starts on */
322 } node;
323
324 /*
325 * A `linebuffer' is a structure which holds a line of text.
326 * `readline_internal' reads a line from a stream into a linebuffer
327 * and works regardless of the length of the line.
328 * SIZE is the size of BUFFER, LEN is the length of the string in
329 * BUFFER after readline reads it.
330 */
331 typedef struct
332 {
333 long size;
334 int len;
335 char *buffer;
336 } linebuffer;
337
338 /* Used to support mixing of --lang and file names. */
339 typedef struct
340 {
341 enum {
342 at_language, /* a language specification */
343 at_regexp, /* a regular expression */
344 at_filename, /* a file name */
345 at_stdin, /* read from stdin here */
346 at_end /* stop parsing the list */
347 } arg_type; /* argument type */
348 language *lang; /* language associated with the argument */
349 char *what; /* the argument itself */
350 } argument;
351
352 /* Structure defining a regular expression. */
353 typedef struct regexp
354 {
355 struct regexp *p_next; /* pointer to next in list */
356 language *lang; /* if set, use only for this language */
357 char *pattern; /* the regexp pattern */
358 char *name; /* tag name */
359 struct re_pattern_buffer *pat; /* the compiled pattern */
360 struct re_registers regs; /* re registers */
361 bool error_signaled; /* already signaled for this regexp */
362 bool force_explicit_name; /* do not allow implict tag name */
363 bool ignore_case; /* ignore case when matching */
364 bool multi_line; /* do a multi-line match on the whole file */
365 } regexp;
366
367
368 /* Many compilers barf on this:
369 Lang_function Ada_funcs;
370 so let's write it this way */
371 static void Ada_funcs __P((FILE *));
372 static void Asm_labels __P((FILE *));
373 static void C_entries __P((int c_ext, FILE *));
374 static void default_C_entries __P((FILE *));
375 static void plain_C_entries __P((FILE *));
376 static void Cjava_entries __P((FILE *));
377 static void Cobol_paragraphs __P((FILE *));
378 static void Cplusplus_entries __P((FILE *));
379 static void Cstar_entries __P((FILE *));
380 static void Erlang_functions __P((FILE *));
381 static void Forth_words __P((FILE *));
382 static void Fortran_functions __P((FILE *));
383 static void HTML_labels __P((FILE *));
384 static void Lisp_functions __P((FILE *));
385 static void Lua_functions __P((FILE *));
386 static void Makefile_targets __P((FILE *));
387 static void Pascal_functions __P((FILE *));
388 static void Perl_functions __P((FILE *));
389 static void PHP_functions __P((FILE *));
390 static void PS_functions __P((FILE *));
391 static void Prolog_functions __P((FILE *));
392 static void Python_functions __P((FILE *));
393 static void Scheme_functions __P((FILE *));
394 static void TeX_commands __P((FILE *));
395 static void Texinfo_nodes __P((FILE *));
396 static void Yacc_entries __P((FILE *));
397 static void just_read_file __P((FILE *));
398
399 static void print_language_names __P((void));
400 static void print_version __P((void));
401 static void print_help __P((argument *));
402 int main __P((int, char **));
403
404 static compressor *get_compressor_from_suffix __P((char *, char **));
405 static language *get_language_from_langname __P((const char *));
406 static language *get_language_from_interpreter __P((char *));
407 static language *get_language_from_filename __P((char *, bool));
408 static void readline __P((linebuffer *, FILE *));
409 static long readline_internal __P((linebuffer *, FILE *));
410 static bool nocase_tail __P((char *));
411 static void get_tag __P((char *, char **));
412
413 static void analyse_regex __P((char *));
414 static void free_regexps __P((void));
415 static void regex_tag_multiline __P((void));
416 static void error __P((const char *, const char *));
417 static void suggest_asking_for_help __P((void));
418 void fatal __P((char *, char *));
419 static void pfatal __P((char *));
420 static void add_node __P((node *, node **));
421
422 static void init __P((void));
423 static void process_file_name __P((char *, language *));
424 static void process_file __P((FILE *, char *, language *));
425 static void find_entries __P((FILE *));
426 static void free_tree __P((node *));
427 static void free_fdesc __P((fdesc *));
428 static void pfnote __P((char *, bool, char *, int, int, long));
429 static void make_tag __P((char *, int, bool, char *, int, int, long));
430 static void invalidate_nodes __P((fdesc *, node **));
431 static void put_entries __P((node *));
432
433 static char *concat __P((char *, char *, char *));
434 static char *skip_spaces __P((char *));
435 static char *skip_non_spaces __P((char *));
436 static char *savenstr __P((char *, int));
437 static char *savestr __P((char *));
438 static char *etags_strchr __P((const char *, int));
439 static char *etags_strrchr __P((const char *, int));
440 static int etags_strcasecmp __P((const char *, const char *));
441 static int etags_strncasecmp __P((const char *, const char *, int));
442 static char *etags_getcwd __P((void));
443 static char *relative_filename __P((char *, char *));
444 static char *absolute_filename __P((char *, char *));
445 static char *absolute_dirname __P((char *, char *));
446 static bool filename_is_absolute __P((char *f));
447 static void canonicalize_filename __P((char *));
448 static void linebuffer_init __P((linebuffer *));
449 static void linebuffer_setlen __P((linebuffer *, int));
450 static PTR xmalloc __P((unsigned int));
451 static PTR xrealloc __P((char *, unsigned int));
452
453 \f
454 static char searchar = '/'; /* use /.../ searches */
455
456 static char *tagfile; /* output file */
457 static char *progname; /* name this program was invoked with */
458 static char *cwd; /* current working directory */
459 static char *tagfiledir; /* directory of tagfile */
460 static FILE *tagf; /* ioptr for tags file */
461
462 static fdesc *fdhead; /* head of file description list */
463 static fdesc *curfdp; /* current file description */
464 static int lineno; /* line number of current line */
465 static long charno; /* current character number */
466 static long linecharno; /* charno of start of current line */
467 static char *dbp; /* pointer to start of current tag */
468
469 static const int invalidcharno = -1;
470
471 static node *nodehead; /* the head of the binary tree of tags */
472 static node *last_node; /* the last node created */
473
474 static linebuffer lb; /* the current line */
475 static linebuffer filebuf; /* a buffer containing the whole file */
476 static linebuffer token_name; /* a buffer containing a tag name */
477
478 /* boolean "functions" (see init) */
479 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
480 static char
481 /* white chars */
482 *white = " \f\t\n\r\v",
483 /* not in a name */
484 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
485 /* token ending chars */
486 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
487 /* token starting chars */
488 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
489 /* valid in-token chars */
490 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
491
492 static bool append_to_tagfile; /* -a: append to tags */
493 /* The next five default to TRUE in C and derived languages. */
494 static bool typedefs; /* -t: create tags for C and Ada typedefs */
495 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
496 /* 0 struct/enum/union decls, and C++ */
497 /* member functions. */
498 static bool constantypedefs; /* -d: create tags for C #define, enum */
499 /* constants and variables. */
500 /* -D: opposite of -d. Default under ctags. */
501 static bool globals; /* create tags for global variables */
502 static bool members; /* create tags for C member variables */
503 static bool declarations; /* --declarations: tag them and extern in C&Co*/
504 static bool no_line_directive; /* ignore #line directives (undocumented) */
505 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
506 static bool update; /* -u: update tags */
507 static bool vgrind_style; /* -v: create vgrind style index output */
508 static bool no_warnings; /* -w: suppress warnings (undocumented) */
509 static bool cxref_style; /* -x: create cxref style output */
510 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
511 static bool ignoreindent; /* -I: ignore indentation in C */
512 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
513
514 /* STDIN is defined in LynxOS system headers */
515 #ifdef STDIN
516 # undef STDIN
517 #endif
518
519 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
520 static bool parsing_stdin; /* --parse-stdin used */
521
522 static regexp *p_head; /* list of all regexps */
523 static bool need_filebuf; /* some regexes are multi-line */
524
525 static struct option longopts[] =
526 {
527 { "append", no_argument, NULL, 'a' },
528 { "packages-only", no_argument, &packages_only, TRUE },
529 { "c++", no_argument, NULL, 'C' },
530 { "declarations", no_argument, &declarations, TRUE },
531 { "no-line-directive", no_argument, &no_line_directive, TRUE },
532 { "no-duplicates", no_argument, &no_duplicates, TRUE },
533 { "help", no_argument, NULL, 'h' },
534 { "help", no_argument, NULL, 'H' },
535 { "ignore-indentation", no_argument, NULL, 'I' },
536 { "language", required_argument, NULL, 'l' },
537 { "members", no_argument, &members, TRUE },
538 { "no-members", no_argument, &members, FALSE },
539 { "output", required_argument, NULL, 'o' },
540 { "regex", required_argument, NULL, 'r' },
541 { "no-regex", no_argument, NULL, 'R' },
542 { "ignore-case-regex", required_argument, NULL, 'c' },
543 { "parse-stdin", required_argument, NULL, STDIN },
544 { "version", no_argument, NULL, 'V' },
545
546 #if CTAGS /* Ctags options */
547 { "backward-search", no_argument, NULL, 'B' },
548 { "cxref", no_argument, NULL, 'x' },
549 { "defines", no_argument, NULL, 'd' },
550 { "globals", no_argument, &globals, TRUE },
551 { "typedefs", no_argument, NULL, 't' },
552 { "typedefs-and-c++", no_argument, NULL, 'T' },
553 { "update", no_argument, NULL, 'u' },
554 { "vgrind", no_argument, NULL, 'v' },
555 { "no-warn", no_argument, NULL, 'w' },
556
557 #else /* Etags options */
558 { "no-defines", no_argument, NULL, 'D' },
559 { "no-globals", no_argument, &globals, FALSE },
560 { "include", required_argument, NULL, 'i' },
561 #endif
562 { NULL }
563 };
564
565 static compressor compressors[] =
566 {
567 { "z", "gzip -d -c"},
568 { "Z", "gzip -d -c"},
569 { "gz", "gzip -d -c"},
570 { "GZ", "gzip -d -c"},
571 { "bz2", "bzip2 -d -c" },
572 { NULL }
573 };
574
575 /*
576 * Language stuff.
577 */
578
579 /* Ada code */
580 static char *Ada_suffixes [] =
581 { "ads", "adb", "ada", NULL };
582 static char Ada_help [] =
583 "In Ada code, functions, procedures, packages, tasks and types are\n\
584 tags. Use the `--packages-only' option to create tags for\n\
585 packages only.\n\
586 Ada tag names have suffixes indicating the type of entity:\n\
587 Entity type: Qualifier:\n\
588 ------------ ----------\n\
589 function /f\n\
590 procedure /p\n\
591 package spec /s\n\
592 package body /b\n\
593 type /t\n\
594 task /k\n\
595 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
596 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
597 will just search for any tag `bidule'.";
598
599 /* Assembly code */
600 static char *Asm_suffixes [] =
601 { "a", /* Unix assembler */
602 "asm", /* Microcontroller assembly */
603 "def", /* BSO/Tasking definition includes */
604 "inc", /* Microcontroller include files */
605 "ins", /* Microcontroller include files */
606 "s", "sa", /* Unix assembler */
607 "S", /* cpp-processed Unix assembler */
608 "src", /* BSO/Tasking C compiler output */
609 NULL
610 };
611 static char Asm_help [] =
612 "In assembler code, labels appearing at the beginning of a line,\n\
613 followed by a colon, are tags.";
614
615
616 /* Note that .c and .h can be considered C++, if the --c++ flag was
617 given, or if the `class' or `template' keywords are met inside the file.
618 That is why default_C_entries is called for these. */
619 static char *default_C_suffixes [] =
620 { "c", "h", NULL };
621 #if CTAGS /* C help for Ctags */
622 static char default_C_help [] =
623 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
624 Use -T to tag definitions of `struct', `union' and `enum'.\n\
625 Use -d to tag `#define' macro definitions and `enum' constants.\n\
626 Use --globals to tag global variables.\n\
627 You can tag function declarations and external variables by\n\
628 using `--declarations', and struct members by using `--members'.";
629 #else /* C help for Etags */
630 static char default_C_help [] =
631 "In C code, any C function or typedef is a tag, and so are\n\
632 definitions of `struct', `union' and `enum'. `#define' macro\n\
633 definitions and `enum' constants are tags unless you specify\n\
634 `--no-defines'. Global variables are tags unless you specify\n\
635 `--no-globals' and so are struct members unless you specify\n\
636 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
637 `--no-members' can make the tags table file much smaller.\n\
638 You can tag function declarations and external variables by\n\
639 using `--declarations'.";
640 #endif /* C help for Ctags and Etags */
641
642 static char *Cplusplus_suffixes [] =
643 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
644 "M", /* Objective C++ */
645 "pdb", /* Postscript with C syntax */
646 NULL };
647 static char Cplusplus_help [] =
648 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
649 --help --lang=c --lang=c++ for full help.)\n\
650 In addition to C tags, member functions are also recognized. Member\n\
651 variables are recognized unless you use the `--no-members' option.\n\
652 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
653 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
654 `operator+'.";
655
656 static char *Cjava_suffixes [] =
657 { "java", NULL };
658 static char Cjava_help [] =
659 "In Java code, all the tags constructs of C and C++ code are\n\
660 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
661
662
663 static char *Cobol_suffixes [] =
664 { "COB", "cob", NULL };
665 static char Cobol_help [] =
666 "In Cobol code, tags are paragraph names; that is, any word\n\
667 starting in column 8 and followed by a period.";
668
669 static char *Cstar_suffixes [] =
670 { "cs", "hs", NULL };
671
672 static char *Erlang_suffixes [] =
673 { "erl", "hrl", NULL };
674 static char Erlang_help [] =
675 "In Erlang code, the tags are the functions, records and macros\n\
676 defined in the file.";
677
678 char *Forth_suffixes [] =
679 { "fth", "tok", NULL };
680 static char Forth_help [] =
681 "In Forth code, tags are words defined by `:',\n\
682 constant, code, create, defer, value, variable, buffer:, field.";
683
684 static char *Fortran_suffixes [] =
685 { "F", "f", "f90", "for", NULL };
686 static char Fortran_help [] =
687 "In Fortran code, functions, subroutines and block data are tags.";
688
689 static char *HTML_suffixes [] =
690 { "htm", "html", "shtml", NULL };
691 static char HTML_help [] =
692 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
693 `h3' headers. Also, tags are `name=' in anchors and all\n\
694 occurrences of `id='.";
695
696 static char *Lisp_suffixes [] =
697 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
698 static char Lisp_help [] =
699 "In Lisp code, any function defined with `defun', any variable\n\
700 defined with `defvar' or `defconst', and in general the first\n\
701 argument of any expression that starts with `(def' in column zero\n\
702 is a tag.";
703
704 static char *Lua_suffixes [] =
705 { "lua", "LUA", NULL };
706 static char Lua_help [] =
707 "In Lua scripts, all functions are tags.";
708
709 static char *Makefile_filenames [] =
710 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
711 static char Makefile_help [] =
712 "In makefiles, targets are tags; additionally, variables are tags\n\
713 unless you specify `--no-globals'.";
714
715 static char *Objc_suffixes [] =
716 { "lm", /* Objective lex file */
717 "m", /* Objective C file */
718 NULL };
719 static char Objc_help [] =
720 "In Objective C code, tags include Objective C definitions for classes,\n\
721 class categories, methods and protocols. Tags for variables and\n\
722 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
723 (Use --help --lang=c --lang=objc --lang=java for full help.)";
724
725 static char *Pascal_suffixes [] =
726 { "p", "pas", NULL };
727 static char Pascal_help [] =
728 "In Pascal code, the tags are the functions and procedures defined\n\
729 in the file.";
730 /* " // this is for working around an Emacs highlighting bug... */
731
732 static char *Perl_suffixes [] =
733 { "pl", "pm", NULL };
734 static char *Perl_interpreters [] =
735 { "perl", "@PERL@", NULL };
736 static char Perl_help [] =
737 "In Perl code, the tags are the packages, subroutines and variables\n\
738 defined by the `package', `sub', `my' and `local' keywords. Use\n\
739 `--globals' if you want to tag global variables. Tags for\n\
740 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
741 defined in the default package is `main::SUB'.";
742
743 static char *PHP_suffixes [] =
744 { "php", "php3", "php4", NULL };
745 static char PHP_help [] =
746 "In PHP code, tags are functions, classes and defines. Unless you use\n\
747 the `--no-members' option, vars are tags too.";
748
749 static char *plain_C_suffixes [] =
750 { "pc", /* Pro*C file */
751 NULL };
752
753 static char *PS_suffixes [] =
754 { "ps", "psw", NULL }; /* .psw is for PSWrap */
755 static char PS_help [] =
756 "In PostScript code, the tags are the functions.";
757
758 static char *Prolog_suffixes [] =
759 { "prolog", NULL };
760 static char Prolog_help [] =
761 "In Prolog code, tags are predicates and rules at the beginning of\n\
762 line.";
763
764 static char *Python_suffixes [] =
765 { "py", NULL };
766 static char Python_help [] =
767 "In Python code, `def' or `class' at the beginning of a line\n\
768 generate a tag.";
769
770 /* Can't do the `SCM' or `scm' prefix with a version number. */
771 static char *Scheme_suffixes [] =
772 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
773 static char Scheme_help [] =
774 "In Scheme code, tags include anything defined with `def' or with a\n\
775 construct whose name starts with `def'. They also include\n\
776 variables set with `set!' at top level in the file.";
777
778 static char *TeX_suffixes [] =
779 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
780 static char TeX_help [] =
781 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
782 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
783 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
784 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
785 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
786 \n\
787 Other commands can be specified by setting the environment variable\n\
788 `TEXTAGS' to a colon-separated list like, for example,\n\
789 TEXTAGS=\"mycommand:myothercommand\".";
790
791
792 static char *Texinfo_suffixes [] =
793 { "texi", "texinfo", "txi", NULL };
794 static char Texinfo_help [] =
795 "for texinfo files, lines starting with @node are tagged.";
796
797 static char *Yacc_suffixes [] =
798 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
799 static char Yacc_help [] =
800 "In Bison or Yacc input files, each rule defines as a tag the\n\
801 nonterminal it constructs. The portions of the file that contain\n\
802 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
803 for full help).";
804
805 static char auto_help [] =
806 "`auto' is not a real language, it indicates to use\n\
807 a default language for files base on file name suffix and file contents.";
808
809 static char none_help [] =
810 "`none' is not a real language, it indicates to only do\n\
811 regexp processing on files.";
812
813 static char no_lang_help [] =
814 "No detailed help available for this language.";
815
816
817 /*
818 * Table of languages.
819 *
820 * It is ok for a given function to be listed under more than one
821 * name. I just didn't.
822 */
823
824 static language lang_names [] =
825 {
826 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
827 { "asm", Asm_help, Asm_labels, Asm_suffixes },
828 { "c", default_C_help, default_C_entries, default_C_suffixes },
829 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
830 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
831 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
832 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
833 { "forth", Forth_help, Forth_words, Forth_suffixes },
834 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
835 { "html", HTML_help, HTML_labels, HTML_suffixes },
836 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
837 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
838 { "lua", Lua_help, Lua_functions, Lua_suffixes },
839 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
840 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
841 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
842 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
843 { "php", PHP_help, PHP_functions, PHP_suffixes },
844 { "postscript",PS_help, PS_functions, PS_suffixes },
845 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
846 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
847 { "python", Python_help, Python_functions, Python_suffixes },
848 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
849 { "tex", TeX_help, TeX_commands, TeX_suffixes },
850 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
851 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
852 { "auto", auto_help }, /* default guessing scheme */
853 { "none", none_help, just_read_file }, /* regexp matching only */
854 { NULL } /* end of list */
855 };
856
857 \f
858 static void
859 print_language_names ()
860 {
861 language *lang;
862 char **name, **ext;
863
864 puts ("\nThese are the currently supported languages, along with the\n\
865 default file names and dot suffixes:");
866 for (lang = lang_names; lang->name != NULL; lang++)
867 {
868 printf (" %-*s", 10, lang->name);
869 if (lang->filenames != NULL)
870 for (name = lang->filenames; *name != NULL; name++)
871 printf (" %s", *name);
872 if (lang->suffixes != NULL)
873 for (ext = lang->suffixes; *ext != NULL; ext++)
874 printf (" .%s", *ext);
875 puts ("");
876 }
877 puts ("where `auto' means use default language for files based on file\n\
878 name suffix, and `none' means only do regexp processing on files.\n\
879 If no language is specified and no matching suffix is found,\n\
880 the first line of the file is read for a sharp-bang (#!) sequence\n\
881 followed by the name of an interpreter. If no such sequence is found,\n\
882 Fortran is tried first; if no tags are found, C is tried next.\n\
883 When parsing any C file, a \"class\" or \"template\" keyword\n\
884 switches to C++.");
885 puts ("Compressed files are supported using gzip and bzip2.\n\
886 \n\
887 For detailed help on a given language use, for example,\n\
888 etags --help --lang=ada.");
889 }
890
891 #ifndef EMACS_NAME
892 # define EMACS_NAME "standalone"
893 #endif
894 #ifndef VERSION
895 # define VERSION "17.38.1.4"
896 #endif
897 static void
898 print_version ()
899 {
900 /* Makes it easier to update automatically. */
901 char emacs_copyright[] = "Copyright (C) 2010 Free Software Foundation, Inc.";
902
903 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
904 puts (emacs_copyright);
905 puts ("This program is distributed under the terms in ETAGS.README");
906
907 exit (EXIT_SUCCESS);
908 }
909
910 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
911 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
912 #endif
913
914 static void
915 print_help (argbuffer)
916 argument *argbuffer;
917 {
918 bool help_for_lang = FALSE;
919
920 for (; argbuffer->arg_type != at_end; argbuffer++)
921 if (argbuffer->arg_type == at_language)
922 {
923 if (help_for_lang)
924 puts ("");
925 puts (argbuffer->lang->help);
926 help_for_lang = TRUE;
927 }
928
929 if (help_for_lang)
930 exit (EXIT_SUCCESS);
931
932 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
933 \n\
934 These are the options accepted by %s.\n", progname, progname);
935 if (NO_LONG_OPTIONS)
936 puts ("WARNING: long option names do not work with this executable,\n\
937 as it is not linked with GNU getopt.");
938 else
939 puts ("You may use unambiguous abbreviations for the long option names.");
940 puts (" A - as file name means read names from stdin (one per line).\n\
941 Absolute names are stored in the output file as they are.\n\
942 Relative ones are stored relative to the output file's directory.\n");
943
944 puts ("-a, --append\n\
945 Append tag entries to existing tags file.");
946
947 puts ("--packages-only\n\
948 For Ada files, only generate tags for packages.");
949
950 if (CTAGS)
951 puts ("-B, --backward-search\n\
952 Write the search commands for the tag entries using '?', the\n\
953 backward-search command instead of '/', the forward-search command.");
954
955 /* This option is mostly obsolete, because etags can now automatically
956 detect C++. Retained for backward compatibility and for debugging and
957 experimentation. In principle, we could want to tag as C++ even
958 before any "class" or "template" keyword.
959 puts ("-C, --c++\n\
960 Treat files whose name suffix defaults to C language as C++ files.");
961 */
962
963 puts ("--declarations\n\
964 In C and derived languages, create tags for function declarations,");
965 if (CTAGS)
966 puts ("\tand create tags for extern variables if --globals is used.");
967 else
968 puts
969 ("\tand create tags for extern variables unless --no-globals is used.");
970
971 if (CTAGS)
972 puts ("-d, --defines\n\
973 Create tag entries for C #define constants and enum constants, too.");
974 else
975 puts ("-D, --no-defines\n\
976 Don't create tag entries for C #define constants and enum constants.\n\
977 This makes the tags file smaller.");
978
979 if (!CTAGS)
980 puts ("-i FILE, --include=FILE\n\
981 Include a note in tag file indicating that, when searching for\n\
982 a tag, one should also consult the tags file FILE after\n\
983 checking the current file.");
984
985 puts ("-l LANG, --language=LANG\n\
986 Force the following files to be considered as written in the\n\
987 named language up to the next --language=LANG option.");
988
989 if (CTAGS)
990 puts ("--globals\n\
991 Create tag entries for global variables in some languages.");
992 else
993 puts ("--no-globals\n\
994 Do not create tag entries for global variables in some\n\
995 languages. This makes the tags file smaller.");
996
997 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
998 puts ("--no-line-directive\n\
999 Ignore #line preprocessor directives in C and derived languages.");
1000
1001 if (CTAGS)
1002 puts ("--members\n\
1003 Create tag entries for members of structures in some languages.");
1004 else
1005 puts ("--no-members\n\
1006 Do not create tag entries for members of structures\n\
1007 in some languages.");
1008
1009 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1010 Make a tag for each line matching a regular expression pattern\n\
1011 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1012 files only. REGEXFILE is a file containing one REGEXP per line.\n\
1013 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1014 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
1015 puts (" If TAGNAME/ is present, the tags created are named.\n\
1016 For example Tcl named tags can be created with:\n\
1017 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1018 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1019 `m' means to allow multi-line matches, `s' implies `m' and\n\
1020 causes dot to match any character, including newline.");
1021
1022 puts ("-R, --no-regex\n\
1023 Don't create tags from regexps for the following files.");
1024
1025 puts ("-I, --ignore-indentation\n\
1026 In C and C++ do not assume that a closing brace in the first\n\
1027 column is the final brace of a function or structure definition.");
1028
1029 puts ("-o FILE, --output=FILE\n\
1030 Write the tags to FILE.");
1031
1032 puts ("--parse-stdin=NAME\n\
1033 Read from standard input and record tags as belonging to file NAME.");
1034
1035 if (CTAGS)
1036 {
1037 puts ("-t, --typedefs\n\
1038 Generate tag entries for C and Ada typedefs.");
1039 puts ("-T, --typedefs-and-c++\n\
1040 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1041 and C++ member functions.");
1042 }
1043
1044 if (CTAGS)
1045 puts ("-u, --update\n\
1046 Update the tag entries for the given files, leaving tag\n\
1047 entries for other files in place. Currently, this is\n\
1048 implemented by deleting the existing entries for the given\n\
1049 files and then rewriting the new entries at the end of the\n\
1050 tags file. It is often faster to simply rebuild the entire\n\
1051 tag file than to use this.");
1052
1053 if (CTAGS)
1054 {
1055 puts ("-v, --vgrind\n\
1056 Print on the standard output an index of items intended for\n\
1057 human consumption, similar to the output of vgrind. The index\n\
1058 is sorted, and gives the page number of each item.");
1059
1060 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1061 puts ("-w, --no-duplicates\n\
1062 Do not create duplicate tag entries, for compatibility with\n\
1063 traditional ctags.");
1064
1065 if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1066 puts ("-w, --no-warn\n\
1067 Suppress warning messages about duplicate tag entries.");
1068
1069 puts ("-x, --cxref\n\
1070 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1071 The output uses line numbers instead of page numbers, but\n\
1072 beyond that the differences are cosmetic; try both to see\n\
1073 which you like.");
1074 }
1075
1076 puts ("-V, --version\n\
1077 Print the version of the program.\n\
1078 -h, --help\n\
1079 Print this help message.\n\
1080 Followed by one or more `--language' options prints detailed\n\
1081 help about tag generation for the specified languages.");
1082
1083 print_language_names ();
1084
1085 puts ("");
1086 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1087
1088 exit (EXIT_SUCCESS);
1089 }
1090
1091 \f
1092 int
1093 main (argc, argv)
1094 int argc;
1095 char *argv[];
1096 {
1097 int i;
1098 unsigned int nincluded_files;
1099 char **included_files;
1100 argument *argbuffer;
1101 int current_arg, file_count;
1102 linebuffer filename_lb;
1103 bool help_asked = FALSE;
1104 char *optstring;
1105 int opt;
1106
1107
1108 #ifdef DOS_NT
1109 _fmode = O_BINARY; /* all of files are treated as binary files */
1110 #endif /* DOS_NT */
1111
1112 progname = argv[0];
1113 nincluded_files = 0;
1114 included_files = xnew (argc, char *);
1115 current_arg = 0;
1116 file_count = 0;
1117
1118 /* Allocate enough no matter what happens. Overkill, but each one
1119 is small. */
1120 argbuffer = xnew (argc, argument);
1121
1122 /*
1123 * Always find typedefs and structure tags.
1124 * Also default to find macro constants, enum constants, struct
1125 * members and global variables. Do it for both etags and ctags.
1126 */
1127 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1128 globals = members = TRUE;
1129
1130 /* When the optstring begins with a '-' getopt_long does not rearrange the
1131 non-options arguments to be at the end, but leaves them alone. */
1132 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1133 "ac:Cf:Il:o:r:RSVhH",
1134 (CTAGS) ? "BxdtTuvw" : "Di:");
1135
1136 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1137 switch (opt)
1138 {
1139 case 0:
1140 /* If getopt returns 0, then it has already processed a
1141 long-named option. We should do nothing. */
1142 break;
1143
1144 case 1:
1145 /* This means that a file name has been seen. Record it. */
1146 argbuffer[current_arg].arg_type = at_filename;
1147 argbuffer[current_arg].what = optarg;
1148 ++current_arg;
1149 ++file_count;
1150 break;
1151
1152 case STDIN:
1153 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1154 argbuffer[current_arg].arg_type = at_stdin;
1155 argbuffer[current_arg].what = optarg;
1156 ++current_arg;
1157 ++file_count;
1158 if (parsing_stdin)
1159 fatal ("cannot parse standard input more than once", (char *)NULL);
1160 parsing_stdin = TRUE;
1161 break;
1162
1163 /* Common options. */
1164 case 'a': append_to_tagfile = TRUE; break;
1165 case 'C': cplusplus = TRUE; break;
1166 case 'f': /* for compatibility with old makefiles */
1167 case 'o':
1168 if (tagfile)
1169 {
1170 error ("-o option may only be given once.", (char *)NULL);
1171 suggest_asking_for_help ();
1172 /* NOTREACHED */
1173 }
1174 tagfile = optarg;
1175 break;
1176 case 'I':
1177 case 'S': /* for backward compatibility */
1178 ignoreindent = TRUE;
1179 break;
1180 case 'l':
1181 {
1182 language *lang = get_language_from_langname (optarg);
1183 if (lang != NULL)
1184 {
1185 argbuffer[current_arg].lang = lang;
1186 argbuffer[current_arg].arg_type = at_language;
1187 ++current_arg;
1188 }
1189 }
1190 break;
1191 case 'c':
1192 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1193 optarg = concat (optarg, "i", ""); /* memory leak here */
1194 /* FALLTHRU */
1195 case 'r':
1196 argbuffer[current_arg].arg_type = at_regexp;
1197 argbuffer[current_arg].what = optarg;
1198 ++current_arg;
1199 break;
1200 case 'R':
1201 argbuffer[current_arg].arg_type = at_regexp;
1202 argbuffer[current_arg].what = NULL;
1203 ++current_arg;
1204 break;
1205 case 'V':
1206 print_version ();
1207 break;
1208 case 'h':
1209 case 'H':
1210 help_asked = TRUE;
1211 break;
1212
1213 /* Etags options */
1214 case 'D': constantypedefs = FALSE; break;
1215 case 'i': included_files[nincluded_files++] = optarg; break;
1216
1217 /* Ctags options. */
1218 case 'B': searchar = '?'; break;
1219 case 'd': constantypedefs = TRUE; break;
1220 case 't': typedefs = TRUE; break;
1221 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1222 case 'u': update = TRUE; break;
1223 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1224 case 'x': cxref_style = TRUE; break;
1225 case 'w': no_warnings = TRUE; break;
1226 default:
1227 suggest_asking_for_help ();
1228 /* NOTREACHED */
1229 }
1230
1231 /* No more options. Store the rest of arguments. */
1232 for (; optind < argc; optind++)
1233 {
1234 argbuffer[current_arg].arg_type = at_filename;
1235 argbuffer[current_arg].what = argv[optind];
1236 ++current_arg;
1237 ++file_count;
1238 }
1239
1240 argbuffer[current_arg].arg_type = at_end;
1241
1242 if (help_asked)
1243 print_help (argbuffer);
1244 /* NOTREACHED */
1245
1246 if (nincluded_files == 0 && file_count == 0)
1247 {
1248 error ("no input files specified.", (char *)NULL);
1249 suggest_asking_for_help ();
1250 /* NOTREACHED */
1251 }
1252
1253 if (tagfile == NULL)
1254 tagfile = savestr (CTAGS ? "tags" : "TAGS");
1255 cwd = etags_getcwd (); /* the current working directory */
1256 if (cwd[strlen (cwd) - 1] != '/')
1257 {
1258 char *oldcwd = cwd;
1259 cwd = concat (oldcwd, "/", "");
1260 free (oldcwd);
1261 }
1262
1263 /* Compute base directory for relative file names. */
1264 if (streq (tagfile, "-")
1265 || strneq (tagfile, "/dev/", 5))
1266 tagfiledir = cwd; /* relative file names are relative to cwd */
1267 else
1268 {
1269 canonicalize_filename (tagfile);
1270 tagfiledir = absolute_dirname (tagfile, cwd);
1271 }
1272
1273 init (); /* set up boolean "functions" */
1274
1275 linebuffer_init (&lb);
1276 linebuffer_init (&filename_lb);
1277 linebuffer_init (&filebuf);
1278 linebuffer_init (&token_name);
1279
1280 if (!CTAGS)
1281 {
1282 if (streq (tagfile, "-"))
1283 {
1284 tagf = stdout;
1285 #ifdef DOS_NT
1286 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1287 doesn't take effect until after `stdout' is already open). */
1288 if (!isatty (fileno (stdout)))
1289 setmode (fileno (stdout), O_BINARY);
1290 #endif /* DOS_NT */
1291 }
1292 else
1293 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1294 if (tagf == NULL)
1295 pfatal (tagfile);
1296 }
1297
1298 /*
1299 * Loop through files finding functions.
1300 */
1301 for (i = 0; i < current_arg; i++)
1302 {
1303 static language *lang; /* non-NULL if language is forced */
1304 char *this_file;
1305
1306 switch (argbuffer[i].arg_type)
1307 {
1308 case at_language:
1309 lang = argbuffer[i].lang;
1310 break;
1311 case at_regexp:
1312 analyse_regex (argbuffer[i].what);
1313 break;
1314 case at_filename:
1315 this_file = argbuffer[i].what;
1316 /* Input file named "-" means read file names from stdin
1317 (one per line) and use them. */
1318 if (streq (this_file, "-"))
1319 {
1320 if (parsing_stdin)
1321 fatal ("cannot parse standard input AND read file names from it",
1322 (char *)NULL);
1323 while (readline_internal (&filename_lb, stdin) > 0)
1324 process_file_name (filename_lb.buffer, lang);
1325 }
1326 else
1327 process_file_name (this_file, lang);
1328 break;
1329 case at_stdin:
1330 this_file = argbuffer[i].what;
1331 process_file (stdin, this_file, lang);
1332 break;
1333 }
1334 }
1335
1336 free_regexps ();
1337 free (lb.buffer);
1338 free (filebuf.buffer);
1339 free (token_name.buffer);
1340
1341 if (!CTAGS || cxref_style)
1342 {
1343 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1344 put_entries (nodehead);
1345 free_tree (nodehead);
1346 nodehead = NULL;
1347 if (!CTAGS)
1348 {
1349 fdesc *fdp;
1350
1351 /* Output file entries that have no tags. */
1352 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1353 if (!fdp->written)
1354 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1355
1356 while (nincluded_files-- > 0)
1357 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1358
1359 if (fclose (tagf) == EOF)
1360 pfatal (tagfile);
1361 }
1362
1363 exit (EXIT_SUCCESS);
1364 }
1365
1366 /* From here on, we are in (CTAGS && !cxref_style) */
1367 if (update)
1368 {
1369 char cmd[BUFSIZ];
1370 for (i = 0; i < current_arg; ++i)
1371 {
1372 switch (argbuffer[i].arg_type)
1373 {
1374 case at_filename:
1375 case at_stdin:
1376 break;
1377 default:
1378 continue; /* the for loop */
1379 }
1380 sprintf (cmd,
1381 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1382 tagfile, argbuffer[i].what, tagfile);
1383 if (system (cmd) != EXIT_SUCCESS)
1384 fatal ("failed to execute shell command", (char *)NULL);
1385 }
1386 append_to_tagfile = TRUE;
1387 }
1388
1389 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1390 if (tagf == NULL)
1391 pfatal (tagfile);
1392 put_entries (nodehead); /* write all the tags (CTAGS) */
1393 free_tree (nodehead);
1394 nodehead = NULL;
1395 if (fclose (tagf) == EOF)
1396 pfatal (tagfile);
1397
1398 if (CTAGS)
1399 if (append_to_tagfile || update)
1400 {
1401 char cmd[2*BUFSIZ+20];
1402 /* Maybe these should be used:
1403 setenv ("LC_COLLATE", "C", 1);
1404 setenv ("LC_ALL", "C", 1); */
1405 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1406 exit (system (cmd));
1407 }
1408 return EXIT_SUCCESS;
1409 }
1410
1411
1412 /*
1413 * Return a compressor given the file name. If EXTPTR is non-zero,
1414 * return a pointer into FILE where the compressor-specific
1415 * extension begins. If no compressor is found, NULL is returned
1416 * and EXTPTR is not significant.
1417 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1418 */
1419 static compressor *
1420 get_compressor_from_suffix (file, extptr)
1421 char *file;
1422 char **extptr;
1423 {
1424 compressor *compr;
1425 char *slash, *suffix;
1426
1427 /* File has been processed by canonicalize_filename,
1428 so we don't need to consider backslashes on DOS_NT. */
1429 slash = etags_strrchr (file, '/');
1430 suffix = etags_strrchr (file, '.');
1431 if (suffix == NULL || suffix < slash)
1432 return NULL;
1433 if (extptr != NULL)
1434 *extptr = suffix;
1435 suffix += 1;
1436 /* Let those poor souls who live with DOS 8+3 file name limits get
1437 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1438 Only the first do loop is run if not MSDOS */
1439 do
1440 {
1441 for (compr = compressors; compr->suffix != NULL; compr++)
1442 if (streq (compr->suffix, suffix))
1443 return compr;
1444 if (!MSDOS)
1445 break; /* do it only once: not really a loop */
1446 if (extptr != NULL)
1447 *extptr = ++suffix;
1448 } while (*suffix != '\0');
1449 return NULL;
1450 }
1451
1452
1453
1454 /*
1455 * Return a language given the name.
1456 */
1457 static language *
1458 get_language_from_langname (name)
1459 const char *name;
1460 {
1461 language *lang;
1462
1463 if (name == NULL)
1464 error ("empty language name", (char *)NULL);
1465 else
1466 {
1467 for (lang = lang_names; lang->name != NULL; lang++)
1468 if (streq (name, lang->name))
1469 return lang;
1470 error ("unknown language \"%s\"", name);
1471 }
1472
1473 return NULL;
1474 }
1475
1476
1477 /*
1478 * Return a language given the interpreter name.
1479 */
1480 static language *
1481 get_language_from_interpreter (interpreter)
1482 char *interpreter;
1483 {
1484 language *lang;
1485 char **iname;
1486
1487 if (interpreter == NULL)
1488 return NULL;
1489 for (lang = lang_names; lang->name != NULL; lang++)
1490 if (lang->interpreters != NULL)
1491 for (iname = lang->interpreters; *iname != NULL; iname++)
1492 if (streq (*iname, interpreter))
1493 return lang;
1494
1495 return NULL;
1496 }
1497
1498
1499
1500 /*
1501 * Return a language given the file name.
1502 */
1503 static language *
1504 get_language_from_filename (file, case_sensitive)
1505 char *file;
1506 bool case_sensitive;
1507 {
1508 language *lang;
1509 char **name, **ext, *suffix;
1510
1511 /* Try whole file name first. */
1512 for (lang = lang_names; lang->name != NULL; lang++)
1513 if (lang->filenames != NULL)
1514 for (name = lang->filenames; *name != NULL; name++)
1515 if ((case_sensitive)
1516 ? streq (*name, file)
1517 : strcaseeq (*name, file))
1518 return lang;
1519
1520 /* If not found, try suffix after last dot. */
1521 suffix = etags_strrchr (file, '.');
1522 if (suffix == NULL)
1523 return NULL;
1524 suffix += 1;
1525 for (lang = lang_names; lang->name != NULL; lang++)
1526 if (lang->suffixes != NULL)
1527 for (ext = lang->suffixes; *ext != NULL; ext++)
1528 if ((case_sensitive)
1529 ? streq (*ext, suffix)
1530 : strcaseeq (*ext, suffix))
1531 return lang;
1532 return NULL;
1533 }
1534
1535 \f
1536 /*
1537 * This routine is called on each file argument.
1538 */
1539 static void
1540 process_file_name (file, lang)
1541 char *file;
1542 language *lang;
1543 {
1544 struct stat stat_buf;
1545 FILE *inf;
1546 fdesc *fdp;
1547 compressor *compr;
1548 char *compressed_name, *uncompressed_name;
1549 char *ext, *real_name;
1550 int retval;
1551
1552 canonicalize_filename (file);
1553 if (streq (file, tagfile) && !streq (tagfile, "-"))
1554 {
1555 error ("skipping inclusion of %s in self.", file);
1556 return;
1557 }
1558 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1559 {
1560 compressed_name = NULL;
1561 real_name = uncompressed_name = savestr (file);
1562 }
1563 else
1564 {
1565 real_name = compressed_name = savestr (file);
1566 uncompressed_name = savenstr (file, ext - file);
1567 }
1568
1569 /* If the canonicalized uncompressed name
1570 has already been dealt with, skip it silently. */
1571 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1572 {
1573 assert (fdp->infname != NULL);
1574 if (streq (uncompressed_name, fdp->infname))
1575 goto cleanup;
1576 }
1577
1578 if (stat (real_name, &stat_buf) != 0)
1579 {
1580 /* Reset real_name and try with a different name. */
1581 real_name = NULL;
1582 if (compressed_name != NULL) /* try with the given suffix */
1583 {
1584 if (stat (uncompressed_name, &stat_buf) == 0)
1585 real_name = uncompressed_name;
1586 }
1587 else /* try all possible suffixes */
1588 {
1589 for (compr = compressors; compr->suffix != NULL; compr++)
1590 {
1591 compressed_name = concat (file, ".", compr->suffix);
1592 if (stat (compressed_name, &stat_buf) != 0)
1593 {
1594 if (MSDOS)
1595 {
1596 char *suf = compressed_name + strlen (file);
1597 size_t suflen = strlen (compr->suffix) + 1;
1598 for ( ; suf[1]; suf++, suflen--)
1599 {
1600 memmove (suf, suf + 1, suflen);
1601 if (stat (compressed_name, &stat_buf) == 0)
1602 {
1603 real_name = compressed_name;
1604 break;
1605 }
1606 }
1607 if (real_name != NULL)
1608 break;
1609 } /* MSDOS */
1610 free (compressed_name);
1611 compressed_name = NULL;
1612 }
1613 else
1614 {
1615 real_name = compressed_name;
1616 break;
1617 }
1618 }
1619 }
1620 if (real_name == NULL)
1621 {
1622 perror (file);
1623 goto cleanup;
1624 }
1625 } /* try with a different name */
1626
1627 if (!S_ISREG (stat_buf.st_mode))
1628 {
1629 error ("skipping %s: it is not a regular file.", real_name);
1630 goto cleanup;
1631 }
1632 if (real_name == compressed_name)
1633 {
1634 char *cmd = concat (compr->command, " ", real_name);
1635 inf = (FILE *) popen (cmd, "r");
1636 free (cmd);
1637 }
1638 else
1639 inf = fopen (real_name, "r");
1640 if (inf == NULL)
1641 {
1642 perror (real_name);
1643 goto cleanup;
1644 }
1645
1646 process_file (inf, uncompressed_name, lang);
1647
1648 if (real_name == compressed_name)
1649 retval = pclose (inf);
1650 else
1651 retval = fclose (inf);
1652 if (retval < 0)
1653 pfatal (file);
1654
1655 cleanup:
1656 free (compressed_name);
1657 free (uncompressed_name);
1658 last_node = NULL;
1659 curfdp = NULL;
1660 return;
1661 }
1662
1663 static void
1664 process_file (fh, fn, lang)
1665 FILE *fh;
1666 char *fn;
1667 language *lang;
1668 {
1669 static const fdesc emptyfdesc;
1670 fdesc *fdp;
1671
1672 /* Create a new input file description entry. */
1673 fdp = xnew (1, fdesc);
1674 *fdp = emptyfdesc;
1675 fdp->next = fdhead;
1676 fdp->infname = savestr (fn);
1677 fdp->lang = lang;
1678 fdp->infabsname = absolute_filename (fn, cwd);
1679 fdp->infabsdir = absolute_dirname (fn, cwd);
1680 if (filename_is_absolute (fn))
1681 {
1682 /* An absolute file name. Canonicalize it. */
1683 fdp->taggedfname = absolute_filename (fn, NULL);
1684 }
1685 else
1686 {
1687 /* A file name relative to cwd. Make it relative
1688 to the directory of the tags file. */
1689 fdp->taggedfname = relative_filename (fn, tagfiledir);
1690 }
1691 fdp->usecharno = TRUE; /* use char position when making tags */
1692 fdp->prop = NULL;
1693 fdp->written = FALSE; /* not written on tags file yet */
1694
1695 fdhead = fdp;
1696 curfdp = fdhead; /* the current file description */
1697
1698 find_entries (fh);
1699
1700 /* If not Ctags, and if this is not metasource and if it contained no #line
1701 directives, we can write the tags and free all nodes pointing to
1702 curfdp. */
1703 if (!CTAGS
1704 && curfdp->usecharno /* no #line directives in this file */
1705 && !curfdp->lang->metasource)
1706 {
1707 node *np, *prev;
1708
1709 /* Look for the head of the sublist relative to this file. See add_node
1710 for the structure of the node tree. */
1711 prev = NULL;
1712 for (np = nodehead; np != NULL; prev = np, np = np->left)
1713 if (np->fdp == curfdp)
1714 break;
1715
1716 /* If we generated tags for this file, write and delete them. */
1717 if (np != NULL)
1718 {
1719 /* This is the head of the last sublist, if any. The following
1720 instructions depend on this being true. */
1721 assert (np->left == NULL);
1722
1723 assert (fdhead == curfdp);
1724 assert (last_node->fdp == curfdp);
1725 put_entries (np); /* write tags for file curfdp->taggedfname */
1726 free_tree (np); /* remove the written nodes */
1727 if (prev == NULL)
1728 nodehead = NULL; /* no nodes left */
1729 else
1730 prev->left = NULL; /* delete the pointer to the sublist */
1731 }
1732 }
1733 }
1734
1735 /*
1736 * This routine sets up the boolean pseudo-functions which work
1737 * by setting boolean flags dependent upon the corresponding character.
1738 * Every char which is NOT in that string is not a white char. Therefore,
1739 * all of the array "_wht" is set to FALSE, and then the elements
1740 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1741 * of a char is TRUE if it is the string "white", else FALSE.
1742 */
1743 static void
1744 init ()
1745 {
1746 register char *sp;
1747 register int i;
1748
1749 for (i = 0; i < CHARS; i++)
1750 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1751 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1752 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1753 notinname('\0') = notinname('\n');
1754 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1755 begtoken('\0') = begtoken('\n');
1756 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1757 intoken('\0') = intoken('\n');
1758 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1759 endtoken('\0') = endtoken('\n');
1760 }
1761
1762 /*
1763 * This routine opens the specified file and calls the function
1764 * which finds the function and type definitions.
1765 */
1766 static void
1767 find_entries (inf)
1768 FILE *inf;
1769 {
1770 char *cp;
1771 language *lang = curfdp->lang;
1772 Lang_function *parser = NULL;
1773
1774 /* If user specified a language, use it. */
1775 if (lang != NULL && lang->function != NULL)
1776 {
1777 parser = lang->function;
1778 }
1779
1780 /* Else try to guess the language given the file name. */
1781 if (parser == NULL)
1782 {
1783 lang = get_language_from_filename (curfdp->infname, TRUE);
1784 if (lang != NULL && lang->function != NULL)
1785 {
1786 curfdp->lang = lang;
1787 parser = lang->function;
1788 }
1789 }
1790
1791 /* Else look for sharp-bang as the first two characters. */
1792 if (parser == NULL
1793 && readline_internal (&lb, inf) > 0
1794 && lb.len >= 2
1795 && lb.buffer[0] == '#'
1796 && lb.buffer[1] == '!')
1797 {
1798 char *lp;
1799
1800 /* Set lp to point at the first char after the last slash in the
1801 line or, if no slashes, at the first nonblank. Then set cp to
1802 the first successive blank and terminate the string. */
1803 lp = etags_strrchr (lb.buffer+2, '/');
1804 if (lp != NULL)
1805 lp += 1;
1806 else
1807 lp = skip_spaces (lb.buffer + 2);
1808 cp = skip_non_spaces (lp);
1809 *cp = '\0';
1810
1811 if (strlen (lp) > 0)
1812 {
1813 lang = get_language_from_interpreter (lp);
1814 if (lang != NULL && lang->function != NULL)
1815 {
1816 curfdp->lang = lang;
1817 parser = lang->function;
1818 }
1819 }
1820 }
1821
1822 /* We rewind here, even if inf may be a pipe. We fail if the
1823 length of the first line is longer than the pipe block size,
1824 which is unlikely. */
1825 rewind (inf);
1826
1827 /* Else try to guess the language given the case insensitive file name. */
1828 if (parser == NULL)
1829 {
1830 lang = get_language_from_filename (curfdp->infname, FALSE);
1831 if (lang != NULL && lang->function != NULL)
1832 {
1833 curfdp->lang = lang;
1834 parser = lang->function;
1835 }
1836 }
1837
1838 /* Else try Fortran or C. */
1839 if (parser == NULL)
1840 {
1841 node *old_last_node = last_node;
1842
1843 curfdp->lang = get_language_from_langname ("fortran");
1844 find_entries (inf);
1845
1846 if (old_last_node == last_node)
1847 /* No Fortran entries found. Try C. */
1848 {
1849 /* We do not tag if rewind fails.
1850 Only the file name will be recorded in the tags file. */
1851 rewind (inf);
1852 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1853 find_entries (inf);
1854 }
1855 return;
1856 }
1857
1858 if (!no_line_directive
1859 && curfdp->lang != NULL && curfdp->lang->metasource)
1860 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1861 file, or anyway we parsed a file that is automatically generated from
1862 this one. If this is the case, the bingo.c file contained #line
1863 directives that generated tags pointing to this file. Let's delete
1864 them all before parsing this file, which is the real source. */
1865 {
1866 fdesc **fdpp = &fdhead;
1867 while (*fdpp != NULL)
1868 if (*fdpp != curfdp
1869 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1870 /* We found one of those! We must delete both the file description
1871 and all tags referring to it. */
1872 {
1873 fdesc *badfdp = *fdpp;
1874
1875 /* Delete the tags referring to badfdp->taggedfname
1876 that were obtained from badfdp->infname. */
1877 invalidate_nodes (badfdp, &nodehead);
1878
1879 *fdpp = badfdp->next; /* remove the bad description from the list */
1880 free_fdesc (badfdp);
1881 }
1882 else
1883 fdpp = &(*fdpp)->next; /* advance the list pointer */
1884 }
1885
1886 assert (parser != NULL);
1887
1888 /* Generic initialisations before reading from file. */
1889 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1890
1891 /* Generic initialisations before parsing file with readline. */
1892 lineno = 0; /* reset global line number */
1893 charno = 0; /* reset global char number */
1894 linecharno = 0; /* reset global char number of line start */
1895
1896 parser (inf);
1897
1898 regex_tag_multiline ();
1899 }
1900
1901 \f
1902 /*
1903 * Check whether an implicitly named tag should be created,
1904 * then call `pfnote'.
1905 * NAME is a string that is internally copied by this function.
1906 *
1907 * TAGS format specification
1908 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1909 * The following is explained in some more detail in etc/ETAGS.EBNF.
1910 *
1911 * make_tag creates tags with "implicit tag names" (unnamed tags)
1912 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1913 * 1. NAME does not contain any of the characters in NONAM;
1914 * 2. LINESTART contains name as either a rightmost, or rightmost but
1915 * one character, substring;
1916 * 3. the character, if any, immediately before NAME in LINESTART must
1917 * be a character in NONAM;
1918 * 4. the character, if any, immediately after NAME in LINESTART must
1919 * also be a character in NONAM.
1920 *
1921 * The implementation uses the notinname() macro, which recognises the
1922 * characters stored in the string `nonam'.
1923 * etags.el needs to use the same characters that are in NONAM.
1924 */
1925 static void
1926 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1927 char *name; /* tag name, or NULL if unnamed */
1928 int namelen; /* tag length */
1929 bool is_func; /* tag is a function */
1930 char *linestart; /* start of the line where tag is */
1931 int linelen; /* length of the line where tag is */
1932 int lno; /* line number */
1933 long cno; /* character number */
1934 {
1935 bool named = (name != NULL && namelen > 0);
1936
1937 if (!CTAGS && named) /* maybe set named to false */
1938 /* Let's try to make an implicit tag name, that is, create an unnamed tag
1939 such that etags.el can guess a name from it. */
1940 {
1941 int i;
1942 register char *cp = name;
1943
1944 for (i = 0; i < namelen; i++)
1945 if (notinname (*cp++))
1946 break;
1947 if (i == namelen) /* rule #1 */
1948 {
1949 cp = linestart + linelen - namelen;
1950 if (notinname (linestart[linelen-1]))
1951 cp -= 1; /* rule #4 */
1952 if (cp >= linestart /* rule #2 */
1953 && (cp == linestart
1954 || notinname (cp[-1])) /* rule #3 */
1955 && strneq (name, cp, namelen)) /* rule #2 */
1956 named = FALSE; /* use implicit tag name */
1957 }
1958 }
1959
1960 if (named)
1961 name = savenstr (name, namelen);
1962 else
1963 name = NULL;
1964 pfnote (name, is_func, linestart, linelen, lno, cno);
1965 }
1966
1967 /* Record a tag. */
1968 static void
1969 pfnote (name, is_func, linestart, linelen, lno, cno)
1970 char *name; /* tag name, or NULL if unnamed */
1971 bool is_func; /* tag is a function */
1972 char *linestart; /* start of the line where tag is */
1973 int linelen; /* length of the line where tag is */
1974 int lno; /* line number */
1975 long cno; /* character number */
1976 {
1977 register node *np;
1978
1979 assert (name == NULL || name[0] != '\0');
1980 if (CTAGS && name == NULL)
1981 return;
1982
1983 np = xnew (1, node);
1984
1985 /* If ctags mode, change name "main" to M<thisfilename>. */
1986 if (CTAGS && !cxref_style && streq (name, "main"))
1987 {
1988 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1989 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1990 fp = etags_strrchr (np->name, '.');
1991 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1992 fp[0] = '\0';
1993 }
1994 else
1995 np->name = name;
1996 np->valid = TRUE;
1997 np->been_warned = FALSE;
1998 np->fdp = curfdp;
1999 np->is_func = is_func;
2000 np->lno = lno;
2001 if (np->fdp->usecharno)
2002 /* Our char numbers are 0-base, because of C language tradition?
2003 ctags compatibility? old versions compatibility? I don't know.
2004 Anyway, since emacs's are 1-base we expect etags.el to take care
2005 of the difference. If we wanted to have 1-based numbers, we would
2006 uncomment the +1 below. */
2007 np->cno = cno /* + 1 */ ;
2008 else
2009 np->cno = invalidcharno;
2010 np->left = np->right = NULL;
2011 if (CTAGS && !cxref_style)
2012 {
2013 if (strlen (linestart) < 50)
2014 np->regex = concat (linestart, "$", "");
2015 else
2016 np->regex = savenstr (linestart, 50);
2017 }
2018 else
2019 np->regex = savenstr (linestart, linelen);
2020
2021 add_node (np, &nodehead);
2022 }
2023
2024 /*
2025 * free_tree ()
2026 * recurse on left children, iterate on right children.
2027 */
2028 static void
2029 free_tree (np)
2030 register node *np;
2031 {
2032 while (np)
2033 {
2034 register node *node_right = np->right;
2035 free_tree (np->left);
2036 free (np->name);
2037 free (np->regex);
2038 free (np);
2039 np = node_right;
2040 }
2041 }
2042
2043 /*
2044 * free_fdesc ()
2045 * delete a file description
2046 */
2047 static void
2048 free_fdesc (fdp)
2049 register fdesc *fdp;
2050 {
2051 free (fdp->infname);
2052 free (fdp->infabsname);
2053 free (fdp->infabsdir);
2054 free (fdp->taggedfname);
2055 free (fdp->prop);
2056 free (fdp);
2057 }
2058
2059 /*
2060 * add_node ()
2061 * Adds a node to the tree of nodes. In etags mode, sort by file
2062 * name. In ctags mode, sort by tag name. Make no attempt at
2063 * balancing.
2064 *
2065 * add_node is the only function allowed to add nodes, so it can
2066 * maintain state.
2067 */
2068 static void
2069 add_node (np, cur_node_p)
2070 node *np, **cur_node_p;
2071 {
2072 register int dif;
2073 register node *cur_node = *cur_node_p;
2074
2075 if (cur_node == NULL)
2076 {
2077 *cur_node_p = np;
2078 last_node = np;
2079 return;
2080 }
2081
2082 if (!CTAGS)
2083 /* Etags Mode */
2084 {
2085 /* For each file name, tags are in a linked sublist on the right
2086 pointer. The first tags of different files are a linked list
2087 on the left pointer. last_node points to the end of the last
2088 used sublist. */
2089 if (last_node != NULL && last_node->fdp == np->fdp)
2090 {
2091 /* Let's use the same sublist as the last added node. */
2092 assert (last_node->right == NULL);
2093 last_node->right = np;
2094 last_node = np;
2095 }
2096 else if (cur_node->fdp == np->fdp)
2097 {
2098 /* Scanning the list we found the head of a sublist which is
2099 good for us. Let's scan this sublist. */
2100 add_node (np, &cur_node->right);
2101 }
2102 else
2103 /* The head of this sublist is not good for us. Let's try the
2104 next one. */
2105 add_node (np, &cur_node->left);
2106 } /* if ETAGS mode */
2107
2108 else
2109 {
2110 /* Ctags Mode */
2111 dif = strcmp (np->name, cur_node->name);
2112
2113 /*
2114 * If this tag name matches an existing one, then
2115 * do not add the node, but maybe print a warning.
2116 */
2117 if (no_duplicates && !dif)
2118 {
2119 if (np->fdp == cur_node->fdp)
2120 {
2121 if (!no_warnings)
2122 {
2123 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2124 np->fdp->infname, lineno, np->name);
2125 fprintf (stderr, "Second entry ignored\n");
2126 }
2127 }
2128 else if (!cur_node->been_warned && !no_warnings)
2129 {
2130 fprintf
2131 (stderr,
2132 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2133 np->fdp->infname, cur_node->fdp->infname, np->name);
2134 cur_node->been_warned = TRUE;
2135 }
2136 return;
2137 }
2138
2139 /* Actually add the node */
2140 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2141 } /* if CTAGS mode */
2142 }
2143
2144 /*
2145 * invalidate_nodes ()
2146 * Scan the node tree and invalidate all nodes pointing to the
2147 * given file description (CTAGS case) or free them (ETAGS case).
2148 */
2149 static void
2150 invalidate_nodes (badfdp, npp)
2151 fdesc *badfdp;
2152 node **npp;
2153 {
2154 node *np = *npp;
2155
2156 if (np == NULL)
2157 return;
2158
2159 if (CTAGS)
2160 {
2161 if (np->left != NULL)
2162 invalidate_nodes (badfdp, &np->left);
2163 if (np->fdp == badfdp)
2164 np->valid = FALSE;
2165 if (np->right != NULL)
2166 invalidate_nodes (badfdp, &np->right);
2167 }
2168 else
2169 {
2170 assert (np->fdp != NULL);
2171 if (np->fdp == badfdp)
2172 {
2173 *npp = np->left; /* detach the sublist from the list */
2174 np->left = NULL; /* isolate it */
2175 free_tree (np); /* free it */
2176 invalidate_nodes (badfdp, npp);
2177 }
2178 else
2179 invalidate_nodes (badfdp, &np->left);
2180 }
2181 }
2182
2183 \f
2184 static int total_size_of_entries __P((node *));
2185 static int number_len __P((long));
2186
2187 /* Length of a non-negative number's decimal representation. */
2188 static int
2189 number_len (num)
2190 long num;
2191 {
2192 int len = 1;
2193 while ((num /= 10) > 0)
2194 len += 1;
2195 return len;
2196 }
2197
2198 /*
2199 * Return total number of characters that put_entries will output for
2200 * the nodes in the linked list at the right of the specified node.
2201 * This count is irrelevant with etags.el since emacs 19.34 at least,
2202 * but is still supplied for backward compatibility.
2203 */
2204 static int
2205 total_size_of_entries (np)
2206 register node *np;
2207 {
2208 register int total = 0;
2209
2210 for (; np != NULL; np = np->right)
2211 if (np->valid)
2212 {
2213 total += strlen (np->regex) + 1; /* pat\177 */
2214 if (np->name != NULL)
2215 total += strlen (np->name) + 1; /* name\001 */
2216 total += number_len ((long) np->lno) + 1; /* lno, */
2217 if (np->cno != invalidcharno) /* cno */
2218 total += number_len (np->cno);
2219 total += 1; /* newline */
2220 }
2221
2222 return total;
2223 }
2224
2225 static void
2226 put_entries (np)
2227 register node *np;
2228 {
2229 register char *sp;
2230 static fdesc *fdp = NULL;
2231
2232 if (np == NULL)
2233 return;
2234
2235 /* Output subentries that precede this one */
2236 if (CTAGS)
2237 put_entries (np->left);
2238
2239 /* Output this entry */
2240 if (np->valid)
2241 {
2242 if (!CTAGS)
2243 {
2244 /* Etags mode */
2245 if (fdp != np->fdp)
2246 {
2247 fdp = np->fdp;
2248 fprintf (tagf, "\f\n%s,%d\n",
2249 fdp->taggedfname, total_size_of_entries (np));
2250 fdp->written = TRUE;
2251 }
2252 fputs (np->regex, tagf);
2253 fputc ('\177', tagf);
2254 if (np->name != NULL)
2255 {
2256 fputs (np->name, tagf);
2257 fputc ('\001', tagf);
2258 }
2259 fprintf (tagf, "%d,", np->lno);
2260 if (np->cno != invalidcharno)
2261 fprintf (tagf, "%ld", np->cno);
2262 fputs ("\n", tagf);
2263 }
2264 else
2265 {
2266 /* Ctags mode */
2267 if (np->name == NULL)
2268 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2269
2270 if (cxref_style)
2271 {
2272 if (vgrind_style)
2273 fprintf (stdout, "%s %s %d\n",
2274 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2275 else
2276 fprintf (stdout, "%-16s %3d %-16s %s\n",
2277 np->name, np->lno, np->fdp->taggedfname, np->regex);
2278 }
2279 else
2280 {
2281 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2282
2283 if (np->is_func)
2284 { /* function or #define macro with args */
2285 putc (searchar, tagf);
2286 putc ('^', tagf);
2287
2288 for (sp = np->regex; *sp; sp++)
2289 {
2290 if (*sp == '\\' || *sp == searchar)
2291 putc ('\\', tagf);
2292 putc (*sp, tagf);
2293 }
2294 putc (searchar, tagf);
2295 }
2296 else
2297 { /* anything else; text pattern inadequate */
2298 fprintf (tagf, "%d", np->lno);
2299 }
2300 putc ('\n', tagf);
2301 }
2302 }
2303 } /* if this node contains a valid tag */
2304
2305 /* Output subentries that follow this one */
2306 put_entries (np->right);
2307 if (!CTAGS)
2308 put_entries (np->left);
2309 }
2310
2311 \f
2312 /* C extensions. */
2313 #define C_EXT 0x00fff /* C extensions */
2314 #define C_PLAIN 0x00000 /* C */
2315 #define C_PLPL 0x00001 /* C++ */
2316 #define C_STAR 0x00003 /* C* */
2317 #define C_JAVA 0x00005 /* JAVA */
2318 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2319 #define YACC 0x10000 /* yacc file */
2320
2321 /*
2322 * The C symbol tables.
2323 */
2324 enum sym_type
2325 {
2326 st_none,
2327 st_C_objprot, st_C_objimpl, st_C_objend,
2328 st_C_gnumacro,
2329 st_C_ignore, st_C_attribute,
2330 st_C_javastruct,
2331 st_C_operator,
2332 st_C_class, st_C_template,
2333 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2334 };
2335
2336 static unsigned int hash __P((const char *, unsigned int));
2337 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2338 static enum sym_type C_symtype __P((char *, int, int));
2339
2340 /* Feed stuff between (but not including) %[ and %] lines to:
2341 gperf -m 5
2342 %[
2343 %compare-strncmp
2344 %enum
2345 %struct-type
2346 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2347 %%
2348 if, 0, st_C_ignore
2349 for, 0, st_C_ignore
2350 while, 0, st_C_ignore
2351 switch, 0, st_C_ignore
2352 return, 0, st_C_ignore
2353 __attribute__, 0, st_C_attribute
2354 GTY, 0, st_C_attribute
2355 @interface, 0, st_C_objprot
2356 @protocol, 0, st_C_objprot
2357 @implementation,0, st_C_objimpl
2358 @end, 0, st_C_objend
2359 import, (C_JAVA & ~C_PLPL), st_C_ignore
2360 package, (C_JAVA & ~C_PLPL), st_C_ignore
2361 friend, C_PLPL, st_C_ignore
2362 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2363 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2364 interface, (C_JAVA & ~C_PLPL), st_C_struct
2365 class, 0, st_C_class
2366 namespace, C_PLPL, st_C_struct
2367 domain, C_STAR, st_C_struct
2368 union, 0, st_C_struct
2369 struct, 0, st_C_struct
2370 extern, 0, st_C_extern
2371 enum, 0, st_C_enum
2372 typedef, 0, st_C_typedef
2373 define, 0, st_C_define
2374 undef, 0, st_C_define
2375 operator, C_PLPL, st_C_operator
2376 template, 0, st_C_template
2377 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2378 DEFUN, 0, st_C_gnumacro
2379 SYSCALL, 0, st_C_gnumacro
2380 ENTRY, 0, st_C_gnumacro
2381 PSEUDO, 0, st_C_gnumacro
2382 # These are defined inside C functions, so currently they are not met.
2383 # EXFUN used in glibc, DEFVAR_* in emacs.
2384 #EXFUN, 0, st_C_gnumacro
2385 #DEFVAR_, 0, st_C_gnumacro
2386 %]
2387 and replace lines between %< and %> with its output, then:
2388 - remove the #if characterset check
2389 - make in_word_set static and not inline. */
2390 /*%<*/
2391 /* C code produced by gperf version 3.0.1 */
2392 /* Command-line: gperf -m 5 */
2393 /* Computed positions: -k'2-3' */
2394
2395 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2396 /* maximum key range = 33, duplicates = 0 */
2397
2398 #ifdef __GNUC__
2399 __inline
2400 #else
2401 #ifdef __cplusplus
2402 inline
2403 #endif
2404 #endif
2405 static unsigned int
2406 hash (str, len)
2407 register const char *str;
2408 register unsigned int len;
2409 {
2410 static unsigned char asso_values[] =
2411 {
2412 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2413 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2414 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2415 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2416 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2417 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2418 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2419 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2420 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2421 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2422 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2423 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2424 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2425 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2426 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2427 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2428 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2429 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2430 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2431 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2432 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2433 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2434 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2435 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2436 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2437 35, 35, 35, 35, 35, 35
2438 };
2439 register int hval = len;
2440
2441 switch (hval)
2442 {
2443 default:
2444 hval += asso_values[(unsigned char)str[2]];
2445 /*FALLTHROUGH*/
2446 case 2:
2447 hval += asso_values[(unsigned char)str[1]];
2448 break;
2449 }
2450 return hval;
2451 }
2452
2453 static struct C_stab_entry *
2454 in_word_set (str, len)
2455 register const char *str;
2456 register unsigned int len;
2457 {
2458 enum
2459 {
2460 TOTAL_KEYWORDS = 33,
2461 MIN_WORD_LENGTH = 2,
2462 MAX_WORD_LENGTH = 15,
2463 MIN_HASH_VALUE = 2,
2464 MAX_HASH_VALUE = 34
2465 };
2466
2467 static struct C_stab_entry wordlist[] =
2468 {
2469 {""}, {""},
2470 {"if", 0, st_C_ignore},
2471 {"GTY", 0, st_C_attribute},
2472 {"@end", 0, st_C_objend},
2473 {"union", 0, st_C_struct},
2474 {"define", 0, st_C_define},
2475 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2476 {"template", 0, st_C_template},
2477 {"operator", C_PLPL, st_C_operator},
2478 {"@interface", 0, st_C_objprot},
2479 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2480 {"friend", C_PLPL, st_C_ignore},
2481 {"typedef", 0, st_C_typedef},
2482 {"return", 0, st_C_ignore},
2483 {"@implementation",0, st_C_objimpl},
2484 {"@protocol", 0, st_C_objprot},
2485 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2486 {"extern", 0, st_C_extern},
2487 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2488 {"struct", 0, st_C_struct},
2489 {"domain", C_STAR, st_C_struct},
2490 {"switch", 0, st_C_ignore},
2491 {"enum", 0, st_C_enum},
2492 {"for", 0, st_C_ignore},
2493 {"namespace", C_PLPL, st_C_struct},
2494 {"class", 0, st_C_class},
2495 {"while", 0, st_C_ignore},
2496 {"undef", 0, st_C_define},
2497 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2498 {"__attribute__", 0, st_C_attribute},
2499 {"SYSCALL", 0, st_C_gnumacro},
2500 {"ENTRY", 0, st_C_gnumacro},
2501 {"PSEUDO", 0, st_C_gnumacro},
2502 {"DEFUN", 0, st_C_gnumacro}
2503 };
2504
2505 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2506 {
2507 register int key = hash (str, len);
2508
2509 if (key <= MAX_HASH_VALUE && key >= 0)
2510 {
2511 register const char *s = wordlist[key].name;
2512
2513 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2514 return &wordlist[key];
2515 }
2516 }
2517 return 0;
2518 }
2519 /*%>*/
2520
2521 static enum sym_type
2522 C_symtype (str, len, c_ext)
2523 char *str;
2524 int len;
2525 int c_ext;
2526 {
2527 register struct C_stab_entry *se = in_word_set (str, len);
2528
2529 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2530 return st_none;
2531 return se->type;
2532 }
2533
2534 \f
2535 /*
2536 * Ignoring __attribute__ ((list))
2537 */
2538 static bool inattribute; /* looking at an __attribute__ construct */
2539
2540 /*
2541 * C functions and variables are recognized using a simple
2542 * finite automaton. fvdef is its state variable.
2543 */
2544 static enum
2545 {
2546 fvnone, /* nothing seen */
2547 fdefunkey, /* Emacs DEFUN keyword seen */
2548 fdefunname, /* Emacs DEFUN name seen */
2549 foperator, /* func: operator keyword seen (cplpl) */
2550 fvnameseen, /* function or variable name seen */
2551 fstartlist, /* func: just after open parenthesis */
2552 finlist, /* func: in parameter list */
2553 flistseen, /* func: after parameter list */
2554 fignore, /* func: before open brace */
2555 vignore /* var-like: ignore until ';' */
2556 } fvdef;
2557
2558 static bool fvextern; /* func or var: extern keyword seen; */
2559
2560 /*
2561 * typedefs are recognized using a simple finite automaton.
2562 * typdef is its state variable.
2563 */
2564 static enum
2565 {
2566 tnone, /* nothing seen */
2567 tkeyseen, /* typedef keyword seen */
2568 ttypeseen, /* defined type seen */
2569 tinbody, /* inside typedef body */
2570 tend, /* just before typedef tag */
2571 tignore /* junk after typedef tag */
2572 } typdef;
2573
2574 /*
2575 * struct-like structures (enum, struct and union) are recognized
2576 * using another simple finite automaton. `structdef' is its state
2577 * variable.
2578 */
2579 static enum
2580 {
2581 snone, /* nothing seen yet,
2582 or in struct body if bracelev > 0 */
2583 skeyseen, /* struct-like keyword seen */
2584 stagseen, /* struct-like tag seen */
2585 scolonseen /* colon seen after struct-like tag */
2586 } structdef;
2587
2588 /*
2589 * When objdef is different from onone, objtag is the name of the class.
2590 */
2591 static char *objtag = "<uninited>";
2592
2593 /*
2594 * Yet another little state machine to deal with preprocessor lines.
2595 */
2596 static enum
2597 {
2598 dnone, /* nothing seen */
2599 dsharpseen, /* '#' seen as first char on line */
2600 ddefineseen, /* '#' and 'define' seen */
2601 dignorerest /* ignore rest of line */
2602 } definedef;
2603
2604 /*
2605 * State machine for Objective C protocols and implementations.
2606 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2607 */
2608 static enum
2609 {
2610 onone, /* nothing seen */
2611 oprotocol, /* @interface or @protocol seen */
2612 oimplementation, /* @implementations seen */
2613 otagseen, /* class name seen */
2614 oparenseen, /* parenthesis before category seen */
2615 ocatseen, /* category name seen */
2616 oinbody, /* in @implementation body */
2617 omethodsign, /* in @implementation body, after +/- */
2618 omethodtag, /* after method name */
2619 omethodcolon, /* after method colon */
2620 omethodparm, /* after method parameter */
2621 oignore /* wait for @end */
2622 } objdef;
2623
2624
2625 /*
2626 * Use this structure to keep info about the token read, and how it
2627 * should be tagged. Used by the make_C_tag function to build a tag.
2628 */
2629 static struct tok
2630 {
2631 char *line; /* string containing the token */
2632 int offset; /* where the token starts in LINE */
2633 int length; /* token length */
2634 /*
2635 The previous members can be used to pass strings around for generic
2636 purposes. The following ones specifically refer to creating tags. In this
2637 case the token contained here is the pattern that will be used to create a
2638 tag.
2639 */
2640 bool valid; /* do not create a tag; the token should be
2641 invalidated whenever a state machine is
2642 reset prematurely */
2643 bool named; /* create a named tag */
2644 int lineno; /* source line number of tag */
2645 long linepos; /* source char number of tag */
2646 } token; /* latest token read */
2647
2648 /*
2649 * Variables and functions for dealing with nested structures.
2650 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2651 */
2652 static void pushclass_above __P((int, char *, int));
2653 static void popclass_above __P((int));
2654 static void write_classname __P((linebuffer *, char *qualifier));
2655
2656 static struct {
2657 char **cname; /* nested class names */
2658 int *bracelev; /* nested class brace level */
2659 int nl; /* class nesting level (elements used) */
2660 int size; /* length of the array */
2661 } cstack; /* stack for nested declaration tags */
2662 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2663 #define nestlev (cstack.nl)
2664 /* After struct keyword or in struct body, not inside a nested function. */
2665 #define instruct (structdef == snone && nestlev > 0 \
2666 && bracelev == cstack.bracelev[nestlev-1] + 1)
2667
2668 static void
2669 pushclass_above (bracelev, str, len)
2670 int bracelev;
2671 char *str;
2672 int len;
2673 {
2674 int nl;
2675
2676 popclass_above (bracelev);
2677 nl = cstack.nl;
2678 if (nl >= cstack.size)
2679 {
2680 int size = cstack.size *= 2;
2681 xrnew (cstack.cname, size, char *);
2682 xrnew (cstack.bracelev, size, int);
2683 }
2684 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2685 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2686 cstack.bracelev[nl] = bracelev;
2687 cstack.nl = nl + 1;
2688 }
2689
2690 static void
2691 popclass_above (bracelev)
2692 int bracelev;
2693 {
2694 int nl;
2695
2696 for (nl = cstack.nl - 1;
2697 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2698 nl--)
2699 {
2700 free (cstack.cname[nl]);
2701 cstack.nl = nl;
2702 }
2703 }
2704
2705 static void
2706 write_classname (cn, qualifier)
2707 linebuffer *cn;
2708 char *qualifier;
2709 {
2710 int i, len;
2711 int qlen = strlen (qualifier);
2712
2713 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2714 {
2715 len = 0;
2716 cn->len = 0;
2717 cn->buffer[0] = '\0';
2718 }
2719 else
2720 {
2721 len = strlen (cstack.cname[0]);
2722 linebuffer_setlen (cn, len);
2723 strcpy (cn->buffer, cstack.cname[0]);
2724 }
2725 for (i = 1; i < cstack.nl; i++)
2726 {
2727 char *s;
2728 int slen;
2729
2730 s = cstack.cname[i];
2731 if (s == NULL)
2732 continue;
2733 slen = strlen (s);
2734 len += slen + qlen;
2735 linebuffer_setlen (cn, len);
2736 strncat (cn->buffer, qualifier, qlen);
2737 strncat (cn->buffer, s, slen);
2738 }
2739 }
2740
2741 \f
2742 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2743 static void make_C_tag __P((bool));
2744
2745 /*
2746 * consider_token ()
2747 * checks to see if the current token is at the start of a
2748 * function or variable, or corresponds to a typedef, or
2749 * is a struct/union/enum tag, or #define, or an enum constant.
2750 *
2751 * *IS_FUNC gets TRUE if the token is a function or #define macro
2752 * with args. C_EXTP points to which language we are looking at.
2753 *
2754 * Globals
2755 * fvdef IN OUT
2756 * structdef IN OUT
2757 * definedef IN OUT
2758 * typdef IN OUT
2759 * objdef IN OUT
2760 */
2761
2762 static bool
2763 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2764 register char *str; /* IN: token pointer */
2765 register int len; /* IN: token length */
2766 register int c; /* IN: first char after the token */
2767 int *c_extp; /* IN, OUT: C extensions mask */
2768 int bracelev; /* IN: brace level */
2769 int parlev; /* IN: parenthesis level */
2770 bool *is_func_or_var; /* OUT: function or variable found */
2771 {
2772 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2773 structtype is the type of the preceding struct-like keyword, and
2774 structbracelev is the brace level where it has been seen. */
2775 static enum sym_type structtype;
2776 static int structbracelev;
2777 static enum sym_type toktype;
2778
2779
2780 toktype = C_symtype (str, len, *c_extp);
2781
2782 /*
2783 * Skip __attribute__
2784 */
2785 if (toktype == st_C_attribute)
2786 {
2787 inattribute = TRUE;
2788 return FALSE;
2789 }
2790
2791 /*
2792 * Advance the definedef state machine.
2793 */
2794 switch (definedef)
2795 {
2796 case dnone:
2797 /* We're not on a preprocessor line. */
2798 if (toktype == st_C_gnumacro)
2799 {
2800 fvdef = fdefunkey;
2801 return FALSE;
2802 }
2803 break;
2804 case dsharpseen:
2805 if (toktype == st_C_define)
2806 {
2807 definedef = ddefineseen;
2808 }
2809 else
2810 {
2811 definedef = dignorerest;
2812 }
2813 return FALSE;
2814 case ddefineseen:
2815 /*
2816 * Make a tag for any macro, unless it is a constant
2817 * and constantypedefs is FALSE.
2818 */
2819 definedef = dignorerest;
2820 *is_func_or_var = (c == '(');
2821 if (!*is_func_or_var && !constantypedefs)
2822 return FALSE;
2823 else
2824 return TRUE;
2825 case dignorerest:
2826 return FALSE;
2827 default:
2828 error ("internal error: definedef value.", (char *)NULL);
2829 }
2830
2831 /*
2832 * Now typedefs
2833 */
2834 switch (typdef)
2835 {
2836 case tnone:
2837 if (toktype == st_C_typedef)
2838 {
2839 if (typedefs)
2840 typdef = tkeyseen;
2841 fvextern = FALSE;
2842 fvdef = fvnone;
2843 return FALSE;
2844 }
2845 break;
2846 case tkeyseen:
2847 switch (toktype)
2848 {
2849 case st_none:
2850 case st_C_class:
2851 case st_C_struct:
2852 case st_C_enum:
2853 typdef = ttypeseen;
2854 }
2855 break;
2856 case ttypeseen:
2857 if (structdef == snone && fvdef == fvnone)
2858 {
2859 fvdef = fvnameseen;
2860 return TRUE;
2861 }
2862 break;
2863 case tend:
2864 switch (toktype)
2865 {
2866 case st_C_class:
2867 case st_C_struct:
2868 case st_C_enum:
2869 return FALSE;
2870 }
2871 return TRUE;
2872 }
2873
2874 switch (toktype)
2875 {
2876 case st_C_javastruct:
2877 if (structdef == stagseen)
2878 structdef = scolonseen;
2879 return FALSE;
2880 case st_C_template:
2881 case st_C_class:
2882 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2883 && bracelev == 0
2884 && definedef == dnone && structdef == snone
2885 && typdef == tnone && fvdef == fvnone)
2886 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2887 if (toktype == st_C_template)
2888 break;
2889 /* FALLTHRU */
2890 case st_C_struct:
2891 case st_C_enum:
2892 if (parlev == 0
2893 && fvdef != vignore
2894 && (typdef == tkeyseen
2895 || (typedefs_or_cplusplus && structdef == snone)))
2896 {
2897 structdef = skeyseen;
2898 structtype = toktype;
2899 structbracelev = bracelev;
2900 if (fvdef == fvnameseen)
2901 fvdef = fvnone;
2902 }
2903 return FALSE;
2904 }
2905
2906 if (structdef == skeyseen)
2907 {
2908 structdef = stagseen;
2909 return TRUE;
2910 }
2911
2912 if (typdef != tnone)
2913 definedef = dnone;
2914
2915 /* Detect Objective C constructs. */
2916 switch (objdef)
2917 {
2918 case onone:
2919 switch (toktype)
2920 {
2921 case st_C_objprot:
2922 objdef = oprotocol;
2923 return FALSE;
2924 case st_C_objimpl:
2925 objdef = oimplementation;
2926 return FALSE;
2927 }
2928 break;
2929 case oimplementation:
2930 /* Save the class tag for functions or variables defined inside. */
2931 objtag = savenstr (str, len);
2932 objdef = oinbody;
2933 return FALSE;
2934 case oprotocol:
2935 /* Save the class tag for categories. */
2936 objtag = savenstr (str, len);
2937 objdef = otagseen;
2938 *is_func_or_var = TRUE;
2939 return TRUE;
2940 case oparenseen:
2941 objdef = ocatseen;
2942 *is_func_or_var = TRUE;
2943 return TRUE;
2944 case oinbody:
2945 break;
2946 case omethodsign:
2947 if (parlev == 0)
2948 {
2949 fvdef = fvnone;
2950 objdef = omethodtag;
2951 linebuffer_setlen (&token_name, len);
2952 strncpy (token_name.buffer, str, len);
2953 token_name.buffer[len] = '\0';
2954 return TRUE;
2955 }
2956 return FALSE;
2957 case omethodcolon:
2958 if (parlev == 0)
2959 objdef = omethodparm;
2960 return FALSE;
2961 case omethodparm:
2962 if (parlev == 0)
2963 {
2964 fvdef = fvnone;
2965 objdef = omethodtag;
2966 linebuffer_setlen (&token_name, token_name.len + len);
2967 strncat (token_name.buffer, str, len);
2968 return TRUE;
2969 }
2970 return FALSE;
2971 case oignore:
2972 if (toktype == st_C_objend)
2973 {
2974 /* Memory leakage here: the string pointed by objtag is
2975 never released, because many tests would be needed to
2976 avoid breaking on incorrect input code. The amount of
2977 memory leaked here is the sum of the lengths of the
2978 class tags.
2979 free (objtag); */
2980 objdef = onone;
2981 }
2982 return FALSE;
2983 }
2984
2985 /* A function, variable or enum constant? */
2986 switch (toktype)
2987 {
2988 case st_C_extern:
2989 fvextern = TRUE;
2990 switch (fvdef)
2991 {
2992 case finlist:
2993 case flistseen:
2994 case fignore:
2995 case vignore:
2996 break;
2997 default:
2998 fvdef = fvnone;
2999 }
3000 return FALSE;
3001 case st_C_ignore:
3002 fvextern = FALSE;
3003 fvdef = vignore;
3004 return FALSE;
3005 case st_C_operator:
3006 fvdef = foperator;
3007 *is_func_or_var = TRUE;
3008 return TRUE;
3009 case st_none:
3010 if (constantypedefs
3011 && structdef == snone
3012 && structtype == st_C_enum && bracelev > structbracelev)
3013 return TRUE; /* enum constant */
3014 switch (fvdef)
3015 {
3016 case fdefunkey:
3017 if (bracelev > 0)
3018 break;
3019 fvdef = fdefunname; /* GNU macro */
3020 *is_func_or_var = TRUE;
3021 return TRUE;
3022 case fvnone:
3023 switch (typdef)
3024 {
3025 case ttypeseen:
3026 return FALSE;
3027 case tnone:
3028 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3029 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3030 {
3031 fvdef = vignore;
3032 return FALSE;
3033 }
3034 break;
3035 }
3036 /* FALLTHRU */
3037 case fvnameseen:
3038 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3039 {
3040 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3041 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3042 fvdef = foperator;
3043 *is_func_or_var = TRUE;
3044 return TRUE;
3045 }
3046 if (bracelev > 0 && !instruct)
3047 break;
3048 fvdef = fvnameseen; /* function or variable */
3049 *is_func_or_var = TRUE;
3050 return TRUE;
3051 }
3052 break;
3053 }
3054
3055 return FALSE;
3056 }
3057
3058 \f
3059 /*
3060 * C_entries often keeps pointers to tokens or lines which are older than
3061 * the line currently read. By keeping two line buffers, and switching
3062 * them at end of line, it is possible to use those pointers.
3063 */
3064 static struct
3065 {
3066 long linepos;
3067 linebuffer lb;
3068 } lbs[2];
3069
3070 #define current_lb_is_new (newndx == curndx)
3071 #define switch_line_buffers() (curndx = 1 - curndx)
3072
3073 #define curlb (lbs[curndx].lb)
3074 #define newlb (lbs[newndx].lb)
3075 #define curlinepos (lbs[curndx].linepos)
3076 #define newlinepos (lbs[newndx].linepos)
3077
3078 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3079 #define cplpl (c_ext & C_PLPL)
3080 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3081
3082 #define CNL_SAVE_DEFINEDEF() \
3083 do { \
3084 curlinepos = charno; \
3085 readline (&curlb, inf); \
3086 lp = curlb.buffer; \
3087 quotednl = FALSE; \
3088 newndx = curndx; \
3089 } while (0)
3090
3091 #define CNL() \
3092 do { \
3093 CNL_SAVE_DEFINEDEF(); \
3094 if (savetoken.valid) \
3095 { \
3096 token = savetoken; \
3097 savetoken.valid = FALSE; \
3098 } \
3099 definedef = dnone; \
3100 } while (0)
3101
3102
3103 static void
3104 make_C_tag (isfun)
3105 bool isfun;
3106 {
3107 /* This function is never called when token.valid is FALSE, but
3108 we must protect against invalid input or internal errors. */
3109 if (token.valid)
3110 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3111 token.offset+token.length+1, token.lineno, token.linepos);
3112 else if (DEBUG)
3113 { /* this branch is optimised away if !DEBUG */
3114 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3115 token_name.len + 17, isfun, token.line,
3116 token.offset+token.length+1, token.lineno, token.linepos);
3117 error ("INVALID TOKEN", NULL);
3118 }
3119
3120 token.valid = FALSE;
3121 }
3122
3123
3124 /*
3125 * C_entries ()
3126 * This routine finds functions, variables, typedefs,
3127 * #define's, enum constants and struct/union/enum definitions in
3128 * C syntax and adds them to the list.
3129 */
3130 static void
3131 C_entries (c_ext, inf)
3132 int c_ext; /* extension of C */
3133 FILE *inf; /* input file */
3134 {
3135 register char c; /* latest char read; '\0' for end of line */
3136 register char *lp; /* pointer one beyond the character `c' */
3137 int curndx, newndx; /* indices for current and new lb */
3138 register int tokoff; /* offset in line of start of current token */
3139 register int toklen; /* length of current token */
3140 char *qualifier; /* string used to qualify names */
3141 int qlen; /* length of qualifier */
3142 int bracelev; /* current brace level */
3143 int bracketlev; /* current bracket level */
3144 int parlev; /* current parenthesis level */
3145 int attrparlev; /* __attribute__ parenthesis level */
3146 int templatelev; /* current template level */
3147 int typdefbracelev; /* bracelev where a typedef struct body begun */
3148 bool incomm, inquote, inchar, quotednl, midtoken;
3149 bool yacc_rules; /* in the rules part of a yacc file */
3150 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3151
3152
3153 linebuffer_init (&lbs[0].lb);
3154 linebuffer_init (&lbs[1].lb);
3155 if (cstack.size == 0)
3156 {
3157 cstack.size = (DEBUG) ? 1 : 4;
3158 cstack.nl = 0;
3159 cstack.cname = xnew (cstack.size, char *);
3160 cstack.bracelev = xnew (cstack.size, int);
3161 }
3162
3163 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3164 curndx = newndx = 0;
3165 lp = curlb.buffer;
3166 *lp = 0;
3167
3168 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3169 structdef = snone; definedef = dnone; objdef = onone;
3170 yacc_rules = FALSE;
3171 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3172 token.valid = savetoken.valid = FALSE;
3173 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3174 if (cjava)
3175 { qualifier = "."; qlen = 1; }
3176 else
3177 { qualifier = "::"; qlen = 2; }
3178
3179
3180 while (!feof (inf))
3181 {
3182 c = *lp++;
3183 if (c == '\\')
3184 {
3185 /* If we are at the end of the line, the next character is a
3186 '\0'; do not skip it, because it is what tells us
3187 to read the next line. */
3188 if (*lp == '\0')
3189 {
3190 quotednl = TRUE;
3191 continue;
3192 }
3193 lp++;
3194 c = ' ';
3195 }
3196 else if (incomm)
3197 {
3198 switch (c)
3199 {
3200 case '*':
3201 if (*lp == '/')
3202 {
3203 c = *lp++;
3204 incomm = FALSE;
3205 }
3206 break;
3207 case '\0':
3208 /* Newlines inside comments do not end macro definitions in
3209 traditional cpp. */
3210 CNL_SAVE_DEFINEDEF ();
3211 break;
3212 }
3213 continue;
3214 }
3215 else if (inquote)
3216 {
3217 switch (c)
3218 {
3219 case '"':
3220 inquote = FALSE;
3221 break;
3222 case '\0':
3223 /* Newlines inside strings do not end macro definitions
3224 in traditional cpp, even though compilers don't
3225 usually accept them. */
3226 CNL_SAVE_DEFINEDEF ();
3227 break;
3228 }
3229 continue;
3230 }
3231 else if (inchar)
3232 {
3233 switch (c)
3234 {
3235 case '\0':
3236 /* Hmmm, something went wrong. */
3237 CNL ();
3238 /* FALLTHRU */
3239 case '\'':
3240 inchar = FALSE;
3241 break;
3242 }
3243 continue;
3244 }
3245 else if (bracketlev > 0)
3246 {
3247 switch (c)
3248 {
3249 case ']':
3250 if (--bracketlev > 0)
3251 continue;
3252 break;
3253 case '\0':
3254 CNL_SAVE_DEFINEDEF ();
3255 break;
3256 }
3257 continue;
3258 }
3259 else switch (c)
3260 {
3261 case '"':
3262 inquote = TRUE;
3263 if (inattribute)
3264 break;
3265 switch (fvdef)
3266 {
3267 case fdefunkey:
3268 case fstartlist:
3269 case finlist:
3270 case fignore:
3271 case vignore:
3272 break;
3273 default:
3274 fvextern = FALSE;
3275 fvdef = fvnone;
3276 }
3277 continue;
3278 case '\'':
3279 inchar = TRUE;
3280 if (inattribute)
3281 break;
3282 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3283 {
3284 fvextern = FALSE;
3285 fvdef = fvnone;
3286 }
3287 continue;
3288 case '/':
3289 if (*lp == '*')
3290 {
3291 incomm = TRUE;
3292 lp++;
3293 c = ' ';
3294 }
3295 else if (/* cplpl && */ *lp == '/')
3296 {
3297 c = '\0';
3298 }
3299 break;
3300 case '%':
3301 if ((c_ext & YACC) && *lp == '%')
3302 {
3303 /* Entering or exiting rules section in yacc file. */
3304 lp++;
3305 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3306 typdef = tnone; structdef = snone;
3307 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3308 bracelev = 0;
3309 yacc_rules = !yacc_rules;
3310 continue;
3311 }
3312 else
3313 break;
3314 case '#':
3315 if (definedef == dnone)
3316 {
3317 char *cp;
3318 bool cpptoken = TRUE;
3319
3320 /* Look back on this line. If all blanks, or nonblanks
3321 followed by an end of comment, this is a preprocessor
3322 token. */
3323 for (cp = newlb.buffer; cp < lp-1; cp++)
3324 if (!iswhite (*cp))
3325 {
3326 if (*cp == '*' && *(cp+1) == '/')
3327 {
3328 cp++;
3329 cpptoken = TRUE;
3330 }
3331 else
3332 cpptoken = FALSE;
3333 }
3334 if (cpptoken)
3335 definedef = dsharpseen;
3336 } /* if (definedef == dnone) */
3337 continue;
3338 case '[':
3339 bracketlev++;
3340 continue;
3341 } /* switch (c) */
3342
3343
3344 /* Consider token only if some involved conditions are satisfied. */
3345 if (typdef != tignore
3346 && definedef != dignorerest
3347 && fvdef != finlist
3348 && templatelev == 0
3349 && (definedef != dnone
3350 || structdef != scolonseen)
3351 && !inattribute)
3352 {
3353 if (midtoken)
3354 {
3355 if (endtoken (c))
3356 {
3357 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3358 /* This handles :: in the middle,
3359 but not at the beginning of an identifier.
3360 Also, space-separated :: is not recognised. */
3361 {
3362 if (c_ext & C_AUTO) /* automatic detection of C++ */
3363 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3364 lp += 2;
3365 toklen += 2;
3366 c = lp[-1];
3367 goto still_in_token;
3368 }
3369 else
3370 {
3371 bool funorvar = FALSE;
3372
3373 if (yacc_rules
3374 || consider_token (newlb.buffer + tokoff, toklen, c,
3375 &c_ext, bracelev, parlev,
3376 &funorvar))
3377 {
3378 if (fvdef == foperator)
3379 {
3380 char *oldlp = lp;
3381 lp = skip_spaces (lp-1);
3382 if (*lp != '\0')
3383 lp += 1;
3384 while (*lp != '\0'
3385 && !iswhite (*lp) && *lp != '(')
3386 lp += 1;
3387 c = *lp++;
3388 toklen += lp - oldlp;
3389 }
3390 token.named = FALSE;
3391 if (!plainc
3392 && nestlev > 0 && definedef == dnone)
3393 /* in struct body */
3394 {
3395 write_classname (&token_name, qualifier);
3396 linebuffer_setlen (&token_name,
3397 token_name.len+qlen+toklen);
3398 strcat (token_name.buffer, qualifier);
3399 strncat (token_name.buffer,
3400 newlb.buffer + tokoff, toklen);
3401 token.named = TRUE;
3402 }
3403 else if (objdef == ocatseen)
3404 /* Objective C category */
3405 {
3406 int len = strlen (objtag) + 2 + toklen;
3407 linebuffer_setlen (&token_name, len);
3408 strcpy (token_name.buffer, objtag);
3409 strcat (token_name.buffer, "(");
3410 strncat (token_name.buffer,
3411 newlb.buffer + tokoff, toklen);
3412 strcat (token_name.buffer, ")");
3413 token.named = TRUE;
3414 }
3415 else if (objdef == omethodtag
3416 || objdef == omethodparm)
3417 /* Objective C method */
3418 {
3419 token.named = TRUE;
3420 }
3421 else if (fvdef == fdefunname)
3422 /* GNU DEFUN and similar macros */
3423 {
3424 bool defun = (newlb.buffer[tokoff] == 'F');
3425 int off = tokoff;
3426 int len = toklen;
3427
3428 /* Rewrite the tag so that emacs lisp DEFUNs
3429 can be found by their elisp name */
3430 if (defun)
3431 {
3432 off += 1;
3433 len -= 1;
3434 }
3435 linebuffer_setlen (&token_name, len);
3436 strncpy (token_name.buffer,
3437 newlb.buffer + off, len);
3438 token_name.buffer[len] = '\0';
3439 if (defun)
3440 while (--len >= 0)
3441 if (token_name.buffer[len] == '_')
3442 token_name.buffer[len] = '-';
3443 token.named = defun;
3444 }
3445 else
3446 {
3447 linebuffer_setlen (&token_name, toklen);
3448 strncpy (token_name.buffer,
3449 newlb.buffer + tokoff, toklen);
3450 token_name.buffer[toklen] = '\0';
3451 /* Name macros and members. */
3452 token.named = (structdef == stagseen
3453 || typdef == ttypeseen
3454 || typdef == tend
3455 || (funorvar
3456 && definedef == dignorerest)
3457 || (funorvar
3458 && definedef == dnone
3459 && structdef == snone
3460 && bracelev > 0));
3461 }
3462 token.lineno = lineno;
3463 token.offset = tokoff;
3464 token.length = toklen;
3465 token.line = newlb.buffer;
3466 token.linepos = newlinepos;
3467 token.valid = TRUE;
3468
3469 if (definedef == dnone
3470 && (fvdef == fvnameseen
3471 || fvdef == foperator
3472 || structdef == stagseen
3473 || typdef == tend
3474 || typdef == ttypeseen
3475 || objdef != onone))
3476 {
3477 if (current_lb_is_new)
3478 switch_line_buffers ();
3479 }
3480 else if (definedef != dnone
3481 || fvdef == fdefunname
3482 || instruct)
3483 make_C_tag (funorvar);
3484 }
3485 else /* not yacc and consider_token failed */
3486 {
3487 if (inattribute && fvdef == fignore)
3488 {
3489 /* We have just met __attribute__ after a
3490 function parameter list: do not tag the
3491 function again. */
3492 fvdef = fvnone;
3493 }
3494 }
3495 midtoken = FALSE;
3496 }
3497 } /* if (endtoken (c)) */
3498 else if (intoken (c))
3499 still_in_token:
3500 {
3501 toklen++;
3502 continue;
3503 }
3504 } /* if (midtoken) */
3505 else if (begtoken (c))
3506 {
3507 switch (definedef)
3508 {
3509 case dnone:
3510 switch (fvdef)
3511 {
3512 case fstartlist:
3513 /* This prevents tagging fb in
3514 void (__attribute__((noreturn)) *fb) (void);
3515 Fixing this is not easy and not very important. */
3516 fvdef = finlist;
3517 continue;
3518 case flistseen:
3519 if (plainc || declarations)
3520 {
3521 make_C_tag (TRUE); /* a function */
3522 fvdef = fignore;
3523 }
3524 break;
3525 }
3526 if (structdef == stagseen && !cjava)
3527 {
3528 popclass_above (bracelev);
3529 structdef = snone;
3530 }
3531 break;
3532 case dsharpseen:
3533 savetoken = token;
3534 break;
3535 }
3536 if (!yacc_rules || lp == newlb.buffer + 1)
3537 {
3538 tokoff = lp - 1 - newlb.buffer;
3539 toklen = 1;
3540 midtoken = TRUE;
3541 }
3542 continue;
3543 } /* if (begtoken) */
3544 } /* if must look at token */
3545
3546
3547 /* Detect end of line, colon, comma, semicolon and various braces
3548 after having handled a token.*/
3549 switch (c)
3550 {
3551 case ':':
3552 if (inattribute)
3553 break;
3554 if (yacc_rules && token.offset == 0 && token.valid)
3555 {
3556 make_C_tag (FALSE); /* a yacc function */
3557 break;
3558 }
3559 if (definedef != dnone)
3560 break;
3561 switch (objdef)
3562 {
3563 case otagseen:
3564 objdef = oignore;
3565 make_C_tag (TRUE); /* an Objective C class */
3566 break;
3567 case omethodtag:
3568 case omethodparm:
3569 objdef = omethodcolon;
3570 linebuffer_setlen (&token_name, token_name.len + 1);
3571 strcat (token_name.buffer, ":");
3572 break;
3573 }
3574 if (structdef == stagseen)
3575 {
3576 structdef = scolonseen;
3577 break;
3578 }
3579 /* Should be useless, but may be work as a safety net. */
3580 if (cplpl && fvdef == flistseen)
3581 {
3582 make_C_tag (TRUE); /* a function */
3583 fvdef = fignore;
3584 break;
3585 }
3586 break;
3587 case ';':
3588 if (definedef != dnone || inattribute)
3589 break;
3590 switch (typdef)
3591 {
3592 case tend:
3593 case ttypeseen:
3594 make_C_tag (FALSE); /* a typedef */
3595 typdef = tnone;
3596 fvdef = fvnone;
3597 break;
3598 case tnone:
3599 case tinbody:
3600 case tignore:
3601 switch (fvdef)
3602 {
3603 case fignore:
3604 if (typdef == tignore || cplpl)
3605 fvdef = fvnone;
3606 break;
3607 case fvnameseen:
3608 if ((globals && bracelev == 0 && (!fvextern || declarations))
3609 || (members && instruct))
3610 make_C_tag (FALSE); /* a variable */
3611 fvextern = FALSE;
3612 fvdef = fvnone;
3613 token.valid = FALSE;
3614 break;
3615 case flistseen:
3616 if ((declarations
3617 && (cplpl || !instruct)
3618 && (typdef == tnone || (typdef != tignore && instruct)))
3619 || (members
3620 && plainc && instruct))
3621 make_C_tag (TRUE); /* a function */
3622 /* FALLTHRU */
3623 default:
3624 fvextern = FALSE;
3625 fvdef = fvnone;
3626 if (declarations
3627 && cplpl && structdef == stagseen)
3628 make_C_tag (FALSE); /* forward declaration */
3629 else
3630 token.valid = FALSE;
3631 } /* switch (fvdef) */
3632 /* FALLTHRU */
3633 default:
3634 if (!instruct)
3635 typdef = tnone;
3636 }
3637 if (structdef == stagseen)
3638 structdef = snone;
3639 break;
3640 case ',':
3641 if (definedef != dnone || inattribute)
3642 break;
3643 switch (objdef)
3644 {
3645 case omethodtag:
3646 case omethodparm:
3647 make_C_tag (TRUE); /* an Objective C method */
3648 objdef = oinbody;
3649 break;
3650 }
3651 switch (fvdef)
3652 {
3653 case fdefunkey:
3654 case foperator:
3655 case fstartlist:
3656 case finlist:
3657 case fignore:
3658 case vignore:
3659 break;
3660 case fdefunname:
3661 fvdef = fignore;
3662 break;
3663 case fvnameseen:
3664 if (parlev == 0
3665 && ((globals
3666 && bracelev == 0
3667 && templatelev == 0
3668 && (!fvextern || declarations))
3669 || (members && instruct)))
3670 make_C_tag (FALSE); /* a variable */
3671 break;
3672 case flistseen:
3673 if ((declarations && typdef == tnone && !instruct)
3674 || (members && typdef != tignore && instruct))
3675 {
3676 make_C_tag (TRUE); /* a function */
3677 fvdef = fvnameseen;
3678 }
3679 else if (!declarations)
3680 fvdef = fvnone;
3681 token.valid = FALSE;
3682 break;
3683 default:
3684 fvdef = fvnone;
3685 }
3686 if (structdef == stagseen)
3687 structdef = snone;
3688 break;
3689 case ']':
3690 if (definedef != dnone || inattribute)
3691 break;
3692 if (structdef == stagseen)
3693 structdef = snone;
3694 switch (typdef)
3695 {
3696 case ttypeseen:
3697 case tend:
3698 typdef = tignore;
3699 make_C_tag (FALSE); /* a typedef */
3700 break;
3701 case tnone:
3702 case tinbody:
3703 switch (fvdef)
3704 {
3705 case foperator:
3706 case finlist:
3707 case fignore:
3708 case vignore:
3709 break;
3710 case fvnameseen:
3711 if ((members && bracelev == 1)
3712 || (globals && bracelev == 0
3713 && (!fvextern || declarations)))
3714 make_C_tag (FALSE); /* a variable */
3715 /* FALLTHRU */
3716 default:
3717 fvdef = fvnone;
3718 }
3719 break;
3720 }
3721 break;
3722 case '(':
3723 if (inattribute)
3724 {
3725 attrparlev++;
3726 break;
3727 }
3728 if (definedef != dnone)
3729 break;
3730 if (objdef == otagseen && parlev == 0)
3731 objdef = oparenseen;
3732 switch (fvdef)
3733 {
3734 case fvnameseen:
3735 if (typdef == ttypeseen
3736 && *lp != '*'
3737 && !instruct)
3738 {
3739 /* This handles constructs like:
3740 typedef void OperatorFun (int fun); */
3741 make_C_tag (FALSE);
3742 typdef = tignore;
3743 fvdef = fignore;
3744 break;
3745 }
3746 /* FALLTHRU */
3747 case foperator:
3748 fvdef = fstartlist;
3749 break;
3750 case flistseen:
3751 fvdef = finlist;
3752 break;
3753 }
3754 parlev++;
3755 break;
3756 case ')':
3757 if (inattribute)
3758 {
3759 if (--attrparlev == 0)
3760 inattribute = FALSE;
3761 break;
3762 }
3763 if (definedef != dnone)
3764 break;
3765 if (objdef == ocatseen && parlev == 1)
3766 {
3767 make_C_tag (TRUE); /* an Objective C category */
3768 objdef = oignore;
3769 }
3770 if (--parlev == 0)
3771 {
3772 switch (fvdef)
3773 {
3774 case fstartlist:
3775 case finlist:
3776 fvdef = flistseen;
3777 break;
3778 }
3779 if (!instruct
3780 && (typdef == tend
3781 || typdef == ttypeseen))
3782 {
3783 typdef = tignore;
3784 make_C_tag (FALSE); /* a typedef */
3785 }
3786 }
3787 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3788 parlev = 0;
3789 break;
3790 case '{':
3791 if (definedef != dnone)
3792 break;
3793 if (typdef == ttypeseen)
3794 {
3795 /* Whenever typdef is set to tinbody (currently only
3796 here), typdefbracelev should be set to bracelev. */
3797 typdef = tinbody;
3798 typdefbracelev = bracelev;
3799 }
3800 switch (fvdef)
3801 {
3802 case flistseen:
3803 make_C_tag (TRUE); /* a function */
3804 /* FALLTHRU */
3805 case fignore:
3806 fvdef = fvnone;
3807 break;
3808 case fvnone:
3809 switch (objdef)
3810 {
3811 case otagseen:
3812 make_C_tag (TRUE); /* an Objective C class */
3813 objdef = oignore;
3814 break;
3815 case omethodtag:
3816 case omethodparm:
3817 make_C_tag (TRUE); /* an Objective C method */
3818 objdef = oinbody;
3819 break;
3820 default:
3821 /* Neutralize `extern "C" {' grot. */
3822 if (bracelev == 0 && structdef == snone && nestlev == 0
3823 && typdef == tnone)
3824 bracelev = -1;
3825 }
3826 break;
3827 }
3828 switch (structdef)
3829 {
3830 case skeyseen: /* unnamed struct */
3831 pushclass_above (bracelev, NULL, 0);
3832 structdef = snone;
3833 break;
3834 case stagseen: /* named struct or enum */
3835 case scolonseen: /* a class */
3836 pushclass_above (bracelev,token.line+token.offset, token.length);
3837 structdef = snone;
3838 make_C_tag (FALSE); /* a struct or enum */
3839 break;
3840 }
3841 bracelev += 1;
3842 break;
3843 case '*':
3844 if (definedef != dnone)
3845 break;
3846 if (fvdef == fstartlist)
3847 {
3848 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3849 token.valid = FALSE;
3850 }
3851 break;
3852 case '}':
3853 if (definedef != dnone)
3854 break;
3855 bracelev -= 1;
3856 if (!ignoreindent && lp == newlb.buffer + 1)
3857 {
3858 if (bracelev != 0)
3859 token.valid = FALSE; /* unexpected value, token unreliable */
3860 bracelev = 0; /* reset brace level if first column */
3861 parlev = 0; /* also reset paren level, just in case... */
3862 }
3863 else if (bracelev < 0)
3864 {
3865 token.valid = FALSE; /* something gone amiss, token unreliable */
3866 bracelev = 0;
3867 }
3868 if (bracelev == 0 && fvdef == vignore)
3869 fvdef = fvnone; /* end of function */
3870 popclass_above (bracelev);
3871 structdef = snone;
3872 /* Only if typdef == tinbody is typdefbracelev significant. */
3873 if (typdef == tinbody && bracelev <= typdefbracelev)
3874 {
3875 assert (bracelev == typdefbracelev);
3876 typdef = tend;
3877 }
3878 break;
3879 case '=':
3880 if (definedef != dnone)
3881 break;
3882 switch (fvdef)
3883 {
3884 case foperator:
3885 case finlist:
3886 case fignore:
3887 case vignore:
3888 break;
3889 case fvnameseen:
3890 if ((members && bracelev == 1)
3891 || (globals && bracelev == 0 && (!fvextern || declarations)))
3892 make_C_tag (FALSE); /* a variable */
3893 /* FALLTHRU */
3894 default:
3895 fvdef = vignore;
3896 }
3897 break;
3898 case '<':
3899 if (cplpl
3900 && (structdef == stagseen || fvdef == fvnameseen))
3901 {
3902 templatelev++;
3903 break;
3904 }
3905 goto resetfvdef;
3906 case '>':
3907 if (templatelev > 0)
3908 {
3909 templatelev--;
3910 break;
3911 }
3912 goto resetfvdef;
3913 case '+':
3914 case '-':
3915 if (objdef == oinbody && bracelev == 0)
3916 {
3917 objdef = omethodsign;
3918 break;
3919 }
3920 /* FALLTHRU */
3921 resetfvdef:
3922 case '#': case '~': case '&': case '%': case '/':
3923 case '|': case '^': case '!': case '.': case '?':
3924 if (definedef != dnone)
3925 break;
3926 /* These surely cannot follow a function tag in C. */
3927 switch (fvdef)
3928 {
3929 case foperator:
3930 case finlist:
3931 case fignore:
3932 case vignore:
3933 break;
3934 default:
3935 fvdef = fvnone;
3936 }
3937 break;
3938 case '\0':
3939 if (objdef == otagseen)
3940 {
3941 make_C_tag (TRUE); /* an Objective C class */
3942 objdef = oignore;
3943 }
3944 /* If a macro spans multiple lines don't reset its state. */
3945 if (quotednl)
3946 CNL_SAVE_DEFINEDEF ();
3947 else
3948 CNL ();
3949 break;
3950 } /* switch (c) */
3951
3952 } /* while not eof */
3953
3954 free (lbs[0].lb.buffer);
3955 free (lbs[1].lb.buffer);
3956 }
3957
3958 /*
3959 * Process either a C++ file or a C file depending on the setting
3960 * of a global flag.
3961 */
3962 static void
3963 default_C_entries (inf)
3964 FILE *inf;
3965 {
3966 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3967 }
3968
3969 /* Always do plain C. */
3970 static void
3971 plain_C_entries (inf)
3972 FILE *inf;
3973 {
3974 C_entries (0, inf);
3975 }
3976
3977 /* Always do C++. */
3978 static void
3979 Cplusplus_entries (inf)
3980 FILE *inf;
3981 {
3982 C_entries (C_PLPL, inf);
3983 }
3984
3985 /* Always do Java. */
3986 static void
3987 Cjava_entries (inf)
3988 FILE *inf;
3989 {
3990 C_entries (C_JAVA, inf);
3991 }
3992
3993 /* Always do C*. */
3994 static void
3995 Cstar_entries (inf)
3996 FILE *inf;
3997 {
3998 C_entries (C_STAR, inf);
3999 }
4000
4001 /* Always do Yacc. */
4002 static void
4003 Yacc_entries (inf)
4004 FILE *inf;
4005 {
4006 C_entries (YACC, inf);
4007 }
4008
4009 \f
4010 /* Useful macros. */
4011 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4012 for (; /* loop initialization */ \
4013 !feof (file_pointer) /* loop test */ \
4014 && /* instructions at start of loop */ \
4015 (readline (&line_buffer, file_pointer), \
4016 char_pointer = line_buffer.buffer, \
4017 TRUE); \
4018 )
4019
4020 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4021 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4022 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4023 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4024 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4025
4026 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4027 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4028 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4029 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4030 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4031
4032 /*
4033 * Read a file, but do no processing. This is used to do regexp
4034 * matching on files that have no language defined.
4035 */
4036 static void
4037 just_read_file (inf)
4038 FILE *inf;
4039 {
4040 register char *dummy;
4041
4042 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4043 continue;
4044 }
4045
4046 \f
4047 /* Fortran parsing */
4048
4049 static void F_takeprec __P((void));
4050 static void F_getit __P((FILE *));
4051
4052 static void
4053 F_takeprec ()
4054 {
4055 dbp = skip_spaces (dbp);
4056 if (*dbp != '*')
4057 return;
4058 dbp++;
4059 dbp = skip_spaces (dbp);
4060 if (strneq (dbp, "(*)", 3))
4061 {
4062 dbp += 3;
4063 return;
4064 }
4065 if (!ISDIGIT (*dbp))
4066 {
4067 --dbp; /* force failure */
4068 return;
4069 }
4070 do
4071 dbp++;
4072 while (ISDIGIT (*dbp));
4073 }
4074
4075 static void
4076 F_getit (inf)
4077 FILE *inf;
4078 {
4079 register char *cp;
4080
4081 dbp = skip_spaces (dbp);
4082 if (*dbp == '\0')
4083 {
4084 readline (&lb, inf);
4085 dbp = lb.buffer;
4086 if (dbp[5] != '&')
4087 return;
4088 dbp += 6;
4089 dbp = skip_spaces (dbp);
4090 }
4091 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4092 return;
4093 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4094 continue;
4095 make_tag (dbp, cp-dbp, TRUE,
4096 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4097 }
4098
4099
4100 static void
4101 Fortran_functions (inf)
4102 FILE *inf;
4103 {
4104 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4105 {
4106 if (*dbp == '%')
4107 dbp++; /* Ratfor escape to fortran */
4108 dbp = skip_spaces (dbp);
4109 if (*dbp == '\0')
4110 continue;
4111 switch (lowcase (*dbp))
4112 {
4113 case 'i':
4114 if (nocase_tail ("integer"))
4115 F_takeprec ();
4116 break;
4117 case 'r':
4118 if (nocase_tail ("real"))
4119 F_takeprec ();
4120 break;
4121 case 'l':
4122 if (nocase_tail ("logical"))
4123 F_takeprec ();
4124 break;
4125 case 'c':
4126 if (nocase_tail ("complex") || nocase_tail ("character"))
4127 F_takeprec ();
4128 break;
4129 case 'd':
4130 if (nocase_tail ("double"))
4131 {
4132 dbp = skip_spaces (dbp);
4133 if (*dbp == '\0')
4134 continue;
4135 if (nocase_tail ("precision"))
4136 break;
4137 continue;
4138 }
4139 break;
4140 }
4141 dbp = skip_spaces (dbp);
4142 if (*dbp == '\0')
4143 continue;
4144 switch (lowcase (*dbp))
4145 {
4146 case 'f':
4147 if (nocase_tail ("function"))
4148 F_getit (inf);
4149 continue;
4150 case 's':
4151 if (nocase_tail ("subroutine"))
4152 F_getit (inf);
4153 continue;
4154 case 'e':
4155 if (nocase_tail ("entry"))
4156 F_getit (inf);
4157 continue;
4158 case 'b':
4159 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4160 {
4161 dbp = skip_spaces (dbp);
4162 if (*dbp == '\0') /* assume un-named */
4163 make_tag ("blockdata", 9, TRUE,
4164 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4165 else
4166 F_getit (inf); /* look for name */
4167 }
4168 continue;
4169 }
4170 }
4171 }
4172
4173 \f
4174 /*
4175 * Ada parsing
4176 * Original code by
4177 * Philippe Waroquiers (1998)
4178 */
4179
4180 static void Ada_getit __P((FILE *, char *));
4181
4182 /* Once we are positioned after an "interesting" keyword, let's get
4183 the real tag value necessary. */
4184 static void
4185 Ada_getit (inf, name_qualifier)
4186 FILE *inf;
4187 char *name_qualifier;
4188 {
4189 register char *cp;
4190 char *name;
4191 char c;
4192
4193 while (!feof (inf))
4194 {
4195 dbp = skip_spaces (dbp);
4196 if (*dbp == '\0'
4197 || (dbp[0] == '-' && dbp[1] == '-'))
4198 {
4199 readline (&lb, inf);
4200 dbp = lb.buffer;
4201 }
4202 switch (lowcase(*dbp))
4203 {
4204 case 'b':
4205 if (nocase_tail ("body"))
4206 {
4207 /* Skipping body of procedure body or package body or ....
4208 resetting qualifier to body instead of spec. */
4209 name_qualifier = "/b";
4210 continue;
4211 }
4212 break;
4213 case 't':
4214 /* Skipping type of task type or protected type ... */
4215 if (nocase_tail ("type"))
4216 continue;
4217 break;
4218 }
4219 if (*dbp == '"')
4220 {
4221 dbp += 1;
4222 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4223 continue;
4224 }
4225 else
4226 {
4227 dbp = skip_spaces (dbp);
4228 for (cp = dbp;
4229 (*cp != '\0'
4230 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4231 cp++)
4232 continue;
4233 if (cp == dbp)
4234 return;
4235 }
4236 c = *cp;
4237 *cp = '\0';
4238 name = concat (dbp, name_qualifier, "");
4239 *cp = c;
4240 make_tag (name, strlen (name), TRUE,
4241 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4242 free (name);
4243 if (c == '"')
4244 dbp = cp + 1;
4245 return;
4246 }
4247 }
4248
4249 static void
4250 Ada_funcs (inf)
4251 FILE *inf;
4252 {
4253 bool inquote = FALSE;
4254 bool skip_till_semicolumn = FALSE;
4255
4256 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4257 {
4258 while (*dbp != '\0')
4259 {
4260 /* Skip a string i.e. "abcd". */
4261 if (inquote || (*dbp == '"'))
4262 {
4263 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4264 if (dbp != NULL)
4265 {
4266 inquote = FALSE;
4267 dbp += 1;
4268 continue; /* advance char */
4269 }
4270 else
4271 {
4272 inquote = TRUE;
4273 break; /* advance line */
4274 }
4275 }
4276
4277 /* Skip comments. */
4278 if (dbp[0] == '-' && dbp[1] == '-')
4279 break; /* advance line */
4280
4281 /* Skip character enclosed in single quote i.e. 'a'
4282 and skip single quote starting an attribute i.e. 'Image. */
4283 if (*dbp == '\'')
4284 {
4285 dbp++ ;
4286 if (*dbp != '\0')
4287 dbp++;
4288 continue;
4289 }
4290
4291 if (skip_till_semicolumn)
4292 {
4293 if (*dbp == ';')
4294 skip_till_semicolumn = FALSE;
4295 dbp++;
4296 continue; /* advance char */
4297 }
4298
4299 /* Search for beginning of a token. */
4300 if (!begtoken (*dbp))
4301 {
4302 dbp++;
4303 continue; /* advance char */
4304 }
4305
4306 /* We are at the beginning of a token. */
4307 switch (lowcase(*dbp))
4308 {
4309 case 'f':
4310 if (!packages_only && nocase_tail ("function"))
4311 Ada_getit (inf, "/f");
4312 else
4313 break; /* from switch */
4314 continue; /* advance char */
4315 case 'p':
4316 if (!packages_only && nocase_tail ("procedure"))
4317 Ada_getit (inf, "/p");
4318 else if (nocase_tail ("package"))
4319 Ada_getit (inf, "/s");
4320 else if (nocase_tail ("protected")) /* protected type */
4321 Ada_getit (inf, "/t");
4322 else
4323 break; /* from switch */
4324 continue; /* advance char */
4325
4326 case 'u':
4327 if (typedefs && !packages_only && nocase_tail ("use"))
4328 {
4329 /* when tagging types, avoid tagging use type Pack.Typename;
4330 for this, we will skip everything till a ; */
4331 skip_till_semicolumn = TRUE;
4332 continue; /* advance char */
4333 }
4334
4335 case 't':
4336 if (!packages_only && nocase_tail ("task"))
4337 Ada_getit (inf, "/k");
4338 else if (typedefs && !packages_only && nocase_tail ("type"))
4339 {
4340 Ada_getit (inf, "/t");
4341 while (*dbp != '\0')
4342 dbp += 1;
4343 }
4344 else
4345 break; /* from switch */
4346 continue; /* advance char */
4347 }
4348
4349 /* Look for the end of the token. */
4350 while (!endtoken (*dbp))
4351 dbp++;
4352
4353 } /* advance char */
4354 } /* advance line */
4355 }
4356
4357 \f
4358 /*
4359 * Unix and microcontroller assembly tag handling
4360 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4361 * Idea by Bob Weiner, Motorola Inc. (1994)
4362 */
4363 static void
4364 Asm_labels (inf)
4365 FILE *inf;
4366 {
4367 register char *cp;
4368
4369 LOOP_ON_INPUT_LINES (inf, lb, cp)
4370 {
4371 /* If first char is alphabetic or one of [_.$], test for colon
4372 following identifier. */
4373 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4374 {
4375 /* Read past label. */
4376 cp++;
4377 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4378 cp++;
4379 if (*cp == ':' || iswhite (*cp))
4380 /* Found end of label, so copy it and add it to the table. */
4381 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4382 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4383 }
4384 }
4385 }
4386
4387 \f
4388 /*
4389 * Perl support
4390 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4391 * Perl variable names: /^(my|local).../
4392 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4393 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4394 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4395 */
4396 static void
4397 Perl_functions (inf)
4398 FILE *inf;
4399 {
4400 char *package = savestr ("main"); /* current package name */
4401 register char *cp;
4402
4403 LOOP_ON_INPUT_LINES (inf, lb, cp)
4404 {
4405 cp = skip_spaces (cp);
4406
4407 if (LOOKING_AT (cp, "package"))
4408 {
4409 free (package);
4410 get_tag (cp, &package);
4411 }
4412 else if (LOOKING_AT (cp, "sub"))
4413 {
4414 char *pos;
4415 char *sp = cp;
4416
4417 while (!notinname (*cp))
4418 cp++;
4419 if (cp == sp)
4420 continue; /* nothing found */
4421 if ((pos = etags_strchr (sp, ':')) != NULL
4422 && pos < cp && pos[1] == ':')
4423 /* The name is already qualified. */
4424 make_tag (sp, cp - sp, TRUE,
4425 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4426 else
4427 /* Qualify it. */
4428 {
4429 char savechar, *name;
4430
4431 savechar = *cp;
4432 *cp = '\0';
4433 name = concat (package, "::", sp);
4434 *cp = savechar;
4435 make_tag (name, strlen(name), TRUE,
4436 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4437 free (name);
4438 }
4439 }
4440 else if (globals) /* only if we are tagging global vars */
4441 {
4442 /* Skip a qualifier, if any. */
4443 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4444 /* After "my" or "local", but before any following paren or space. */
4445 char *varstart = cp;
4446
4447 if (qual /* should this be removed? If yes, how? */
4448 && (*cp == '$' || *cp == '@' || *cp == '%'))
4449 {
4450 varstart += 1;
4451 do
4452 cp++;
4453 while (ISALNUM (*cp) || *cp == '_');
4454 }
4455 else if (qual)
4456 {
4457 /* Should be examining a variable list at this point;
4458 could insist on seeing an open parenthesis. */
4459 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4460 cp++;
4461 }
4462 else
4463 continue;
4464
4465 make_tag (varstart, cp - varstart, FALSE,
4466 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4467 }
4468 }
4469 free (package);
4470 }
4471
4472
4473 /*
4474 * Python support
4475 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4476 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4477 * More ideas by seb bacon <seb@jamkit.com> (2002)
4478 */
4479 static void
4480 Python_functions (inf)
4481 FILE *inf;
4482 {
4483 register char *cp;
4484
4485 LOOP_ON_INPUT_LINES (inf, lb, cp)
4486 {
4487 cp = skip_spaces (cp);
4488 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4489 {
4490 char *name = cp;
4491 while (!notinname (*cp) && *cp != ':')
4492 cp++;
4493 make_tag (name, cp - name, TRUE,
4494 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4495 }
4496 }
4497 }
4498
4499 \f
4500 /*
4501 * PHP support
4502 * Look for:
4503 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4504 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4505 * - /^[ \t]*define\(\"[^\"]+/
4506 * Only with --members:
4507 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4508 * Idea by Diez B. Roggisch (2001)
4509 */
4510 static void
4511 PHP_functions (inf)
4512 FILE *inf;
4513 {
4514 register char *cp, *name;
4515 bool search_identifier = FALSE;
4516
4517 LOOP_ON_INPUT_LINES (inf, lb, cp)
4518 {
4519 cp = skip_spaces (cp);
4520 name = cp;
4521 if (search_identifier
4522 && *cp != '\0')
4523 {
4524 while (!notinname (*cp))
4525 cp++;
4526 make_tag (name, cp - name, TRUE,
4527 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4528 search_identifier = FALSE;
4529 }
4530 else if (LOOKING_AT (cp, "function"))
4531 {
4532 if(*cp == '&')
4533 cp = skip_spaces (cp+1);
4534 if(*cp != '\0')
4535 {
4536 name = cp;
4537 while (!notinname (*cp))
4538 cp++;
4539 make_tag (name, cp - name, TRUE,
4540 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4541 }
4542 else
4543 search_identifier = TRUE;
4544 }
4545 else if (LOOKING_AT (cp, "class"))
4546 {
4547 if (*cp != '\0')
4548 {
4549 name = cp;
4550 while (*cp != '\0' && !iswhite (*cp))
4551 cp++;
4552 make_tag (name, cp - name, FALSE,
4553 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4554 }
4555 else
4556 search_identifier = TRUE;
4557 }
4558 else if (strneq (cp, "define", 6)
4559 && (cp = skip_spaces (cp+6))
4560 && *cp++ == '('
4561 && (*cp == '"' || *cp == '\''))
4562 {
4563 char quote = *cp++;
4564 name = cp;
4565 while (*cp != quote && *cp != '\0')
4566 cp++;
4567 make_tag (name, cp - name, FALSE,
4568 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4569 }
4570 else if (members
4571 && LOOKING_AT (cp, "var")
4572 && *cp == '$')
4573 {
4574 name = cp;
4575 while (!notinname(*cp))
4576 cp++;
4577 make_tag (name, cp - name, FALSE,
4578 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4579 }
4580 }
4581 }
4582
4583 \f
4584 /*
4585 * Cobol tag functions
4586 * We could look for anything that could be a paragraph name.
4587 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4588 * Idea by Corny de Souza (1993)
4589 */
4590 static void
4591 Cobol_paragraphs (inf)
4592 FILE *inf;
4593 {
4594 register char *bp, *ep;
4595
4596 LOOP_ON_INPUT_LINES (inf, lb, bp)
4597 {
4598 if (lb.len < 9)
4599 continue;
4600 bp += 8;
4601
4602 /* If eoln, compiler option or comment ignore whole line. */
4603 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4604 continue;
4605
4606 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4607 continue;
4608 if (*ep++ == '.')
4609 make_tag (bp, ep - bp, TRUE,
4610 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4611 }
4612 }
4613
4614 \f
4615 /*
4616 * Makefile support
4617 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4618 */
4619 static void
4620 Makefile_targets (inf)
4621 FILE *inf;
4622 {
4623 register char *bp;
4624
4625 LOOP_ON_INPUT_LINES (inf, lb, bp)
4626 {
4627 if (*bp == '\t' || *bp == '#')
4628 continue;
4629 while (*bp != '\0' && *bp != '=' && *bp != ':')
4630 bp++;
4631 if (*bp == ':' || (globals && *bp == '='))
4632 {
4633 /* We should detect if there is more than one tag, but we do not.
4634 We just skip initial and final spaces. */
4635 char * namestart = skip_spaces (lb.buffer);
4636 while (--bp > namestart)
4637 if (!notinname (*bp))
4638 break;
4639 make_tag (namestart, bp - namestart + 1, TRUE,
4640 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4641 }
4642 }
4643 }
4644
4645 \f
4646 /*
4647 * Pascal parsing
4648 * Original code by Mosur K. Mohan (1989)
4649 *
4650 * Locates tags for procedures & functions. Doesn't do any type- or
4651 * var-definitions. It does look for the keyword "extern" or
4652 * "forward" immediately following the procedure statement; if found,
4653 * the tag is skipped.
4654 */
4655 static void
4656 Pascal_functions (inf)
4657 FILE *inf;
4658 {
4659 linebuffer tline; /* mostly copied from C_entries */
4660 long save_lcno;
4661 int save_lineno, namelen, taglen;
4662 char c, *name;
4663
4664 bool /* each of these flags is TRUE if: */
4665 incomment, /* point is inside a comment */
4666 inquote, /* point is inside '..' string */
4667 get_tagname, /* point is after PROCEDURE/FUNCTION
4668 keyword, so next item = potential tag */
4669 found_tag, /* point is after a potential tag */
4670 inparms, /* point is within parameter-list */
4671 verify_tag; /* point has passed the parm-list, so the
4672 next token will determine whether this
4673 is a FORWARD/EXTERN to be ignored, or
4674 whether it is a real tag */
4675
4676 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4677 name = NULL; /* keep compiler quiet */
4678 dbp = lb.buffer;
4679 *dbp = '\0';
4680 linebuffer_init (&tline);
4681
4682 incomment = inquote = FALSE;
4683 found_tag = FALSE; /* have a proc name; check if extern */
4684 get_tagname = FALSE; /* found "procedure" keyword */
4685 inparms = FALSE; /* found '(' after "proc" */
4686 verify_tag = FALSE; /* check if "extern" is ahead */
4687
4688
4689 while (!feof (inf)) /* long main loop to get next char */
4690 {
4691 c = *dbp++;
4692 if (c == '\0') /* if end of line */
4693 {
4694 readline (&lb, inf);
4695 dbp = lb.buffer;
4696 if (*dbp == '\0')
4697 continue;
4698 if (!((found_tag && verify_tag)
4699 || get_tagname))
4700 c = *dbp++; /* only if don't need *dbp pointing
4701 to the beginning of the name of
4702 the procedure or function */
4703 }
4704 if (incomment)
4705 {
4706 if (c == '}') /* within { } comments */
4707 incomment = FALSE;
4708 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4709 {
4710 dbp++;
4711 incomment = FALSE;
4712 }
4713 continue;
4714 }
4715 else if (inquote)
4716 {
4717 if (c == '\'')
4718 inquote = FALSE;
4719 continue;
4720 }
4721 else
4722 switch (c)
4723 {
4724 case '\'':
4725 inquote = TRUE; /* found first quote */
4726 continue;
4727 case '{': /* found open { comment */
4728 incomment = TRUE;
4729 continue;
4730 case '(':
4731 if (*dbp == '*') /* found open (* comment */
4732 {
4733 incomment = TRUE;
4734 dbp++;
4735 }
4736 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4737 inparms = TRUE;
4738 continue;
4739 case ')': /* end of parms list */
4740 if (inparms)
4741 inparms = FALSE;
4742 continue;
4743 case ';':
4744 if (found_tag && !inparms) /* end of proc or fn stmt */
4745 {
4746 verify_tag = TRUE;
4747 break;
4748 }
4749 continue;
4750 }
4751 if (found_tag && verify_tag && (*dbp != ' '))
4752 {
4753 /* Check if this is an "extern" declaration. */
4754 if (*dbp == '\0')
4755 continue;
4756 if (lowcase (*dbp == 'e'))
4757 {
4758 if (nocase_tail ("extern")) /* superfluous, really! */
4759 {
4760 found_tag = FALSE;
4761 verify_tag = FALSE;
4762 }
4763 }
4764 else if (lowcase (*dbp) == 'f')
4765 {
4766 if (nocase_tail ("forward")) /* check for forward reference */
4767 {
4768 found_tag = FALSE;
4769 verify_tag = FALSE;
4770 }
4771 }
4772 if (found_tag && verify_tag) /* not external proc, so make tag */
4773 {
4774 found_tag = FALSE;
4775 verify_tag = FALSE;
4776 make_tag (name, namelen, TRUE,
4777 tline.buffer, taglen, save_lineno, save_lcno);
4778 continue;
4779 }
4780 }
4781 if (get_tagname) /* grab name of proc or fn */
4782 {
4783 char *cp;
4784
4785 if (*dbp == '\0')
4786 continue;
4787
4788 /* Find block name. */
4789 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4790 continue;
4791
4792 /* Save all values for later tagging. */
4793 linebuffer_setlen (&tline, lb.len);
4794 strcpy (tline.buffer, lb.buffer);
4795 save_lineno = lineno;
4796 save_lcno = linecharno;
4797 name = tline.buffer + (dbp - lb.buffer);
4798 namelen = cp - dbp;
4799 taglen = cp - lb.buffer + 1;
4800
4801 dbp = cp; /* set dbp to e-o-token */
4802 get_tagname = FALSE;
4803 found_tag = TRUE;
4804 continue;
4805
4806 /* And proceed to check for "extern". */
4807 }
4808 else if (!incomment && !inquote && !found_tag)
4809 {
4810 /* Check for proc/fn keywords. */
4811 switch (lowcase (c))
4812 {
4813 case 'p':
4814 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4815 get_tagname = TRUE;
4816 continue;
4817 case 'f':
4818 if (nocase_tail ("unction"))
4819 get_tagname = TRUE;
4820 continue;
4821 }
4822 }
4823 } /* while not eof */
4824
4825 free (tline.buffer);
4826 }
4827
4828 \f
4829 /*
4830 * Lisp tag functions
4831 * look for (def or (DEF, quote or QUOTE
4832 */
4833
4834 static void L_getit __P((void));
4835
4836 static void
4837 L_getit ()
4838 {
4839 if (*dbp == '\'') /* Skip prefix quote */
4840 dbp++;
4841 else if (*dbp == '(')
4842 {
4843 dbp++;
4844 /* Try to skip "(quote " */
4845 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4846 /* Ok, then skip "(" before name in (defstruct (foo)) */
4847 dbp = skip_spaces (dbp);
4848 }
4849 get_tag (dbp, NULL);
4850 }
4851
4852 static void
4853 Lisp_functions (inf)
4854 FILE *inf;
4855 {
4856 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4857 {
4858 if (dbp[0] != '(')
4859 continue;
4860
4861 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4862 {
4863 dbp = skip_non_spaces (dbp);
4864 dbp = skip_spaces (dbp);
4865 L_getit ();
4866 }
4867 else
4868 {
4869 /* Check for (foo::defmumble name-defined ... */
4870 do
4871 dbp++;
4872 while (!notinname (*dbp) && *dbp != ':');
4873 if (*dbp == ':')
4874 {
4875 do
4876 dbp++;
4877 while (*dbp == ':');
4878
4879 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4880 {
4881 dbp = skip_non_spaces (dbp);
4882 dbp = skip_spaces (dbp);
4883 L_getit ();
4884 }
4885 }
4886 }
4887 }
4888 }
4889
4890 \f
4891 /*
4892 * Lua script language parsing
4893 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4894 *
4895 * "function" and "local function" are tags if they start at column 1.
4896 */
4897 static void
4898 Lua_functions (inf)
4899 FILE *inf;
4900 {
4901 register char *bp;
4902
4903 LOOP_ON_INPUT_LINES (inf, lb, bp)
4904 {
4905 if (bp[0] != 'f' && bp[0] != 'l')
4906 continue;
4907
4908 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4909
4910 if (LOOKING_AT (bp, "function"))
4911 get_tag (bp, NULL);
4912 }
4913 }
4914
4915 \f
4916 /*
4917 * Postscript tags
4918 * Just look for lines where the first character is '/'
4919 * Also look at "defineps" for PSWrap
4920 * Ideas by:
4921 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4922 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4923 */
4924 static void
4925 PS_functions (inf)
4926 FILE *inf;
4927 {
4928 register char *bp, *ep;
4929
4930 LOOP_ON_INPUT_LINES (inf, lb, bp)
4931 {
4932 if (bp[0] == '/')
4933 {
4934 for (ep = bp+1;
4935 *ep != '\0' && *ep != ' ' && *ep != '{';
4936 ep++)
4937 continue;
4938 make_tag (bp, ep - bp, TRUE,
4939 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4940 }
4941 else if (LOOKING_AT (bp, "defineps"))
4942 get_tag (bp, NULL);
4943 }
4944 }
4945
4946 \f
4947 /*
4948 * Forth tags
4949 * Ignore anything after \ followed by space or in ( )
4950 * Look for words defined by :
4951 * Look for constant, code, create, defer, value, and variable
4952 * OBP extensions: Look for buffer:, field,
4953 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4954 */
4955 static void
4956 Forth_words (inf)
4957 FILE *inf;
4958 {
4959 register char *bp;
4960
4961 LOOP_ON_INPUT_LINES (inf, lb, bp)
4962 while ((bp = skip_spaces (bp))[0] != '\0')
4963 if (bp[0] == '\\' && iswhite(bp[1]))
4964 break; /* read next line */
4965 else if (bp[0] == '(' && iswhite(bp[1]))
4966 do /* skip to ) or eol */
4967 bp++;
4968 while (*bp != ')' && *bp != '\0');
4969 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4970 || LOOKING_AT_NOCASE (bp, "constant")
4971 || LOOKING_AT_NOCASE (bp, "code")
4972 || LOOKING_AT_NOCASE (bp, "create")
4973 || LOOKING_AT_NOCASE (bp, "defer")
4974 || LOOKING_AT_NOCASE (bp, "value")
4975 || LOOKING_AT_NOCASE (bp, "variable")
4976 || LOOKING_AT_NOCASE (bp, "buffer:")
4977 || LOOKING_AT_NOCASE (bp, "field"))
4978 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
4979 else
4980 bp = skip_non_spaces (bp);
4981 }
4982
4983 \f
4984 /*
4985 * Scheme tag functions
4986 * look for (def... xyzzy
4987 * (def... (xyzzy
4988 * (def ... ((...(xyzzy ....
4989 * (set! xyzzy
4990 * Original code by Ken Haase (1985?)
4991 */
4992 static void
4993 Scheme_functions (inf)
4994 FILE *inf;
4995 {
4996 register char *bp;
4997
4998 LOOP_ON_INPUT_LINES (inf, lb, bp)
4999 {
5000 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5001 {
5002 bp = skip_non_spaces (bp+4);
5003 /* Skip over open parens and white space */
5004 while (notinname (*bp))
5005 bp++;
5006 get_tag (bp, NULL);
5007 }
5008 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5009 get_tag (bp, NULL);
5010 }
5011 }
5012
5013 \f
5014 /* Find tags in TeX and LaTeX input files. */
5015
5016 /* TEX_toktab is a table of TeX control sequences that define tags.
5017 * Each entry records one such control sequence.
5018 *
5019 * Original code from who knows whom.
5020 * Ideas by:
5021 * Stefan Monnier (2002)
5022 */
5023
5024 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5025
5026 /* Default set of control sequences to put into TEX_toktab.
5027 The value of environment var TEXTAGS is prepended to this. */
5028 static char *TEX_defenv = "\
5029 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5030 :part:appendix:entry:index:def\
5031 :newcommand:renewcommand:newenvironment:renewenvironment";
5032
5033 static void TEX_mode __P((FILE *));
5034 static void TEX_decode_env __P((char *, char *));
5035
5036 static char TEX_esc = '\\';
5037 static char TEX_opgrp = '{';
5038 static char TEX_clgrp = '}';
5039
5040 /*
5041 * TeX/LaTeX scanning loop.
5042 */
5043 static void
5044 TeX_commands (inf)
5045 FILE *inf;
5046 {
5047 char *cp;
5048 linebuffer *key;
5049
5050 /* Select either \ or ! as escape character. */
5051 TEX_mode (inf);
5052
5053 /* Initialize token table once from environment. */
5054 if (TEX_toktab == NULL)
5055 TEX_decode_env ("TEXTAGS", TEX_defenv);
5056
5057 LOOP_ON_INPUT_LINES (inf, lb, cp)
5058 {
5059 /* Look at each TEX keyword in line. */
5060 for (;;)
5061 {
5062 /* Look for a TEX escape. */
5063 while (*cp++ != TEX_esc)
5064 if (cp[-1] == '\0' || cp[-1] == '%')
5065 goto tex_next_line;
5066
5067 for (key = TEX_toktab; key->buffer != NULL; key++)
5068 if (strneq (cp, key->buffer, key->len))
5069 {
5070 register char *p;
5071 int namelen, linelen;
5072 bool opgrp = FALSE;
5073
5074 cp = skip_spaces (cp + key->len);
5075 if (*cp == TEX_opgrp)
5076 {
5077 opgrp = TRUE;
5078 cp++;
5079 }
5080 for (p = cp;
5081 (!iswhite (*p) && *p != '#' &&
5082 *p != TEX_opgrp && *p != TEX_clgrp);
5083 p++)
5084 continue;
5085 namelen = p - cp;
5086 linelen = lb.len;
5087 if (!opgrp || *p == TEX_clgrp)
5088 {
5089 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5090 p++;
5091 linelen = p - lb.buffer + 1;
5092 }
5093 make_tag (cp, namelen, TRUE,
5094 lb.buffer, linelen, lineno, linecharno);
5095 goto tex_next_line; /* We only tag a line once */
5096 }
5097 }
5098 tex_next_line:
5099 ;
5100 }
5101 }
5102
5103 #define TEX_LESC '\\'
5104 #define TEX_SESC '!'
5105
5106 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5107 chars accordingly. */
5108 static void
5109 TEX_mode (inf)
5110 FILE *inf;
5111 {
5112 int c;
5113
5114 while ((c = getc (inf)) != EOF)
5115 {
5116 /* Skip to next line if we hit the TeX comment char. */
5117 if (c == '%')
5118 while (c != '\n' && c != EOF)
5119 c = getc (inf);
5120 else if (c == TEX_LESC || c == TEX_SESC )
5121 break;
5122 }
5123
5124 if (c == TEX_LESC)
5125 {
5126 TEX_esc = TEX_LESC;
5127 TEX_opgrp = '{';
5128 TEX_clgrp = '}';
5129 }
5130 else
5131 {
5132 TEX_esc = TEX_SESC;
5133 TEX_opgrp = '<';
5134 TEX_clgrp = '>';
5135 }
5136 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5137 No attempt is made to correct the situation. */
5138 rewind (inf);
5139 }
5140
5141 /* Read environment and prepend it to the default string.
5142 Build token table. */
5143 static void
5144 TEX_decode_env (evarname, defenv)
5145 char *evarname;
5146 char *defenv;
5147 {
5148 register char *env, *p;
5149 int i, len;
5150
5151 /* Append default string to environment. */
5152 env = getenv (evarname);
5153 if (!env)
5154 env = defenv;
5155 else
5156 {
5157 char *oldenv = env;
5158 env = concat (oldenv, defenv, "");
5159 }
5160
5161 /* Allocate a token table */
5162 for (len = 1, p = env; p;)
5163 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5164 len++;
5165 TEX_toktab = xnew (len, linebuffer);
5166
5167 /* Unpack environment string into token table. Be careful about */
5168 /* zero-length strings (leading ':', "::" and trailing ':') */
5169 for (i = 0; *env != '\0';)
5170 {
5171 p = etags_strchr (env, ':');
5172 if (!p) /* End of environment string. */
5173 p = env + strlen (env);
5174 if (p - env > 0)
5175 { /* Only non-zero strings. */
5176 TEX_toktab[i].buffer = savenstr (env, p - env);
5177 TEX_toktab[i].len = p - env;
5178 i++;
5179 }
5180 if (*p)
5181 env = p + 1;
5182 else
5183 {
5184 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5185 TEX_toktab[i].len = 0;
5186 break;
5187 }
5188 }
5189 }
5190
5191 \f
5192 /* Texinfo support. Dave Love, Mar. 2000. */
5193 static void
5194 Texinfo_nodes (inf)
5195 FILE * inf;
5196 {
5197 char *cp, *start;
5198 LOOP_ON_INPUT_LINES (inf, lb, cp)
5199 if (LOOKING_AT (cp, "@node"))
5200 {
5201 start = cp;
5202 while (*cp != '\0' && *cp != ',')
5203 cp++;
5204 make_tag (start, cp - start, TRUE,
5205 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5206 }
5207 }
5208
5209 \f
5210 /*
5211 * HTML support.
5212 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5213 * Contents of <a name=xxx> are tags with name xxx.
5214 *
5215 * Francesco Potortì, 2002.
5216 */
5217 static void
5218 HTML_labels (inf)
5219 FILE * inf;
5220 {
5221 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5222 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5223 bool intag = FALSE; /* inside an html tag, looking for ID= */
5224 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5225 char *end;
5226
5227
5228 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5229
5230 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5231 for (;;) /* loop on the same line */
5232 {
5233 if (skiptag) /* skip HTML tag */
5234 {
5235 while (*dbp != '\0' && *dbp != '>')
5236 dbp++;
5237 if (*dbp == '>')
5238 {
5239 dbp += 1;
5240 skiptag = FALSE;
5241 continue; /* look on the same line */
5242 }
5243 break; /* go to next line */
5244 }
5245
5246 else if (intag) /* look for "name=" or "id=" */
5247 {
5248 while (*dbp != '\0' && *dbp != '>'
5249 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5250 dbp++;
5251 if (*dbp == '\0')
5252 break; /* go to next line */
5253 if (*dbp == '>')
5254 {
5255 dbp += 1;
5256 intag = FALSE;
5257 continue; /* look on the same line */
5258 }
5259 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5260 || LOOKING_AT_NOCASE (dbp, "id="))
5261 {
5262 bool quoted = (dbp[0] == '"');
5263
5264 if (quoted)
5265 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5266 continue;
5267 else
5268 for (end = dbp; *end != '\0' && intoken (*end); end++)
5269 continue;
5270 linebuffer_setlen (&token_name, end - dbp);
5271 strncpy (token_name.buffer, dbp, end - dbp);
5272 token_name.buffer[end - dbp] = '\0';
5273
5274 dbp = end;
5275 intag = FALSE; /* we found what we looked for */
5276 skiptag = TRUE; /* skip to the end of the tag */
5277 getnext = TRUE; /* then grab the text */
5278 continue; /* look on the same line */
5279 }
5280 dbp += 1;
5281 }
5282
5283 else if (getnext) /* grab next tokens and tag them */
5284 {
5285 dbp = skip_spaces (dbp);
5286 if (*dbp == '\0')
5287 break; /* go to next line */
5288 if (*dbp == '<')
5289 {
5290 intag = TRUE;
5291 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5292 continue; /* look on the same line */
5293 }
5294
5295 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5296 continue;
5297 make_tag (token_name.buffer, token_name.len, TRUE,
5298 dbp, end - dbp, lineno, linecharno);
5299 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5300 getnext = FALSE;
5301 break; /* go to next line */
5302 }
5303
5304 else /* look for an interesting HTML tag */
5305 {
5306 while (*dbp != '\0' && *dbp != '<')
5307 dbp++;
5308 if (*dbp == '\0')
5309 break; /* go to next line */
5310 intag = TRUE;
5311 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5312 {
5313 inanchor = TRUE;
5314 continue; /* look on the same line */
5315 }
5316 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5317 || LOOKING_AT_NOCASE (dbp, "<h1>")
5318 || LOOKING_AT_NOCASE (dbp, "<h2>")
5319 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5320 {
5321 intag = FALSE;
5322 getnext = TRUE;
5323 continue; /* look on the same line */
5324 }
5325 dbp += 1;
5326 }
5327 }
5328 }
5329
5330 \f
5331 /*
5332 * Prolog support
5333 *
5334 * Assumes that the predicate or rule starts at column 0.
5335 * Only the first clause of a predicate or rule is added.
5336 * Original code by Sunichirou Sugou (1989)
5337 * Rewritten by Anders Lindgren (1996)
5338 */
5339 static int prolog_pr __P((char *, char *));
5340 static void prolog_skip_comment __P((linebuffer *, FILE *));
5341 static int prolog_atom __P((char *, int));
5342
5343 static void
5344 Prolog_functions (inf)
5345 FILE *inf;
5346 {
5347 char *cp, *last;
5348 int len;
5349 int allocated;
5350
5351 allocated = 0;
5352 len = 0;
5353 last = NULL;
5354
5355 LOOP_ON_INPUT_LINES (inf, lb, cp)
5356 {
5357 if (cp[0] == '\0') /* Empty line */
5358 continue;
5359 else if (iswhite (cp[0])) /* Not a predicate */
5360 continue;
5361 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5362 prolog_skip_comment (&lb, inf);
5363 else if ((len = prolog_pr (cp, last)) > 0)
5364 {
5365 /* Predicate or rule. Store the function name so that we
5366 only generate a tag for the first clause. */
5367 if (last == NULL)
5368 last = xnew(len + 1, char);
5369 else if (len + 1 > allocated)
5370 xrnew (last, len + 1, char);
5371 allocated = len + 1;
5372 strncpy (last, cp, len);
5373 last[len] = '\0';
5374 }
5375 }
5376 free (last);
5377 }
5378
5379
5380 static void
5381 prolog_skip_comment (plb, inf)
5382 linebuffer *plb;
5383 FILE *inf;
5384 {
5385 char *cp;
5386
5387 do
5388 {
5389 for (cp = plb->buffer; *cp != '\0'; cp++)
5390 if (cp[0] == '*' && cp[1] == '/')
5391 return;
5392 readline (plb, inf);
5393 }
5394 while (!feof(inf));
5395 }
5396
5397 /*
5398 * A predicate or rule definition is added if it matches:
5399 * <beginning of line><Prolog Atom><whitespace>(
5400 * or <beginning of line><Prolog Atom><whitespace>:-
5401 *
5402 * It is added to the tags database if it doesn't match the
5403 * name of the previous clause header.
5404 *
5405 * Return the size of the name of the predicate or rule, or 0 if no
5406 * header was found.
5407 */
5408 static int
5409 prolog_pr (s, last)
5410 char *s;
5411 char *last; /* Name of last clause. */
5412 {
5413 int pos;
5414 int len;
5415
5416 pos = prolog_atom (s, 0);
5417 if (pos < 1)
5418 return 0;
5419
5420 len = pos;
5421 pos = skip_spaces (s + pos) - s;
5422
5423 if ((s[pos] == '.'
5424 || (s[pos] == '(' && (pos += 1))
5425 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5426 && (last == NULL /* save only the first clause */
5427 || len != (int)strlen (last)
5428 || !strneq (s, last, len)))
5429 {
5430 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5431 return len;
5432 }
5433 else
5434 return 0;
5435 }
5436
5437 /*
5438 * Consume a Prolog atom.
5439 * Return the number of bytes consumed, or -1 if there was an error.
5440 *
5441 * A prolog atom, in this context, could be one of:
5442 * - An alphanumeric sequence, starting with a lower case letter.
5443 * - A quoted arbitrary string. Single quotes can escape themselves.
5444 * Backslash quotes everything.
5445 */
5446 static int
5447 prolog_atom (s, pos)
5448 char *s;
5449 int pos;
5450 {
5451 int origpos;
5452
5453 origpos = pos;
5454
5455 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5456 {
5457 /* The atom is unquoted. */
5458 pos++;
5459 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5460 {
5461 pos++;
5462 }
5463 return pos - origpos;
5464 }
5465 else if (s[pos] == '\'')
5466 {
5467 pos++;
5468
5469 for (;;)
5470 {
5471 if (s[pos] == '\'')
5472 {
5473 pos++;
5474 if (s[pos] != '\'')
5475 break;
5476 pos++; /* A double quote */
5477 }
5478 else if (s[pos] == '\0')
5479 /* Multiline quoted atoms are ignored. */
5480 return -1;
5481 else if (s[pos] == '\\')
5482 {
5483 if (s[pos+1] == '\0')
5484 return -1;
5485 pos += 2;
5486 }
5487 else
5488 pos++;
5489 }
5490 return pos - origpos;
5491 }
5492 else
5493 return -1;
5494 }
5495
5496 \f
5497 /*
5498 * Support for Erlang
5499 *
5500 * Generates tags for functions, defines, and records.
5501 * Assumes that Erlang functions start at column 0.
5502 * Original code by Anders Lindgren (1996)
5503 */
5504 static int erlang_func __P((char *, char *));
5505 static void erlang_attribute __P((char *));
5506 static int erlang_atom __P((char *));
5507
5508 static void
5509 Erlang_functions (inf)
5510 FILE *inf;
5511 {
5512 char *cp, *last;
5513 int len;
5514 int allocated;
5515
5516 allocated = 0;
5517 len = 0;
5518 last = NULL;
5519
5520 LOOP_ON_INPUT_LINES (inf, lb, cp)
5521 {
5522 if (cp[0] == '\0') /* Empty line */
5523 continue;
5524 else if (iswhite (cp[0])) /* Not function nor attribute */
5525 continue;
5526 else if (cp[0] == '%') /* comment */
5527 continue;
5528 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5529 continue;
5530 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5531 {
5532 erlang_attribute (cp);
5533 if (last != NULL)
5534 {
5535 free (last);
5536 last = NULL;
5537 }
5538 }
5539 else if ((len = erlang_func (cp, last)) > 0)
5540 {
5541 /*
5542 * Function. Store the function name so that we only
5543 * generates a tag for the first clause.
5544 */
5545 if (last == NULL)
5546 last = xnew (len + 1, char);
5547 else if (len + 1 > allocated)
5548 xrnew (last, len + 1, char);
5549 allocated = len + 1;
5550 strncpy (last, cp, len);
5551 last[len] = '\0';
5552 }
5553 }
5554 free (last);
5555 }
5556
5557
5558 /*
5559 * A function definition is added if it matches:
5560 * <beginning of line><Erlang Atom><whitespace>(
5561 *
5562 * It is added to the tags database if it doesn't match the
5563 * name of the previous clause header.
5564 *
5565 * Return the size of the name of the function, or 0 if no function
5566 * was found.
5567 */
5568 static int
5569 erlang_func (s, last)
5570 char *s;
5571 char *last; /* Name of last clause. */
5572 {
5573 int pos;
5574 int len;
5575
5576 pos = erlang_atom (s);
5577 if (pos < 1)
5578 return 0;
5579
5580 len = pos;
5581 pos = skip_spaces (s + pos) - s;
5582
5583 /* Save only the first clause. */
5584 if (s[pos++] == '('
5585 && (last == NULL
5586 || len != (int)strlen (last)
5587 || !strneq (s, last, len)))
5588 {
5589 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5590 return len;
5591 }
5592
5593 return 0;
5594 }
5595
5596
5597 /*
5598 * Handle attributes. Currently, tags are generated for defines
5599 * and records.
5600 *
5601 * They are on the form:
5602 * -define(foo, bar).
5603 * -define(Foo(M, N), M+N).
5604 * -record(graph, {vtab = notable, cyclic = true}).
5605 */
5606 static void
5607 erlang_attribute (s)
5608 char *s;
5609 {
5610 char *cp = s;
5611
5612 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5613 && *cp++ == '(')
5614 {
5615 int len = erlang_atom (skip_spaces (cp));
5616 if (len > 0)
5617 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5618 }
5619 return;
5620 }
5621
5622
5623 /*
5624 * Consume an Erlang atom (or variable).
5625 * Return the number of bytes consumed, or -1 if there was an error.
5626 */
5627 static int
5628 erlang_atom (s)
5629 char *s;
5630 {
5631 int pos = 0;
5632
5633 if (ISALPHA (s[pos]) || s[pos] == '_')
5634 {
5635 /* The atom is unquoted. */
5636 do
5637 pos++;
5638 while (ISALNUM (s[pos]) || s[pos] == '_');
5639 }
5640 else if (s[pos] == '\'')
5641 {
5642 for (pos++; s[pos] != '\''; pos++)
5643 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5644 || (s[pos] == '\\' && s[++pos] == '\0'))
5645 return 0;
5646 pos++;
5647 }
5648
5649 return pos;
5650 }
5651
5652 \f
5653 static char *scan_separators __P((char *));
5654 static void add_regex __P((char *, language *));
5655 static char *substitute __P((char *, char *, struct re_registers *));
5656
5657 /*
5658 * Take a string like "/blah/" and turn it into "blah", verifying
5659 * that the first and last characters are the same, and handling
5660 * quoted separator characters. Actually, stops on the occurrence of
5661 * an unquoted separator. Also process \t, \n, etc. and turn into
5662 * appropriate characters. Works in place. Null terminates name string.
5663 * Returns pointer to terminating separator, or NULL for
5664 * unterminated regexps.
5665 */
5666 static char *
5667 scan_separators (name)
5668 char *name;
5669 {
5670 char sep = name[0];
5671 char *copyto = name;
5672 bool quoted = FALSE;
5673
5674 for (++name; *name != '\0'; ++name)
5675 {
5676 if (quoted)
5677 {
5678 switch (*name)
5679 {
5680 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5681 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5682 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5683 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5684 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5685 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5686 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5687 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5688 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5689 default:
5690 if (*name == sep)
5691 *copyto++ = sep;
5692 else
5693 {
5694 /* Something else is quoted, so preserve the quote. */
5695 *copyto++ = '\\';
5696 *copyto++ = *name;
5697 }
5698 break;
5699 }
5700 quoted = FALSE;
5701 }
5702 else if (*name == '\\')
5703 quoted = TRUE;
5704 else if (*name == sep)
5705 break;
5706 else
5707 *copyto++ = *name;
5708 }
5709 if (*name != sep)
5710 name = NULL; /* signal unterminated regexp */
5711
5712 /* Terminate copied string. */
5713 *copyto = '\0';
5714 return name;
5715 }
5716
5717 /* Look at the argument of --regex or --no-regex and do the right
5718 thing. Same for each line of a regexp file. */
5719 static void
5720 analyse_regex (regex_arg)
5721 char *regex_arg;
5722 {
5723 if (regex_arg == NULL)
5724 {
5725 free_regexps (); /* --no-regex: remove existing regexps */
5726 return;
5727 }
5728
5729 /* A real --regexp option or a line in a regexp file. */
5730 switch (regex_arg[0])
5731 {
5732 /* Comments in regexp file or null arg to --regex. */
5733 case '\0':
5734 case ' ':
5735 case '\t':
5736 break;
5737
5738 /* Read a regex file. This is recursive and may result in a
5739 loop, which will stop when the file descriptors are exhausted. */
5740 case '@':
5741 {
5742 FILE *regexfp;
5743 linebuffer regexbuf;
5744 char *regexfile = regex_arg + 1;
5745
5746 /* regexfile is a file containing regexps, one per line. */
5747 regexfp = fopen (regexfile, "r");
5748 if (regexfp == NULL)
5749 {
5750 pfatal (regexfile);
5751 return;
5752 }
5753 linebuffer_init (&regexbuf);
5754 while (readline_internal (&regexbuf, regexfp) > 0)
5755 analyse_regex (regexbuf.buffer);
5756 free (regexbuf.buffer);
5757 fclose (regexfp);
5758 }
5759 break;
5760
5761 /* Regexp to be used for a specific language only. */
5762 case '{':
5763 {
5764 language *lang;
5765 char *lang_name = regex_arg + 1;
5766 char *cp;
5767
5768 for (cp = lang_name; *cp != '}'; cp++)
5769 if (*cp == '\0')
5770 {
5771 error ("unterminated language name in regex: %s", regex_arg);
5772 return;
5773 }
5774 *cp++ = '\0';
5775 lang = get_language_from_langname (lang_name);
5776 if (lang == NULL)
5777 return;
5778 add_regex (cp, lang);
5779 }
5780 break;
5781
5782 /* Regexp to be used for any language. */
5783 default:
5784 add_regex (regex_arg, NULL);
5785 break;
5786 }
5787 }
5788
5789 /* Separate the regexp pattern, compile it,
5790 and care for optional name and modifiers. */
5791 static void
5792 add_regex (regexp_pattern, lang)
5793 char *regexp_pattern;
5794 language *lang;
5795 {
5796 static struct re_pattern_buffer zeropattern;
5797 char sep, *pat, *name, *modifiers;
5798 const char *err;
5799 struct re_pattern_buffer *patbuf;
5800 regexp *rp;
5801 bool
5802 force_explicit_name = TRUE, /* do not use implicit tag names */
5803 ignore_case = FALSE, /* case is significant */
5804 multi_line = FALSE, /* matches are done one line at a time */
5805 single_line = FALSE; /* dot does not match newline */
5806
5807
5808 if (strlen(regexp_pattern) < 3)
5809 {
5810 error ("null regexp", (char *)NULL);
5811 return;
5812 }
5813 sep = regexp_pattern[0];
5814 name = scan_separators (regexp_pattern);
5815 if (name == NULL)
5816 {
5817 error ("%s: unterminated regexp", regexp_pattern);
5818 return;
5819 }
5820 if (name[1] == sep)
5821 {
5822 error ("null name for regexp \"%s\"", regexp_pattern);
5823 return;
5824 }
5825 modifiers = scan_separators (name);
5826 if (modifiers == NULL) /* no terminating separator --> no name */
5827 {
5828 modifiers = name;
5829 name = "";
5830 }
5831 else
5832 modifiers += 1; /* skip separator */
5833
5834 /* Parse regex modifiers. */
5835 for (; modifiers[0] != '\0'; modifiers++)
5836 switch (modifiers[0])
5837 {
5838 case 'N':
5839 if (modifiers == name)
5840 error ("forcing explicit tag name but no name, ignoring", NULL);
5841 force_explicit_name = TRUE;
5842 break;
5843 case 'i':
5844 ignore_case = TRUE;
5845 break;
5846 case 's':
5847 single_line = TRUE;
5848 /* FALLTHRU */
5849 case 'm':
5850 multi_line = TRUE;
5851 need_filebuf = TRUE;
5852 break;
5853 default:
5854 {
5855 char wrongmod [2];
5856 wrongmod[0] = modifiers[0];
5857 wrongmod[1] = '\0';
5858 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5859 }
5860 break;
5861 }
5862
5863 patbuf = xnew (1, struct re_pattern_buffer);
5864 *patbuf = zeropattern;
5865 if (ignore_case)
5866 {
5867 static char lc_trans[CHARS];
5868 int i;
5869 for (i = 0; i < CHARS; i++)
5870 lc_trans[i] = lowcase (i);
5871 patbuf->translate = lc_trans; /* translation table to fold case */
5872 }
5873
5874 if (multi_line)
5875 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5876 else
5877 pat = regexp_pattern;
5878
5879 if (single_line)
5880 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5881 else
5882 re_set_syntax (RE_SYNTAX_EMACS);
5883
5884 err = re_compile_pattern (pat, strlen (pat), patbuf);
5885 if (multi_line)
5886 free (pat);
5887 if (err != NULL)
5888 {
5889 error ("%s while compiling pattern", err);
5890 return;
5891 }
5892
5893 rp = p_head;
5894 p_head = xnew (1, regexp);
5895 p_head->pattern = savestr (regexp_pattern);
5896 p_head->p_next = rp;
5897 p_head->lang = lang;
5898 p_head->pat = patbuf;
5899 p_head->name = savestr (name);
5900 p_head->error_signaled = FALSE;
5901 p_head->force_explicit_name = force_explicit_name;
5902 p_head->ignore_case = ignore_case;
5903 p_head->multi_line = multi_line;
5904 }
5905
5906 /*
5907 * Do the substitutions indicated by the regular expression and
5908 * arguments.
5909 */
5910 static char *
5911 substitute (in, out, regs)
5912 char *in, *out;
5913 struct re_registers *regs;
5914 {
5915 char *result, *t;
5916 int size, dig, diglen;
5917
5918 result = NULL;
5919 size = strlen (out);
5920
5921 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5922 if (out[size - 1] == '\\')
5923 fatal ("pattern error in \"%s\"", out);
5924 for (t = etags_strchr (out, '\\');
5925 t != NULL;
5926 t = etags_strchr (t + 2, '\\'))
5927 if (ISDIGIT (t[1]))
5928 {
5929 dig = t[1] - '0';
5930 diglen = regs->end[dig] - regs->start[dig];
5931 size += diglen - 2;
5932 }
5933 else
5934 size -= 1;
5935
5936 /* Allocate space and do the substitutions. */
5937 assert (size >= 0);
5938 result = xnew (size + 1, char);
5939
5940 for (t = result; *out != '\0'; out++)
5941 if (*out == '\\' && ISDIGIT (*++out))
5942 {
5943 dig = *out - '0';
5944 diglen = regs->end[dig] - regs->start[dig];
5945 strncpy (t, in + regs->start[dig], diglen);
5946 t += diglen;
5947 }
5948 else
5949 *t++ = *out;
5950 *t = '\0';
5951
5952 assert (t <= result + size);
5953 assert (t - result == (int)strlen (result));
5954
5955 return result;
5956 }
5957
5958 /* Deallocate all regexps. */
5959 static void
5960 free_regexps ()
5961 {
5962 regexp *rp;
5963 while (p_head != NULL)
5964 {
5965 rp = p_head->p_next;
5966 free (p_head->pattern);
5967 free (p_head->name);
5968 free (p_head);
5969 p_head = rp;
5970 }
5971 return;
5972 }
5973
5974 /*
5975 * Reads the whole file as a single string from `filebuf' and looks for
5976 * multi-line regular expressions, creating tags on matches.
5977 * readline already dealt with normal regexps.
5978 *
5979 * Idea by Ben Wing <ben@666.com> (2002).
5980 */
5981 static void
5982 regex_tag_multiline ()
5983 {
5984 char *buffer = filebuf.buffer;
5985 regexp *rp;
5986 char *name;
5987
5988 for (rp = p_head; rp != NULL; rp = rp->p_next)
5989 {
5990 int match = 0;
5991
5992 if (!rp->multi_line)
5993 continue; /* skip normal regexps */
5994
5995 /* Generic initialisations before parsing file from memory. */
5996 lineno = 1; /* reset global line number */
5997 charno = 0; /* reset global char number */
5998 linecharno = 0; /* reset global char number of line start */
5999
6000 /* Only use generic regexps or those for the current language. */
6001 if (rp->lang != NULL && rp->lang != curfdp->lang)
6002 continue;
6003
6004 while (match >= 0 && match < filebuf.len)
6005 {
6006 match = re_search (rp->pat, buffer, filebuf.len, charno,
6007 filebuf.len - match, &rp->regs);
6008 switch (match)
6009 {
6010 case -2:
6011 /* Some error. */
6012 if (!rp->error_signaled)
6013 {
6014 error ("regexp stack overflow while matching \"%s\"",
6015 rp->pattern);
6016 rp->error_signaled = TRUE;
6017 }
6018 break;
6019 case -1:
6020 /* No match. */
6021 break;
6022 default:
6023 if (match == rp->regs.end[0])
6024 {
6025 if (!rp->error_signaled)
6026 {
6027 error ("regexp matches the empty string: \"%s\"",
6028 rp->pattern);
6029 rp->error_signaled = TRUE;
6030 }
6031 match = -3; /* exit from while loop */
6032 break;
6033 }
6034
6035 /* Match occurred. Construct a tag. */
6036 while (charno < rp->regs.end[0])
6037 if (buffer[charno++] == '\n')
6038 lineno++, linecharno = charno;
6039 name = rp->name;
6040 if (name[0] == '\0')
6041 name = NULL;
6042 else /* make a named tag */
6043 name = substitute (buffer, rp->name, &rp->regs);
6044 if (rp->force_explicit_name)
6045 /* Force explicit tag name, if a name is there. */
6046 pfnote (name, TRUE, buffer + linecharno,
6047 charno - linecharno + 1, lineno, linecharno);
6048 else
6049 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6050 charno - linecharno + 1, lineno, linecharno);
6051 break;
6052 }
6053 }
6054 }
6055 }
6056
6057 \f
6058 static bool
6059 nocase_tail (cp)
6060 char *cp;
6061 {
6062 register int len = 0;
6063
6064 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6065 cp++, len++;
6066 if (*cp == '\0' && !intoken (dbp[len]))
6067 {
6068 dbp += len;
6069 return TRUE;
6070 }
6071 return FALSE;
6072 }
6073
6074 static void
6075 get_tag (bp, namepp)
6076 register char *bp;
6077 char **namepp;
6078 {
6079 register char *cp = bp;
6080
6081 if (*bp != '\0')
6082 {
6083 /* Go till you get to white space or a syntactic break */
6084 for (cp = bp + 1; !notinname (*cp); cp++)
6085 continue;
6086 make_tag (bp, cp - bp, TRUE,
6087 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6088 }
6089
6090 if (namepp != NULL)
6091 *namepp = savenstr (bp, cp - bp);
6092 }
6093
6094 /*
6095 * Read a line of text from `stream' into `lbp', excluding the
6096 * newline or CR-NL, if any. Return the number of characters read from
6097 * `stream', which is the length of the line including the newline.
6098 *
6099 * On DOS or Windows we do not count the CR character, if any before the
6100 * NL, in the returned length; this mirrors the behavior of Emacs on those
6101 * platforms (for text files, it translates CR-NL to NL as it reads in the
6102 * file).
6103 *
6104 * If multi-line regular expressions are requested, each line read is
6105 * appended to `filebuf'.
6106 */
6107 static long
6108 readline_internal (lbp, stream)
6109 linebuffer *lbp;
6110 register FILE *stream;
6111 {
6112 char *buffer = lbp->buffer;
6113 register char *p = lbp->buffer;
6114 register char *pend;
6115 int chars_deleted;
6116
6117 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6118
6119 for (;;)
6120 {
6121 register int c = getc (stream);
6122 if (p == pend)
6123 {
6124 /* We're at the end of linebuffer: expand it. */
6125 lbp->size *= 2;
6126 xrnew (buffer, lbp->size, char);
6127 p += buffer - lbp->buffer;
6128 pend = buffer + lbp->size;
6129 lbp->buffer = buffer;
6130 }
6131 if (c == EOF)
6132 {
6133 *p = '\0';
6134 chars_deleted = 0;
6135 break;
6136 }
6137 if (c == '\n')
6138 {
6139 if (p > buffer && p[-1] == '\r')
6140 {
6141 p -= 1;
6142 #ifdef DOS_NT
6143 /* Assume CRLF->LF translation will be performed by Emacs
6144 when loading this file, so CRs won't appear in the buffer.
6145 It would be cleaner to compensate within Emacs;
6146 however, Emacs does not know how many CRs were deleted
6147 before any given point in the file. */
6148 chars_deleted = 1;
6149 #else
6150 chars_deleted = 2;
6151 #endif
6152 }
6153 else
6154 {
6155 chars_deleted = 1;
6156 }
6157 *p = '\0';
6158 break;
6159 }
6160 *p++ = c;
6161 }
6162 lbp->len = p - buffer;
6163
6164 if (need_filebuf /* we need filebuf for multi-line regexps */
6165 && chars_deleted > 0) /* not at EOF */
6166 {
6167 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6168 {
6169 /* Expand filebuf. */
6170 filebuf.size *= 2;
6171 xrnew (filebuf.buffer, filebuf.size, char);
6172 }
6173 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6174 filebuf.len += lbp->len;
6175 filebuf.buffer[filebuf.len++] = '\n';
6176 filebuf.buffer[filebuf.len] = '\0';
6177 }
6178
6179 return lbp->len + chars_deleted;
6180 }
6181
6182 /*
6183 * Like readline_internal, above, but in addition try to match the
6184 * input line against relevant regular expressions and manage #line
6185 * directives.
6186 */
6187 static void
6188 readline (lbp, stream)
6189 linebuffer *lbp;
6190 FILE *stream;
6191 {
6192 long result;
6193
6194 linecharno = charno; /* update global char number of line start */
6195 result = readline_internal (lbp, stream); /* read line */
6196 lineno += 1; /* increment global line number */
6197 charno += result; /* increment global char number */
6198
6199 /* Honour #line directives. */
6200 if (!no_line_directive)
6201 {
6202 static bool discard_until_line_directive;
6203
6204 /* Check whether this is a #line directive. */
6205 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6206 {
6207 unsigned int lno;
6208 int start = 0;
6209
6210 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6211 && start > 0) /* double quote character found */
6212 {
6213 char *endp = lbp->buffer + start;
6214
6215 while ((endp = etags_strchr (endp, '"')) != NULL
6216 && endp[-1] == '\\')
6217 endp++;
6218 if (endp != NULL)
6219 /* Ok, this is a real #line directive. Let's deal with it. */
6220 {
6221 char *taggedabsname; /* absolute name of original file */
6222 char *taggedfname; /* name of original file as given */
6223 char *name; /* temp var */
6224
6225 discard_until_line_directive = FALSE; /* found it */
6226 name = lbp->buffer + start;
6227 *endp = '\0';
6228 canonicalize_filename (name);
6229 taggedabsname = absolute_filename (name, tagfiledir);
6230 if (filename_is_absolute (name)
6231 || filename_is_absolute (curfdp->infname))
6232 taggedfname = savestr (taggedabsname);
6233 else
6234 taggedfname = relative_filename (taggedabsname,tagfiledir);
6235
6236 if (streq (curfdp->taggedfname, taggedfname))
6237 /* The #line directive is only a line number change. We
6238 deal with this afterwards. */
6239 free (taggedfname);
6240 else
6241 /* The tags following this #line directive should be
6242 attributed to taggedfname. In order to do this, set
6243 curfdp accordingly. */
6244 {
6245 fdesc *fdp; /* file description pointer */
6246
6247 /* Go look for a file description already set up for the
6248 file indicated in the #line directive. If there is
6249 one, use it from now until the next #line
6250 directive. */
6251 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6252 if (streq (fdp->infname, curfdp->infname)
6253 && streq (fdp->taggedfname, taggedfname))
6254 /* If we remove the second test above (after the &&)
6255 then all entries pertaining to the same file are
6256 coalesced in the tags file. If we use it, then
6257 entries pertaining to the same file but generated
6258 from different files (via #line directives) will
6259 go into separate sections in the tags file. These
6260 alternatives look equivalent. The first one
6261 destroys some apparently useless information. */
6262 {
6263 curfdp = fdp;
6264 free (taggedfname);
6265 break;
6266 }
6267 /* Else, if we already tagged the real file, skip all
6268 input lines until the next #line directive. */
6269 if (fdp == NULL) /* not found */
6270 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6271 if (streq (fdp->infabsname, taggedabsname))
6272 {
6273 discard_until_line_directive = TRUE;
6274 free (taggedfname);
6275 break;
6276 }
6277 /* Else create a new file description and use that from
6278 now on, until the next #line directive. */
6279 if (fdp == NULL) /* not found */
6280 {
6281 fdp = fdhead;
6282 fdhead = xnew (1, fdesc);
6283 *fdhead = *curfdp; /* copy curr. file description */
6284 fdhead->next = fdp;
6285 fdhead->infname = savestr (curfdp->infname);
6286 fdhead->infabsname = savestr (curfdp->infabsname);
6287 fdhead->infabsdir = savestr (curfdp->infabsdir);
6288 fdhead->taggedfname = taggedfname;
6289 fdhead->usecharno = FALSE;
6290 fdhead->prop = NULL;
6291 fdhead->written = FALSE;
6292 curfdp = fdhead;
6293 }
6294 }
6295 free (taggedabsname);
6296 lineno = lno - 1;
6297 readline (lbp, stream);
6298 return;
6299 } /* if a real #line directive */
6300 } /* if #line is followed by a a number */
6301 } /* if line begins with "#line " */
6302
6303 /* If we are here, no #line directive was found. */
6304 if (discard_until_line_directive)
6305 {
6306 if (result > 0)
6307 {
6308 /* Do a tail recursion on ourselves, thus discarding the contents
6309 of the line buffer. */
6310 readline (lbp, stream);
6311 return;
6312 }
6313 /* End of file. */
6314 discard_until_line_directive = FALSE;
6315 return;
6316 }
6317 } /* if #line directives should be considered */
6318
6319 {
6320 int match;
6321 regexp *rp;
6322 char *name;
6323
6324 /* Match against relevant regexps. */
6325 if (lbp->len > 0)
6326 for (rp = p_head; rp != NULL; rp = rp->p_next)
6327 {
6328 /* Only use generic regexps or those for the current language.
6329 Also do not use multiline regexps, which is the job of
6330 regex_tag_multiline. */
6331 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6332 || rp->multi_line)
6333 continue;
6334
6335 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6336 switch (match)
6337 {
6338 case -2:
6339 /* Some error. */
6340 if (!rp->error_signaled)
6341 {
6342 error ("regexp stack overflow while matching \"%s\"",
6343 rp->pattern);
6344 rp->error_signaled = TRUE;
6345 }
6346 break;
6347 case -1:
6348 /* No match. */
6349 break;
6350 case 0:
6351 /* Empty string matched. */
6352 if (!rp->error_signaled)
6353 {
6354 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6355 rp->error_signaled = TRUE;
6356 }
6357 break;
6358 default:
6359 /* Match occurred. Construct a tag. */
6360 name = rp->name;
6361 if (name[0] == '\0')
6362 name = NULL;
6363 else /* make a named tag */
6364 name = substitute (lbp->buffer, rp->name, &rp->regs);
6365 if (rp->force_explicit_name)
6366 /* Force explicit tag name, if a name is there. */
6367 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6368 else
6369 make_tag (name, strlen (name), TRUE,
6370 lbp->buffer, match, lineno, linecharno);
6371 break;
6372 }
6373 }
6374 }
6375 }
6376
6377 \f
6378 /*
6379 * Return a pointer to a space of size strlen(cp)+1 allocated
6380 * with xnew where the string CP has been copied.
6381 */
6382 static char *
6383 savestr (cp)
6384 char *cp;
6385 {
6386 return savenstr (cp, strlen (cp));
6387 }
6388
6389 /*
6390 * Return a pointer to a space of size LEN+1 allocated with xnew where
6391 * the string CP has been copied for at most the first LEN characters.
6392 */
6393 static char *
6394 savenstr (cp, len)
6395 char *cp;
6396 int len;
6397 {
6398 register char *dp;
6399
6400 dp = xnew (len + 1, char);
6401 strncpy (dp, cp, len);
6402 dp[len] = '\0';
6403 return dp;
6404 }
6405
6406 /*
6407 * Return the ptr in sp at which the character c last
6408 * appears; NULL if not found
6409 *
6410 * Identical to POSIX strrchr, included for portability.
6411 */
6412 static char *
6413 etags_strrchr (sp, c)
6414 register const char *sp;
6415 register int c;
6416 {
6417 register const char *r;
6418
6419 r = NULL;
6420 do
6421 {
6422 if (*sp == c)
6423 r = sp;
6424 } while (*sp++);
6425 return (char *)r;
6426 }
6427
6428 /*
6429 * Return the ptr in sp at which the character c first
6430 * appears; NULL if not found
6431 *
6432 * Identical to POSIX strchr, included for portability.
6433 */
6434 static char *
6435 etags_strchr (sp, c)
6436 register const char *sp;
6437 register int c;
6438 {
6439 do
6440 {
6441 if (*sp == c)
6442 return (char *)sp;
6443 } while (*sp++);
6444 return NULL;
6445 }
6446
6447 /*
6448 * Compare two strings, ignoring case for alphabetic characters.
6449 *
6450 * Same as BSD's strcasecmp, included for portability.
6451 */
6452 static int
6453 etags_strcasecmp (s1, s2)
6454 register const char *s1;
6455 register const char *s2;
6456 {
6457 while (*s1 != '\0'
6458 && (ISALPHA (*s1) && ISALPHA (*s2)
6459 ? lowcase (*s1) == lowcase (*s2)
6460 : *s1 == *s2))
6461 s1++, s2++;
6462
6463 return (ISALPHA (*s1) && ISALPHA (*s2)
6464 ? lowcase (*s1) - lowcase (*s2)
6465 : *s1 - *s2);
6466 }
6467
6468 /*
6469 * Compare two strings, ignoring case for alphabetic characters.
6470 * Stop after a given number of characters
6471 *
6472 * Same as BSD's strncasecmp, included for portability.
6473 */
6474 static int
6475 etags_strncasecmp (s1, s2, n)
6476 register const char *s1;
6477 register const char *s2;
6478 register int n;
6479 {
6480 while (*s1 != '\0' && n-- > 0
6481 && (ISALPHA (*s1) && ISALPHA (*s2)
6482 ? lowcase (*s1) == lowcase (*s2)
6483 : *s1 == *s2))
6484 s1++, s2++;
6485
6486 if (n < 0)
6487 return 0;
6488 else
6489 return (ISALPHA (*s1) && ISALPHA (*s2)
6490 ? lowcase (*s1) - lowcase (*s2)
6491 : *s1 - *s2);
6492 }
6493
6494 /* Skip spaces (end of string is not space), return new pointer. */
6495 static char *
6496 skip_spaces (cp)
6497 char *cp;
6498 {
6499 while (iswhite (*cp))
6500 cp++;
6501 return cp;
6502 }
6503
6504 /* Skip non spaces, except end of string, return new pointer. */
6505 static char *
6506 skip_non_spaces (cp)
6507 char *cp;
6508 {
6509 while (*cp != '\0' && !iswhite (*cp))
6510 cp++;
6511 return cp;
6512 }
6513
6514 /* Print error message and exit. */
6515 void
6516 fatal (s1, s2)
6517 char *s1, *s2;
6518 {
6519 error (s1, s2);
6520 exit (EXIT_FAILURE);
6521 }
6522
6523 static void
6524 pfatal (s1)
6525 char *s1;
6526 {
6527 perror (s1);
6528 exit (EXIT_FAILURE);
6529 }
6530
6531 static void
6532 suggest_asking_for_help ()
6533 {
6534 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6535 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6536 exit (EXIT_FAILURE);
6537 }
6538
6539 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6540 static void
6541 error (s1, s2)
6542 const char *s1, *s2;
6543 {
6544 fprintf (stderr, "%s: ", progname);
6545 fprintf (stderr, s1, s2);
6546 fprintf (stderr, "\n");
6547 }
6548
6549 /* Return a newly-allocated string whose contents
6550 concatenate those of s1, s2, s3. */
6551 static char *
6552 concat (s1, s2, s3)
6553 char *s1, *s2, *s3;
6554 {
6555 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6556 char *result = xnew (len1 + len2 + len3 + 1, char);
6557
6558 strcpy (result, s1);
6559 strcpy (result + len1, s2);
6560 strcpy (result + len1 + len2, s3);
6561 result[len1 + len2 + len3] = '\0';
6562
6563 return result;
6564 }
6565
6566 \f
6567 /* Does the same work as the system V getcwd, but does not need to
6568 guess the buffer size in advance. */
6569 static char *
6570 etags_getcwd ()
6571 {
6572 #ifdef HAVE_GETCWD
6573 int bufsize = 200;
6574 char *path = xnew (bufsize, char);
6575
6576 while (getcwd (path, bufsize) == NULL)
6577 {
6578 if (errno != ERANGE)
6579 pfatal ("getcwd");
6580 bufsize *= 2;
6581 free (path);
6582 path = xnew (bufsize, char);
6583 }
6584
6585 canonicalize_filename (path);
6586 return path;
6587
6588 #else /* not HAVE_GETCWD */
6589 #if MSDOS
6590
6591 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6592
6593 getwd (path);
6594
6595 for (p = path; *p != '\0'; p++)
6596 if (*p == '\\')
6597 *p = '/';
6598 else
6599 *p = lowcase (*p);
6600
6601 return strdup (path);
6602 #else /* not MSDOS */
6603 linebuffer path;
6604 FILE *pipe;
6605
6606 linebuffer_init (&path);
6607 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6608 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6609 pfatal ("pwd");
6610 pclose (pipe);
6611
6612 return path.buffer;
6613 #endif /* not MSDOS */
6614 #endif /* not HAVE_GETCWD */
6615 }
6616
6617 /* Return a newly allocated string containing the file name of FILE
6618 relative to the absolute directory DIR (which should end with a slash). */
6619 static char *
6620 relative_filename (file, dir)
6621 char *file, *dir;
6622 {
6623 char *fp, *dp, *afn, *res;
6624 int i;
6625
6626 /* Find the common root of file and dir (with a trailing slash). */
6627 afn = absolute_filename (file, cwd);
6628 fp = afn;
6629 dp = dir;
6630 while (*fp++ == *dp++)
6631 continue;
6632 fp--, dp--; /* back to the first differing char */
6633 #ifdef DOS_NT
6634 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6635 return afn;
6636 #endif
6637 do /* look at the equal chars until '/' */
6638 fp--, dp--;
6639 while (*fp != '/');
6640
6641 /* Build a sequence of "../" strings for the resulting relative file name. */
6642 i = 0;
6643 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6644 i += 1;
6645 res = xnew (3*i + strlen (fp + 1) + 1, char);
6646 res[0] = '\0';
6647 while (i-- > 0)
6648 strcat (res, "../");
6649
6650 /* Add the file name relative to the common root of file and dir. */
6651 strcat (res, fp + 1);
6652 free (afn);
6653
6654 return res;
6655 }
6656
6657 /* Return a newly allocated string containing the absolute file name
6658 of FILE given DIR (which should end with a slash). */
6659 static char *
6660 absolute_filename (file, dir)
6661 char *file, *dir;
6662 {
6663 char *slashp, *cp, *res;
6664
6665 if (filename_is_absolute (file))
6666 res = savestr (file);
6667 #ifdef DOS_NT
6668 /* We don't support non-absolute file names with a drive
6669 letter, like `d:NAME' (it's too much hassle). */
6670 else if (file[1] == ':')
6671 fatal ("%s: relative file names with drive letters not supported", file);
6672 #endif
6673 else
6674 res = concat (dir, file, "");
6675
6676 /* Delete the "/dirname/.." and "/." substrings. */
6677 slashp = etags_strchr (res, '/');
6678 while (slashp != NULL && slashp[0] != '\0')
6679 {
6680 if (slashp[1] == '.')
6681 {
6682 if (slashp[2] == '.'
6683 && (slashp[3] == '/' || slashp[3] == '\0'))
6684 {
6685 cp = slashp;
6686 do
6687 cp--;
6688 while (cp >= res && !filename_is_absolute (cp));
6689 if (cp < res)
6690 cp = slashp; /* the absolute name begins with "/.." */
6691 #ifdef DOS_NT
6692 /* Under MSDOS and NT we get `d:/NAME' as absolute
6693 file name, so the luser could say `d:/../NAME'.
6694 We silently treat this as `d:/NAME'. */
6695 else if (cp[0] != '/')
6696 cp = slashp;
6697 #endif
6698 #ifdef HAVE_MEMMOVE
6699 memmove (cp, slashp + 3, strlen (slashp + 2));
6700 #else
6701 /* Overlapping copy isn't really okay */
6702 strcpy (cp, slashp + 3);
6703 #endif
6704 slashp = cp;
6705 continue;
6706 }
6707 else if (slashp[2] == '/' || slashp[2] == '\0')
6708 {
6709 #ifdef HAVE_MEMMOVE
6710 memmove (slashp, slashp + 2, strlen (slashp + 1));
6711 #else
6712 strcpy (slashp, slashp + 2);
6713 #endif
6714 continue;
6715 }
6716 }
6717
6718 slashp = etags_strchr (slashp + 1, '/');
6719 }
6720
6721 if (res[0] == '\0') /* just a safety net: should never happen */
6722 {
6723 free (res);
6724 return savestr ("/");
6725 }
6726 else
6727 return res;
6728 }
6729
6730 /* Return a newly allocated string containing the absolute
6731 file name of dir where FILE resides given DIR (which should
6732 end with a slash). */
6733 static char *
6734 absolute_dirname (file, dir)
6735 char *file, *dir;
6736 {
6737 char *slashp, *res;
6738 char save;
6739
6740 slashp = etags_strrchr (file, '/');
6741 if (slashp == NULL)
6742 return savestr (dir);
6743 save = slashp[1];
6744 slashp[1] = '\0';
6745 res = absolute_filename (file, dir);
6746 slashp[1] = save;
6747
6748 return res;
6749 }
6750
6751 /* Whether the argument string is an absolute file name. The argument
6752 string must have been canonicalized with canonicalize_filename. */
6753 static bool
6754 filename_is_absolute (fn)
6755 char *fn;
6756 {
6757 return (fn[0] == '/'
6758 #ifdef DOS_NT
6759 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6760 #endif
6761 );
6762 }
6763
6764 /* Upcase DOS drive letter and collapse separators into single slashes.
6765 Works in place. */
6766 static void
6767 canonicalize_filename (fn)
6768 register char *fn;
6769 {
6770 register char* cp;
6771 char sep = '/';
6772
6773 #ifdef DOS_NT
6774 /* Canonicalize drive letter case. */
6775 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6776 fn[0] = upcase (fn[0]);
6777
6778 sep = '\\';
6779 #endif
6780
6781 /* Collapse multiple separators into a single slash. */
6782 for (cp = fn; *cp != '\0'; cp++, fn++)
6783 if (*cp == sep)
6784 {
6785 *fn = '/';
6786 while (cp[1] == sep)
6787 cp++;
6788 }
6789 else
6790 *fn = *cp;
6791 *fn = '\0';
6792 }
6793
6794 \f
6795 /* Initialize a linebuffer for use. */
6796 static void
6797 linebuffer_init (lbp)
6798 linebuffer *lbp;
6799 {
6800 lbp->size = (DEBUG) ? 3 : 200;
6801 lbp->buffer = xnew (lbp->size, char);
6802 lbp->buffer[0] = '\0';
6803 lbp->len = 0;
6804 }
6805
6806 /* Set the minimum size of a string contained in a linebuffer. */
6807 static void
6808 linebuffer_setlen (lbp, toksize)
6809 linebuffer *lbp;
6810 int toksize;
6811 {
6812 while (lbp->size <= toksize)
6813 {
6814 lbp->size *= 2;
6815 xrnew (lbp->buffer, lbp->size, char);
6816 }
6817 lbp->len = toksize;
6818 }
6819
6820 /* Like malloc but get fatal error if memory is exhausted. */
6821 static PTR
6822 xmalloc (size)
6823 unsigned int size;
6824 {
6825 PTR result = (PTR) malloc (size);
6826 if (result == NULL)
6827 fatal ("virtual memory exhausted", (char *)NULL);
6828 return result;
6829 }
6830
6831 static PTR
6832 xrealloc (ptr, size)
6833 char *ptr;
6834 unsigned int size;
6835 {
6836 PTR result = (PTR) realloc (ptr, size);
6837 if (result == NULL)
6838 fatal ("virtual memory exhausted", (char *)NULL);
6839 return result;
6840 }
6841
6842 /*
6843 * Local Variables:
6844 * indent-tabs-mode: t
6845 * tab-width: 8
6846 * fill-column: 79
6847 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6848 * c-file-style: "gnu"
6849 * End:
6850 */
6851
6852 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6853 (do not change this comment) */
6854
6855 /* etags.c ends here */