*** empty log message ***
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
33 Free Software Foundation, Inc.
34
35 This file is not considered part of GNU Emacs.
36
37 This program is free software; you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation; either version 2 of the License, or
40 (at your option) any later version.
41
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
46
47 You should have received a copy of the GNU General Public License
48 along with this program; if not, write to the Free Software Foundation,
49 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
50
51
52 /* NB To comply with the above BSD license, copyright information is
53 reproduced in etc/ETAGS.README. That file should be updated when the
54 above notices are.
55
56 To the best of our knowledge, this code was originally based on the
57 ctags.c distributed with BSD4.2, which was copyrighted by the
58 University of California, as described above. */
59
60
61 /*
62 * Authors:
63 * Ctags originally by Ken Arnold.
64 * Fortran added by Jim Kleckner.
65 * Ed Pelegri-Llopart added C typedefs.
66 * Gnu Emacs TAGS format and modifications by RMS?
67 * 1989 Sam Kendall added C++.
68 * 1992 Joseph B. Wells improved C and C++ parsing.
69 * 1993 Francesco Potortì reorganised C and C++.
70 * 1994 Line-by-line regexp tags by Tom Tromey.
71 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
72 * 2002 #line directives by Francesco Potortì.
73 *
74 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
75 */
76
77 /*
78 * If you want to add support for a new language, start by looking at the LUA
79 * language, which is the simplest. Alternatively, consider shipping a
80 * configuration file containing regexp definitions for etags.
81 */
82
83 char pot_etags_version[] = "@(#) pot revision number is 17.26";
84
85 #define TRUE 1
86 #define FALSE 0
87
88 #ifdef DEBUG
89 # undef DEBUG
90 # define DEBUG TRUE
91 #else
92 # define DEBUG FALSE
93 # define NDEBUG /* disable assert */
94 #endif
95
96 #ifdef HAVE_CONFIG_H
97 # include <config.h>
98 /* On some systems, Emacs defines static as nothing for the sake
99 of unexec. We don't want that here since we don't use unexec. */
100 # undef static
101 # ifndef PTR /* for XEmacs */
102 # define PTR void *
103 # endif
104 # ifndef __P /* for XEmacs */
105 # define __P(args) args
106 # endif
107 #else /* no config.h */
108 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
109 # define __P(args) args /* use prototypes */
110 # define PTR void * /* for generic pointers */
111 # else /* not standard C */
112 # define __P(args) () /* no prototypes */
113 # define const /* remove const for old compilers' sake */
114 # define PTR long * /* don't use void* */
115 # endif
116 #endif /* !HAVE_CONFIG_H */
117
118 #ifndef _GNU_SOURCE
119 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
120 #endif
121
122 /* WIN32_NATIVE is for XEmacs.
123 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
124 #ifdef WIN32_NATIVE
125 # undef MSDOS
126 # undef WINDOWSNT
127 # define WINDOWSNT
128 #endif /* WIN32_NATIVE */
129
130 #ifdef MSDOS
131 # undef MSDOS
132 # define MSDOS TRUE
133 # include <fcntl.h>
134 # include <sys/param.h>
135 # include <io.h>
136 # ifndef HAVE_CONFIG_H
137 # define DOS_NT
138 # include <sys/config.h>
139 # endif
140 #else
141 # define MSDOS FALSE
142 #endif /* MSDOS */
143
144 #ifdef WINDOWSNT
145 # include <stdlib.h>
146 # include <fcntl.h>
147 # include <string.h>
148 # include <direct.h>
149 # include <io.h>
150 # define MAXPATHLEN _MAX_PATH
151 # undef HAVE_NTGUI
152 # undef DOS_NT
153 # define DOS_NT
154 # ifndef HAVE_GETCWD
155 # define HAVE_GETCWD
156 # endif /* undef HAVE_GETCWD */
157 #else /* not WINDOWSNT */
158 # ifdef STDC_HEADERS
159 # include <stdlib.h>
160 # include <string.h>
161 # else /* no standard C headers */
162 extern char *getenv ();
163 # ifdef VMS
164 # define EXIT_SUCCESS 1
165 # define EXIT_FAILURE 0
166 # else /* no VMS */
167 # define EXIT_SUCCESS 0
168 # define EXIT_FAILURE 1
169 # endif
170 # endif
171 #endif /* !WINDOWSNT */
172
173 #ifdef HAVE_UNISTD_H
174 # include <unistd.h>
175 #else
176 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
177 extern char *getcwd (char *buf, size_t size);
178 # endif
179 #endif /* HAVE_UNISTD_H */
180
181 #include <stdio.h>
182 #include <ctype.h>
183 #include <errno.h>
184 #ifndef errno
185 extern int errno;
186 #endif
187 #include <sys/types.h>
188 #include <sys/stat.h>
189
190 #include <assert.h>
191 #ifdef NDEBUG
192 # undef assert /* some systems have a buggy assert.h */
193 # define assert(x) ((void) 0)
194 #endif
195
196 #if !defined (S_ISREG) && defined (S_IFREG)
197 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
198 #endif
199
200 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
201 # define NO_LONG_OPTIONS TRUE
202 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
203 extern char *optarg;
204 extern int optind, opterr;
205 #else
206 # define NO_LONG_OPTIONS FALSE
207 # include <getopt.h>
208 #endif /* NO_LONG_OPTIONS */
209
210 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
211 # ifdef __CYGWIN__ /* compiling on Cygwin */
212 !!! NOTICE !!!
213 the regex.h distributed with Cygwin is not compatible with etags, alas!
214 If you want regular expression support, you should delete this notice and
215 arrange to use the GNU regex.h and regex.c.
216 # endif
217 #endif
218 #include <regex.h>
219
220 /* Define CTAGS to make the program "ctags" compatible with the usual one.
221 Leave it undefined to make the program "etags", which makes emacs-style
222 tag tables and tags typedefs, #defines and struct/union/enum by default. */
223 #ifdef CTAGS
224 # undef CTAGS
225 # define CTAGS TRUE
226 #else
227 # define CTAGS FALSE
228 #endif
229
230 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
231 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
232 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
233 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
234
235 #define CHARS 256 /* 2^sizeof(char) */
236 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
237 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
238 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
239 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
240 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
241 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
242
243 #define ISALNUM(c) isalnum (CHAR(c))
244 #define ISALPHA(c) isalpha (CHAR(c))
245 #define ISDIGIT(c) isdigit (CHAR(c))
246 #define ISLOWER(c) islower (CHAR(c))
247
248 #define lowcase(c) tolower (CHAR(c))
249 #define upcase(c) toupper (CHAR(c))
250
251
252 /*
253 * xnew, xrnew -- allocate, reallocate storage
254 *
255 * SYNOPSIS: Type *xnew (int n, Type);
256 * void xrnew (OldPointer, int n, Type);
257 */
258 #if DEBUG
259 # include "chkmalloc.h"
260 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
261 (n) * sizeof (Type)))
262 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
263 (char *) (op), (n) * sizeof (Type)))
264 #else
265 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
266 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
267 (char *) (op), (n) * sizeof (Type)))
268 #endif
269
270 #define bool int
271
272 typedef void Lang_function __P((FILE *));
273
274 typedef struct
275 {
276 char *suffix; /* file name suffix for this compressor */
277 char *command; /* takes one arg and decompresses to stdout */
278 } compressor;
279
280 typedef struct
281 {
282 char *name; /* language name */
283 char *help; /* detailed help for the language */
284 Lang_function *function; /* parse function */
285 char **suffixes; /* name suffixes of this language's files */
286 char **filenames; /* names of this language's files */
287 char **interpreters; /* interpreters for this language */
288 bool metasource; /* source used to generate other sources */
289 } language;
290
291 typedef struct fdesc
292 {
293 struct fdesc *next; /* for the linked list */
294 char *infname; /* uncompressed input file name */
295 char *infabsname; /* absolute uncompressed input file name */
296 char *infabsdir; /* absolute dir of input file */
297 char *taggedfname; /* file name to write in tagfile */
298 language *lang; /* language of file */
299 char *prop; /* file properties to write in tagfile */
300 bool usecharno; /* etags tags shall contain char number */
301 bool written; /* entry written in the tags file */
302 } fdesc;
303
304 typedef struct node_st
305 { /* sorting structure */
306 struct node_st *left, *right; /* left and right sons */
307 fdesc *fdp; /* description of file to whom tag belongs */
308 char *name; /* tag name */
309 char *regex; /* search regexp */
310 bool valid; /* write this tag on the tag file */
311 bool is_func; /* function tag: use regexp in CTAGS mode */
312 bool been_warned; /* warning already given for duplicated tag */
313 int lno; /* line number tag is on */
314 long cno; /* character number line starts on */
315 } node;
316
317 /*
318 * A `linebuffer' is a structure which holds a line of text.
319 * `readline_internal' reads a line from a stream into a linebuffer
320 * and works regardless of the length of the line.
321 * SIZE is the size of BUFFER, LEN is the length of the string in
322 * BUFFER after readline reads it.
323 */
324 typedef struct
325 {
326 long size;
327 int len;
328 char *buffer;
329 } linebuffer;
330
331 /* Used to support mixing of --lang and file names. */
332 typedef struct
333 {
334 enum {
335 at_language, /* a language specification */
336 at_regexp, /* a regular expression */
337 at_filename, /* a file name */
338 at_stdin, /* read from stdin here */
339 at_end /* stop parsing the list */
340 } arg_type; /* argument type */
341 language *lang; /* language associated with the argument */
342 char *what; /* the argument itself */
343 } argument;
344
345 /* Structure defining a regular expression. */
346 typedef struct regexp
347 {
348 struct regexp *p_next; /* pointer to next in list */
349 language *lang; /* if set, use only for this language */
350 char *pattern; /* the regexp pattern */
351 char *name; /* tag name */
352 struct re_pattern_buffer *pat; /* the compiled pattern */
353 struct re_registers regs; /* re registers */
354 bool error_signaled; /* already signaled for this regexp */
355 bool force_explicit_name; /* do not allow implict tag name */
356 bool ignore_case; /* ignore case when matching */
357 bool multi_line; /* do a multi-line match on the whole file */
358 } regexp;
359
360
361 /* Many compilers barf on this:
362 Lang_function Ada_funcs;
363 so let's write it this way */
364 static void Ada_funcs __P((FILE *));
365 static void Asm_labels __P((FILE *));
366 static void C_entries __P((int c_ext, FILE *));
367 static void default_C_entries __P((FILE *));
368 static void plain_C_entries __P((FILE *));
369 static void Cjava_entries __P((FILE *));
370 static void Cobol_paragraphs __P((FILE *));
371 static void Cplusplus_entries __P((FILE *));
372 static void Cstar_entries __P((FILE *));
373 static void Erlang_functions __P((FILE *));
374 static void Forth_words __P((FILE *));
375 static void Fortran_functions __P((FILE *));
376 static void HTML_labels __P((FILE *));
377 static void Lisp_functions __P((FILE *));
378 static void Lua_functions __P((FILE *));
379 static void Makefile_targets __P((FILE *));
380 static void Pascal_functions __P((FILE *));
381 static void Perl_functions __P((FILE *));
382 static void PHP_functions __P((FILE *));
383 static void PS_functions __P((FILE *));
384 static void Prolog_functions __P((FILE *));
385 static void Python_functions __P((FILE *));
386 static void Scheme_functions __P((FILE *));
387 static void TeX_commands __P((FILE *));
388 static void Texinfo_nodes __P((FILE *));
389 static void Yacc_entries __P((FILE *));
390 static void just_read_file __P((FILE *));
391
392 static void print_language_names __P((void));
393 static void print_version __P((void));
394 static void print_help __P((argument *));
395 int main __P((int, char **));
396
397 static compressor *get_compressor_from_suffix __P((char *, char **));
398 static language *get_language_from_langname __P((const char *));
399 static language *get_language_from_interpreter __P((char *));
400 static language *get_language_from_filename __P((char *, bool));
401 static void readline __P((linebuffer *, FILE *));
402 static long readline_internal __P((linebuffer *, FILE *));
403 static bool nocase_tail __P((char *));
404 static void get_tag __P((char *, char **));
405
406 static void analyse_regex __P((char *));
407 static void free_regexps __P((void));
408 static void regex_tag_multiline __P((void));
409 static void error __P((const char *, const char *));
410 static void suggest_asking_for_help __P((void));
411 void fatal __P((char *, char *));
412 static void pfatal __P((char *));
413 static void add_node __P((node *, node **));
414
415 static void init __P((void));
416 static void process_file_name __P((char *, language *));
417 static void process_file __P((FILE *, char *, language *));
418 static void find_entries __P((FILE *));
419 static void free_tree __P((node *));
420 static void free_fdesc __P((fdesc *));
421 static void pfnote __P((char *, bool, char *, int, int, long));
422 static void make_tag __P((char *, int, bool, char *, int, int, long));
423 static void invalidate_nodes __P((fdesc *, node **));
424 static void put_entries __P((node *));
425
426 static char *concat __P((char *, char *, char *));
427 static char *skip_spaces __P((char *));
428 static char *skip_non_spaces __P((char *));
429 static char *savenstr __P((char *, int));
430 static char *savestr __P((char *));
431 static char *etags_strchr __P((const char *, int));
432 static char *etags_strrchr __P((const char *, int));
433 static int etags_strcasecmp __P((const char *, const char *));
434 static int etags_strncasecmp __P((const char *, const char *, int));
435 static char *etags_getcwd __P((void));
436 static char *relative_filename __P((char *, char *));
437 static char *absolute_filename __P((char *, char *));
438 static char *absolute_dirname __P((char *, char *));
439 static bool filename_is_absolute __P((char *f));
440 static void canonicalize_filename __P((char *));
441 static void linebuffer_init __P((linebuffer *));
442 static void linebuffer_setlen __P((linebuffer *, int));
443 static PTR xmalloc __P((unsigned int));
444 static PTR xrealloc __P((char *, unsigned int));
445
446 \f
447 static char searchar = '/'; /* use /.../ searches */
448
449 static char *tagfile; /* output file */
450 static char *progname; /* name this program was invoked with */
451 static char *cwd; /* current working directory */
452 static char *tagfiledir; /* directory of tagfile */
453 static FILE *tagf; /* ioptr for tags file */
454
455 static fdesc *fdhead; /* head of file description list */
456 static fdesc *curfdp; /* current file description */
457 static int lineno; /* line number of current line */
458 static long charno; /* current character number */
459 static long linecharno; /* charno of start of current line */
460 static char *dbp; /* pointer to start of current tag */
461
462 static const int invalidcharno = -1;
463
464 static node *nodehead; /* the head of the binary tree of tags */
465 static node *last_node; /* the last node created */
466
467 static linebuffer lb; /* the current line */
468 static linebuffer filebuf; /* a buffer containing the whole file */
469 static linebuffer token_name; /* a buffer containing a tag name */
470
471 /* boolean "functions" (see init) */
472 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
473 static char
474 /* white chars */
475 *white = " \f\t\n\r\v",
476 /* not in a name */
477 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
478 /* token ending chars */
479 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
480 /* token starting chars */
481 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
482 /* valid in-token chars */
483 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
484
485 static bool append_to_tagfile; /* -a: append to tags */
486 /* The next four default to TRUE for etags, but to FALSE for ctags. */
487 static bool typedefs; /* -t: create tags for C and Ada typedefs */
488 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
489 /* 0 struct/enum/union decls, and C++ */
490 /* member functions. */
491 static bool constantypedefs; /* -d: create tags for C #define, enum */
492 /* constants and variables. */
493 /* -D: opposite of -d. Default under ctags. */
494 static bool globals; /* create tags for global variables */
495 static bool members; /* create tags for C member variables */
496 static bool declarations; /* --declarations: tag them and extern in C&Co*/
497 static bool no_line_directive; /* ignore #line directives (undocumented) */
498 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
499 static bool update; /* -u: update tags */
500 static bool vgrind_style; /* -v: create vgrind style index output */
501 static bool no_warnings; /* -w: suppress warnings (undocumented) */
502 static bool cxref_style; /* -x: create cxref style output */
503 static bool cplusplus; /* .[hc] means C++, not C */
504 static bool ignoreindent; /* -I: ignore indentation in C */
505 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
506
507 /* STDIN is defined in LynxOS system headers */
508 #ifdef STDIN
509 # undef STDIN
510 #endif
511
512 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
513 static bool parsing_stdin; /* --parse-stdin used */
514
515 static regexp *p_head; /* list of all regexps */
516 static bool need_filebuf; /* some regexes are multi-line */
517
518 static struct option longopts[] =
519 {
520 { "append", no_argument, NULL, 'a' },
521 { "packages-only", no_argument, &packages_only, TRUE },
522 { "c++", no_argument, NULL, 'C' },
523 { "declarations", no_argument, &declarations, TRUE },
524 { "no-line-directive", no_argument, &no_line_directive, TRUE },
525 { "no-duplicates", no_argument, &no_duplicates, TRUE },
526 { "help", no_argument, NULL, 'h' },
527 { "help", no_argument, NULL, 'H' },
528 { "ignore-indentation", no_argument, NULL, 'I' },
529 { "language", required_argument, NULL, 'l' },
530 { "members", no_argument, &members, TRUE },
531 { "no-members", no_argument, &members, FALSE },
532 { "output", required_argument, NULL, 'o' },
533 { "regex", required_argument, NULL, 'r' },
534 { "no-regex", no_argument, NULL, 'R' },
535 { "ignore-case-regex", required_argument, NULL, 'c' },
536 { "parse-stdin", required_argument, NULL, STDIN },
537 { "version", no_argument, NULL, 'V' },
538
539 #if CTAGS /* Ctags options */
540 { "backward-search", no_argument, NULL, 'B' },
541 { "cxref", no_argument, NULL, 'x' },
542 { "defines", no_argument, NULL, 'd' },
543 { "globals", no_argument, &globals, TRUE },
544 { "typedefs", no_argument, NULL, 't' },
545 { "typedefs-and-c++", no_argument, NULL, 'T' },
546 { "update", no_argument, NULL, 'u' },
547 { "vgrind", no_argument, NULL, 'v' },
548 { "no-warn", no_argument, NULL, 'w' },
549
550 #else /* Etags options */
551 { "no-defines", no_argument, NULL, 'D' },
552 { "no-globals", no_argument, &globals, FALSE },
553 { "include", required_argument, NULL, 'i' },
554 #endif
555 { NULL }
556 };
557
558 static compressor compressors[] =
559 {
560 { "z", "gzip -d -c"},
561 { "Z", "gzip -d -c"},
562 { "gz", "gzip -d -c"},
563 { "GZ", "gzip -d -c"},
564 { "bz2", "bzip2 -d -c" },
565 { NULL }
566 };
567
568 /*
569 * Language stuff.
570 */
571
572 /* Ada code */
573 static char *Ada_suffixes [] =
574 { "ads", "adb", "ada", NULL };
575 static char Ada_help [] =
576 "In Ada code, functions, procedures, packages, tasks and types are\n\
577 tags. Use the `--packages-only' option to create tags for\n\
578 packages only.\n\
579 Ada tag names have suffixes indicating the type of entity:\n\
580 Entity type: Qualifier:\n\
581 ------------ ----------\n\
582 function /f\n\
583 procedure /p\n\
584 package spec /s\n\
585 package body /b\n\
586 type /t\n\
587 task /k\n\
588 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
589 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
590 will just search for any tag `bidule'.";
591
592 /* Assembly code */
593 static char *Asm_suffixes [] =
594 { "a", /* Unix assembler */
595 "asm", /* Microcontroller assembly */
596 "def", /* BSO/Tasking definition includes */
597 "inc", /* Microcontroller include files */
598 "ins", /* Microcontroller include files */
599 "s", "sa", /* Unix assembler */
600 "S", /* cpp-processed Unix assembler */
601 "src", /* BSO/Tasking C compiler output */
602 NULL
603 };
604 static char Asm_help [] =
605 "In assembler code, labels appearing at the beginning of a line,\n\
606 followed by a colon, are tags.";
607
608
609 /* Note that .c and .h can be considered C++, if the --c++ flag was
610 given, or if the `class' or `template' keyowrds are met inside the file.
611 That is why default_C_entries is called for these. */
612 static char *default_C_suffixes [] =
613 { "c", "h", NULL };
614 static char default_C_help [] =
615 "In C code, any C function or typedef is a tag, and so are\n\
616 definitions of `struct', `union' and `enum'. `#define' macro\n\
617 definitions and `enum' constants are tags unless you specify\n\
618 `--no-defines'. Global variables are tags unless you specify\n\
619 `--no-globals' and so are struct members unless you specify\n\
620 `--no-members'. Use of `--no-globals', `--no-defines' and\n\
621 `--no-members' can make the tags table file much smaller.\n\
622 You can tag function declarations and external variables by\n\
623 using `--declarations'.";
624
625 static char *Cplusplus_suffixes [] =
626 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
627 "M", /* Objective C++ */
628 "pdb", /* Postscript with C syntax */
629 NULL };
630 static char Cplusplus_help [] =
631 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
632 --help --lang=c --lang=c++ for full help.)\n\
633 In addition to C tags, member functions are also recognized. Member\n\
634 variables are recognized unless you use the `--no-members' option.\n\
635 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
636 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
637 `operator+'.";
638
639 static char *Cjava_suffixes [] =
640 { "java", NULL };
641 static char Cjava_help [] =
642 "In Java code, all the tags constructs of C and C++ code are\n\
643 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
644
645
646 static char *Cobol_suffixes [] =
647 { "COB", "cob", NULL };
648 static char Cobol_help [] =
649 "In Cobol code, tags are paragraph names; that is, any word\n\
650 starting in column 8 and followed by a period.";
651
652 static char *Cstar_suffixes [] =
653 { "cs", "hs", NULL };
654
655 static char *Erlang_suffixes [] =
656 { "erl", "hrl", NULL };
657 static char Erlang_help [] =
658 "In Erlang code, the tags are the functions, records and macros\n\
659 defined in the file.";
660
661 char *Forth_suffixes [] =
662 { "fth", "tok", NULL };
663 static char Forth_help [] =
664 "In Forth code, tags are words defined by `:',\n\
665 constant, code, create, defer, value, variable, buffer:, field.";
666
667 static char *Fortran_suffixes [] =
668 { "F", "f", "f90", "for", NULL };
669 static char Fortran_help [] =
670 "In Fortran code, functions, subroutines and block data are tags.";
671
672 static char *HTML_suffixes [] =
673 { "htm", "html", "shtml", NULL };
674 static char HTML_help [] =
675 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
676 `h3' headers. Also, tags are `name=' in anchors and all\n\
677 occurrences of `id='.";
678
679 static char *Lisp_suffixes [] =
680 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
681 static char Lisp_help [] =
682 "In Lisp code, any function defined with `defun', any variable\n\
683 defined with `defvar' or `defconst', and in general the first\n\
684 argument of any expression that starts with `(def' in column zero\n\
685 is a tag.";
686
687 static char *Lua_suffixes [] =
688 { "lua", "LUA", NULL };
689 static char Lua_help [] =
690 "In Lua scripts, all functions are tags.";
691
692 static char *Makefile_filenames [] =
693 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
694 static char Makefile_help [] =
695 "In makefiles, targets are tags; additionally, variables are tags\n\
696 unless you specify `--no-globals'.";
697
698 static char *Objc_suffixes [] =
699 { "lm", /* Objective lex file */
700 "m", /* Objective C file */
701 NULL };
702 static char Objc_help [] =
703 "In Objective C code, tags include Objective C definitions for classes,\n\
704 class categories, methods and protocols. Tags for variables and\n\
705 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
706 (Use --help --lang=c --lang=objc --lang=java for full help.)";
707
708 static char *Pascal_suffixes [] =
709 { "p", "pas", NULL };
710 static char Pascal_help [] =
711 "In Pascal code, the tags are the functions and procedures defined\n\
712 in the file.";
713 /* " // this is for working around an Emacs highlighting bug... */
714
715 static char *Perl_suffixes [] =
716 { "pl", "pm", NULL };
717 static char *Perl_interpreters [] =
718 { "perl", "@PERL@", NULL };
719 static char Perl_help [] =
720 "In Perl code, the tags are the packages, subroutines and variables\n\
721 defined by the `package', `sub', `my' and `local' keywords. Use\n\
722 `--globals' if you want to tag global variables. Tags for\n\
723 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
724 defined in the default package is `main::SUB'.";
725
726 static char *PHP_suffixes [] =
727 { "php", "php3", "php4", NULL };
728 static char PHP_help [] =
729 "In PHP code, tags are functions, classes and defines. Unless you use\n\
730 the `--no-members' option, vars are tags too.";
731
732 static char *plain_C_suffixes [] =
733 { "pc", /* Pro*C file */
734 NULL };
735
736 static char *PS_suffixes [] =
737 { "ps", "psw", NULL }; /* .psw is for PSWrap */
738 static char PS_help [] =
739 "In PostScript code, the tags are the functions.";
740
741 static char *Prolog_suffixes [] =
742 { "prolog", NULL };
743 static char Prolog_help [] =
744 "In Prolog code, tags are predicates and rules at the beginning of\n\
745 line.";
746
747 static char *Python_suffixes [] =
748 { "py", NULL };
749 static char Python_help [] =
750 "In Python code, `def' or `class' at the beginning of a line\n\
751 generate a tag.";
752
753 /* Can't do the `SCM' or `scm' prefix with a version number. */
754 static char *Scheme_suffixes [] =
755 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
756 static char Scheme_help [] =
757 "In Scheme code, tags include anything defined with `def' or with a\n\
758 construct whose name starts with `def'. They also include\n\
759 variables set with `set!' at top level in the file.";
760
761 static char *TeX_suffixes [] =
762 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
763 static char TeX_help [] =
764 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
765 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
766 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
767 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
768 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
769 \n\
770 Other commands can be specified by setting the environment variable\n\
771 `TEXTAGS' to a colon-separated list like, for example,\n\
772 TEXTAGS=\"mycommand:myothercommand\".";
773
774
775 static char *Texinfo_suffixes [] =
776 { "texi", "texinfo", "txi", NULL };
777 static char Texinfo_help [] =
778 "for texinfo files, lines starting with @node are tagged.";
779
780 static char *Yacc_suffixes [] =
781 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
782 static char Yacc_help [] =
783 "In Bison or Yacc input files, each rule defines as a tag the\n\
784 nonterminal it constructs. The portions of the file that contain\n\
785 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
786 for full help).";
787
788 static char auto_help [] =
789 "`auto' is not a real language, it indicates to use\n\
790 a default language for files base on file name suffix and file contents.";
791
792 static char none_help [] =
793 "`none' is not a real language, it indicates to only do\n\
794 regexp processing on files.";
795
796 static char no_lang_help [] =
797 "No detailed help available for this language.";
798
799
800 /*
801 * Table of languages.
802 *
803 * It is ok for a given function to be listed under more than one
804 * name. I just didn't.
805 */
806
807 static language lang_names [] =
808 {
809 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
810 { "asm", Asm_help, Asm_labels, Asm_suffixes },
811 { "c", default_C_help, default_C_entries, default_C_suffixes },
812 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
813 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
814 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
815 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
816 { "forth", Forth_help, Forth_words, Forth_suffixes },
817 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
818 { "html", HTML_help, HTML_labels, HTML_suffixes },
819 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
820 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
821 { "lua", Lua_help, Lua_functions, Lua_suffixes },
822 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
823 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
824 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
825 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
826 { "php", PHP_help, PHP_functions, PHP_suffixes },
827 { "postscript",PS_help, PS_functions, PS_suffixes },
828 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
829 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
830 { "python", Python_help, Python_functions, Python_suffixes },
831 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
832 { "tex", TeX_help, TeX_commands, TeX_suffixes },
833 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
834 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
835 { "auto", auto_help }, /* default guessing scheme */
836 { "none", none_help, just_read_file }, /* regexp matching only */
837 { NULL } /* end of list */
838 };
839
840 \f
841 static void
842 print_language_names ()
843 {
844 language *lang;
845 char **name, **ext;
846
847 puts ("\nThese are the currently supported languages, along with the\n\
848 default file names and dot suffixes:");
849 for (lang = lang_names; lang->name != NULL; lang++)
850 {
851 printf (" %-*s", 10, lang->name);
852 if (lang->filenames != NULL)
853 for (name = lang->filenames; *name != NULL; name++)
854 printf (" %s", *name);
855 if (lang->suffixes != NULL)
856 for (ext = lang->suffixes; *ext != NULL; ext++)
857 printf (" .%s", *ext);
858 puts ("");
859 }
860 puts ("where `auto' means use default language for files based on file\n\
861 name suffix, and `none' means only do regexp processing on files.\n\
862 If no language is specified and no matching suffix is found,\n\
863 the first line of the file is read for a sharp-bang (#!) sequence\n\
864 followed by the name of an interpreter. If no such sequence is found,\n\
865 Fortran is tried first; if no tags are found, C is tried next.\n\
866 When parsing any C file, a \"class\" or \"template\" keyword\n\
867 switches to C++.");
868 puts ("Compressed files are supported using gzip and bzip2.\n\
869 \n\
870 For detailed help on a given language use, for example,\n\
871 etags --help --lang=ada.");
872 }
873
874 #ifndef EMACS_NAME
875 # define EMACS_NAME "standalone"
876 #endif
877 #ifndef VERSION
878 # define VERSION "version"
879 #endif
880 static void
881 print_version ()
882 {
883 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
884 puts ("Copyright (C) 2007 Free Software Foundation, Inc.");
885 puts ("This program is distributed under the terms in ETAGS.README");
886
887 exit (EXIT_SUCCESS);
888 }
889
890 static void
891 print_help (argbuffer)
892 argument *argbuffer;
893 {
894 bool help_for_lang = FALSE;
895
896 for (; argbuffer->arg_type != at_end; argbuffer++)
897 if (argbuffer->arg_type == at_language)
898 {
899 if (help_for_lang)
900 puts ("");
901 puts (argbuffer->lang->help);
902 help_for_lang = TRUE;
903 }
904
905 if (help_for_lang)
906 exit (EXIT_SUCCESS);
907
908 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
909 \n\
910 These are the options accepted by %s.\n", progname, progname);
911 if (NO_LONG_OPTIONS)
912 puts ("WARNING: long option names do not work with this executable,\n\
913 as it is not linked with GNU getopt.");
914 else
915 puts ("You may use unambiguous abbreviations for the long option names.");
916 puts (" A - as file name means read names from stdin (one per line).\n\
917 Absolute names are stored in the output file as they are.\n\
918 Relative ones are stored relative to the output file's directory.\n");
919
920 puts ("-a, --append\n\
921 Append tag entries to existing tags file.");
922
923 puts ("--packages-only\n\
924 For Ada files, only generate tags for packages.");
925
926 if (CTAGS)
927 puts ("-B, --backward-search\n\
928 Write the search commands for the tag entries using '?', the\n\
929 backward-search command instead of '/', the forward-search command.");
930
931 /* This option is mostly obsolete, because etags can now automatically
932 detect C++. Retained for backward compatibility and for debugging and
933 experimentation. In principle, we could want to tag as C++ even
934 before any "class" or "template" keyword.
935 puts ("-C, --c++\n\
936 Treat files whose name suffix defaults to C language as C++ files.");
937 */
938
939 puts ("--declarations\n\
940 In C and derived languages, create tags for function declarations,");
941 if (CTAGS)
942 puts ("\tand create tags for extern variables if --globals is used.");
943 else
944 puts
945 ("\tand create tags for extern variables unless --no-globals is used.");
946
947 if (CTAGS)
948 puts ("-d, --defines\n\
949 Create tag entries for C #define constants and enum constants, too.");
950 else
951 puts ("-D, --no-defines\n\
952 Don't create tag entries for C #define constants and enum constants.\n\
953 This makes the tags file smaller.");
954
955 if (!CTAGS)
956 puts ("-i FILE, --include=FILE\n\
957 Include a note in tag file indicating that, when searching for\n\
958 a tag, one should also consult the tags file FILE after\n\
959 checking the current file.");
960
961 puts ("-l LANG, --language=LANG\n\
962 Force the following files to be considered as written in the\n\
963 named language up to the next --language=LANG option.");
964
965 if (CTAGS)
966 puts ("--globals\n\
967 Create tag entries for global variables in some languages.");
968 else
969 puts ("--no-globals\n\
970 Do not create tag entries for global variables in some\n\
971 languages. This makes the tags file smaller.");
972 puts ("--no-members\n\
973 Do not create tag entries for members of structures\n\
974 in some languages.");
975
976 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
977 Make a tag for each line matching a regular expression pattern\n\
978 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
979 files only. REGEXFILE is a file containing one REGEXP per line.\n\
980 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
981 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
982 puts (" If TAGNAME/ is present, the tags created are named.\n\
983 For example Tcl named tags can be created with:\n\
984 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
985 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
986 `m' means to allow multi-line matches, `s' implies `m' and\n\
987 causes dot to match any character, including newline.");
988 puts ("-R, --no-regex\n\
989 Don't create tags from regexps for the following files.");
990 puts ("-I, --ignore-indentation\n\
991 In C and C++ do not assume that a closing brace in the first\n\
992 column is the final brace of a function or structure definition.");
993 puts ("-o FILE, --output=FILE\n\
994 Write the tags to FILE.");
995 puts ("--parse-stdin=NAME\n\
996 Read from standard input and record tags as belonging to file NAME.");
997
998 if (CTAGS)
999 {
1000 puts ("-t, --typedefs\n\
1001 Generate tag entries for C and Ada typedefs.");
1002 puts ("-T, --typedefs-and-c++\n\
1003 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1004 and C++ member functions.");
1005 }
1006
1007 if (CTAGS)
1008 puts ("-u, --update\n\
1009 Update the tag entries for the given files, leaving tag\n\
1010 entries for other files in place. Currently, this is\n\
1011 implemented by deleting the existing entries for the given\n\
1012 files and then rewriting the new entries at the end of the\n\
1013 tags file. It is often faster to simply rebuild the entire\n\
1014 tag file than to use this.");
1015
1016 if (CTAGS)
1017 {
1018 puts ("-v, --vgrind\n\
1019 Print on the standard output an index of items intended for\n\
1020 human consumption, similar to the output of vgrind. The index\n\
1021 is sorted, and gives the page number of each item.");
1022 # if PRINT_UNDOCUMENTED_OPTIONS_HELP
1023 puts ("-w, --no-duplicates\n\
1024 Do not create duplicate tag entries, for compatibility with\n\
1025 traditional ctags.");
1026 puts ("-w, --no-warn\n\
1027 Suppress warning messages about duplicate tag entries.");
1028 # endif /* PRINT_UNDOCUMENTED_OPTIONS_HELP */
1029 puts ("-x, --cxref\n\
1030 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1031 The output uses line numbers instead of page numbers, but\n\
1032 beyond that the differences are cosmetic; try both to see\n\
1033 which you like.");
1034 }
1035
1036 puts ("-V, --version\n\
1037 Print the version of the program.\n\
1038 -h, --help\n\
1039 Print this help message.\n\
1040 Followed by one or more `--language' options prints detailed\n\
1041 help about tag generation for the specified languages.");
1042
1043 print_language_names ();
1044
1045 puts ("");
1046 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1047
1048 exit (EXIT_SUCCESS);
1049 }
1050
1051 \f
1052 #ifdef VMS /* VMS specific functions */
1053
1054 #define EOS '\0'
1055
1056 /* This is a BUG! ANY arbitrary limit is a BUG!
1057 Won't someone please fix this? */
1058 #define MAX_FILE_SPEC_LEN 255
1059 typedef struct {
1060 short curlen;
1061 char body[MAX_FILE_SPEC_LEN + 1];
1062 } vspec;
1063
1064 /*
1065 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1066 returning in each successive call the next file name matching the input
1067 spec. The function expects that each in_spec passed
1068 to it will be processed to completion; in particular, up to and
1069 including the call following that in which the last matching name
1070 is returned, the function ignores the value of in_spec, and will
1071 only start processing a new spec with the following call.
1072 If an error occurs, on return out_spec contains the value
1073 of in_spec when the error occurred.
1074
1075 With each successive file name returned in out_spec, the
1076 function's return value is one. When there are no more matching
1077 names the function returns zero. If on the first call no file
1078 matches in_spec, or there is any other error, -1 is returned.
1079 */
1080
1081 #include <rmsdef.h>
1082 #include <descrip.h>
1083 #define OUTSIZE MAX_FILE_SPEC_LEN
1084 static short
1085 fn_exp (out, in)
1086 vspec *out;
1087 char *in;
1088 {
1089 static long context = 0;
1090 static struct dsc$descriptor_s o;
1091 static struct dsc$descriptor_s i;
1092 static bool pass1 = TRUE;
1093 long status;
1094 short retval;
1095
1096 if (pass1)
1097 {
1098 pass1 = FALSE;
1099 o.dsc$a_pointer = (char *) out;
1100 o.dsc$w_length = (short)OUTSIZE;
1101 i.dsc$a_pointer = in;
1102 i.dsc$w_length = (short)strlen(in);
1103 i.dsc$b_dtype = DSC$K_DTYPE_T;
1104 i.dsc$b_class = DSC$K_CLASS_S;
1105 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1106 o.dsc$b_class = DSC$K_CLASS_VS;
1107 }
1108 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1109 {
1110 out->body[out->curlen] = EOS;
1111 return 1;
1112 }
1113 else if (status == RMS$_NMF)
1114 retval = 0;
1115 else
1116 {
1117 strcpy(out->body, in);
1118 retval = -1;
1119 }
1120 lib$find_file_end(&context);
1121 pass1 = TRUE;
1122 return retval;
1123 }
1124
1125 /*
1126 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1127 name of each file specified by the provided arg expanding wildcards.
1128 */
1129 static char *
1130 gfnames (arg, p_error)
1131 char *arg;
1132 bool *p_error;
1133 {
1134 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1135
1136 switch (fn_exp (&filename, arg))
1137 {
1138 case 1:
1139 *p_error = FALSE;
1140 return filename.body;
1141 case 0:
1142 *p_error = FALSE;
1143 return NULL;
1144 default:
1145 *p_error = TRUE;
1146 return filename.body;
1147 }
1148 }
1149
1150 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1151 system (cmd)
1152 char *cmd;
1153 {
1154 error ("%s", "system() function not implemented under VMS");
1155 }
1156 #endif
1157
1158 #define VERSION_DELIM ';'
1159 char *massage_name (s)
1160 char *s;
1161 {
1162 char *start = s;
1163
1164 for ( ; *s; s++)
1165 if (*s == VERSION_DELIM)
1166 {
1167 *s = EOS;
1168 break;
1169 }
1170 else
1171 *s = lowcase (*s);
1172 return start;
1173 }
1174 #endif /* VMS */
1175
1176 \f
1177 int
1178 main (argc, argv)
1179 int argc;
1180 char *argv[];
1181 {
1182 int i;
1183 unsigned int nincluded_files;
1184 char **included_files;
1185 argument *argbuffer;
1186 int current_arg, file_count;
1187 linebuffer filename_lb;
1188 bool help_asked = FALSE;
1189 #ifdef VMS
1190 bool got_err;
1191 #endif
1192 char *optstring;
1193 int opt;
1194
1195
1196 #ifdef DOS_NT
1197 _fmode = O_BINARY; /* all of files are treated as binary files */
1198 #endif /* DOS_NT */
1199
1200 progname = argv[0];
1201 nincluded_files = 0;
1202 included_files = xnew (argc, char *);
1203 current_arg = 0;
1204 file_count = 0;
1205
1206 /* Allocate enough no matter what happens. Overkill, but each one
1207 is small. */
1208 argbuffer = xnew (argc, argument);
1209
1210 /*
1211 * If etags, always find typedefs and structure tags. Why not?
1212 * Also default to find macro constants, enum constants, struct
1213 * members and global variables.
1214 */
1215 if (!CTAGS)
1216 {
1217 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1218 globals = TRUE;
1219 }
1220
1221 /* When the optstring begins with a '-' getopt_long does not rearrange the
1222 non-options arguments to be at the end, but leaves them alone. */
1223 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1224 "ac:Cf:Il:o:r:RSVhH",
1225 (CTAGS) ? "BxdtTuvw" : "Di:");
1226
1227 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1228 switch (opt)
1229 {
1230 case 0:
1231 /* If getopt returns 0, then it has already processed a
1232 long-named option. We should do nothing. */
1233 break;
1234
1235 case 1:
1236 /* This means that a file name has been seen. Record it. */
1237 argbuffer[current_arg].arg_type = at_filename;
1238 argbuffer[current_arg].what = optarg;
1239 ++current_arg;
1240 ++file_count;
1241 break;
1242
1243 case STDIN:
1244 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1245 argbuffer[current_arg].arg_type = at_stdin;
1246 argbuffer[current_arg].what = optarg;
1247 ++current_arg;
1248 ++file_count;
1249 if (parsing_stdin)
1250 fatal ("cannot parse standard input more than once", (char *)NULL);
1251 parsing_stdin = TRUE;
1252 break;
1253
1254 /* Common options. */
1255 case 'a': append_to_tagfile = TRUE; break;
1256 case 'C': cplusplus = TRUE; break;
1257 case 'f': /* for compatibility with old makefiles */
1258 case 'o':
1259 if (tagfile)
1260 {
1261 error ("-o option may only be given once.", (char *)NULL);
1262 suggest_asking_for_help ();
1263 /* NOTREACHED */
1264 }
1265 tagfile = optarg;
1266 break;
1267 case 'I':
1268 case 'S': /* for backward compatibility */
1269 ignoreindent = TRUE;
1270 break;
1271 case 'l':
1272 {
1273 language *lang = get_language_from_langname (optarg);
1274 if (lang != NULL)
1275 {
1276 argbuffer[current_arg].lang = lang;
1277 argbuffer[current_arg].arg_type = at_language;
1278 ++current_arg;
1279 }
1280 }
1281 break;
1282 case 'c':
1283 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1284 optarg = concat (optarg, "i", ""); /* memory leak here */
1285 /* FALLTHRU */
1286 case 'r':
1287 argbuffer[current_arg].arg_type = at_regexp;
1288 argbuffer[current_arg].what = optarg;
1289 ++current_arg;
1290 break;
1291 case 'R':
1292 argbuffer[current_arg].arg_type = at_regexp;
1293 argbuffer[current_arg].what = NULL;
1294 ++current_arg;
1295 break;
1296 case 'V':
1297 print_version ();
1298 break;
1299 case 'h':
1300 case 'H':
1301 help_asked = TRUE;
1302 break;
1303
1304 /* Etags options */
1305 case 'D': constantypedefs = FALSE; break;
1306 case 'i': included_files[nincluded_files++] = optarg; break;
1307
1308 /* Ctags options. */
1309 case 'B': searchar = '?'; break;
1310 case 'd': constantypedefs = TRUE; break;
1311 case 't': typedefs = TRUE; break;
1312 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1313 case 'u': update = TRUE; break;
1314 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1315 case 'x': cxref_style = TRUE; break;
1316 case 'w': no_warnings = TRUE; break;
1317 default:
1318 suggest_asking_for_help ();
1319 /* NOTREACHED */
1320 }
1321
1322 /* No more options. Store the rest of arguments. */
1323 for (; optind < argc; optind++)
1324 {
1325 argbuffer[current_arg].arg_type = at_filename;
1326 argbuffer[current_arg].what = argv[optind];
1327 ++current_arg;
1328 ++file_count;
1329 }
1330
1331 argbuffer[current_arg].arg_type = at_end;
1332
1333 if (help_asked)
1334 print_help (argbuffer);
1335 /* NOTREACHED */
1336
1337 if (nincluded_files == 0 && file_count == 0)
1338 {
1339 error ("no input files specified.", (char *)NULL);
1340 suggest_asking_for_help ();
1341 /* NOTREACHED */
1342 }
1343
1344 if (tagfile == NULL)
1345 tagfile = CTAGS ? "tags" : "TAGS";
1346 cwd = etags_getcwd (); /* the current working directory */
1347 if (cwd[strlen (cwd) - 1] != '/')
1348 {
1349 char *oldcwd = cwd;
1350 cwd = concat (oldcwd, "/", "");
1351 free (oldcwd);
1352 }
1353 /* Relative file names are made relative to the current directory. */
1354 if (streq (tagfile, "-")
1355 || strneq (tagfile, "/dev/", 5))
1356 tagfiledir = cwd;
1357 else
1358 tagfiledir = absolute_dirname (tagfile, cwd);
1359
1360 init (); /* set up boolean "functions" */
1361
1362 linebuffer_init (&lb);
1363 linebuffer_init (&filename_lb);
1364 linebuffer_init (&filebuf);
1365 linebuffer_init (&token_name);
1366
1367 if (!CTAGS)
1368 {
1369 if (streq (tagfile, "-"))
1370 {
1371 tagf = stdout;
1372 #ifdef DOS_NT
1373 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1374 doesn't take effect until after `stdout' is already open). */
1375 if (!isatty (fileno (stdout)))
1376 setmode (fileno (stdout), O_BINARY);
1377 #endif /* DOS_NT */
1378 }
1379 else
1380 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1381 if (tagf == NULL)
1382 pfatal (tagfile);
1383 }
1384
1385 /*
1386 * Loop through files finding functions.
1387 */
1388 for (i = 0; i < current_arg; i++)
1389 {
1390 static language *lang; /* non-NULL if language is forced */
1391 char *this_file;
1392
1393 switch (argbuffer[i].arg_type)
1394 {
1395 case at_language:
1396 lang = argbuffer[i].lang;
1397 break;
1398 case at_regexp:
1399 analyse_regex (argbuffer[i].what);
1400 break;
1401 case at_filename:
1402 #ifdef VMS
1403 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1404 {
1405 if (got_err)
1406 {
1407 error ("can't find file %s\n", this_file);
1408 argc--, argv++;
1409 }
1410 else
1411 {
1412 this_file = massage_name (this_file);
1413 }
1414 #else
1415 this_file = argbuffer[i].what;
1416 #endif
1417 /* Input file named "-" means read file names from stdin
1418 (one per line) and use them. */
1419 if (streq (this_file, "-"))
1420 {
1421 if (parsing_stdin)
1422 fatal ("cannot parse standard input AND read file names from it",
1423 (char *)NULL);
1424 while (readline_internal (&filename_lb, stdin) > 0)
1425 process_file_name (filename_lb.buffer, lang);
1426 }
1427 else
1428 process_file_name (this_file, lang);
1429 #ifdef VMS
1430 }
1431 #endif
1432 break;
1433 case at_stdin:
1434 this_file = argbuffer[i].what;
1435 process_file (stdin, this_file, lang);
1436 break;
1437 }
1438 }
1439
1440 free_regexps ();
1441 free (lb.buffer);
1442 free (filebuf.buffer);
1443 free (token_name.buffer);
1444
1445 if (!CTAGS || cxref_style)
1446 {
1447 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1448 put_entries (nodehead);
1449 free_tree (nodehead);
1450 nodehead = NULL;
1451 if (!CTAGS)
1452 {
1453 fdesc *fdp;
1454
1455 /* Output file entries that have no tags. */
1456 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1457 if (!fdp->written)
1458 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1459
1460 while (nincluded_files-- > 0)
1461 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1462
1463 if (fclose (tagf) == EOF)
1464 pfatal (tagfile);
1465 }
1466
1467 exit (EXIT_SUCCESS);
1468 }
1469
1470 if (update)
1471 {
1472 char cmd[BUFSIZ];
1473 for (i = 0; i < current_arg; ++i)
1474 {
1475 switch (argbuffer[i].arg_type)
1476 {
1477 case at_filename:
1478 case at_stdin:
1479 break;
1480 default:
1481 continue; /* the for loop */
1482 }
1483 sprintf (cmd,
1484 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1485 tagfile, argbuffer[i].what, tagfile);
1486 if (system (cmd) != EXIT_SUCCESS)
1487 fatal ("failed to execute shell command", (char *)NULL);
1488 }
1489 append_to_tagfile = TRUE;
1490 }
1491
1492 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1493 if (tagf == NULL)
1494 pfatal (tagfile);
1495 put_entries (nodehead); /* write all the tags (CTAGS) */
1496 free_tree (nodehead);
1497 nodehead = NULL;
1498 if (fclose (tagf) == EOF)
1499 pfatal (tagfile);
1500
1501 if (CTAGS)
1502 if (append_to_tagfile || update)
1503 {
1504 char cmd[2*BUFSIZ+20];
1505 /* Maybe these should be used:
1506 setenv ("LC_COLLATE", "C", 1);
1507 setenv ("LC_ALL", "C", 1); */
1508 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1509 exit (system (cmd));
1510 }
1511 return EXIT_SUCCESS;
1512 }
1513
1514
1515 /*
1516 * Return a compressor given the file name. If EXTPTR is non-zero,
1517 * return a pointer into FILE where the compressor-specific
1518 * extension begins. If no compressor is found, NULL is returned
1519 * and EXTPTR is not significant.
1520 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1521 */
1522 static compressor *
1523 get_compressor_from_suffix (file, extptr)
1524 char *file;
1525 char **extptr;
1526 {
1527 compressor *compr;
1528 char *slash, *suffix;
1529
1530 /* This relies on FN to be after canonicalize_filename,
1531 so we don't need to consider backslashes on DOS_NT. */
1532 slash = etags_strrchr (file, '/');
1533 suffix = etags_strrchr (file, '.');
1534 if (suffix == NULL || suffix < slash)
1535 return NULL;
1536 if (extptr != NULL)
1537 *extptr = suffix;
1538 suffix += 1;
1539 /* Let those poor souls who live with DOS 8+3 file name limits get
1540 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1541 Only the first do loop is run if not MSDOS */
1542 do
1543 {
1544 for (compr = compressors; compr->suffix != NULL; compr++)
1545 if (streq (compr->suffix, suffix))
1546 return compr;
1547 if (!MSDOS)
1548 break; /* do it only once: not really a loop */
1549 if (extptr != NULL)
1550 *extptr = ++suffix;
1551 } while (*suffix != '\0');
1552 return NULL;
1553 }
1554
1555
1556
1557 /*
1558 * Return a language given the name.
1559 */
1560 static language *
1561 get_language_from_langname (name)
1562 const char *name;
1563 {
1564 language *lang;
1565
1566 if (name == NULL)
1567 error ("empty language name", (char *)NULL);
1568 else
1569 {
1570 for (lang = lang_names; lang->name != NULL; lang++)
1571 if (streq (name, lang->name))
1572 return lang;
1573 error ("unknown language \"%s\"", name);
1574 }
1575
1576 return NULL;
1577 }
1578
1579
1580 /*
1581 * Return a language given the interpreter name.
1582 */
1583 static language *
1584 get_language_from_interpreter (interpreter)
1585 char *interpreter;
1586 {
1587 language *lang;
1588 char **iname;
1589
1590 if (interpreter == NULL)
1591 return NULL;
1592 for (lang = lang_names; lang->name != NULL; lang++)
1593 if (lang->interpreters != NULL)
1594 for (iname = lang->interpreters; *iname != NULL; iname++)
1595 if (streq (*iname, interpreter))
1596 return lang;
1597
1598 return NULL;
1599 }
1600
1601
1602
1603 /*
1604 * Return a language given the file name.
1605 */
1606 static language *
1607 get_language_from_filename (file, case_sensitive)
1608 char *file;
1609 bool case_sensitive;
1610 {
1611 language *lang;
1612 char **name, **ext, *suffix;
1613
1614 /* Try whole file name first. */
1615 for (lang = lang_names; lang->name != NULL; lang++)
1616 if (lang->filenames != NULL)
1617 for (name = lang->filenames; *name != NULL; name++)
1618 if ((case_sensitive)
1619 ? streq (*name, file)
1620 : strcaseeq (*name, file))
1621 return lang;
1622
1623 /* If not found, try suffix after last dot. */
1624 suffix = etags_strrchr (file, '.');
1625 if (suffix == NULL)
1626 return NULL;
1627 suffix += 1;
1628 for (lang = lang_names; lang->name != NULL; lang++)
1629 if (lang->suffixes != NULL)
1630 for (ext = lang->suffixes; *ext != NULL; ext++)
1631 if ((case_sensitive)
1632 ? streq (*ext, suffix)
1633 : strcaseeq (*ext, suffix))
1634 return lang;
1635 return NULL;
1636 }
1637
1638 \f
1639 /*
1640 * This routine is called on each file argument.
1641 */
1642 static void
1643 process_file_name (file, lang)
1644 char *file;
1645 language *lang;
1646 {
1647 struct stat stat_buf;
1648 FILE *inf;
1649 fdesc *fdp;
1650 compressor *compr;
1651 char *compressed_name, *uncompressed_name;
1652 char *ext, *real_name;
1653 int retval;
1654
1655 canonicalize_filename (file);
1656 if (streq (file, tagfile) && !streq (tagfile, "-"))
1657 {
1658 error ("skipping inclusion of %s in self.", file);
1659 return;
1660 }
1661 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1662 {
1663 compressed_name = NULL;
1664 real_name = uncompressed_name = savestr (file);
1665 }
1666 else
1667 {
1668 real_name = compressed_name = savestr (file);
1669 uncompressed_name = savenstr (file, ext - file);
1670 }
1671
1672 /* If the canonicalized uncompressed name
1673 has already been dealt with, skip it silently. */
1674 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1675 {
1676 assert (fdp->infname != NULL);
1677 if (streq (uncompressed_name, fdp->infname))
1678 goto cleanup;
1679 }
1680
1681 if (stat (real_name, &stat_buf) != 0)
1682 {
1683 /* Reset real_name and try with a different name. */
1684 real_name = NULL;
1685 if (compressed_name != NULL) /* try with the given suffix */
1686 {
1687 if (stat (uncompressed_name, &stat_buf) == 0)
1688 real_name = uncompressed_name;
1689 }
1690 else /* try all possible suffixes */
1691 {
1692 for (compr = compressors; compr->suffix != NULL; compr++)
1693 {
1694 compressed_name = concat (file, ".", compr->suffix);
1695 if (stat (compressed_name, &stat_buf) != 0)
1696 {
1697 if (MSDOS)
1698 {
1699 char *suf = compressed_name + strlen (file);
1700 size_t suflen = strlen (compr->suffix) + 1;
1701 for ( ; suf[1]; suf++, suflen--)
1702 {
1703 memmove (suf, suf + 1, suflen);
1704 if (stat (compressed_name, &stat_buf) == 0)
1705 {
1706 real_name = compressed_name;
1707 break;
1708 }
1709 }
1710 if (real_name != NULL)
1711 break;
1712 } /* MSDOS */
1713 free (compressed_name);
1714 compressed_name = NULL;
1715 }
1716 else
1717 {
1718 real_name = compressed_name;
1719 break;
1720 }
1721 }
1722 }
1723 if (real_name == NULL)
1724 {
1725 perror (file);
1726 goto cleanup;
1727 }
1728 } /* try with a different name */
1729
1730 if (!S_ISREG (stat_buf.st_mode))
1731 {
1732 error ("skipping %s: it is not a regular file.", real_name);
1733 goto cleanup;
1734 }
1735 if (real_name == compressed_name)
1736 {
1737 char *cmd = concat (compr->command, " ", real_name);
1738 inf = (FILE *) popen (cmd, "r");
1739 free (cmd);
1740 }
1741 else
1742 inf = fopen (real_name, "r");
1743 if (inf == NULL)
1744 {
1745 perror (real_name);
1746 goto cleanup;
1747 }
1748
1749 process_file (inf, uncompressed_name, lang);
1750
1751 if (real_name == compressed_name)
1752 retval = pclose (inf);
1753 else
1754 retval = fclose (inf);
1755 if (retval < 0)
1756 pfatal (file);
1757
1758 cleanup:
1759 if (compressed_name) free (compressed_name);
1760 if (uncompressed_name) free (uncompressed_name);
1761 last_node = NULL;
1762 curfdp = NULL;
1763 return;
1764 }
1765
1766 static void
1767 process_file (fh, fn, lang)
1768 FILE *fh;
1769 char *fn;
1770 language *lang;
1771 {
1772 static const fdesc emptyfdesc;
1773 fdesc *fdp;
1774
1775 /* Create a new input file description entry. */
1776 fdp = xnew (1, fdesc);
1777 *fdp = emptyfdesc;
1778 fdp->next = fdhead;
1779 fdp->infname = savestr (fn);
1780 fdp->lang = lang;
1781 fdp->infabsname = absolute_filename (fn, cwd);
1782 fdp->infabsdir = absolute_dirname (fn, cwd);
1783 if (filename_is_absolute (fn))
1784 {
1785 /* An absolute file name. Canonicalize it. */
1786 fdp->taggedfname = absolute_filename (fn, NULL);
1787 }
1788 else
1789 {
1790 /* A file name relative to cwd. Make it relative
1791 to the directory of the tags file. */
1792 fdp->taggedfname = relative_filename (fn, tagfiledir);
1793 }
1794 fdp->usecharno = TRUE; /* use char position when making tags */
1795 fdp->prop = NULL;
1796 fdp->written = FALSE; /* not written on tags file yet */
1797
1798 fdhead = fdp;
1799 curfdp = fdhead; /* the current file description */
1800
1801 find_entries (fh);
1802
1803 /* If not Ctags, and if this is not metasource and if it contained no #line
1804 directives, we can write the tags and free all nodes pointing to
1805 curfdp. */
1806 if (!CTAGS
1807 && curfdp->usecharno /* no #line directives in this file */
1808 && !curfdp->lang->metasource)
1809 {
1810 node *np, *prev;
1811
1812 /* Look for the head of the sublist relative to this file. See add_node
1813 for the structure of the node tree. */
1814 prev = NULL;
1815 for (np = nodehead; np != NULL; prev = np, np = np->left)
1816 if (np->fdp == curfdp)
1817 break;
1818
1819 /* If we generated tags for this file, write and delete them. */
1820 if (np != NULL)
1821 {
1822 /* This is the head of the last sublist, if any. The following
1823 instructions depend on this being true. */
1824 assert (np->left == NULL);
1825
1826 assert (fdhead == curfdp);
1827 assert (last_node->fdp == curfdp);
1828 put_entries (np); /* write tags for file curfdp->taggedfname */
1829 free_tree (np); /* remove the written nodes */
1830 if (prev == NULL)
1831 nodehead = NULL; /* no nodes left */
1832 else
1833 prev->left = NULL; /* delete the pointer to the sublist */
1834 }
1835 }
1836 }
1837
1838 /*
1839 * This routine sets up the boolean pseudo-functions which work
1840 * by setting boolean flags dependent upon the corresponding character.
1841 * Every char which is NOT in that string is not a white char. Therefore,
1842 * all of the array "_wht" is set to FALSE, and then the elements
1843 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1844 * of a char is TRUE if it is the string "white", else FALSE.
1845 */
1846 static void
1847 init ()
1848 {
1849 register char *sp;
1850 register int i;
1851
1852 for (i = 0; i < CHARS; i++)
1853 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1854 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1855 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1856 notinname('\0') = notinname('\n');
1857 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1858 begtoken('\0') = begtoken('\n');
1859 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1860 intoken('\0') = intoken('\n');
1861 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1862 endtoken('\0') = endtoken('\n');
1863 }
1864
1865 /*
1866 * This routine opens the specified file and calls the function
1867 * which finds the function and type definitions.
1868 */
1869 static void
1870 find_entries (inf)
1871 FILE *inf;
1872 {
1873 char *cp;
1874 language *lang = curfdp->lang;
1875 Lang_function *parser = NULL;
1876
1877 /* If user specified a language, use it. */
1878 if (lang != NULL && lang->function != NULL)
1879 {
1880 parser = lang->function;
1881 }
1882
1883 /* Else try to guess the language given the file name. */
1884 if (parser == NULL)
1885 {
1886 lang = get_language_from_filename (curfdp->infname, TRUE);
1887 if (lang != NULL && lang->function != NULL)
1888 {
1889 curfdp->lang = lang;
1890 parser = lang->function;
1891 }
1892 }
1893
1894 /* Else look for sharp-bang as the first two characters. */
1895 if (parser == NULL
1896 && readline_internal (&lb, inf) > 0
1897 && lb.len >= 2
1898 && lb.buffer[0] == '#'
1899 && lb.buffer[1] == '!')
1900 {
1901 char *lp;
1902
1903 /* Set lp to point at the first char after the last slash in the
1904 line or, if no slashes, at the first nonblank. Then set cp to
1905 the first successive blank and terminate the string. */
1906 lp = etags_strrchr (lb.buffer+2, '/');
1907 if (lp != NULL)
1908 lp += 1;
1909 else
1910 lp = skip_spaces (lb.buffer + 2);
1911 cp = skip_non_spaces (lp);
1912 *cp = '\0';
1913
1914 if (strlen (lp) > 0)
1915 {
1916 lang = get_language_from_interpreter (lp);
1917 if (lang != NULL && lang->function != NULL)
1918 {
1919 curfdp->lang = lang;
1920 parser = lang->function;
1921 }
1922 }
1923 }
1924
1925 /* We rewind here, even if inf may be a pipe. We fail if the
1926 length of the first line is longer than the pipe block size,
1927 which is unlikely. */
1928 rewind (inf);
1929
1930 /* Else try to guess the language given the case insensitive file name. */
1931 if (parser == NULL)
1932 {
1933 lang = get_language_from_filename (curfdp->infname, FALSE);
1934 if (lang != NULL && lang->function != NULL)
1935 {
1936 curfdp->lang = lang;
1937 parser = lang->function;
1938 }
1939 }
1940
1941 /* Else try Fortran or C. */
1942 if (parser == NULL)
1943 {
1944 node *old_last_node = last_node;
1945
1946 curfdp->lang = get_language_from_langname ("fortran");
1947 find_entries (inf);
1948
1949 if (old_last_node == last_node)
1950 /* No Fortran entries found. Try C. */
1951 {
1952 /* We do not tag if rewind fails.
1953 Only the file name will be recorded in the tags file. */
1954 rewind (inf);
1955 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1956 find_entries (inf);
1957 }
1958 return;
1959 }
1960
1961 if (!no_line_directive
1962 && curfdp->lang != NULL && curfdp->lang->metasource)
1963 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1964 file, or anyway we parsed a file that is automatically generated from
1965 this one. If this is the case, the bingo.c file contained #line
1966 directives that generated tags pointing to this file. Let's delete
1967 them all before parsing this file, which is the real source. */
1968 {
1969 fdesc **fdpp = &fdhead;
1970 while (*fdpp != NULL)
1971 if (*fdpp != curfdp
1972 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1973 /* We found one of those! We must delete both the file description
1974 and all tags referring to it. */
1975 {
1976 fdesc *badfdp = *fdpp;
1977
1978 /* Delete the tags referring to badfdp->taggedfname
1979 that were obtained from badfdp->infname. */
1980 invalidate_nodes (badfdp, &nodehead);
1981
1982 *fdpp = badfdp->next; /* remove the bad description from the list */
1983 free_fdesc (badfdp);
1984 }
1985 else
1986 fdpp = &(*fdpp)->next; /* advance the list pointer */
1987 }
1988
1989 assert (parser != NULL);
1990
1991 /* Generic initialisations before reading from file. */
1992 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1993
1994 /* Generic initialisations before parsing file with readline. */
1995 lineno = 0; /* reset global line number */
1996 charno = 0; /* reset global char number */
1997 linecharno = 0; /* reset global char number of line start */
1998
1999 parser (inf);
2000
2001 regex_tag_multiline ();
2002 }
2003
2004 \f
2005 /*
2006 * Check whether an implicitly named tag should be created,
2007 * then call `pfnote'.
2008 * NAME is a string that is internally copied by this function.
2009 *
2010 * TAGS format specification
2011 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2012 * The following is explained in some more detail in etc/ETAGS.EBNF.
2013 *
2014 * make_tag creates tags with "implicit tag names" (unnamed tags)
2015 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2016 * 1. NAME does not contain any of the characters in NONAM;
2017 * 2. LINESTART contains name as either a rightmost, or rightmost but
2018 * one character, substring;
2019 * 3. the character, if any, immediately before NAME in LINESTART must
2020 * be a character in NONAM;
2021 * 4. the character, if any, immediately after NAME in LINESTART must
2022 * also be a character in NONAM.
2023 *
2024 * The implementation uses the notinname() macro, which recognises the
2025 * characters stored in the string `nonam'.
2026 * etags.el needs to use the same characters that are in NONAM.
2027 */
2028 static void
2029 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2030 char *name; /* tag name, or NULL if unnamed */
2031 int namelen; /* tag length */
2032 bool is_func; /* tag is a function */
2033 char *linestart; /* start of the line where tag is */
2034 int linelen; /* length of the line where tag is */
2035 int lno; /* line number */
2036 long cno; /* character number */
2037 {
2038 bool named = (name != NULL && namelen > 0);
2039
2040 if (!CTAGS && named) /* maybe set named to false */
2041 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2042 such that etags.el can guess a name from it. */
2043 {
2044 int i;
2045 register char *cp = name;
2046
2047 for (i = 0; i < namelen; i++)
2048 if (notinname (*cp++))
2049 break;
2050 if (i == namelen) /* rule #1 */
2051 {
2052 cp = linestart + linelen - namelen;
2053 if (notinname (linestart[linelen-1]))
2054 cp -= 1; /* rule #4 */
2055 if (cp >= linestart /* rule #2 */
2056 && (cp == linestart
2057 || notinname (cp[-1])) /* rule #3 */
2058 && strneq (name, cp, namelen)) /* rule #2 */
2059 named = FALSE; /* use implicit tag name */
2060 }
2061 }
2062
2063 if (named)
2064 name = savenstr (name, namelen);
2065 else
2066 name = NULL;
2067 pfnote (name, is_func, linestart, linelen, lno, cno);
2068 }
2069
2070 /* Record a tag. */
2071 static void
2072 pfnote (name, is_func, linestart, linelen, lno, cno)
2073 char *name; /* tag name, or NULL if unnamed */
2074 bool is_func; /* tag is a function */
2075 char *linestart; /* start of the line where tag is */
2076 int linelen; /* length of the line where tag is */
2077 int lno; /* line number */
2078 long cno; /* character number */
2079 {
2080 register node *np;
2081
2082 assert (name == NULL || name[0] != '\0');
2083 if (CTAGS && name == NULL)
2084 return;
2085
2086 np = xnew (1, node);
2087
2088 /* If ctags mode, change name "main" to M<thisfilename>. */
2089 if (CTAGS && !cxref_style && streq (name, "main"))
2090 {
2091 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2092 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2093 fp = etags_strrchr (np->name, '.');
2094 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2095 fp[0] = '\0';
2096 }
2097 else
2098 np->name = name;
2099 np->valid = TRUE;
2100 np->been_warned = FALSE;
2101 np->fdp = curfdp;
2102 np->is_func = is_func;
2103 np->lno = lno;
2104 if (np->fdp->usecharno)
2105 /* Our char numbers are 0-base, because of C language tradition?
2106 ctags compatibility? old versions compatibility? I don't know.
2107 Anyway, since emacs's are 1-base we expect etags.el to take care
2108 of the difference. If we wanted to have 1-based numbers, we would
2109 uncomment the +1 below. */
2110 np->cno = cno /* + 1 */ ;
2111 else
2112 np->cno = invalidcharno;
2113 np->left = np->right = NULL;
2114 if (CTAGS && !cxref_style)
2115 {
2116 if (strlen (linestart) < 50)
2117 np->regex = concat (linestart, "$", "");
2118 else
2119 np->regex = savenstr (linestart, 50);
2120 }
2121 else
2122 np->regex = savenstr (linestart, linelen);
2123
2124 add_node (np, &nodehead);
2125 }
2126
2127 /*
2128 * free_tree ()
2129 * recurse on left children, iterate on right children.
2130 */
2131 static void
2132 free_tree (np)
2133 register node *np;
2134 {
2135 while (np)
2136 {
2137 register node *node_right = np->right;
2138 free_tree (np->left);
2139 if (np->name != NULL)
2140 free (np->name);
2141 free (np->regex);
2142 free (np);
2143 np = node_right;
2144 }
2145 }
2146
2147 /*
2148 * free_fdesc ()
2149 * delete a file description
2150 */
2151 static void
2152 free_fdesc (fdp)
2153 register fdesc *fdp;
2154 {
2155 if (fdp->infname != NULL) free (fdp->infname);
2156 if (fdp->infabsname != NULL) free (fdp->infabsname);
2157 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2158 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2159 if (fdp->prop != NULL) free (fdp->prop);
2160 free (fdp);
2161 }
2162
2163 /*
2164 * add_node ()
2165 * Adds a node to the tree of nodes. In etags mode, sort by file
2166 * name. In ctags mode, sort by tag name. Make no attempt at
2167 * balancing.
2168 *
2169 * add_node is the only function allowed to add nodes, so it can
2170 * maintain state.
2171 */
2172 static void
2173 add_node (np, cur_node_p)
2174 node *np, **cur_node_p;
2175 {
2176 register int dif;
2177 register node *cur_node = *cur_node_p;
2178
2179 if (cur_node == NULL)
2180 {
2181 *cur_node_p = np;
2182 last_node = np;
2183 return;
2184 }
2185
2186 if (!CTAGS)
2187 /* Etags Mode */
2188 {
2189 /* For each file name, tags are in a linked sublist on the right
2190 pointer. The first tags of different files are a linked list
2191 on the left pointer. last_node points to the end of the last
2192 used sublist. */
2193 if (last_node != NULL && last_node->fdp == np->fdp)
2194 {
2195 /* Let's use the same sublist as the last added node. */
2196 assert (last_node->right == NULL);
2197 last_node->right = np;
2198 last_node = np;
2199 }
2200 else if (cur_node->fdp == np->fdp)
2201 {
2202 /* Scanning the list we found the head of a sublist which is
2203 good for us. Let's scan this sublist. */
2204 add_node (np, &cur_node->right);
2205 }
2206 else
2207 /* The head of this sublist is not good for us. Let's try the
2208 next one. */
2209 add_node (np, &cur_node->left);
2210 } /* if ETAGS mode */
2211
2212 else
2213 {
2214 /* Ctags Mode */
2215 dif = strcmp (np->name, cur_node->name);
2216
2217 /*
2218 * If this tag name matches an existing one, then
2219 * do not add the node, but maybe print a warning.
2220 */
2221 if (no_duplicates && !dif)
2222 {
2223 if (np->fdp == cur_node->fdp)
2224 {
2225 if (!no_warnings)
2226 {
2227 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2228 np->fdp->infname, lineno, np->name);
2229 fprintf (stderr, "Second entry ignored\n");
2230 }
2231 }
2232 else if (!cur_node->been_warned && !no_warnings)
2233 {
2234 fprintf
2235 (stderr,
2236 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2237 np->fdp->infname, cur_node->fdp->infname, np->name);
2238 cur_node->been_warned = TRUE;
2239 }
2240 return;
2241 }
2242
2243 /* Actually add the node */
2244 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2245 } /* if CTAGS mode */
2246 }
2247
2248 /*
2249 * invalidate_nodes ()
2250 * Scan the node tree and invalidate all nodes pointing to the
2251 * given file description (CTAGS case) or free them (ETAGS case).
2252 */
2253 static void
2254 invalidate_nodes (badfdp, npp)
2255 fdesc *badfdp;
2256 node **npp;
2257 {
2258 node *np = *npp;
2259
2260 if (np == NULL)
2261 return;
2262
2263 if (CTAGS)
2264 {
2265 if (np->left != NULL)
2266 invalidate_nodes (badfdp, &np->left);
2267 if (np->fdp == badfdp)
2268 np->valid = FALSE;
2269 if (np->right != NULL)
2270 invalidate_nodes (badfdp, &np->right);
2271 }
2272 else
2273 {
2274 assert (np->fdp != NULL);
2275 if (np->fdp == badfdp)
2276 {
2277 *npp = np->left; /* detach the sublist from the list */
2278 np->left = NULL; /* isolate it */
2279 free_tree (np); /* free it */
2280 invalidate_nodes (badfdp, npp);
2281 }
2282 else
2283 invalidate_nodes (badfdp, &np->left);
2284 }
2285 }
2286
2287 \f
2288 static int total_size_of_entries __P((node *));
2289 static int number_len __P((long));
2290
2291 /* Length of a non-negative number's decimal representation. */
2292 static int
2293 number_len (num)
2294 long num;
2295 {
2296 int len = 1;
2297 while ((num /= 10) > 0)
2298 len += 1;
2299 return len;
2300 }
2301
2302 /*
2303 * Return total number of characters that put_entries will output for
2304 * the nodes in the linked list at the right of the specified node.
2305 * This count is irrelevant with etags.el since emacs 19.34 at least,
2306 * but is still supplied for backward compatibility.
2307 */
2308 static int
2309 total_size_of_entries (np)
2310 register node *np;
2311 {
2312 register int total = 0;
2313
2314 for (; np != NULL; np = np->right)
2315 if (np->valid)
2316 {
2317 total += strlen (np->regex) + 1; /* pat\177 */
2318 if (np->name != NULL)
2319 total += strlen (np->name) + 1; /* name\001 */
2320 total += number_len ((long) np->lno) + 1; /* lno, */
2321 if (np->cno != invalidcharno) /* cno */
2322 total += number_len (np->cno);
2323 total += 1; /* newline */
2324 }
2325
2326 return total;
2327 }
2328
2329 static void
2330 put_entries (np)
2331 register node *np;
2332 {
2333 register char *sp;
2334 static fdesc *fdp = NULL;
2335
2336 if (np == NULL)
2337 return;
2338
2339 /* Output subentries that precede this one */
2340 if (CTAGS)
2341 put_entries (np->left);
2342
2343 /* Output this entry */
2344 if (np->valid)
2345 {
2346 if (!CTAGS)
2347 {
2348 /* Etags mode */
2349 if (fdp != np->fdp)
2350 {
2351 fdp = np->fdp;
2352 fprintf (tagf, "\f\n%s,%d\n",
2353 fdp->taggedfname, total_size_of_entries (np));
2354 fdp->written = TRUE;
2355 }
2356 fputs (np->regex, tagf);
2357 fputc ('\177', tagf);
2358 if (np->name != NULL)
2359 {
2360 fputs (np->name, tagf);
2361 fputc ('\001', tagf);
2362 }
2363 fprintf (tagf, "%d,", np->lno);
2364 if (np->cno != invalidcharno)
2365 fprintf (tagf, "%ld", np->cno);
2366 fputs ("\n", tagf);
2367 }
2368 else
2369 {
2370 /* Ctags mode */
2371 if (np->name == NULL)
2372 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2373
2374 if (cxref_style)
2375 {
2376 if (vgrind_style)
2377 fprintf (stdout, "%s %s %d\n",
2378 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2379 else
2380 fprintf (stdout, "%-16s %3d %-16s %s\n",
2381 np->name, np->lno, np->fdp->taggedfname, np->regex);
2382 }
2383 else
2384 {
2385 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2386
2387 if (np->is_func)
2388 { /* function or #define macro with args */
2389 putc (searchar, tagf);
2390 putc ('^', tagf);
2391
2392 for (sp = np->regex; *sp; sp++)
2393 {
2394 if (*sp == '\\' || *sp == searchar)
2395 putc ('\\', tagf);
2396 putc (*sp, tagf);
2397 }
2398 putc (searchar, tagf);
2399 }
2400 else
2401 { /* anything else; text pattern inadequate */
2402 fprintf (tagf, "%d", np->lno);
2403 }
2404 putc ('\n', tagf);
2405 }
2406 }
2407 } /* if this node contains a valid tag */
2408
2409 /* Output subentries that follow this one */
2410 put_entries (np->right);
2411 if (!CTAGS)
2412 put_entries (np->left);
2413 }
2414
2415 \f
2416 /* C extensions. */
2417 #define C_EXT 0x00fff /* C extensions */
2418 #define C_PLAIN 0x00000 /* C */
2419 #define C_PLPL 0x00001 /* C++ */
2420 #define C_STAR 0x00003 /* C* */
2421 #define C_JAVA 0x00005 /* JAVA */
2422 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2423 #define YACC 0x10000 /* yacc file */
2424
2425 /*
2426 * The C symbol tables.
2427 */
2428 enum sym_type
2429 {
2430 st_none,
2431 st_C_objprot, st_C_objimpl, st_C_objend,
2432 st_C_gnumacro,
2433 st_C_ignore, st_C_attribute,
2434 st_C_javastruct,
2435 st_C_operator,
2436 st_C_class, st_C_template,
2437 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2438 };
2439
2440 static unsigned int hash __P((const char *, unsigned int));
2441 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2442 static enum sym_type C_symtype __P((char *, int, int));
2443
2444 /* Feed stuff between (but not including) %[ and %] lines to:
2445 gperf -m 5
2446 %[
2447 %compare-strncmp
2448 %enum
2449 %struct-type
2450 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2451 %%
2452 if, 0, st_C_ignore
2453 for, 0, st_C_ignore
2454 while, 0, st_C_ignore
2455 switch, 0, st_C_ignore
2456 return, 0, st_C_ignore
2457 __attribute__, 0, st_C_attribute
2458 @interface, 0, st_C_objprot
2459 @protocol, 0, st_C_objprot
2460 @implementation,0, st_C_objimpl
2461 @end, 0, st_C_objend
2462 import, (C_JAVA & ~C_PLPL), st_C_ignore
2463 package, (C_JAVA & ~C_PLPL), st_C_ignore
2464 friend, C_PLPL, st_C_ignore
2465 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2466 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2467 interface, (C_JAVA & ~C_PLPL), st_C_struct
2468 class, 0, st_C_class
2469 namespace, C_PLPL, st_C_struct
2470 domain, C_STAR, st_C_struct
2471 union, 0, st_C_struct
2472 struct, 0, st_C_struct
2473 extern, 0, st_C_extern
2474 enum, 0, st_C_enum
2475 typedef, 0, st_C_typedef
2476 define, 0, st_C_define
2477 undef, 0, st_C_define
2478 operator, C_PLPL, st_C_operator
2479 template, 0, st_C_template
2480 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2481 DEFUN, 0, st_C_gnumacro
2482 SYSCALL, 0, st_C_gnumacro
2483 ENTRY, 0, st_C_gnumacro
2484 PSEUDO, 0, st_C_gnumacro
2485 # These are defined inside C functions, so currently they are not met.
2486 # EXFUN used in glibc, DEFVAR_* in emacs.
2487 #EXFUN, 0, st_C_gnumacro
2488 #DEFVAR_, 0, st_C_gnumacro
2489 %]
2490 and replace lines between %< and %> with its output, then:
2491 - remove the #if characterset check
2492 - make in_word_set static and not inline. */
2493 /*%<*/
2494 /* C code produced by gperf version 3.0.1 */
2495 /* Command-line: gperf -m 5 */
2496 /* Computed positions: -k'2-3' */
2497
2498 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2499 /* maximum key range = 33, duplicates = 0 */
2500
2501 #ifdef __GNUC__
2502 __inline
2503 #else
2504 #ifdef __cplusplus
2505 inline
2506 #endif
2507 #endif
2508 static unsigned int
2509 hash (str, len)
2510 register const char *str;
2511 register unsigned int len;
2512 {
2513 static unsigned char asso_values[] =
2514 {
2515 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2516 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2517 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2518 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2519 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2520 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2521 35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2522 14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2523 35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2524 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2525 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2526 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2527 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2528 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2529 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2530 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2531 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2532 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2533 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2534 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2535 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2536 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2537 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2538 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2539 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2540 35, 35, 35, 35, 35, 35
2541 };
2542 register int hval = len;
2543
2544 switch (hval)
2545 {
2546 default:
2547 hval += asso_values[(unsigned char)str[2]];
2548 /*FALLTHROUGH*/
2549 case 2:
2550 hval += asso_values[(unsigned char)str[1]];
2551 break;
2552 }
2553 return hval;
2554 }
2555
2556 static struct C_stab_entry *
2557 in_word_set (str, len)
2558 register const char *str;
2559 register unsigned int len;
2560 {
2561 enum
2562 {
2563 TOTAL_KEYWORDS = 32,
2564 MIN_WORD_LENGTH = 2,
2565 MAX_WORD_LENGTH = 15,
2566 MIN_HASH_VALUE = 2,
2567 MAX_HASH_VALUE = 34
2568 };
2569
2570 static struct C_stab_entry wordlist[] =
2571 {
2572 {""}, {""},
2573 {"if", 0, st_C_ignore},
2574 {""},
2575 {"@end", 0, st_C_objend},
2576 {"union", 0, st_C_struct},
2577 {"define", 0, st_C_define},
2578 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2579 {"template", 0, st_C_template},
2580 {"operator", C_PLPL, st_C_operator},
2581 {"@interface", 0, st_C_objprot},
2582 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2583 {"friend", C_PLPL, st_C_ignore},
2584 {"typedef", 0, st_C_typedef},
2585 {"return", 0, st_C_ignore},
2586 {"@implementation",0, st_C_objimpl},
2587 {"@protocol", 0, st_C_objprot},
2588 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2589 {"extern", 0, st_C_extern},
2590 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2591 {"struct", 0, st_C_struct},
2592 {"domain", C_STAR, st_C_struct},
2593 {"switch", 0, st_C_ignore},
2594 {"enum", 0, st_C_enum},
2595 {"for", 0, st_C_ignore},
2596 {"namespace", C_PLPL, st_C_struct},
2597 {"class", 0, st_C_class},
2598 {"while", 0, st_C_ignore},
2599 {"undef", 0, st_C_define},
2600 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2601 {"__attribute__", 0, st_C_attribute},
2602 {"SYSCALL", 0, st_C_gnumacro},
2603 {"ENTRY", 0, st_C_gnumacro},
2604 {"PSEUDO", 0, st_C_gnumacro},
2605 {"DEFUN", 0, st_C_gnumacro}
2606 };
2607
2608 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2609 {
2610 register int key = hash (str, len);
2611
2612 if (key <= MAX_HASH_VALUE && key >= 0)
2613 {
2614 register const char *s = wordlist[key].name;
2615
2616 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2617 return &wordlist[key];
2618 }
2619 }
2620 return 0;
2621 }
2622 /*%>*/
2623
2624 static enum sym_type
2625 C_symtype (str, len, c_ext)
2626 char *str;
2627 int len;
2628 int c_ext;
2629 {
2630 register struct C_stab_entry *se = in_word_set (str, len);
2631
2632 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2633 return st_none;
2634 return se->type;
2635 }
2636
2637 \f
2638 /*
2639 * Ignoring __attribute__ ((list))
2640 */
2641 static bool inattribute; /* looking at an __attribute__ construct */
2642
2643 /*
2644 * C functions and variables are recognized using a simple
2645 * finite automaton. fvdef is its state variable.
2646 */
2647 static enum
2648 {
2649 fvnone, /* nothing seen */
2650 fdefunkey, /* Emacs DEFUN keyword seen */
2651 fdefunname, /* Emacs DEFUN name seen */
2652 foperator, /* func: operator keyword seen (cplpl) */
2653 fvnameseen, /* function or variable name seen */
2654 fstartlist, /* func: just after open parenthesis */
2655 finlist, /* func: in parameter list */
2656 flistseen, /* func: after parameter list */
2657 fignore, /* func: before open brace */
2658 vignore /* var-like: ignore until ';' */
2659 } fvdef;
2660
2661 static bool fvextern; /* func or var: extern keyword seen; */
2662
2663 /*
2664 * typedefs are recognized using a simple finite automaton.
2665 * typdef is its state variable.
2666 */
2667 static enum
2668 {
2669 tnone, /* nothing seen */
2670 tkeyseen, /* typedef keyword seen */
2671 ttypeseen, /* defined type seen */
2672 tinbody, /* inside typedef body */
2673 tend, /* just before typedef tag */
2674 tignore /* junk after typedef tag */
2675 } typdef;
2676
2677 /*
2678 * struct-like structures (enum, struct and union) are recognized
2679 * using another simple finite automaton. `structdef' is its state
2680 * variable.
2681 */
2682 static enum
2683 {
2684 snone, /* nothing seen yet,
2685 or in struct body if bracelev > 0 */
2686 skeyseen, /* struct-like keyword seen */
2687 stagseen, /* struct-like tag seen */
2688 scolonseen /* colon seen after struct-like tag */
2689 } structdef;
2690
2691 /*
2692 * When objdef is different from onone, objtag is the name of the class.
2693 */
2694 static char *objtag = "<uninited>";
2695
2696 /*
2697 * Yet another little state machine to deal with preprocessor lines.
2698 */
2699 static enum
2700 {
2701 dnone, /* nothing seen */
2702 dsharpseen, /* '#' seen as first char on line */
2703 ddefineseen, /* '#' and 'define' seen */
2704 dignorerest /* ignore rest of line */
2705 } definedef;
2706
2707 /*
2708 * State machine for Objective C protocols and implementations.
2709 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2710 */
2711 static enum
2712 {
2713 onone, /* nothing seen */
2714 oprotocol, /* @interface or @protocol seen */
2715 oimplementation, /* @implementations seen */
2716 otagseen, /* class name seen */
2717 oparenseen, /* parenthesis before category seen */
2718 ocatseen, /* category name seen */
2719 oinbody, /* in @implementation body */
2720 omethodsign, /* in @implementation body, after +/- */
2721 omethodtag, /* after method name */
2722 omethodcolon, /* after method colon */
2723 omethodparm, /* after method parameter */
2724 oignore /* wait for @end */
2725 } objdef;
2726
2727
2728 /*
2729 * Use this structure to keep info about the token read, and how it
2730 * should be tagged. Used by the make_C_tag function to build a tag.
2731 */
2732 static struct tok
2733 {
2734 char *line; /* string containing the token */
2735 int offset; /* where the token starts in LINE */
2736 int length; /* token length */
2737 /*
2738 The previous members can be used to pass strings around for generic
2739 purposes. The following ones specifically refer to creating tags. In this
2740 case the token contained here is the pattern that will be used to create a
2741 tag.
2742 */
2743 bool valid; /* do not create a tag; the token should be
2744 invalidated whenever a state machine is
2745 reset prematurely */
2746 bool named; /* create a named tag */
2747 int lineno; /* source line number of tag */
2748 long linepos; /* source char number of tag */
2749 } token; /* latest token read */
2750
2751 /*
2752 * Variables and functions for dealing with nested structures.
2753 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2754 */
2755 static void pushclass_above __P((int, char *, int));
2756 static void popclass_above __P((int));
2757 static void write_classname __P((linebuffer *, char *qualifier));
2758
2759 static struct {
2760 char **cname; /* nested class names */
2761 int *bracelev; /* nested class brace level */
2762 int nl; /* class nesting level (elements used) */
2763 int size; /* length of the array */
2764 } cstack; /* stack for nested declaration tags */
2765 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2766 #define nestlev (cstack.nl)
2767 /* After struct keyword or in struct body, not inside a nested function. */
2768 #define instruct (structdef == snone && nestlev > 0 \
2769 && bracelev == cstack.bracelev[nestlev-1] + 1)
2770
2771 static void
2772 pushclass_above (bracelev, str, len)
2773 int bracelev;
2774 char *str;
2775 int len;
2776 {
2777 int nl;
2778
2779 popclass_above (bracelev);
2780 nl = cstack.nl;
2781 if (nl >= cstack.size)
2782 {
2783 int size = cstack.size *= 2;
2784 xrnew (cstack.cname, size, char *);
2785 xrnew (cstack.bracelev, size, int);
2786 }
2787 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2788 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2789 cstack.bracelev[nl] = bracelev;
2790 cstack.nl = nl + 1;
2791 }
2792
2793 static void
2794 popclass_above (bracelev)
2795 int bracelev;
2796 {
2797 int nl;
2798
2799 for (nl = cstack.nl - 1;
2800 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2801 nl--)
2802 {
2803 if (cstack.cname[nl] != NULL)
2804 free (cstack.cname[nl]);
2805 cstack.nl = nl;
2806 }
2807 }
2808
2809 static void
2810 write_classname (cn, qualifier)
2811 linebuffer *cn;
2812 char *qualifier;
2813 {
2814 int i, len;
2815 int qlen = strlen (qualifier);
2816
2817 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2818 {
2819 len = 0;
2820 cn->len = 0;
2821 cn->buffer[0] = '\0';
2822 }
2823 else
2824 {
2825 len = strlen (cstack.cname[0]);
2826 linebuffer_setlen (cn, len);
2827 strcpy (cn->buffer, cstack.cname[0]);
2828 }
2829 for (i = 1; i < cstack.nl; i++)
2830 {
2831 char *s;
2832 int slen;
2833
2834 s = cstack.cname[i];
2835 if (s == NULL)
2836 continue;
2837 slen = strlen (s);
2838 len += slen + qlen;
2839 linebuffer_setlen (cn, len);
2840 strncat (cn->buffer, qualifier, qlen);
2841 strncat (cn->buffer, s, slen);
2842 }
2843 }
2844
2845 \f
2846 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2847 static void make_C_tag __P((bool));
2848
2849 /*
2850 * consider_token ()
2851 * checks to see if the current token is at the start of a
2852 * function or variable, or corresponds to a typedef, or
2853 * is a struct/union/enum tag, or #define, or an enum constant.
2854 *
2855 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2856 * with args. C_EXTP points to which language we are looking at.
2857 *
2858 * Globals
2859 * fvdef IN OUT
2860 * structdef IN OUT
2861 * definedef IN OUT
2862 * typdef IN OUT
2863 * objdef IN OUT
2864 */
2865
2866 static bool
2867 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2868 register char *str; /* IN: token pointer */
2869 register int len; /* IN: token length */
2870 register int c; /* IN: first char after the token */
2871 int *c_extp; /* IN, OUT: C extensions mask */
2872 int bracelev; /* IN: brace level */
2873 int parlev; /* IN: parenthesis level */
2874 bool *is_func_or_var; /* OUT: function or variable found */
2875 {
2876 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2877 structtype is the type of the preceding struct-like keyword, and
2878 structbracelev is the brace level where it has been seen. */
2879 static enum sym_type structtype;
2880 static int structbracelev;
2881 static enum sym_type toktype;
2882
2883
2884 toktype = C_symtype (str, len, *c_extp);
2885
2886 /*
2887 * Skip __attribute__
2888 */
2889 if (toktype == st_C_attribute)
2890 {
2891 inattribute = TRUE;
2892 return FALSE;
2893 }
2894
2895 /*
2896 * Advance the definedef state machine.
2897 */
2898 switch (definedef)
2899 {
2900 case dnone:
2901 /* We're not on a preprocessor line. */
2902 if (toktype == st_C_gnumacro)
2903 {
2904 fvdef = fdefunkey;
2905 return FALSE;
2906 }
2907 break;
2908 case dsharpseen:
2909 if (toktype == st_C_define)
2910 {
2911 definedef = ddefineseen;
2912 }
2913 else
2914 {
2915 definedef = dignorerest;
2916 }
2917 return FALSE;
2918 case ddefineseen:
2919 /*
2920 * Make a tag for any macro, unless it is a constant
2921 * and constantypedefs is FALSE.
2922 */
2923 definedef = dignorerest;
2924 *is_func_or_var = (c == '(');
2925 if (!*is_func_or_var && !constantypedefs)
2926 return FALSE;
2927 else
2928 return TRUE;
2929 case dignorerest:
2930 return FALSE;
2931 default:
2932 error ("internal error: definedef value.", (char *)NULL);
2933 }
2934
2935 /*
2936 * Now typedefs
2937 */
2938 switch (typdef)
2939 {
2940 case tnone:
2941 if (toktype == st_C_typedef)
2942 {
2943 if (typedefs)
2944 typdef = tkeyseen;
2945 fvextern = FALSE;
2946 fvdef = fvnone;
2947 return FALSE;
2948 }
2949 break;
2950 case tkeyseen:
2951 switch (toktype)
2952 {
2953 case st_none:
2954 case st_C_class:
2955 case st_C_struct:
2956 case st_C_enum:
2957 typdef = ttypeseen;
2958 }
2959 break;
2960 case ttypeseen:
2961 if (structdef == snone && fvdef == fvnone)
2962 {
2963 fvdef = fvnameseen;
2964 return TRUE;
2965 }
2966 break;
2967 case tend:
2968 switch (toktype)
2969 {
2970 case st_C_class:
2971 case st_C_struct:
2972 case st_C_enum:
2973 return FALSE;
2974 }
2975 return TRUE;
2976 }
2977
2978 /*
2979 * This structdef business is NOT invoked when we are ctags and the
2980 * file is plain C. This is because a struct tag may have the same
2981 * name as another tag, and this loses with ctags.
2982 */
2983 switch (toktype)
2984 {
2985 case st_C_javastruct:
2986 if (structdef == stagseen)
2987 structdef = scolonseen;
2988 return FALSE;
2989 case st_C_template:
2990 case st_C_class:
2991 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2992 && bracelev == 0
2993 && definedef == dnone && structdef == snone
2994 && typdef == tnone && fvdef == fvnone)
2995 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2996 if (toktype == st_C_template)
2997 break;
2998 /* FALLTHRU */
2999 case st_C_struct:
3000 case st_C_enum:
3001 if (parlev == 0
3002 && fvdef != vignore
3003 && (typdef == tkeyseen
3004 || (typedefs_or_cplusplus && structdef == snone)))
3005 {
3006 structdef = skeyseen;
3007 structtype = toktype;
3008 structbracelev = bracelev;
3009 if (fvdef == fvnameseen)
3010 fvdef = fvnone;
3011 }
3012 return FALSE;
3013 }
3014
3015 if (structdef == skeyseen)
3016 {
3017 structdef = stagseen;
3018 return TRUE;
3019 }
3020
3021 if (typdef != tnone)
3022 definedef = dnone;
3023
3024 /* Detect Objective C constructs. */
3025 switch (objdef)
3026 {
3027 case onone:
3028 switch (toktype)
3029 {
3030 case st_C_objprot:
3031 objdef = oprotocol;
3032 return FALSE;
3033 case st_C_objimpl:
3034 objdef = oimplementation;
3035 return FALSE;
3036 }
3037 break;
3038 case oimplementation:
3039 /* Save the class tag for functions or variables defined inside. */
3040 objtag = savenstr (str, len);
3041 objdef = oinbody;
3042 return FALSE;
3043 case oprotocol:
3044 /* Save the class tag for categories. */
3045 objtag = savenstr (str, len);
3046 objdef = otagseen;
3047 *is_func_or_var = TRUE;
3048 return TRUE;
3049 case oparenseen:
3050 objdef = ocatseen;
3051 *is_func_or_var = TRUE;
3052 return TRUE;
3053 case oinbody:
3054 break;
3055 case omethodsign:
3056 if (parlev == 0)
3057 {
3058 fvdef = fvnone;
3059 objdef = omethodtag;
3060 linebuffer_setlen (&token_name, len);
3061 strncpy (token_name.buffer, str, len);
3062 token_name.buffer[len] = '\0';
3063 return TRUE;
3064 }
3065 return FALSE;
3066 case omethodcolon:
3067 if (parlev == 0)
3068 objdef = omethodparm;
3069 return FALSE;
3070 case omethodparm:
3071 if (parlev == 0)
3072 {
3073 fvdef = fvnone;
3074 objdef = omethodtag;
3075 linebuffer_setlen (&token_name, token_name.len + len);
3076 strncat (token_name.buffer, str, len);
3077 return TRUE;
3078 }
3079 return FALSE;
3080 case oignore:
3081 if (toktype == st_C_objend)
3082 {
3083 /* Memory leakage here: the string pointed by objtag is
3084 never released, because many tests would be needed to
3085 avoid breaking on incorrect input code. The amount of
3086 memory leaked here is the sum of the lengths of the
3087 class tags.
3088 free (objtag); */
3089 objdef = onone;
3090 }
3091 return FALSE;
3092 }
3093
3094 /* A function, variable or enum constant? */
3095 switch (toktype)
3096 {
3097 case st_C_extern:
3098 fvextern = TRUE;
3099 switch (fvdef)
3100 {
3101 case finlist:
3102 case flistseen:
3103 case fignore:
3104 case vignore:
3105 break;
3106 default:
3107 fvdef = fvnone;
3108 }
3109 return FALSE;
3110 case st_C_ignore:
3111 fvextern = FALSE;
3112 fvdef = vignore;
3113 return FALSE;
3114 case st_C_operator:
3115 fvdef = foperator;
3116 *is_func_or_var = TRUE;
3117 return TRUE;
3118 case st_none:
3119 if (constantypedefs
3120 && structdef == snone
3121 && structtype == st_C_enum && bracelev > structbracelev)
3122 return TRUE; /* enum constant */
3123 switch (fvdef)
3124 {
3125 case fdefunkey:
3126 if (bracelev > 0)
3127 break;
3128 fvdef = fdefunname; /* GNU macro */
3129 *is_func_or_var = TRUE;
3130 return TRUE;
3131 case fvnone:
3132 switch (typdef)
3133 {
3134 case ttypeseen:
3135 return FALSE;
3136 case tnone:
3137 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3138 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3139 {
3140 fvdef = vignore;
3141 return FALSE;
3142 }
3143 break;
3144 }
3145 /* FALLTHRU */
3146 case fvnameseen:
3147 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3148 {
3149 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3150 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3151 fvdef = foperator;
3152 *is_func_or_var = TRUE;
3153 return TRUE;
3154 }
3155 if (bracelev > 0 && !instruct)
3156 break;
3157 fvdef = fvnameseen; /* function or variable */
3158 *is_func_or_var = TRUE;
3159 return TRUE;
3160 }
3161 break;
3162 }
3163
3164 return FALSE;
3165 }
3166
3167 \f
3168 /*
3169 * C_entries often keeps pointers to tokens or lines which are older than
3170 * the line currently read. By keeping two line buffers, and switching
3171 * them at end of line, it is possible to use those pointers.
3172 */
3173 static struct
3174 {
3175 long linepos;
3176 linebuffer lb;
3177 } lbs[2];
3178
3179 #define current_lb_is_new (newndx == curndx)
3180 #define switch_line_buffers() (curndx = 1 - curndx)
3181
3182 #define curlb (lbs[curndx].lb)
3183 #define newlb (lbs[newndx].lb)
3184 #define curlinepos (lbs[curndx].linepos)
3185 #define newlinepos (lbs[newndx].linepos)
3186
3187 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3188 #define cplpl (c_ext & C_PLPL)
3189 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3190
3191 #define CNL_SAVE_DEFINEDEF() \
3192 do { \
3193 curlinepos = charno; \
3194 readline (&curlb, inf); \
3195 lp = curlb.buffer; \
3196 quotednl = FALSE; \
3197 newndx = curndx; \
3198 } while (0)
3199
3200 #define CNL() \
3201 do { \
3202 CNL_SAVE_DEFINEDEF(); \
3203 if (savetoken.valid) \
3204 { \
3205 token = savetoken; \
3206 savetoken.valid = FALSE; \
3207 } \
3208 definedef = dnone; \
3209 } while (0)
3210
3211
3212 static void
3213 make_C_tag (isfun)
3214 bool isfun;
3215 {
3216 /* This function is never called when token.valid is FALSE, but
3217 we must protect against invalid input or internal errors. */
3218 if (!DEBUG && !token.valid)
3219 return;
3220
3221 if (token.valid)
3222 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3223 token.offset+token.length+1, token.lineno, token.linepos);
3224 else /* this case is optimised away if !DEBUG */
3225 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3226 token_name.len + 17, isfun, token.line,
3227 token.offset+token.length+1, token.lineno, token.linepos);
3228
3229 token.valid = FALSE;
3230 }
3231
3232
3233 /*
3234 * C_entries ()
3235 * This routine finds functions, variables, typedefs,
3236 * #define's, enum constants and struct/union/enum definitions in
3237 * C syntax and adds them to the list.
3238 */
3239 static void
3240 C_entries (c_ext, inf)
3241 int c_ext; /* extension of C */
3242 FILE *inf; /* input file */
3243 {
3244 register char c; /* latest char read; '\0' for end of line */
3245 register char *lp; /* pointer one beyond the character `c' */
3246 int curndx, newndx; /* indices for current and new lb */
3247 register int tokoff; /* offset in line of start of current token */
3248 register int toklen; /* length of current token */
3249 char *qualifier; /* string used to qualify names */
3250 int qlen; /* length of qualifier */
3251 int bracelev; /* current brace level */
3252 int bracketlev; /* current bracket level */
3253 int parlev; /* current parenthesis level */
3254 int attrparlev; /* __attribute__ parenthesis level */
3255 int templatelev; /* current template level */
3256 int typdefbracelev; /* bracelev where a typedef struct body begun */
3257 bool incomm, inquote, inchar, quotednl, midtoken;
3258 bool yacc_rules; /* in the rules part of a yacc file */
3259 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3260
3261
3262 linebuffer_init (&lbs[0].lb);
3263 linebuffer_init (&lbs[1].lb);
3264 if (cstack.size == 0)
3265 {
3266 cstack.size = (DEBUG) ? 1 : 4;
3267 cstack.nl = 0;
3268 cstack.cname = xnew (cstack.size, char *);
3269 cstack.bracelev = xnew (cstack.size, int);
3270 }
3271
3272 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3273 curndx = newndx = 0;
3274 lp = curlb.buffer;
3275 *lp = 0;
3276
3277 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3278 structdef = snone; definedef = dnone; objdef = onone;
3279 yacc_rules = FALSE;
3280 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3281 token.valid = savetoken.valid = FALSE;
3282 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3283 if (cjava)
3284 { qualifier = "."; qlen = 1; }
3285 else
3286 { qualifier = "::"; qlen = 2; }
3287
3288
3289 while (!feof (inf))
3290 {
3291 c = *lp++;
3292 if (c == '\\')
3293 {
3294 /* If we are at the end of the line, the next character is a
3295 '\0'; do not skip it, because it is what tells us
3296 to read the next line. */
3297 if (*lp == '\0')
3298 {
3299 quotednl = TRUE;
3300 continue;
3301 }
3302 lp++;
3303 c = ' ';
3304 }
3305 else if (incomm)
3306 {
3307 switch (c)
3308 {
3309 case '*':
3310 if (*lp == '/')
3311 {
3312 c = *lp++;
3313 incomm = FALSE;
3314 }
3315 break;
3316 case '\0':
3317 /* Newlines inside comments do not end macro definitions in
3318 traditional cpp. */
3319 CNL_SAVE_DEFINEDEF ();
3320 break;
3321 }
3322 continue;
3323 }
3324 else if (inquote)
3325 {
3326 switch (c)
3327 {
3328 case '"':
3329 inquote = FALSE;
3330 break;
3331 case '\0':
3332 /* Newlines inside strings do not end macro definitions
3333 in traditional cpp, even though compilers don't
3334 usually accept them. */
3335 CNL_SAVE_DEFINEDEF ();
3336 break;
3337 }
3338 continue;
3339 }
3340 else if (inchar)
3341 {
3342 switch (c)
3343 {
3344 case '\0':
3345 /* Hmmm, something went wrong. */
3346 CNL ();
3347 /* FALLTHRU */
3348 case '\'':
3349 inchar = FALSE;
3350 break;
3351 }
3352 continue;
3353 }
3354 else if (bracketlev > 0)
3355 {
3356 switch (c)
3357 {
3358 case ']':
3359 if (--bracketlev > 0)
3360 continue;
3361 break;
3362 case '\0':
3363 CNL_SAVE_DEFINEDEF ();
3364 break;
3365 }
3366 continue;
3367 }
3368 else switch (c)
3369 {
3370 case '"':
3371 inquote = TRUE;
3372 if (inattribute)
3373 break;
3374 switch (fvdef)
3375 {
3376 case fdefunkey:
3377 case fstartlist:
3378 case finlist:
3379 case fignore:
3380 case vignore:
3381 break;
3382 default:
3383 fvextern = FALSE;
3384 fvdef = fvnone;
3385 }
3386 continue;
3387 case '\'':
3388 inchar = TRUE;
3389 if (inattribute)
3390 break;
3391 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3392 {
3393 fvextern = FALSE;
3394 fvdef = fvnone;
3395 }
3396 continue;
3397 case '/':
3398 if (*lp == '*')
3399 {
3400 lp++;
3401 incomm = TRUE;
3402 continue;
3403 }
3404 else if (/* cplpl && */ *lp == '/')
3405 {
3406 c = '\0';
3407 break;
3408 }
3409 else
3410 break;
3411 case '%':
3412 if ((c_ext & YACC) && *lp == '%')
3413 {
3414 /* Entering or exiting rules section in yacc file. */
3415 lp++;
3416 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3417 typdef = tnone; structdef = snone;
3418 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3419 bracelev = 0;
3420 yacc_rules = !yacc_rules;
3421 continue;
3422 }
3423 else
3424 break;
3425 case '#':
3426 if (definedef == dnone)
3427 {
3428 char *cp;
3429 bool cpptoken = TRUE;
3430
3431 /* Look back on this line. If all blanks, or nonblanks
3432 followed by an end of comment, this is a preprocessor
3433 token. */
3434 for (cp = newlb.buffer; cp < lp-1; cp++)
3435 if (!iswhite (*cp))
3436 {
3437 if (*cp == '*' && *(cp+1) == '/')
3438 {
3439 cp++;
3440 cpptoken = TRUE;
3441 }
3442 else
3443 cpptoken = FALSE;
3444 }
3445 if (cpptoken)
3446 definedef = dsharpseen;
3447 } /* if (definedef == dnone) */
3448 continue;
3449 case '[':
3450 bracketlev++;
3451 continue;
3452 } /* switch (c) */
3453
3454
3455 /* Consider token only if some involved conditions are satisfied. */
3456 if (typdef != tignore
3457 && definedef != dignorerest
3458 && fvdef != finlist
3459 && templatelev == 0
3460 && (definedef != dnone
3461 || structdef != scolonseen)
3462 && !inattribute)
3463 {
3464 if (midtoken)
3465 {
3466 if (endtoken (c))
3467 {
3468 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3469 /* This handles :: in the middle,
3470 but not at the beginning of an identifier.
3471 Also, space-separated :: is not recognised. */
3472 {
3473 if (c_ext & C_AUTO) /* automatic detection of C++ */
3474 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3475 lp += 2;
3476 toklen += 2;
3477 c = lp[-1];
3478 goto still_in_token;
3479 }
3480 else
3481 {
3482 bool funorvar = FALSE;
3483
3484 if (yacc_rules
3485 || consider_token (newlb.buffer + tokoff, toklen, c,
3486 &c_ext, bracelev, parlev,
3487 &funorvar))
3488 {
3489 if (fvdef == foperator)
3490 {
3491 char *oldlp = lp;
3492 lp = skip_spaces (lp-1);
3493 if (*lp != '\0')
3494 lp += 1;
3495 while (*lp != '\0'
3496 && !iswhite (*lp) && *lp != '(')
3497 lp += 1;
3498 c = *lp++;
3499 toklen += lp - oldlp;
3500 }
3501 token.named = FALSE;
3502 if (!plainc
3503 && nestlev > 0 && definedef == dnone)
3504 /* in struct body */
3505 {
3506 write_classname (&token_name, qualifier);
3507 linebuffer_setlen (&token_name,
3508 token_name.len+qlen+toklen);
3509 strcat (token_name.buffer, qualifier);
3510 strncat (token_name.buffer,
3511 newlb.buffer + tokoff, toklen);
3512 token.named = TRUE;
3513 }
3514 else if (objdef == ocatseen)
3515 /* Objective C category */
3516 {
3517 int len = strlen (objtag) + 2 + toklen;
3518 linebuffer_setlen (&token_name, len);
3519 strcpy (token_name.buffer, objtag);
3520 strcat (token_name.buffer, "(");
3521 strncat (token_name.buffer,
3522 newlb.buffer + tokoff, toklen);
3523 strcat (token_name.buffer, ")");
3524 token.named = TRUE;
3525 }
3526 else if (objdef == omethodtag
3527 || objdef == omethodparm)
3528 /* Objective C method */
3529 {
3530 token.named = TRUE;
3531 }
3532 else if (fvdef == fdefunname)
3533 /* GNU DEFUN and similar macros */
3534 {
3535 bool defun = (newlb.buffer[tokoff] == 'F');
3536 int off = tokoff;
3537 int len = toklen;
3538
3539 /* Rewrite the tag so that emacs lisp DEFUNs
3540 can be found by their elisp name */
3541 if (defun)
3542 {
3543 off += 1;
3544 len -= 1;
3545 }
3546 linebuffer_setlen (&token_name, len);
3547 strncpy (token_name.buffer,
3548 newlb.buffer + off, len);
3549 token_name.buffer[len] = '\0';
3550 if (defun)
3551 while (--len >= 0)
3552 if (token_name.buffer[len] == '_')
3553 token_name.buffer[len] = '-';
3554 token.named = defun;
3555 }
3556 else
3557 {
3558 linebuffer_setlen (&token_name, toklen);
3559 strncpy (token_name.buffer,
3560 newlb.buffer + tokoff, toklen);
3561 token_name.buffer[toklen] = '\0';
3562 /* Name macros and members. */
3563 token.named = (structdef == stagseen
3564 || typdef == ttypeseen
3565 || typdef == tend
3566 || (funorvar
3567 && definedef == dignorerest)
3568 || (funorvar
3569 && definedef == dnone
3570 && structdef == snone
3571 && bracelev > 0));
3572 }
3573 token.lineno = lineno;
3574 token.offset = tokoff;
3575 token.length = toklen;
3576 token.line = newlb.buffer;
3577 token.linepos = newlinepos;
3578 token.valid = TRUE;
3579
3580 if (definedef == dnone
3581 && (fvdef == fvnameseen
3582 || fvdef == foperator
3583 || structdef == stagseen
3584 || typdef == tend
3585 || typdef == ttypeseen
3586 || objdef != onone))
3587 {
3588 if (current_lb_is_new)
3589 switch_line_buffers ();
3590 }
3591 else if (definedef != dnone
3592 || fvdef == fdefunname
3593 || instruct)
3594 make_C_tag (funorvar);
3595 }
3596 else /* not yacc and consider_token failed */
3597 {
3598 if (inattribute && fvdef == fignore)
3599 {
3600 /* We have just met __attribute__ after a
3601 function parameter list: do not tag the
3602 function again. */
3603 fvdef = fvnone;
3604 }
3605 }
3606 midtoken = FALSE;
3607 }
3608 } /* if (endtoken (c)) */
3609 else if (intoken (c))
3610 still_in_token:
3611 {
3612 toklen++;
3613 continue;
3614 }
3615 } /* if (midtoken) */
3616 else if (begtoken (c))
3617 {
3618 switch (definedef)
3619 {
3620 case dnone:
3621 switch (fvdef)
3622 {
3623 case fstartlist:
3624 /* This prevents tagging fb in
3625 void (__attribute__((noreturn)) *fb) (void);
3626 Fixing this is not easy and not very important. */
3627 fvdef = finlist;
3628 continue;
3629 case flistseen:
3630 if (plainc || declarations)
3631 {
3632 make_C_tag (TRUE); /* a function */
3633 fvdef = fignore;
3634 }
3635 break;
3636 }
3637 if (structdef == stagseen && !cjava)
3638 {
3639 popclass_above (bracelev);
3640 structdef = snone;
3641 }
3642 break;
3643 case dsharpseen:
3644 savetoken = token;
3645 break;
3646 }
3647 if (!yacc_rules || lp == newlb.buffer + 1)
3648 {
3649 tokoff = lp - 1 - newlb.buffer;
3650 toklen = 1;
3651 midtoken = TRUE;
3652 }
3653 continue;
3654 } /* if (begtoken) */
3655 } /* if must look at token */
3656
3657
3658 /* Detect end of line, colon, comma, semicolon and various braces
3659 after having handled a token.*/
3660 switch (c)
3661 {
3662 case ':':
3663 if (inattribute)
3664 break;
3665 if (yacc_rules && token.offset == 0 && token.valid)
3666 {
3667 make_C_tag (FALSE); /* a yacc function */
3668 break;
3669 }
3670 if (definedef != dnone)
3671 break;
3672 switch (objdef)
3673 {
3674 case otagseen:
3675 objdef = oignore;
3676 make_C_tag (TRUE); /* an Objective C class */
3677 break;
3678 case omethodtag:
3679 case omethodparm:
3680 objdef = omethodcolon;
3681 linebuffer_setlen (&token_name, token_name.len + 1);
3682 strcat (token_name.buffer, ":");
3683 break;
3684 }
3685 if (structdef == stagseen)
3686 {
3687 structdef = scolonseen;
3688 break;
3689 }
3690 /* Should be useless, but may be work as a safety net. */
3691 if (cplpl && fvdef == flistseen)
3692 {
3693 make_C_tag (TRUE); /* a function */
3694 fvdef = fignore;
3695 break;
3696 }
3697 break;
3698 case ';':
3699 if (definedef != dnone || inattribute)
3700 break;
3701 switch (typdef)
3702 {
3703 case tend:
3704 case ttypeseen:
3705 make_C_tag (FALSE); /* a typedef */
3706 typdef = tnone;
3707 fvdef = fvnone;
3708 break;
3709 case tnone:
3710 case tinbody:
3711 case tignore:
3712 switch (fvdef)
3713 {
3714 case fignore:
3715 if (typdef == tignore || cplpl)
3716 fvdef = fvnone;
3717 break;
3718 case fvnameseen:
3719 if ((globals && bracelev == 0 && (!fvextern || declarations))
3720 || (members && instruct))
3721 make_C_tag (FALSE); /* a variable */
3722 fvextern = FALSE;
3723 fvdef = fvnone;
3724 token.valid = FALSE;
3725 break;
3726 case flistseen:
3727 if ((declarations
3728 && (cplpl || !instruct)
3729 && (typdef == tnone || (typdef != tignore && instruct)))
3730 || (members
3731 && plainc && instruct))
3732 make_C_tag (TRUE); /* a function */
3733 /* FALLTHRU */
3734 default:
3735 fvextern = FALSE;
3736 fvdef = fvnone;
3737 if (declarations
3738 && cplpl && structdef == stagseen)
3739 make_C_tag (FALSE); /* forward declaration */
3740 else
3741 token.valid = FALSE;
3742 } /* switch (fvdef) */
3743 /* FALLTHRU */
3744 default:
3745 if (!instruct)
3746 typdef = tnone;
3747 }
3748 if (structdef == stagseen)
3749 structdef = snone;
3750 break;
3751 case ',':
3752 if (definedef != dnone || inattribute)
3753 break;
3754 switch (objdef)
3755 {
3756 case omethodtag:
3757 case omethodparm:
3758 make_C_tag (TRUE); /* an Objective C method */
3759 objdef = oinbody;
3760 break;
3761 }
3762 switch (fvdef)
3763 {
3764 case fdefunkey:
3765 case foperator:
3766 case fstartlist:
3767 case finlist:
3768 case fignore:
3769 case vignore:
3770 break;
3771 case fdefunname:
3772 fvdef = fignore;
3773 break;
3774 case fvnameseen:
3775 if (parlev == 0
3776 && ((globals
3777 && bracelev == 0
3778 && templatelev == 0
3779 && (!fvextern || declarations))
3780 || (members && instruct)))
3781 make_C_tag (FALSE); /* a variable */
3782 break;
3783 case flistseen:
3784 if ((declarations && typdef == tnone && !instruct)
3785 || (members && typdef != tignore && instruct))
3786 {
3787 make_C_tag (TRUE); /* a function */
3788 fvdef = fvnameseen;
3789 }
3790 else if (!declarations)
3791 fvdef = fvnone;
3792 token.valid = FALSE;
3793 break;
3794 default:
3795 fvdef = fvnone;
3796 }
3797 if (structdef == stagseen)
3798 structdef = snone;
3799 break;
3800 case ']':
3801 if (definedef != dnone || inattribute)
3802 break;
3803 if (structdef == stagseen)
3804 structdef = snone;
3805 switch (typdef)
3806 {
3807 case ttypeseen:
3808 case tend:
3809 typdef = tignore;
3810 make_C_tag (FALSE); /* a typedef */
3811 break;
3812 case tnone:
3813 case tinbody:
3814 switch (fvdef)
3815 {
3816 case foperator:
3817 case finlist:
3818 case fignore:
3819 case vignore:
3820 break;
3821 case fvnameseen:
3822 if ((members && bracelev == 1)
3823 || (globals && bracelev == 0
3824 && (!fvextern || declarations)))
3825 make_C_tag (FALSE); /* a variable */
3826 /* FALLTHRU */
3827 default:
3828 fvdef = fvnone;
3829 }
3830 break;
3831 }
3832 break;
3833 case '(':
3834 if (inattribute)
3835 {
3836 attrparlev++;
3837 break;
3838 }
3839 if (definedef != dnone)
3840 break;
3841 if (objdef == otagseen && parlev == 0)
3842 objdef = oparenseen;
3843 switch (fvdef)
3844 {
3845 case fvnameseen:
3846 if (typdef == ttypeseen
3847 && *lp != '*'
3848 && !instruct)
3849 {
3850 /* This handles constructs like:
3851 typedef void OperatorFun (int fun); */
3852 make_C_tag (FALSE);
3853 typdef = tignore;
3854 fvdef = fignore;
3855 break;
3856 }
3857 /* FALLTHRU */
3858 case foperator:
3859 fvdef = fstartlist;
3860 break;
3861 case flistseen:
3862 fvdef = finlist;
3863 break;
3864 }
3865 parlev++;
3866 break;
3867 case ')':
3868 if (inattribute)
3869 {
3870 if (--attrparlev == 0)
3871 inattribute = FALSE;
3872 break;
3873 }
3874 if (definedef != dnone)
3875 break;
3876 if (objdef == ocatseen && parlev == 1)
3877 {
3878 make_C_tag (TRUE); /* an Objective C category */
3879 objdef = oignore;
3880 }
3881 if (--parlev == 0)
3882 {
3883 switch (fvdef)
3884 {
3885 case fstartlist:
3886 case finlist:
3887 fvdef = flistseen;
3888 break;
3889 }
3890 if (!instruct
3891 && (typdef == tend
3892 || typdef == ttypeseen))
3893 {
3894 typdef = tignore;
3895 make_C_tag (FALSE); /* a typedef */
3896 }
3897 }
3898 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3899 parlev = 0;
3900 break;
3901 case '{':
3902 if (definedef != dnone)
3903 break;
3904 if (typdef == ttypeseen)
3905 {
3906 /* Whenever typdef is set to tinbody (currently only
3907 here), typdefbracelev should be set to bracelev. */
3908 typdef = tinbody;
3909 typdefbracelev = bracelev;
3910 }
3911 switch (fvdef)
3912 {
3913 case flistseen:
3914 make_C_tag (TRUE); /* a function */
3915 /* FALLTHRU */
3916 case fignore:
3917 fvdef = fvnone;
3918 break;
3919 case fvnone:
3920 switch (objdef)
3921 {
3922 case otagseen:
3923 make_C_tag (TRUE); /* an Objective C class */
3924 objdef = oignore;
3925 break;
3926 case omethodtag:
3927 case omethodparm:
3928 make_C_tag (TRUE); /* an Objective C method */
3929 objdef = oinbody;
3930 break;
3931 default:
3932 /* Neutralize `extern "C" {' grot. */
3933 if (bracelev == 0 && structdef == snone && nestlev == 0
3934 && typdef == tnone)
3935 bracelev = -1;
3936 }
3937 break;
3938 }
3939 switch (structdef)
3940 {
3941 case skeyseen: /* unnamed struct */
3942 pushclass_above (bracelev, NULL, 0);
3943 structdef = snone;
3944 break;
3945 case stagseen: /* named struct or enum */
3946 case scolonseen: /* a class */
3947 pushclass_above (bracelev,token.line+token.offset, token.length);
3948 structdef = snone;
3949 make_C_tag (FALSE); /* a struct or enum */
3950 break;
3951 }
3952 bracelev++;
3953 break;
3954 case '*':
3955 if (definedef != dnone)
3956 break;
3957 if (fvdef == fstartlist)
3958 {
3959 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3960 token.valid = FALSE;
3961 }
3962 break;
3963 case '}':
3964 if (definedef != dnone)
3965 break;
3966 if (!ignoreindent && lp == newlb.buffer + 1)
3967 {
3968 if (bracelev != 0)
3969 token.valid = FALSE;
3970 bracelev = 0; /* reset brace level if first column */
3971 parlev = 0; /* also reset paren level, just in case... */
3972 }
3973 else if (bracelev > 0)
3974 bracelev--;
3975 else
3976 token.valid = FALSE; /* something gone amiss, token unreliable */
3977 popclass_above (bracelev);
3978 structdef = snone;
3979 /* Only if typdef == tinbody is typdefbracelev significant. */
3980 if (typdef == tinbody && bracelev <= typdefbracelev)
3981 {
3982 assert (bracelev == typdefbracelev);
3983 typdef = tend;
3984 }
3985 break;
3986 case '=':
3987 if (definedef != dnone)
3988 break;
3989 switch (fvdef)
3990 {
3991 case foperator:
3992 case finlist:
3993 case fignore:
3994 case vignore:
3995 break;
3996 case fvnameseen:
3997 if ((members && bracelev == 1)
3998 || (globals && bracelev == 0 && (!fvextern || declarations)))
3999 make_C_tag (FALSE); /* a variable */
4000 /* FALLTHRU */
4001 default:
4002 fvdef = vignore;
4003 }
4004 break;
4005 case '<':
4006 if (cplpl
4007 && (structdef == stagseen || fvdef == fvnameseen))
4008 {
4009 templatelev++;
4010 break;
4011 }
4012 goto resetfvdef;
4013 case '>':
4014 if (templatelev > 0)
4015 {
4016 templatelev--;
4017 break;
4018 }
4019 goto resetfvdef;
4020 case '+':
4021 case '-':
4022 if (objdef == oinbody && bracelev == 0)
4023 {
4024 objdef = omethodsign;
4025 break;
4026 }
4027 /* FALLTHRU */
4028 resetfvdef:
4029 case '#': case '~': case '&': case '%': case '/':
4030 case '|': case '^': case '!': case '.': case '?':
4031 if (definedef != dnone)
4032 break;
4033 /* These surely cannot follow a function tag in C. */
4034 switch (fvdef)
4035 {
4036 case foperator:
4037 case finlist:
4038 case fignore:
4039 case vignore:
4040 break;
4041 default:
4042 fvdef = fvnone;
4043 }
4044 break;
4045 case '\0':
4046 if (objdef == otagseen)
4047 {
4048 make_C_tag (TRUE); /* an Objective C class */
4049 objdef = oignore;
4050 }
4051 /* If a macro spans multiple lines don't reset its state. */
4052 if (quotednl)
4053 CNL_SAVE_DEFINEDEF ();
4054 else
4055 CNL ();
4056 break;
4057 } /* switch (c) */
4058
4059 } /* while not eof */
4060
4061 free (lbs[0].lb.buffer);
4062 free (lbs[1].lb.buffer);
4063 }
4064
4065 /*
4066 * Process either a C++ file or a C file depending on the setting
4067 * of a global flag.
4068 */
4069 static void
4070 default_C_entries (inf)
4071 FILE *inf;
4072 {
4073 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4074 }
4075
4076 /* Always do plain C. */
4077 static void
4078 plain_C_entries (inf)
4079 FILE *inf;
4080 {
4081 C_entries (0, inf);
4082 }
4083
4084 /* Always do C++. */
4085 static void
4086 Cplusplus_entries (inf)
4087 FILE *inf;
4088 {
4089 C_entries (C_PLPL, inf);
4090 }
4091
4092 /* Always do Java. */
4093 static void
4094 Cjava_entries (inf)
4095 FILE *inf;
4096 {
4097 C_entries (C_JAVA, inf);
4098 }
4099
4100 /* Always do C*. */
4101 static void
4102 Cstar_entries (inf)
4103 FILE *inf;
4104 {
4105 C_entries (C_STAR, inf);
4106 }
4107
4108 /* Always do Yacc. */
4109 static void
4110 Yacc_entries (inf)
4111 FILE *inf;
4112 {
4113 C_entries (YACC, inf);
4114 }
4115
4116 \f
4117 /* Useful macros. */
4118 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4119 for (; /* loop initialization */ \
4120 !feof (file_pointer) /* loop test */ \
4121 && /* instructions at start of loop */ \
4122 (readline (&line_buffer, file_pointer), \
4123 char_pointer = line_buffer.buffer, \
4124 TRUE); \
4125 )
4126
4127 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4128 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4129 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4130 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4131 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4132
4133 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4134 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4135 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4136 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4137 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4138
4139 /*
4140 * Read a file, but do no processing. This is used to do regexp
4141 * matching on files that have no language defined.
4142 */
4143 static void
4144 just_read_file (inf)
4145 FILE *inf;
4146 {
4147 register char *dummy;
4148
4149 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4150 continue;
4151 }
4152
4153 \f
4154 /* Fortran parsing */
4155
4156 static void F_takeprec __P((void));
4157 static void F_getit __P((FILE *));
4158
4159 static void
4160 F_takeprec ()
4161 {
4162 dbp = skip_spaces (dbp);
4163 if (*dbp != '*')
4164 return;
4165 dbp++;
4166 dbp = skip_spaces (dbp);
4167 if (strneq (dbp, "(*)", 3))
4168 {
4169 dbp += 3;
4170 return;
4171 }
4172 if (!ISDIGIT (*dbp))
4173 {
4174 --dbp; /* force failure */
4175 return;
4176 }
4177 do
4178 dbp++;
4179 while (ISDIGIT (*dbp));
4180 }
4181
4182 static void
4183 F_getit (inf)
4184 FILE *inf;
4185 {
4186 register char *cp;
4187
4188 dbp = skip_spaces (dbp);
4189 if (*dbp == '\0')
4190 {
4191 readline (&lb, inf);
4192 dbp = lb.buffer;
4193 if (dbp[5] != '&')
4194 return;
4195 dbp += 6;
4196 dbp = skip_spaces (dbp);
4197 }
4198 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4199 return;
4200 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4201 continue;
4202 make_tag (dbp, cp-dbp, TRUE,
4203 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4204 }
4205
4206
4207 static void
4208 Fortran_functions (inf)
4209 FILE *inf;
4210 {
4211 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4212 {
4213 if (*dbp == '%')
4214 dbp++; /* Ratfor escape to fortran */
4215 dbp = skip_spaces (dbp);
4216 if (*dbp == '\0')
4217 continue;
4218 switch (lowcase (*dbp))
4219 {
4220 case 'i':
4221 if (nocase_tail ("integer"))
4222 F_takeprec ();
4223 break;
4224 case 'r':
4225 if (nocase_tail ("real"))
4226 F_takeprec ();
4227 break;
4228 case 'l':
4229 if (nocase_tail ("logical"))
4230 F_takeprec ();
4231 break;
4232 case 'c':
4233 if (nocase_tail ("complex") || nocase_tail ("character"))
4234 F_takeprec ();
4235 break;
4236 case 'd':
4237 if (nocase_tail ("double"))
4238 {
4239 dbp = skip_spaces (dbp);
4240 if (*dbp == '\0')
4241 continue;
4242 if (nocase_tail ("precision"))
4243 break;
4244 continue;
4245 }
4246 break;
4247 }
4248 dbp = skip_spaces (dbp);
4249 if (*dbp == '\0')
4250 continue;
4251 switch (lowcase (*dbp))
4252 {
4253 case 'f':
4254 if (nocase_tail ("function"))
4255 F_getit (inf);
4256 continue;
4257 case 's':
4258 if (nocase_tail ("subroutine"))
4259 F_getit (inf);
4260 continue;
4261 case 'e':
4262 if (nocase_tail ("entry"))
4263 F_getit (inf);
4264 continue;
4265 case 'b':
4266 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4267 {
4268 dbp = skip_spaces (dbp);
4269 if (*dbp == '\0') /* assume un-named */
4270 make_tag ("blockdata", 9, TRUE,
4271 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4272 else
4273 F_getit (inf); /* look for name */
4274 }
4275 continue;
4276 }
4277 }
4278 }
4279
4280 \f
4281 /*
4282 * Ada parsing
4283 * Original code by
4284 * Philippe Waroquiers (1998)
4285 */
4286
4287 static void Ada_getit __P((FILE *, char *));
4288
4289 /* Once we are positioned after an "interesting" keyword, let's get
4290 the real tag value necessary. */
4291 static void
4292 Ada_getit (inf, name_qualifier)
4293 FILE *inf;
4294 char *name_qualifier;
4295 {
4296 register char *cp;
4297 char *name;
4298 char c;
4299
4300 while (!feof (inf))
4301 {
4302 dbp = skip_spaces (dbp);
4303 if (*dbp == '\0'
4304 || (dbp[0] == '-' && dbp[1] == '-'))
4305 {
4306 readline (&lb, inf);
4307 dbp = lb.buffer;
4308 }
4309 switch (lowcase(*dbp))
4310 {
4311 case 'b':
4312 if (nocase_tail ("body"))
4313 {
4314 /* Skipping body of procedure body or package body or ....
4315 resetting qualifier to body instead of spec. */
4316 name_qualifier = "/b";
4317 continue;
4318 }
4319 break;
4320 case 't':
4321 /* Skipping type of task type or protected type ... */
4322 if (nocase_tail ("type"))
4323 continue;
4324 break;
4325 }
4326 if (*dbp == '"')
4327 {
4328 dbp += 1;
4329 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4330 continue;
4331 }
4332 else
4333 {
4334 dbp = skip_spaces (dbp);
4335 for (cp = dbp;
4336 (*cp != '\0'
4337 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4338 cp++)
4339 continue;
4340 if (cp == dbp)
4341 return;
4342 }
4343 c = *cp;
4344 *cp = '\0';
4345 name = concat (dbp, name_qualifier, "");
4346 *cp = c;
4347 make_tag (name, strlen (name), TRUE,
4348 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4349 free (name);
4350 if (c == '"')
4351 dbp = cp + 1;
4352 return;
4353 }
4354 }
4355
4356 static void
4357 Ada_funcs (inf)
4358 FILE *inf;
4359 {
4360 bool inquote = FALSE;
4361 bool skip_till_semicolumn = FALSE;
4362
4363 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4364 {
4365 while (*dbp != '\0')
4366 {
4367 /* Skip a string i.e. "abcd". */
4368 if (inquote || (*dbp == '"'))
4369 {
4370 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4371 if (dbp != NULL)
4372 {
4373 inquote = FALSE;
4374 dbp += 1;
4375 continue; /* advance char */
4376 }
4377 else
4378 {
4379 inquote = TRUE;
4380 break; /* advance line */
4381 }
4382 }
4383
4384 /* Skip comments. */
4385 if (dbp[0] == '-' && dbp[1] == '-')
4386 break; /* advance line */
4387
4388 /* Skip character enclosed in single quote i.e. 'a'
4389 and skip single quote starting an attribute i.e. 'Image. */
4390 if (*dbp == '\'')
4391 {
4392 dbp++ ;
4393 if (*dbp != '\0')
4394 dbp++;
4395 continue;
4396 }
4397
4398 if (skip_till_semicolumn)
4399 {
4400 if (*dbp == ';')
4401 skip_till_semicolumn = FALSE;
4402 dbp++;
4403 continue; /* advance char */
4404 }
4405
4406 /* Search for beginning of a token. */
4407 if (!begtoken (*dbp))
4408 {
4409 dbp++;
4410 continue; /* advance char */
4411 }
4412
4413 /* We are at the beginning of a token. */
4414 switch (lowcase(*dbp))
4415 {
4416 case 'f':
4417 if (!packages_only && nocase_tail ("function"))
4418 Ada_getit (inf, "/f");
4419 else
4420 break; /* from switch */
4421 continue; /* advance char */
4422 case 'p':
4423 if (!packages_only && nocase_tail ("procedure"))
4424 Ada_getit (inf, "/p");
4425 else if (nocase_tail ("package"))
4426 Ada_getit (inf, "/s");
4427 else if (nocase_tail ("protected")) /* protected type */
4428 Ada_getit (inf, "/t");
4429 else
4430 break; /* from switch */
4431 continue; /* advance char */
4432
4433 case 'u':
4434 if (typedefs && !packages_only && nocase_tail ("use"))
4435 {
4436 /* when tagging types, avoid tagging use type Pack.Typename;
4437 for this, we will skip everything till a ; */
4438 skip_till_semicolumn = TRUE;
4439 continue; /* advance char */
4440 }
4441
4442 case 't':
4443 if (!packages_only && nocase_tail ("task"))
4444 Ada_getit (inf, "/k");
4445 else if (typedefs && !packages_only && nocase_tail ("type"))
4446 {
4447 Ada_getit (inf, "/t");
4448 while (*dbp != '\0')
4449 dbp += 1;
4450 }
4451 else
4452 break; /* from switch */
4453 continue; /* advance char */
4454 }
4455
4456 /* Look for the end of the token. */
4457 while (!endtoken (*dbp))
4458 dbp++;
4459
4460 } /* advance char */
4461 } /* advance line */
4462 }
4463
4464 \f
4465 /*
4466 * Unix and microcontroller assembly tag handling
4467 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4468 * Idea by Bob Weiner, Motorola Inc. (1994)
4469 */
4470 static void
4471 Asm_labels (inf)
4472 FILE *inf;
4473 {
4474 register char *cp;
4475
4476 LOOP_ON_INPUT_LINES (inf, lb, cp)
4477 {
4478 /* If first char is alphabetic or one of [_.$], test for colon
4479 following identifier. */
4480 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4481 {
4482 /* Read past label. */
4483 cp++;
4484 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4485 cp++;
4486 if (*cp == ':' || iswhite (*cp))
4487 /* Found end of label, so copy it and add it to the table. */
4488 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4489 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4490 }
4491 }
4492 }
4493
4494 \f
4495 /*
4496 * Perl support
4497 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4498 * Perl variable names: /^(my|local).../
4499 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4500 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4501 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4502 */
4503 static void
4504 Perl_functions (inf)
4505 FILE *inf;
4506 {
4507 char *package = savestr ("main"); /* current package name */
4508 register char *cp;
4509
4510 LOOP_ON_INPUT_LINES (inf, lb, cp)
4511 {
4512 skip_spaces(cp);
4513
4514 if (LOOKING_AT (cp, "package"))
4515 {
4516 free (package);
4517 get_tag (cp, &package);
4518 }
4519 else if (LOOKING_AT (cp, "sub"))
4520 {
4521 char *pos;
4522 char *sp = cp;
4523
4524 while (!notinname (*cp))
4525 cp++;
4526 if (cp == sp)
4527 continue; /* nothing found */
4528 if ((pos = etags_strchr (sp, ':')) != NULL
4529 && pos < cp && pos[1] == ':')
4530 /* The name is already qualified. */
4531 make_tag (sp, cp - sp, TRUE,
4532 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4533 else
4534 /* Qualify it. */
4535 {
4536 char savechar, *name;
4537
4538 savechar = *cp;
4539 *cp = '\0';
4540 name = concat (package, "::", sp);
4541 *cp = savechar;
4542 make_tag (name, strlen(name), TRUE,
4543 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4544 free (name);
4545 }
4546 }
4547 else if (globals) /* only if we are tagging global vars */
4548 {
4549 /* Skip a qualifier, if any. */
4550 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4551 /* After "my" or "local", but before any following paren or space. */
4552 char *varstart = cp;
4553
4554 if (qual /* should this be removed? If yes, how? */
4555 && (*cp == '$' || *cp == '@' || *cp == '%'))
4556 {
4557 varstart += 1;
4558 do
4559 cp++;
4560 while (ISALNUM (*cp) || *cp == '_');
4561 }
4562 else if (qual)
4563 {
4564 /* Should be examining a variable list at this point;
4565 could insist on seeing an open parenthesis. */
4566 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4567 cp++;
4568 }
4569 else
4570 continue;
4571
4572 make_tag (varstart, cp - varstart, FALSE,
4573 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4574 }
4575 }
4576 free (package);
4577 }
4578
4579
4580 /*
4581 * Python support
4582 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4583 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4584 * More ideas by seb bacon <seb@jamkit.com> (2002)
4585 */
4586 static void
4587 Python_functions (inf)
4588 FILE *inf;
4589 {
4590 register char *cp;
4591
4592 LOOP_ON_INPUT_LINES (inf, lb, cp)
4593 {
4594 cp = skip_spaces (cp);
4595 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4596 {
4597 char *name = cp;
4598 while (!notinname (*cp) && *cp != ':')
4599 cp++;
4600 make_tag (name, cp - name, TRUE,
4601 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4602 }
4603 }
4604 }
4605
4606 \f
4607 /*
4608 * PHP support
4609 * Look for:
4610 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4611 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4612 * - /^[ \t]*define\(\"[^\"]+/
4613 * Only with --members:
4614 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4615 * Idea by Diez B. Roggisch (2001)
4616 */
4617 static void
4618 PHP_functions (inf)
4619 FILE *inf;
4620 {
4621 register char *cp, *name;
4622 bool search_identifier = FALSE;
4623
4624 LOOP_ON_INPUT_LINES (inf, lb, cp)
4625 {
4626 cp = skip_spaces (cp);
4627 name = cp;
4628 if (search_identifier
4629 && *cp != '\0')
4630 {
4631 while (!notinname (*cp))
4632 cp++;
4633 make_tag (name, cp - name, TRUE,
4634 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4635 search_identifier = FALSE;
4636 }
4637 else if (LOOKING_AT (cp, "function"))
4638 {
4639 if(*cp == '&')
4640 cp = skip_spaces (cp+1);
4641 if(*cp != '\0')
4642 {
4643 name = cp;
4644 while (!notinname (*cp))
4645 cp++;
4646 make_tag (name, cp - name, TRUE,
4647 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4648 }
4649 else
4650 search_identifier = TRUE;
4651 }
4652 else if (LOOKING_AT (cp, "class"))
4653 {
4654 if (*cp != '\0')
4655 {
4656 name = cp;
4657 while (*cp != '\0' && !iswhite (*cp))
4658 cp++;
4659 make_tag (name, cp - name, FALSE,
4660 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4661 }
4662 else
4663 search_identifier = TRUE;
4664 }
4665 else if (strneq (cp, "define", 6)
4666 && (cp = skip_spaces (cp+6))
4667 && *cp++ == '('
4668 && (*cp == '"' || *cp == '\''))
4669 {
4670 char quote = *cp++;
4671 name = cp;
4672 while (*cp != quote && *cp != '\0')
4673 cp++;
4674 make_tag (name, cp - name, FALSE,
4675 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4676 }
4677 else if (members
4678 && LOOKING_AT (cp, "var")
4679 && *cp == '$')
4680 {
4681 name = cp;
4682 while (!notinname(*cp))
4683 cp++;
4684 make_tag (name, cp - name, FALSE,
4685 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4686 }
4687 }
4688 }
4689
4690 \f
4691 /*
4692 * Cobol tag functions
4693 * We could look for anything that could be a paragraph name.
4694 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4695 * Idea by Corny de Souza (1993)
4696 */
4697 static void
4698 Cobol_paragraphs (inf)
4699 FILE *inf;
4700 {
4701 register char *bp, *ep;
4702
4703 LOOP_ON_INPUT_LINES (inf, lb, bp)
4704 {
4705 if (lb.len < 9)
4706 continue;
4707 bp += 8;
4708
4709 /* If eoln, compiler option or comment ignore whole line. */
4710 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4711 continue;
4712
4713 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4714 continue;
4715 if (*ep++ == '.')
4716 make_tag (bp, ep - bp, TRUE,
4717 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4718 }
4719 }
4720
4721 \f
4722 /*
4723 * Makefile support
4724 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4725 */
4726 static void
4727 Makefile_targets (inf)
4728 FILE *inf;
4729 {
4730 register char *bp;
4731
4732 LOOP_ON_INPUT_LINES (inf, lb, bp)
4733 {
4734 if (*bp == '\t' || *bp == '#')
4735 continue;
4736 while (*bp != '\0' && *bp != '=' && *bp != ':')
4737 bp++;
4738 if (*bp == ':' || (globals && *bp == '='))
4739 {
4740 /* We should detect if there is more than one tag, but we do not.
4741 We just skip initial and final spaces. */
4742 char * namestart = skip_spaces (lb.buffer);
4743 while (--bp > namestart)
4744 if (!notinname (*bp))
4745 break;
4746 make_tag (namestart, bp - namestart + 1, TRUE,
4747 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4748 }
4749 }
4750 }
4751
4752 \f
4753 /*
4754 * Pascal parsing
4755 * Original code by Mosur K. Mohan (1989)
4756 *
4757 * Locates tags for procedures & functions. Doesn't do any type- or
4758 * var-definitions. It does look for the keyword "extern" or
4759 * "forward" immediately following the procedure statement; if found,
4760 * the tag is skipped.
4761 */
4762 static void
4763 Pascal_functions (inf)
4764 FILE *inf;
4765 {
4766 linebuffer tline; /* mostly copied from C_entries */
4767 long save_lcno;
4768 int save_lineno, namelen, taglen;
4769 char c, *name;
4770
4771 bool /* each of these flags is TRUE iff: */
4772 incomment, /* point is inside a comment */
4773 inquote, /* point is inside '..' string */
4774 get_tagname, /* point is after PROCEDURE/FUNCTION
4775 keyword, so next item = potential tag */
4776 found_tag, /* point is after a potential tag */
4777 inparms, /* point is within parameter-list */
4778 verify_tag; /* point has passed the parm-list, so the
4779 next token will determine whether this
4780 is a FORWARD/EXTERN to be ignored, or
4781 whether it is a real tag */
4782
4783 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4784 name = NULL; /* keep compiler quiet */
4785 dbp = lb.buffer;
4786 *dbp = '\0';
4787 linebuffer_init (&tline);
4788
4789 incomment = inquote = FALSE;
4790 found_tag = FALSE; /* have a proc name; check if extern */
4791 get_tagname = FALSE; /* found "procedure" keyword */
4792 inparms = FALSE; /* found '(' after "proc" */
4793 verify_tag = FALSE; /* check if "extern" is ahead */
4794
4795
4796 while (!feof (inf)) /* long main loop to get next char */
4797 {
4798 c = *dbp++;
4799 if (c == '\0') /* if end of line */
4800 {
4801 readline (&lb, inf);
4802 dbp = lb.buffer;
4803 if (*dbp == '\0')
4804 continue;
4805 if (!((found_tag && verify_tag)
4806 || get_tagname))
4807 c = *dbp++; /* only if don't need *dbp pointing
4808 to the beginning of the name of
4809 the procedure or function */
4810 }
4811 if (incomment)
4812 {
4813 if (c == '}') /* within { } comments */
4814 incomment = FALSE;
4815 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4816 {
4817 dbp++;
4818 incomment = FALSE;
4819 }
4820 continue;
4821 }
4822 else if (inquote)
4823 {
4824 if (c == '\'')
4825 inquote = FALSE;
4826 continue;
4827 }
4828 else
4829 switch (c)
4830 {
4831 case '\'':
4832 inquote = TRUE; /* found first quote */
4833 continue;
4834 case '{': /* found open { comment */
4835 incomment = TRUE;
4836 continue;
4837 case '(':
4838 if (*dbp == '*') /* found open (* comment */
4839 {
4840 incomment = TRUE;
4841 dbp++;
4842 }
4843 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4844 inparms = TRUE;
4845 continue;
4846 case ')': /* end of parms list */
4847 if (inparms)
4848 inparms = FALSE;
4849 continue;
4850 case ';':
4851 if (found_tag && !inparms) /* end of proc or fn stmt */
4852 {
4853 verify_tag = TRUE;
4854 break;
4855 }
4856 continue;
4857 }
4858 if (found_tag && verify_tag && (*dbp != ' '))
4859 {
4860 /* Check if this is an "extern" declaration. */
4861 if (*dbp == '\0')
4862 continue;
4863 if (lowcase (*dbp == 'e'))
4864 {
4865 if (nocase_tail ("extern")) /* superfluous, really! */
4866 {
4867 found_tag = FALSE;
4868 verify_tag = FALSE;
4869 }
4870 }
4871 else if (lowcase (*dbp) == 'f')
4872 {
4873 if (nocase_tail ("forward")) /* check for forward reference */
4874 {
4875 found_tag = FALSE;
4876 verify_tag = FALSE;
4877 }
4878 }
4879 if (found_tag && verify_tag) /* not external proc, so make tag */
4880 {
4881 found_tag = FALSE;
4882 verify_tag = FALSE;
4883 make_tag (name, namelen, TRUE,
4884 tline.buffer, taglen, save_lineno, save_lcno);
4885 continue;
4886 }
4887 }
4888 if (get_tagname) /* grab name of proc or fn */
4889 {
4890 char *cp;
4891
4892 if (*dbp == '\0')
4893 continue;
4894
4895 /* Find block name. */
4896 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4897 continue;
4898
4899 /* Save all values for later tagging. */
4900 linebuffer_setlen (&tline, lb.len);
4901 strcpy (tline.buffer, lb.buffer);
4902 save_lineno = lineno;
4903 save_lcno = linecharno;
4904 name = tline.buffer + (dbp - lb.buffer);
4905 namelen = cp - dbp;
4906 taglen = cp - lb.buffer + 1;
4907
4908 dbp = cp; /* set dbp to e-o-token */
4909 get_tagname = FALSE;
4910 found_tag = TRUE;
4911 continue;
4912
4913 /* And proceed to check for "extern". */
4914 }
4915 else if (!incomment && !inquote && !found_tag)
4916 {
4917 /* Check for proc/fn keywords. */
4918 switch (lowcase (c))
4919 {
4920 case 'p':
4921 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4922 get_tagname = TRUE;
4923 continue;
4924 case 'f':
4925 if (nocase_tail ("unction"))
4926 get_tagname = TRUE;
4927 continue;
4928 }
4929 }
4930 } /* while not eof */
4931
4932 free (tline.buffer);
4933 }
4934
4935 \f
4936 /*
4937 * Lisp tag functions
4938 * look for (def or (DEF, quote or QUOTE
4939 */
4940
4941 static void L_getit __P((void));
4942
4943 static void
4944 L_getit ()
4945 {
4946 if (*dbp == '\'') /* Skip prefix quote */
4947 dbp++;
4948 else if (*dbp == '(')
4949 {
4950 dbp++;
4951 /* Try to skip "(quote " */
4952 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4953 /* Ok, then skip "(" before name in (defstruct (foo)) */
4954 dbp = skip_spaces (dbp);
4955 }
4956 get_tag (dbp, NULL);
4957 }
4958
4959 static void
4960 Lisp_functions (inf)
4961 FILE *inf;
4962 {
4963 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4964 {
4965 if (dbp[0] != '(')
4966 continue;
4967
4968 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4969 {
4970 dbp = skip_non_spaces (dbp);
4971 dbp = skip_spaces (dbp);
4972 L_getit ();
4973 }
4974 else
4975 {
4976 /* Check for (foo::defmumble name-defined ... */
4977 do
4978 dbp++;
4979 while (!notinname (*dbp) && *dbp != ':');
4980 if (*dbp == ':')
4981 {
4982 do
4983 dbp++;
4984 while (*dbp == ':');
4985
4986 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4987 {
4988 dbp = skip_non_spaces (dbp);
4989 dbp = skip_spaces (dbp);
4990 L_getit ();
4991 }
4992 }
4993 }
4994 }
4995 }
4996
4997 \f
4998 /*
4999 * Lua script language parsing
5000 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5001 *
5002 * "function" and "local function" are tags if they start at column 1.
5003 */
5004 static void
5005 Lua_functions (inf)
5006 FILE *inf;
5007 {
5008 register char *bp;
5009
5010 LOOP_ON_INPUT_LINES (inf, lb, bp)
5011 {
5012 if (bp[0] != 'f' && bp[0] != 'l')
5013 continue;
5014
5015 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5016
5017 if (LOOKING_AT (bp, "function"))
5018 get_tag (bp, NULL);
5019 }
5020 }
5021
5022 \f
5023 /*
5024 * Postscript tags
5025 * Just look for lines where the first character is '/'
5026 * Also look at "defineps" for PSWrap
5027 * Ideas by:
5028 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5029 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5030 */
5031 static void
5032 PS_functions (inf)
5033 FILE *inf;
5034 {
5035 register char *bp, *ep;
5036
5037 LOOP_ON_INPUT_LINES (inf, lb, bp)
5038 {
5039 if (bp[0] == '/')
5040 {
5041 for (ep = bp+1;
5042 *ep != '\0' && *ep != ' ' && *ep != '{';
5043 ep++)
5044 continue;
5045 make_tag (bp, ep - bp, TRUE,
5046 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5047 }
5048 else if (LOOKING_AT (bp, "defineps"))
5049 get_tag (bp, NULL);
5050 }
5051 }
5052
5053 \f
5054 /*
5055 * Forth tags
5056 * Ignore anything after \ followed by space or in ( )
5057 * Look for words defined by :
5058 * Look for constant, code, create, defer, value, and variable
5059 * OBP extensions: Look for buffer:, field,
5060 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5061 */
5062 static void
5063 Forth_words (inf)
5064 FILE *inf;
5065 {
5066 register char *bp;
5067
5068 LOOP_ON_INPUT_LINES (inf, lb, bp)
5069 while ((bp = skip_spaces (bp))[0] != '\0')
5070 if (bp[0] == '\\' && iswhite(bp[1]))
5071 break; /* read next line */
5072 else if (bp[0] == '(' && iswhite(bp[1]))
5073 do /* skip to ) or eol */
5074 bp++;
5075 while (*bp != ')' && *bp != '\0');
5076 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5077 || LOOKING_AT_NOCASE (bp, "constant")
5078 || LOOKING_AT_NOCASE (bp, "code")
5079 || LOOKING_AT_NOCASE (bp, "create")
5080 || LOOKING_AT_NOCASE (bp, "defer")
5081 || LOOKING_AT_NOCASE (bp, "value")
5082 || LOOKING_AT_NOCASE (bp, "variable")
5083 || LOOKING_AT_NOCASE (bp, "buffer:")
5084 || LOOKING_AT_NOCASE (bp, "field"))
5085 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5086 else
5087 bp = skip_non_spaces (bp);
5088 }
5089
5090 \f
5091 /*
5092 * Scheme tag functions
5093 * look for (def... xyzzy
5094 * (def... (xyzzy
5095 * (def ... ((...(xyzzy ....
5096 * (set! xyzzy
5097 * Original code by Ken Haase (1985?)
5098 */
5099 static void
5100 Scheme_functions (inf)
5101 FILE *inf;
5102 {
5103 register char *bp;
5104
5105 LOOP_ON_INPUT_LINES (inf, lb, bp)
5106 {
5107 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5108 {
5109 bp = skip_non_spaces (bp+4);
5110 /* Skip over open parens and white space */
5111 while (notinname (*bp))
5112 bp++;
5113 get_tag (bp, NULL);
5114 }
5115 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5116 get_tag (bp, NULL);
5117 }
5118 }
5119
5120 \f
5121 /* Find tags in TeX and LaTeX input files. */
5122
5123 /* TEX_toktab is a table of TeX control sequences that define tags.
5124 * Each entry records one such control sequence.
5125 *
5126 * Original code from who knows whom.
5127 * Ideas by:
5128 * Stefan Monnier (2002)
5129 */
5130
5131 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5132
5133 /* Default set of control sequences to put into TEX_toktab.
5134 The value of environment var TEXTAGS is prepended to this. */
5135 static char *TEX_defenv = "\
5136 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5137 :part:appendix:entry:index:def\
5138 :newcommand:renewcommand:newenvironment:renewenvironment";
5139
5140 static void TEX_mode __P((FILE *));
5141 static void TEX_decode_env __P((char *, char *));
5142
5143 static char TEX_esc = '\\';
5144 static char TEX_opgrp = '{';
5145 static char TEX_clgrp = '}';
5146
5147 /*
5148 * TeX/LaTeX scanning loop.
5149 */
5150 static void
5151 TeX_commands (inf)
5152 FILE *inf;
5153 {
5154 char *cp;
5155 linebuffer *key;
5156
5157 /* Select either \ or ! as escape character. */
5158 TEX_mode (inf);
5159
5160 /* Initialize token table once from environment. */
5161 if (TEX_toktab == NULL)
5162 TEX_decode_env ("TEXTAGS", TEX_defenv);
5163
5164 LOOP_ON_INPUT_LINES (inf, lb, cp)
5165 {
5166 /* Look at each TEX keyword in line. */
5167 for (;;)
5168 {
5169 /* Look for a TEX escape. */
5170 while (*cp++ != TEX_esc)
5171 if (cp[-1] == '\0' || cp[-1] == '%')
5172 goto tex_next_line;
5173
5174 for (key = TEX_toktab; key->buffer != NULL; key++)
5175 if (strneq (cp, key->buffer, key->len))
5176 {
5177 register char *p;
5178 int namelen, linelen;
5179 bool opgrp = FALSE;
5180
5181 cp = skip_spaces (cp + key->len);
5182 if (*cp == TEX_opgrp)
5183 {
5184 opgrp = TRUE;
5185 cp++;
5186 }
5187 for (p = cp;
5188 (!iswhite (*p) && *p != '#' &&
5189 *p != TEX_opgrp && *p != TEX_clgrp);
5190 p++)
5191 continue;
5192 namelen = p - cp;
5193 linelen = lb.len;
5194 if (!opgrp || *p == TEX_clgrp)
5195 {
5196 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5197 p++;
5198 linelen = p - lb.buffer + 1;
5199 }
5200 make_tag (cp, namelen, TRUE,
5201 lb.buffer, linelen, lineno, linecharno);
5202 goto tex_next_line; /* We only tag a line once */
5203 }
5204 }
5205 tex_next_line:
5206 ;
5207 }
5208 }
5209
5210 #define TEX_LESC '\\'
5211 #define TEX_SESC '!'
5212
5213 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5214 chars accordingly. */
5215 static void
5216 TEX_mode (inf)
5217 FILE *inf;
5218 {
5219 int c;
5220
5221 while ((c = getc (inf)) != EOF)
5222 {
5223 /* Skip to next line if we hit the TeX comment char. */
5224 if (c == '%')
5225 while (c != '\n' && c != EOF)
5226 c = getc (inf);
5227 else if (c == TEX_LESC || c == TEX_SESC )
5228 break;
5229 }
5230
5231 if (c == TEX_LESC)
5232 {
5233 TEX_esc = TEX_LESC;
5234 TEX_opgrp = '{';
5235 TEX_clgrp = '}';
5236 }
5237 else
5238 {
5239 TEX_esc = TEX_SESC;
5240 TEX_opgrp = '<';
5241 TEX_clgrp = '>';
5242 }
5243 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5244 No attempt is made to correct the situation. */
5245 rewind (inf);
5246 }
5247
5248 /* Read environment and prepend it to the default string.
5249 Build token table. */
5250 static void
5251 TEX_decode_env (evarname, defenv)
5252 char *evarname;
5253 char *defenv;
5254 {
5255 register char *env, *p;
5256 int i, len;
5257
5258 /* Append default string to environment. */
5259 env = getenv (evarname);
5260 if (!env)
5261 env = defenv;
5262 else
5263 {
5264 char *oldenv = env;
5265 env = concat (oldenv, defenv, "");
5266 }
5267
5268 /* Allocate a token table */
5269 for (len = 1, p = env; p;)
5270 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5271 len++;
5272 TEX_toktab = xnew (len, linebuffer);
5273
5274 /* Unpack environment string into token table. Be careful about */
5275 /* zero-length strings (leading ':', "::" and trailing ':') */
5276 for (i = 0; *env != '\0';)
5277 {
5278 p = etags_strchr (env, ':');
5279 if (!p) /* End of environment string. */
5280 p = env + strlen (env);
5281 if (p - env > 0)
5282 { /* Only non-zero strings. */
5283 TEX_toktab[i].buffer = savenstr (env, p - env);
5284 TEX_toktab[i].len = p - env;
5285 i++;
5286 }
5287 if (*p)
5288 env = p + 1;
5289 else
5290 {
5291 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5292 TEX_toktab[i].len = 0;
5293 break;
5294 }
5295 }
5296 }
5297
5298 \f
5299 /* Texinfo support. Dave Love, Mar. 2000. */
5300 static void
5301 Texinfo_nodes (inf)
5302 FILE * inf;
5303 {
5304 char *cp, *start;
5305 LOOP_ON_INPUT_LINES (inf, lb, cp)
5306 if (LOOKING_AT (cp, "@node"))
5307 {
5308 start = cp;
5309 while (*cp != '\0' && *cp != ',')
5310 cp++;
5311 make_tag (start, cp - start, TRUE,
5312 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5313 }
5314 }
5315
5316 \f
5317 /*
5318 * HTML support.
5319 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5320 * Contents of <a name=xxx> are tags with name xxx.
5321 *
5322 * Francesco Potortì, 2002.
5323 */
5324 static void
5325 HTML_labels (inf)
5326 FILE * inf;
5327 {
5328 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5329 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5330 bool intag = FALSE; /* inside an html tag, looking for ID= */
5331 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5332 char *end;
5333
5334
5335 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5336
5337 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5338 for (;;) /* loop on the same line */
5339 {
5340 if (skiptag) /* skip HTML tag */
5341 {
5342 while (*dbp != '\0' && *dbp != '>')
5343 dbp++;
5344 if (*dbp == '>')
5345 {
5346 dbp += 1;
5347 skiptag = FALSE;
5348 continue; /* look on the same line */
5349 }
5350 break; /* go to next line */
5351 }
5352
5353 else if (intag) /* look for "name=" or "id=" */
5354 {
5355 while (*dbp != '\0' && *dbp != '>'
5356 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5357 dbp++;
5358 if (*dbp == '\0')
5359 break; /* go to next line */
5360 if (*dbp == '>')
5361 {
5362 dbp += 1;
5363 intag = FALSE;
5364 continue; /* look on the same line */
5365 }
5366 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5367 || LOOKING_AT_NOCASE (dbp, "id="))
5368 {
5369 bool quoted = (dbp[0] == '"');
5370
5371 if (quoted)
5372 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5373 continue;
5374 else
5375 for (end = dbp; *end != '\0' && intoken (*end); end++)
5376 continue;
5377 linebuffer_setlen (&token_name, end - dbp);
5378 strncpy (token_name.buffer, dbp, end - dbp);
5379 token_name.buffer[end - dbp] = '\0';
5380
5381 dbp = end;
5382 intag = FALSE; /* we found what we looked for */
5383 skiptag = TRUE; /* skip to the end of the tag */
5384 getnext = TRUE; /* then grab the text */
5385 continue; /* look on the same line */
5386 }
5387 dbp += 1;
5388 }
5389
5390 else if (getnext) /* grab next tokens and tag them */
5391 {
5392 dbp = skip_spaces (dbp);
5393 if (*dbp == '\0')
5394 break; /* go to next line */
5395 if (*dbp == '<')
5396 {
5397 intag = TRUE;
5398 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5399 continue; /* look on the same line */
5400 }
5401
5402 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5403 continue;
5404 make_tag (token_name.buffer, token_name.len, TRUE,
5405 dbp, end - dbp, lineno, linecharno);
5406 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5407 getnext = FALSE;
5408 break; /* go to next line */
5409 }
5410
5411 else /* look for an interesting HTML tag */
5412 {
5413 while (*dbp != '\0' && *dbp != '<')
5414 dbp++;
5415 if (*dbp == '\0')
5416 break; /* go to next line */
5417 intag = TRUE;
5418 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5419 {
5420 inanchor = TRUE;
5421 continue; /* look on the same line */
5422 }
5423 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5424 || LOOKING_AT_NOCASE (dbp, "<h1>")
5425 || LOOKING_AT_NOCASE (dbp, "<h2>")
5426 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5427 {
5428 intag = FALSE;
5429 getnext = TRUE;
5430 continue; /* look on the same line */
5431 }
5432 dbp += 1;
5433 }
5434 }
5435 }
5436
5437 \f
5438 /*
5439 * Prolog support
5440 *
5441 * Assumes that the predicate or rule starts at column 0.
5442 * Only the first clause of a predicate or rule is added.
5443 * Original code by Sunichirou Sugou (1989)
5444 * Rewritten by Anders Lindgren (1996)
5445 */
5446 static int prolog_pr __P((char *, char *));
5447 static void prolog_skip_comment __P((linebuffer *, FILE *));
5448 static int prolog_atom __P((char *, int));
5449
5450 static void
5451 Prolog_functions (inf)
5452 FILE *inf;
5453 {
5454 char *cp, *last;
5455 int len;
5456 int allocated;
5457
5458 allocated = 0;
5459 len = 0;
5460 last = NULL;
5461
5462 LOOP_ON_INPUT_LINES (inf, lb, cp)
5463 {
5464 if (cp[0] == '\0') /* Empty line */
5465 continue;
5466 else if (iswhite (cp[0])) /* Not a predicate */
5467 continue;
5468 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5469 prolog_skip_comment (&lb, inf);
5470 else if ((len = prolog_pr (cp, last)) > 0)
5471 {
5472 /* Predicate or rule. Store the function name so that we
5473 only generate a tag for the first clause. */
5474 if (last == NULL)
5475 last = xnew(len + 1, char);
5476 else if (len + 1 > allocated)
5477 xrnew (last, len + 1, char);
5478 allocated = len + 1;
5479 strncpy (last, cp, len);
5480 last[len] = '\0';
5481 }
5482 }
5483 if (last != NULL)
5484 free (last);
5485 }
5486
5487
5488 static void
5489 prolog_skip_comment (plb, inf)
5490 linebuffer *plb;
5491 FILE *inf;
5492 {
5493 char *cp;
5494
5495 do
5496 {
5497 for (cp = plb->buffer; *cp != '\0'; cp++)
5498 if (cp[0] == '*' && cp[1] == '/')
5499 return;
5500 readline (plb, inf);
5501 }
5502 while (!feof(inf));
5503 }
5504
5505 /*
5506 * A predicate or rule definition is added if it matches:
5507 * <beginning of line><Prolog Atom><whitespace>(
5508 * or <beginning of line><Prolog Atom><whitespace>:-
5509 *
5510 * It is added to the tags database if it doesn't match the
5511 * name of the previous clause header.
5512 *
5513 * Return the size of the name of the predicate or rule, or 0 if no
5514 * header was found.
5515 */
5516 static int
5517 prolog_pr (s, last)
5518 char *s;
5519 char *last; /* Name of last clause. */
5520 {
5521 int pos;
5522 int len;
5523
5524 pos = prolog_atom (s, 0);
5525 if (pos < 1)
5526 return 0;
5527
5528 len = pos;
5529 pos = skip_spaces (s + pos) - s;
5530
5531 if ((s[pos] == '.'
5532 || (s[pos] == '(' && (pos += 1))
5533 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5534 && (last == NULL /* save only the first clause */
5535 || len != (int)strlen (last)
5536 || !strneq (s, last, len)))
5537 {
5538 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5539 return len;
5540 }
5541 else
5542 return 0;
5543 }
5544
5545 /*
5546 * Consume a Prolog atom.
5547 * Return the number of bytes consumed, or -1 if there was an error.
5548 *
5549 * A prolog atom, in this context, could be one of:
5550 * - An alphanumeric sequence, starting with a lower case letter.
5551 * - A quoted arbitrary string. Single quotes can escape themselves.
5552 * Backslash quotes everything.
5553 */
5554 static int
5555 prolog_atom (s, pos)
5556 char *s;
5557 int pos;
5558 {
5559 int origpos;
5560
5561 origpos = pos;
5562
5563 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5564 {
5565 /* The atom is unquoted. */
5566 pos++;
5567 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5568 {
5569 pos++;
5570 }
5571 return pos - origpos;
5572 }
5573 else if (s[pos] == '\'')
5574 {
5575 pos++;
5576
5577 for (;;)
5578 {
5579 if (s[pos] == '\'')
5580 {
5581 pos++;
5582 if (s[pos] != '\'')
5583 break;
5584 pos++; /* A double quote */
5585 }
5586 else if (s[pos] == '\0')
5587 /* Multiline quoted atoms are ignored. */
5588 return -1;
5589 else if (s[pos] == '\\')
5590 {
5591 if (s[pos+1] == '\0')
5592 return -1;
5593 pos += 2;
5594 }
5595 else
5596 pos++;
5597 }
5598 return pos - origpos;
5599 }
5600 else
5601 return -1;
5602 }
5603
5604 \f
5605 /*
5606 * Support for Erlang
5607 *
5608 * Generates tags for functions, defines, and records.
5609 * Assumes that Erlang functions start at column 0.
5610 * Original code by Anders Lindgren (1996)
5611 */
5612 static int erlang_func __P((char *, char *));
5613 static void erlang_attribute __P((char *));
5614 static int erlang_atom __P((char *));
5615
5616 static void
5617 Erlang_functions (inf)
5618 FILE *inf;
5619 {
5620 char *cp, *last;
5621 int len;
5622 int allocated;
5623
5624 allocated = 0;
5625 len = 0;
5626 last = NULL;
5627
5628 LOOP_ON_INPUT_LINES (inf, lb, cp)
5629 {
5630 if (cp[0] == '\0') /* Empty line */
5631 continue;
5632 else if (iswhite (cp[0])) /* Not function nor attribute */
5633 continue;
5634 else if (cp[0] == '%') /* comment */
5635 continue;
5636 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5637 continue;
5638 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5639 {
5640 erlang_attribute (cp);
5641 if (last != NULL)
5642 {
5643 free (last);
5644 last = NULL;
5645 }
5646 }
5647 else if ((len = erlang_func (cp, last)) > 0)
5648 {
5649 /*
5650 * Function. Store the function name so that we only
5651 * generates a tag for the first clause.
5652 */
5653 if (last == NULL)
5654 last = xnew (len + 1, char);
5655 else if (len + 1 > allocated)
5656 xrnew (last, len + 1, char);
5657 allocated = len + 1;
5658 strncpy (last, cp, len);
5659 last[len] = '\0';
5660 }
5661 }
5662 if (last != NULL)
5663 free (last);
5664 }
5665
5666
5667 /*
5668 * A function definition is added if it matches:
5669 * <beginning of line><Erlang Atom><whitespace>(
5670 *
5671 * It is added to the tags database if it doesn't match the
5672 * name of the previous clause header.
5673 *
5674 * Return the size of the name of the function, or 0 if no function
5675 * was found.
5676 */
5677 static int
5678 erlang_func (s, last)
5679 char *s;
5680 char *last; /* Name of last clause. */
5681 {
5682 int pos;
5683 int len;
5684
5685 pos = erlang_atom (s);
5686 if (pos < 1)
5687 return 0;
5688
5689 len = pos;
5690 pos = skip_spaces (s + pos) - s;
5691
5692 /* Save only the first clause. */
5693 if (s[pos++] == '('
5694 && (last == NULL
5695 || len != (int)strlen (last)
5696 || !strneq (s, last, len)))
5697 {
5698 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5699 return len;
5700 }
5701
5702 return 0;
5703 }
5704
5705
5706 /*
5707 * Handle attributes. Currently, tags are generated for defines
5708 * and records.
5709 *
5710 * They are on the form:
5711 * -define(foo, bar).
5712 * -define(Foo(M, N), M+N).
5713 * -record(graph, {vtab = notable, cyclic = true}).
5714 */
5715 static void
5716 erlang_attribute (s)
5717 char *s;
5718 {
5719 char *cp = s;
5720
5721 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5722 && *cp++ == '(')
5723 {
5724 int len = erlang_atom (skip_spaces (cp));
5725 if (len > 0)
5726 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5727 }
5728 return;
5729 }
5730
5731
5732 /*
5733 * Consume an Erlang atom (or variable).
5734 * Return the number of bytes consumed, or -1 if there was an error.
5735 */
5736 static int
5737 erlang_atom (s)
5738 char *s;
5739 {
5740 int pos = 0;
5741
5742 if (ISALPHA (s[pos]) || s[pos] == '_')
5743 {
5744 /* The atom is unquoted. */
5745 do
5746 pos++;
5747 while (ISALNUM (s[pos]) || s[pos] == '_');
5748 }
5749 else if (s[pos] == '\'')
5750 {
5751 for (pos++; s[pos] != '\''; pos++)
5752 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5753 || (s[pos] == '\\' && s[++pos] == '\0'))
5754 return 0;
5755 pos++;
5756 }
5757
5758 return pos;
5759 }
5760
5761 \f
5762 static char *scan_separators __P((char *));
5763 static void add_regex __P((char *, language *));
5764 static char *substitute __P((char *, char *, struct re_registers *));
5765
5766 /*
5767 * Take a string like "/blah/" and turn it into "blah", verifying
5768 * that the first and last characters are the same, and handling
5769 * quoted separator characters. Actually, stops on the occurrence of
5770 * an unquoted separator. Also process \t, \n, etc. and turn into
5771 * appropriate characters. Works in place. Null terminates name string.
5772 * Returns pointer to terminating separator, or NULL for
5773 * unterminated regexps.
5774 */
5775 static char *
5776 scan_separators (name)
5777 char *name;
5778 {
5779 char sep = name[0];
5780 char *copyto = name;
5781 bool quoted = FALSE;
5782
5783 for (++name; *name != '\0'; ++name)
5784 {
5785 if (quoted)
5786 {
5787 switch (*name)
5788 {
5789 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5790 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5791 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5792 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5793 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5794 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5795 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5796 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5797 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5798 default:
5799 if (*name == sep)
5800 *copyto++ = sep;
5801 else
5802 {
5803 /* Something else is quoted, so preserve the quote. */
5804 *copyto++ = '\\';
5805 *copyto++ = *name;
5806 }
5807 break;
5808 }
5809 quoted = FALSE;
5810 }
5811 else if (*name == '\\')
5812 quoted = TRUE;
5813 else if (*name == sep)
5814 break;
5815 else
5816 *copyto++ = *name;
5817 }
5818 if (*name != sep)
5819 name = NULL; /* signal unterminated regexp */
5820
5821 /* Terminate copied string. */
5822 *copyto = '\0';
5823 return name;
5824 }
5825
5826 /* Look at the argument of --regex or --no-regex and do the right
5827 thing. Same for each line of a regexp file. */
5828 static void
5829 analyse_regex (regex_arg)
5830 char *regex_arg;
5831 {
5832 if (regex_arg == NULL)
5833 {
5834 free_regexps (); /* --no-regex: remove existing regexps */
5835 return;
5836 }
5837
5838 /* A real --regexp option or a line in a regexp file. */
5839 switch (regex_arg[0])
5840 {
5841 /* Comments in regexp file or null arg to --regex. */
5842 case '\0':
5843 case ' ':
5844 case '\t':
5845 break;
5846
5847 /* Read a regex file. This is recursive and may result in a
5848 loop, which will stop when the file descriptors are exhausted. */
5849 case '@':
5850 {
5851 FILE *regexfp;
5852 linebuffer regexbuf;
5853 char *regexfile = regex_arg + 1;
5854
5855 /* regexfile is a file containing regexps, one per line. */
5856 regexfp = fopen (regexfile, "r");
5857 if (regexfp == NULL)
5858 {
5859 pfatal (regexfile);
5860 return;
5861 }
5862 linebuffer_init (&regexbuf);
5863 while (readline_internal (&regexbuf, regexfp) > 0)
5864 analyse_regex (regexbuf.buffer);
5865 free (regexbuf.buffer);
5866 fclose (regexfp);
5867 }
5868 break;
5869
5870 /* Regexp to be used for a specific language only. */
5871 case '{':
5872 {
5873 language *lang;
5874 char *lang_name = regex_arg + 1;
5875 char *cp;
5876
5877 for (cp = lang_name; *cp != '}'; cp++)
5878 if (*cp == '\0')
5879 {
5880 error ("unterminated language name in regex: %s", regex_arg);
5881 return;
5882 }
5883 *cp++ = '\0';
5884 lang = get_language_from_langname (lang_name);
5885 if (lang == NULL)
5886 return;
5887 add_regex (cp, lang);
5888 }
5889 break;
5890
5891 /* Regexp to be used for any language. */
5892 default:
5893 add_regex (regex_arg, NULL);
5894 break;
5895 }
5896 }
5897
5898 /* Separate the regexp pattern, compile it,
5899 and care for optional name and modifiers. */
5900 static void
5901 add_regex (regexp_pattern, lang)
5902 char *regexp_pattern;
5903 language *lang;
5904 {
5905 static struct re_pattern_buffer zeropattern;
5906 char sep, *pat, *name, *modifiers;
5907 const char *err;
5908 struct re_pattern_buffer *patbuf;
5909 regexp *rp;
5910 bool
5911 force_explicit_name = TRUE, /* do not use implicit tag names */
5912 ignore_case = FALSE, /* case is significant */
5913 multi_line = FALSE, /* matches are done one line at a time */
5914 single_line = FALSE; /* dot does not match newline */
5915
5916
5917 if (strlen(regexp_pattern) < 3)
5918 {
5919 error ("null regexp", (char *)NULL);
5920 return;
5921 }
5922 sep = regexp_pattern[0];
5923 name = scan_separators (regexp_pattern);
5924 if (name == NULL)
5925 {
5926 error ("%s: unterminated regexp", regexp_pattern);
5927 return;
5928 }
5929 if (name[1] == sep)
5930 {
5931 error ("null name for regexp \"%s\"", regexp_pattern);
5932 return;
5933 }
5934 modifiers = scan_separators (name);
5935 if (modifiers == NULL) /* no terminating separator --> no name */
5936 {
5937 modifiers = name;
5938 name = "";
5939 }
5940 else
5941 modifiers += 1; /* skip separator */
5942
5943 /* Parse regex modifiers. */
5944 for (; modifiers[0] != '\0'; modifiers++)
5945 switch (modifiers[0])
5946 {
5947 case 'N':
5948 if (modifiers == name)
5949 error ("forcing explicit tag name but no name, ignoring", NULL);
5950 force_explicit_name = TRUE;
5951 break;
5952 case 'i':
5953 ignore_case = TRUE;
5954 break;
5955 case 's':
5956 single_line = TRUE;
5957 /* FALLTHRU */
5958 case 'm':
5959 multi_line = TRUE;
5960 need_filebuf = TRUE;
5961 break;
5962 default:
5963 {
5964 char wrongmod [2];
5965 wrongmod[0] = modifiers[0];
5966 wrongmod[1] = '\0';
5967 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5968 }
5969 break;
5970 }
5971
5972 patbuf = xnew (1, struct re_pattern_buffer);
5973 *patbuf = zeropattern;
5974 if (ignore_case)
5975 {
5976 static char lc_trans[CHARS];
5977 int i;
5978 for (i = 0; i < CHARS; i++)
5979 lc_trans[i] = lowcase (i);
5980 patbuf->translate = lc_trans; /* translation table to fold case */
5981 }
5982
5983 if (multi_line)
5984 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5985 else
5986 pat = regexp_pattern;
5987
5988 if (single_line)
5989 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5990 else
5991 re_set_syntax (RE_SYNTAX_EMACS);
5992
5993 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5994 if (multi_line)
5995 free (pat);
5996 if (err != NULL)
5997 {
5998 error ("%s while compiling pattern", err);
5999 return;
6000 }
6001
6002 rp = p_head;
6003 p_head = xnew (1, regexp);
6004 p_head->pattern = savestr (regexp_pattern);
6005 p_head->p_next = rp;
6006 p_head->lang = lang;
6007 p_head->pat = patbuf;
6008 p_head->name = savestr (name);
6009 p_head->error_signaled = FALSE;
6010 p_head->force_explicit_name = force_explicit_name;
6011 p_head->ignore_case = ignore_case;
6012 p_head->multi_line = multi_line;
6013 }
6014
6015 /*
6016 * Do the substitutions indicated by the regular expression and
6017 * arguments.
6018 */
6019 static char *
6020 substitute (in, out, regs)
6021 char *in, *out;
6022 struct re_registers *regs;
6023 {
6024 char *result, *t;
6025 int size, dig, diglen;
6026
6027 result = NULL;
6028 size = strlen (out);
6029
6030 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6031 if (out[size - 1] == '\\')
6032 fatal ("pattern error in \"%s\"", out);
6033 for (t = etags_strchr (out, '\\');
6034 t != NULL;
6035 t = etags_strchr (t + 2, '\\'))
6036 if (ISDIGIT (t[1]))
6037 {
6038 dig = t[1] - '0';
6039 diglen = regs->end[dig] - regs->start[dig];
6040 size += diglen - 2;
6041 }
6042 else
6043 size -= 1;
6044
6045 /* Allocate space and do the substitutions. */
6046 assert (size >= 0);
6047 result = xnew (size + 1, char);
6048
6049 for (t = result; *out != '\0'; out++)
6050 if (*out == '\\' && ISDIGIT (*++out))
6051 {
6052 dig = *out - '0';
6053 diglen = regs->end[dig] - regs->start[dig];
6054 strncpy (t, in + regs->start[dig], diglen);
6055 t += diglen;
6056 }
6057 else
6058 *t++ = *out;
6059 *t = '\0';
6060
6061 assert (t <= result + size);
6062 assert (t - result == (int)strlen (result));
6063
6064 return result;
6065 }
6066
6067 /* Deallocate all regexps. */
6068 static void
6069 free_regexps ()
6070 {
6071 regexp *rp;
6072 while (p_head != NULL)
6073 {
6074 rp = p_head->p_next;
6075 free (p_head->pattern);
6076 free (p_head->name);
6077 free (p_head);
6078 p_head = rp;
6079 }
6080 return;
6081 }
6082
6083 /*
6084 * Reads the whole file as a single string from `filebuf' and looks for
6085 * multi-line regular expressions, creating tags on matches.
6086 * readline already dealt with normal regexps.
6087 *
6088 * Idea by Ben Wing <ben@666.com> (2002).
6089 */
6090 static void
6091 regex_tag_multiline ()
6092 {
6093 char *buffer = filebuf.buffer;
6094 regexp *rp;
6095 char *name;
6096
6097 for (rp = p_head; rp != NULL; rp = rp->p_next)
6098 {
6099 int match = 0;
6100
6101 if (!rp->multi_line)
6102 continue; /* skip normal regexps */
6103
6104 /* Generic initialisations before parsing file from memory. */
6105 lineno = 1; /* reset global line number */
6106 charno = 0; /* reset global char number */
6107 linecharno = 0; /* reset global char number of line start */
6108
6109 /* Only use generic regexps or those for the current language. */
6110 if (rp->lang != NULL && rp->lang != curfdp->lang)
6111 continue;
6112
6113 while (match >= 0 && match < filebuf.len)
6114 {
6115 match = re_search (rp->pat, buffer, filebuf.len, charno,
6116 filebuf.len - match, &rp->regs);
6117 switch (match)
6118 {
6119 case -2:
6120 /* Some error. */
6121 if (!rp->error_signaled)
6122 {
6123 error ("regexp stack overflow while matching \"%s\"",
6124 rp->pattern);
6125 rp->error_signaled = TRUE;
6126 }
6127 break;
6128 case -1:
6129 /* No match. */
6130 break;
6131 default:
6132 if (match == rp->regs.end[0])
6133 {
6134 if (!rp->error_signaled)
6135 {
6136 error ("regexp matches the empty string: \"%s\"",
6137 rp->pattern);
6138 rp->error_signaled = TRUE;
6139 }
6140 match = -3; /* exit from while loop */
6141 break;
6142 }
6143
6144 /* Match occurred. Construct a tag. */
6145 while (charno < rp->regs.end[0])
6146 if (buffer[charno++] == '\n')
6147 lineno++, linecharno = charno;
6148 name = rp->name;
6149 if (name[0] == '\0')
6150 name = NULL;
6151 else /* make a named tag */
6152 name = substitute (buffer, rp->name, &rp->regs);
6153 if (rp->force_explicit_name)
6154 /* Force explicit tag name, if a name is there. */
6155 pfnote (name, TRUE, buffer + linecharno,
6156 charno - linecharno + 1, lineno, linecharno);
6157 else
6158 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6159 charno - linecharno + 1, lineno, linecharno);
6160 break;
6161 }
6162 }
6163 }
6164 }
6165
6166 \f
6167 static bool
6168 nocase_tail (cp)
6169 char *cp;
6170 {
6171 register int len = 0;
6172
6173 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6174 cp++, len++;
6175 if (*cp == '\0' && !intoken (dbp[len]))
6176 {
6177 dbp += len;
6178 return TRUE;
6179 }
6180 return FALSE;
6181 }
6182
6183 static void
6184 get_tag (bp, namepp)
6185 register char *bp;
6186 char **namepp;
6187 {
6188 register char *cp = bp;
6189
6190 if (*bp != '\0')
6191 {
6192 /* Go till you get to white space or a syntactic break */
6193 for (cp = bp + 1; !notinname (*cp); cp++)
6194 continue;
6195 make_tag (bp, cp - bp, TRUE,
6196 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6197 }
6198
6199 if (namepp != NULL)
6200 *namepp = savenstr (bp, cp - bp);
6201 }
6202
6203 /*
6204 * Read a line of text from `stream' into `lbp', excluding the
6205 * newline or CR-NL, if any. Return the number of characters read from
6206 * `stream', which is the length of the line including the newline.
6207 *
6208 * On DOS or Windows we do not count the CR character, if any before the
6209 * NL, in the returned length; this mirrors the behavior of Emacs on those
6210 * platforms (for text files, it translates CR-NL to NL as it reads in the
6211 * file).
6212 *
6213 * If multi-line regular expressions are requested, each line read is
6214 * appended to `filebuf'.
6215 */
6216 static long
6217 readline_internal (lbp, stream)
6218 linebuffer *lbp;
6219 register FILE *stream;
6220 {
6221 char *buffer = lbp->buffer;
6222 register char *p = lbp->buffer;
6223 register char *pend;
6224 int chars_deleted;
6225
6226 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6227
6228 for (;;)
6229 {
6230 register int c = getc (stream);
6231 if (p == pend)
6232 {
6233 /* We're at the end of linebuffer: expand it. */
6234 lbp->size *= 2;
6235 xrnew (buffer, lbp->size, char);
6236 p += buffer - lbp->buffer;
6237 pend = buffer + lbp->size;
6238 lbp->buffer = buffer;
6239 }
6240 if (c == EOF)
6241 {
6242 *p = '\0';
6243 chars_deleted = 0;
6244 break;
6245 }
6246 if (c == '\n')
6247 {
6248 if (p > buffer && p[-1] == '\r')
6249 {
6250 p -= 1;
6251 #ifdef DOS_NT
6252 /* Assume CRLF->LF translation will be performed by Emacs
6253 when loading this file, so CRs won't appear in the buffer.
6254 It would be cleaner to compensate within Emacs;
6255 however, Emacs does not know how many CRs were deleted
6256 before any given point in the file. */
6257 chars_deleted = 1;
6258 #else
6259 chars_deleted = 2;
6260 #endif
6261 }
6262 else
6263 {
6264 chars_deleted = 1;
6265 }
6266 *p = '\0';
6267 break;
6268 }
6269 *p++ = c;
6270 }
6271 lbp->len = p - buffer;
6272
6273 if (need_filebuf /* we need filebuf for multi-line regexps */
6274 && chars_deleted > 0) /* not at EOF */
6275 {
6276 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6277 {
6278 /* Expand filebuf. */
6279 filebuf.size *= 2;
6280 xrnew (filebuf.buffer, filebuf.size, char);
6281 }
6282 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6283 filebuf.len += lbp->len;
6284 filebuf.buffer[filebuf.len++] = '\n';
6285 filebuf.buffer[filebuf.len] = '\0';
6286 }
6287
6288 return lbp->len + chars_deleted;
6289 }
6290
6291 /*
6292 * Like readline_internal, above, but in addition try to match the
6293 * input line against relevant regular expressions and manage #line
6294 * directives.
6295 */
6296 static void
6297 readline (lbp, stream)
6298 linebuffer *lbp;
6299 FILE *stream;
6300 {
6301 long result;
6302
6303 linecharno = charno; /* update global char number of line start */
6304 result = readline_internal (lbp, stream); /* read line */
6305 lineno += 1; /* increment global line number */
6306 charno += result; /* increment global char number */
6307
6308 /* Honour #line directives. */
6309 if (!no_line_directive)
6310 {
6311 static bool discard_until_line_directive;
6312
6313 /* Check whether this is a #line directive. */
6314 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6315 {
6316 unsigned int lno;
6317 int start = 0;
6318
6319 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6320 && start > 0) /* double quote character found */
6321 {
6322 char *endp = lbp->buffer + start;
6323
6324 while ((endp = etags_strchr (endp, '"')) != NULL
6325 && endp[-1] == '\\')
6326 endp++;
6327 if (endp != NULL)
6328 /* Ok, this is a real #line directive. Let's deal with it. */
6329 {
6330 char *taggedabsname; /* absolute name of original file */
6331 char *taggedfname; /* name of original file as given */
6332 char *name; /* temp var */
6333
6334 discard_until_line_directive = FALSE; /* found it */
6335 name = lbp->buffer + start;
6336 *endp = '\0';
6337 canonicalize_filename (name); /* for DOS */
6338 taggedabsname = absolute_filename (name, tagfiledir);
6339 if (filename_is_absolute (name)
6340 || filename_is_absolute (curfdp->infname))
6341 taggedfname = savestr (taggedabsname);
6342 else
6343 taggedfname = relative_filename (taggedabsname,tagfiledir);
6344
6345 if (streq (curfdp->taggedfname, taggedfname))
6346 /* The #line directive is only a line number change. We
6347 deal with this afterwards. */
6348 free (taggedfname);
6349 else
6350 /* The tags following this #line directive should be
6351 attributed to taggedfname. In order to do this, set
6352 curfdp accordingly. */
6353 {
6354 fdesc *fdp; /* file description pointer */
6355
6356 /* Go look for a file description already set up for the
6357 file indicated in the #line directive. If there is
6358 one, use it from now until the next #line
6359 directive. */
6360 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6361 if (streq (fdp->infname, curfdp->infname)
6362 && streq (fdp->taggedfname, taggedfname))
6363 /* If we remove the second test above (after the &&)
6364 then all entries pertaining to the same file are
6365 coalesced in the tags file. If we use it, then
6366 entries pertaining to the same file but generated
6367 from different files (via #line directives) will
6368 go into separate sections in the tags file. These
6369 alternatives look equivalent. The first one
6370 destroys some apparently useless information. */
6371 {
6372 curfdp = fdp;
6373 free (taggedfname);
6374 break;
6375 }
6376 /* Else, if we already tagged the real file, skip all
6377 input lines until the next #line directive. */
6378 if (fdp == NULL) /* not found */
6379 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6380 if (streq (fdp->infabsname, taggedabsname))
6381 {
6382 discard_until_line_directive = TRUE;
6383 free (taggedfname);
6384 break;
6385 }
6386 /* Else create a new file description and use that from
6387 now on, until the next #line directive. */
6388 if (fdp == NULL) /* not found */
6389 {
6390 fdp = fdhead;
6391 fdhead = xnew (1, fdesc);
6392 *fdhead = *curfdp; /* copy curr. file description */
6393 fdhead->next = fdp;
6394 fdhead->infname = savestr (curfdp->infname);
6395 fdhead->infabsname = savestr (curfdp->infabsname);
6396 fdhead->infabsdir = savestr (curfdp->infabsdir);
6397 fdhead->taggedfname = taggedfname;
6398 fdhead->usecharno = FALSE;
6399 fdhead->prop = NULL;
6400 fdhead->written = FALSE;
6401 curfdp = fdhead;
6402 }
6403 }
6404 free (taggedabsname);
6405 lineno = lno - 1;
6406 readline (lbp, stream);
6407 return;
6408 } /* if a real #line directive */
6409 } /* if #line is followed by a a number */
6410 } /* if line begins with "#line " */
6411
6412 /* If we are here, no #line directive was found. */
6413 if (discard_until_line_directive)
6414 {
6415 if (result > 0)
6416 {
6417 /* Do a tail recursion on ourselves, thus discarding the contents
6418 of the line buffer. */
6419 readline (lbp, stream);
6420 return;
6421 }
6422 /* End of file. */
6423 discard_until_line_directive = FALSE;
6424 return;
6425 }
6426 } /* if #line directives should be considered */
6427
6428 {
6429 int match;
6430 regexp *rp;
6431 char *name;
6432
6433 /* Match against relevant regexps. */
6434 if (lbp->len > 0)
6435 for (rp = p_head; rp != NULL; rp = rp->p_next)
6436 {
6437 /* Only use generic regexps or those for the current language.
6438 Also do not use multiline regexps, which is the job of
6439 regex_tag_multiline. */
6440 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6441 || rp->multi_line)
6442 continue;
6443
6444 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6445 switch (match)
6446 {
6447 case -2:
6448 /* Some error. */
6449 if (!rp->error_signaled)
6450 {
6451 error ("regexp stack overflow while matching \"%s\"",
6452 rp->pattern);
6453 rp->error_signaled = TRUE;
6454 }
6455 break;
6456 case -1:
6457 /* No match. */
6458 break;
6459 case 0:
6460 /* Empty string matched. */
6461 if (!rp->error_signaled)
6462 {
6463 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6464 rp->error_signaled = TRUE;
6465 }
6466 break;
6467 default:
6468 /* Match occurred. Construct a tag. */
6469 name = rp->name;
6470 if (name[0] == '\0')
6471 name = NULL;
6472 else /* make a named tag */
6473 name = substitute (lbp->buffer, rp->name, &rp->regs);
6474 if (rp->force_explicit_name)
6475 /* Force explicit tag name, if a name is there. */
6476 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6477 else
6478 make_tag (name, strlen (name), TRUE,
6479 lbp->buffer, match, lineno, linecharno);
6480 break;
6481 }
6482 }
6483 }
6484 }
6485
6486 \f
6487 /*
6488 * Return a pointer to a space of size strlen(cp)+1 allocated
6489 * with xnew where the string CP has been copied.
6490 */
6491 static char *
6492 savestr (cp)
6493 char *cp;
6494 {
6495 return savenstr (cp, strlen (cp));
6496 }
6497
6498 /*
6499 * Return a pointer to a space of size LEN+1 allocated with xnew where
6500 * the string CP has been copied for at most the first LEN characters.
6501 */
6502 static char *
6503 savenstr (cp, len)
6504 char *cp;
6505 int len;
6506 {
6507 register char *dp;
6508
6509 dp = xnew (len + 1, char);
6510 strncpy (dp, cp, len);
6511 dp[len] = '\0';
6512 return dp;
6513 }
6514
6515 /*
6516 * Return the ptr in sp at which the character c last
6517 * appears; NULL if not found
6518 *
6519 * Identical to POSIX strrchr, included for portability.
6520 */
6521 static char *
6522 etags_strrchr (sp, c)
6523 register const char *sp;
6524 register int c;
6525 {
6526 register const char *r;
6527
6528 r = NULL;
6529 do
6530 {
6531 if (*sp == c)
6532 r = sp;
6533 } while (*sp++);
6534 return (char *)r;
6535 }
6536
6537 /*
6538 * Return the ptr in sp at which the character c first
6539 * appears; NULL if not found
6540 *
6541 * Identical to POSIX strchr, included for portability.
6542 */
6543 static char *
6544 etags_strchr (sp, c)
6545 register const char *sp;
6546 register int c;
6547 {
6548 do
6549 {
6550 if (*sp == c)
6551 return (char *)sp;
6552 } while (*sp++);
6553 return NULL;
6554 }
6555
6556 /*
6557 * Compare two strings, ignoring case for alphabetic characters.
6558 *
6559 * Same as BSD's strcasecmp, included for portability.
6560 */
6561 static int
6562 etags_strcasecmp (s1, s2)
6563 register const char *s1;
6564 register const char *s2;
6565 {
6566 while (*s1 != '\0'
6567 && (ISALPHA (*s1) && ISALPHA (*s2)
6568 ? lowcase (*s1) == lowcase (*s2)
6569 : *s1 == *s2))
6570 s1++, s2++;
6571
6572 return (ISALPHA (*s1) && ISALPHA (*s2)
6573 ? lowcase (*s1) - lowcase (*s2)
6574 : *s1 - *s2);
6575 }
6576
6577 /*
6578 * Compare two strings, ignoring case for alphabetic characters.
6579 * Stop after a given number of characters
6580 *
6581 * Same as BSD's strncasecmp, included for portability.
6582 */
6583 static int
6584 etags_strncasecmp (s1, s2, n)
6585 register const char *s1;
6586 register const char *s2;
6587 register int n;
6588 {
6589 while (*s1 != '\0' && n-- > 0
6590 && (ISALPHA (*s1) && ISALPHA (*s2)
6591 ? lowcase (*s1) == lowcase (*s2)
6592 : *s1 == *s2))
6593 s1++, s2++;
6594
6595 if (n < 0)
6596 return 0;
6597 else
6598 return (ISALPHA (*s1) && ISALPHA (*s2)
6599 ? lowcase (*s1) - lowcase (*s2)
6600 : *s1 - *s2);
6601 }
6602
6603 /* Skip spaces (end of string is not space), return new pointer. */
6604 static char *
6605 skip_spaces (cp)
6606 char *cp;
6607 {
6608 while (iswhite (*cp))
6609 cp++;
6610 return cp;
6611 }
6612
6613 /* Skip non spaces, except end of string, return new pointer. */
6614 static char *
6615 skip_non_spaces (cp)
6616 char *cp;
6617 {
6618 while (*cp != '\0' && !iswhite (*cp))
6619 cp++;
6620 return cp;
6621 }
6622
6623 /* Print error message and exit. */
6624 void
6625 fatal (s1, s2)
6626 char *s1, *s2;
6627 {
6628 error (s1, s2);
6629 exit (EXIT_FAILURE);
6630 }
6631
6632 static void
6633 pfatal (s1)
6634 char *s1;
6635 {
6636 perror (s1);
6637 exit (EXIT_FAILURE);
6638 }
6639
6640 static void
6641 suggest_asking_for_help ()
6642 {
6643 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6644 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6645 exit (EXIT_FAILURE);
6646 }
6647
6648 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6649 static void
6650 error (s1, s2)
6651 const char *s1, *s2;
6652 {
6653 fprintf (stderr, "%s: ", progname);
6654 fprintf (stderr, s1, s2);
6655 fprintf (stderr, "\n");
6656 }
6657
6658 /* Return a newly-allocated string whose contents
6659 concatenate those of s1, s2, s3. */
6660 static char *
6661 concat (s1, s2, s3)
6662 char *s1, *s2, *s3;
6663 {
6664 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6665 char *result = xnew (len1 + len2 + len3 + 1, char);
6666
6667 strcpy (result, s1);
6668 strcpy (result + len1, s2);
6669 strcpy (result + len1 + len2, s3);
6670 result[len1 + len2 + len3] = '\0';
6671
6672 return result;
6673 }
6674
6675 \f
6676 /* Does the same work as the system V getcwd, but does not need to
6677 guess the buffer size in advance. */
6678 static char *
6679 etags_getcwd ()
6680 {
6681 #ifdef HAVE_GETCWD
6682 int bufsize = 200;
6683 char *path = xnew (bufsize, char);
6684
6685 while (getcwd (path, bufsize) == NULL)
6686 {
6687 if (errno != ERANGE)
6688 pfatal ("getcwd");
6689 bufsize *= 2;
6690 free (path);
6691 path = xnew (bufsize, char);
6692 }
6693
6694 canonicalize_filename (path);
6695 return path;
6696
6697 #else /* not HAVE_GETCWD */
6698 #if MSDOS
6699
6700 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6701
6702 getwd (path);
6703
6704 for (p = path; *p != '\0'; p++)
6705 if (*p == '\\')
6706 *p = '/';
6707 else
6708 *p = lowcase (*p);
6709
6710 return strdup (path);
6711 #else /* not MSDOS */
6712 linebuffer path;
6713 FILE *pipe;
6714
6715 linebuffer_init (&path);
6716 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6717 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6718 pfatal ("pwd");
6719 pclose (pipe);
6720
6721 return path.buffer;
6722 #endif /* not MSDOS */
6723 #endif /* not HAVE_GETCWD */
6724 }
6725
6726 /* Return a newly allocated string containing the file name of FILE
6727 relative to the absolute directory DIR (which should end with a slash). */
6728 static char *
6729 relative_filename (file, dir)
6730 char *file, *dir;
6731 {
6732 char *fp, *dp, *afn, *res;
6733 int i;
6734
6735 /* Find the common root of file and dir (with a trailing slash). */
6736 afn = absolute_filename (file, cwd);
6737 fp = afn;
6738 dp = dir;
6739 while (*fp++ == *dp++)
6740 continue;
6741 fp--, dp--; /* back to the first differing char */
6742 #ifdef DOS_NT
6743 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6744 return afn;
6745 #endif
6746 do /* look at the equal chars until '/' */
6747 fp--, dp--;
6748 while (*fp != '/');
6749
6750 /* Build a sequence of "../" strings for the resulting relative file name. */
6751 i = 0;
6752 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6753 i += 1;
6754 res = xnew (3*i + strlen (fp + 1) + 1, char);
6755 res[0] = '\0';
6756 while (i-- > 0)
6757 strcat (res, "../");
6758
6759 /* Add the file name relative to the common root of file and dir. */
6760 strcat (res, fp + 1);
6761 free (afn);
6762
6763 return res;
6764 }
6765
6766 /* Return a newly allocated string containing the absolute file name
6767 of FILE given DIR (which should end with a slash). */
6768 static char *
6769 absolute_filename (file, dir)
6770 char *file, *dir;
6771 {
6772 char *slashp, *cp, *res;
6773
6774 if (filename_is_absolute (file))
6775 res = savestr (file);
6776 #ifdef DOS_NT
6777 /* We don't support non-absolute file names with a drive
6778 letter, like `d:NAME' (it's too much hassle). */
6779 else if (file[1] == ':')
6780 fatal ("%s: relative file names with drive letters not supported", file);
6781 #endif
6782 else
6783 res = concat (dir, file, "");
6784
6785 /* Delete the "/dirname/.." and "/." substrings. */
6786 slashp = etags_strchr (res, '/');
6787 while (slashp != NULL && slashp[0] != '\0')
6788 {
6789 if (slashp[1] == '.')
6790 {
6791 if (slashp[2] == '.'
6792 && (slashp[3] == '/' || slashp[3] == '\0'))
6793 {
6794 cp = slashp;
6795 do
6796 cp--;
6797 while (cp >= res && !filename_is_absolute (cp));
6798 if (cp < res)
6799 cp = slashp; /* the absolute name begins with "/.." */
6800 #ifdef DOS_NT
6801 /* Under MSDOS and NT we get `d:/NAME' as absolute
6802 file name, so the luser could say `d:/../NAME'.
6803 We silently treat this as `d:/NAME'. */
6804 else if (cp[0] != '/')
6805 cp = slashp;
6806 #endif
6807 strcpy (cp, slashp + 3);
6808 slashp = cp;
6809 continue;
6810 }
6811 else if (slashp[2] == '/' || slashp[2] == '\0')
6812 {
6813 strcpy (slashp, slashp + 2);
6814 continue;
6815 }
6816 }
6817
6818 slashp = etags_strchr (slashp + 1, '/');
6819 }
6820
6821 if (res[0] == '\0') /* just a safety net: should never happen */
6822 {
6823 free (res);
6824 return savestr ("/");
6825 }
6826 else
6827 return res;
6828 }
6829
6830 /* Return a newly allocated string containing the absolute
6831 file name of dir where FILE resides given DIR (which should
6832 end with a slash). */
6833 static char *
6834 absolute_dirname (file, dir)
6835 char *file, *dir;
6836 {
6837 char *slashp, *res;
6838 char save;
6839
6840 canonicalize_filename (file);
6841 slashp = etags_strrchr (file, '/');
6842 if (slashp == NULL)
6843 return savestr (dir);
6844 save = slashp[1];
6845 slashp[1] = '\0';
6846 res = absolute_filename (file, dir);
6847 slashp[1] = save;
6848
6849 return res;
6850 }
6851
6852 /* Whether the argument string is an absolute file name. The argument
6853 string must have been canonicalized with canonicalize_filename. */
6854 static bool
6855 filename_is_absolute (fn)
6856 char *fn;
6857 {
6858 return (fn[0] == '/'
6859 #ifdef DOS_NT
6860 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6861 #endif
6862 );
6863 }
6864
6865 /* Translate backslashes into slashes. Works in place. */
6866 static void
6867 canonicalize_filename (fn)
6868 register char *fn;
6869 {
6870 #ifdef DOS_NT
6871 /* Canonicalize drive letter case. */
6872 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6873 fn[0] = upcase (fn[0]);
6874 /* Convert backslashes to slashes. */
6875 for (; *fn != '\0'; fn++)
6876 if (*fn == '\\')
6877 *fn = '/';
6878 #else
6879 /* No action. */
6880 fn = NULL; /* shut up the compiler */
6881 #endif
6882 }
6883
6884 \f
6885 /* Initialize a linebuffer for use */
6886 static void
6887 linebuffer_init (lbp)
6888 linebuffer *lbp;
6889 {
6890 lbp->size = (DEBUG) ? 3 : 200;
6891 lbp->buffer = xnew (lbp->size, char);
6892 lbp->buffer[0] = '\0';
6893 lbp->len = 0;
6894 }
6895
6896 /* Set the minimum size of a string contained in a linebuffer. */
6897 static void
6898 linebuffer_setlen (lbp, toksize)
6899 linebuffer *lbp;
6900 int toksize;
6901 {
6902 while (lbp->size <= toksize)
6903 {
6904 lbp->size *= 2;
6905 xrnew (lbp->buffer, lbp->size, char);
6906 }
6907 lbp->len = toksize;
6908 }
6909
6910 /* Like malloc but get fatal error if memory is exhausted. */
6911 static PTR
6912 xmalloc (size)
6913 unsigned int size;
6914 {
6915 PTR result = (PTR) malloc (size);
6916 if (result == NULL)
6917 fatal ("virtual memory exhausted", (char *)NULL);
6918 return result;
6919 }
6920
6921 static PTR
6922 xrealloc (ptr, size)
6923 char *ptr;
6924 unsigned int size;
6925 {
6926 PTR result = (PTR) realloc (ptr, size);
6927 if (result == NULL)
6928 fatal ("virtual memory exhausted", (char *)NULL);
6929 return result;
6930 }
6931
6932 /*
6933 * Local Variables:
6934 * indent-tabs-mode: t
6935 * tab-width: 8
6936 * fill-column: 79
6937 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6938 * c-file-style: "gnu"
6939 * End:
6940 */
6941
6942 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6943 (do not change this comment) */
6944
6945 /* etags.c ends here */