*** empty log message ***
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
33 Free Software Foundation, Inc.
34
35 This file is not considered part of GNU Emacs.
36
37 This program is free software; you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation; either version 3, or (at your option)
40 any later version.
41
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
46
47 You should have received a copy of the GNU General Public License
48 along with this program; see the file COPYING. If not, write to the
49 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
50 Boston, MA 02110-1301, USA. */
51
52
53 /* NB To comply with the above BSD license, copyright information is
54 reproduced in etc/ETAGS.README. That file should be updated when the
55 above notices are.
56
57 To the best of our knowledge, this code was originally based on the
58 ctags.c distributed with BSD4.2, which was copyrighted by the
59 University of California, as described above. */
60
61
62 /*
63 * Authors:
64 * 1983 Ctags originally by Ken Arnold.
65 * 1984 Fortran added by Jim Kleckner.
66 * 1984 Ed Pelegri-Llopart added C typedefs.
67 * 1985 Emacs TAGS format by Richard Stallman.
68 * 1989 Sam Kendall added C++.
69 * 1992 Joseph B. Wells improved C and C++ parsing.
70 * 1993 Francesco Potortì reorganised C and C++.
71 * 1994 Line-by-line regexp tags by Tom Tromey.
72 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
73 * 2002 #line directives by Francesco Potortì.
74 *
75 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
76 */
77
78 /*
79 * If you want to add support for a new language, start by looking at the LUA
80 * language, which is the simplest. Alternatively, consider shipping a
81 * configuration file containing regexp definitions for etags.
82 */
83
84 char pot_etags_version[] = "@(#) pot revision number is 17.26";
85
86 #define TRUE 1
87 #define FALSE 0
88
89 #ifdef DEBUG
90 # undef DEBUG
91 # define DEBUG TRUE
92 #else
93 # define DEBUG FALSE
94 # define NDEBUG /* disable assert */
95 #endif
96
97 #ifdef HAVE_CONFIG_H
98 # include <config.h>
99 /* On some systems, Emacs defines static as nothing for the sake
100 of unexec. We don't want that here since we don't use unexec. */
101 # undef static
102 # ifndef PTR /* for XEmacs */
103 # define PTR void *
104 # endif
105 # ifndef __P /* for XEmacs */
106 # define __P(args) args
107 # endif
108 #else /* no config.h */
109 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
110 # define __P(args) args /* use prototypes */
111 # define PTR void * /* for generic pointers */
112 # else /* not standard C */
113 # define __P(args) () /* no prototypes */
114 # define const /* remove const for old compilers' sake */
115 # define PTR long * /* don't use void* */
116 # endif
117 #endif /* !HAVE_CONFIG_H */
118
119 #ifndef _GNU_SOURCE
120 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
121 #endif
122
123 /* WIN32_NATIVE is for XEmacs.
124 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
125 #ifdef WIN32_NATIVE
126 # undef MSDOS
127 # undef WINDOWSNT
128 # define WINDOWSNT
129 #endif /* WIN32_NATIVE */
130
131 #ifdef MSDOS
132 # undef MSDOS
133 # define MSDOS TRUE
134 # include <fcntl.h>
135 # include <sys/param.h>
136 # include <io.h>
137 # ifndef HAVE_CONFIG_H
138 # define DOS_NT
139 # include <sys/config.h>
140 # endif
141 #else
142 # define MSDOS FALSE
143 #endif /* MSDOS */
144
145 #ifdef WINDOWSNT
146 # include <stdlib.h>
147 # include <fcntl.h>
148 # include <string.h>
149 # include <direct.h>
150 # include <io.h>
151 # define MAXPATHLEN _MAX_PATH
152 # undef HAVE_NTGUI
153 # undef DOS_NT
154 # define DOS_NT
155 # ifndef HAVE_GETCWD
156 # define HAVE_GETCWD
157 # endif /* undef HAVE_GETCWD */
158 #else /* not WINDOWSNT */
159 # ifdef STDC_HEADERS
160 # include <stdlib.h>
161 # include <string.h>
162 # else /* no standard C headers */
163 extern char *getenv ();
164 # ifdef VMS
165 # define EXIT_SUCCESS 1
166 # define EXIT_FAILURE 0
167 # else /* no VMS */
168 # define EXIT_SUCCESS 0
169 # define EXIT_FAILURE 1
170 # endif
171 # endif
172 #endif /* !WINDOWSNT */
173
174 #ifdef HAVE_UNISTD_H
175 # include <unistd.h>
176 #else
177 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
178 extern char *getcwd (char *buf, size_t size);
179 # endif
180 #endif /* HAVE_UNISTD_H */
181
182 #include <stdio.h>
183 #include <ctype.h>
184 #include <errno.h>
185 #ifndef errno
186 extern int errno;
187 #endif
188 #include <sys/types.h>
189 #include <sys/stat.h>
190
191 #include <assert.h>
192 #ifdef NDEBUG
193 # undef assert /* some systems have a buggy assert.h */
194 # define assert(x) ((void) 0)
195 #endif
196
197 #if !defined (S_ISREG) && defined (S_IFREG)
198 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
199 #endif
200
201 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
202 # define NO_LONG_OPTIONS TRUE
203 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
204 extern char *optarg;
205 extern int optind, opterr;
206 #else
207 # define NO_LONG_OPTIONS FALSE
208 # include <getopt.h>
209 #endif /* NO_LONG_OPTIONS */
210
211 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
212 # ifdef __CYGWIN__ /* compiling on Cygwin */
213 !!! NOTICE !!!
214 the regex.h distributed with Cygwin is not compatible with etags, alas!
215 If you want regular expression support, you should delete this notice and
216 arrange to use the GNU regex.h and regex.c.
217 # endif
218 #endif
219 #include <regex.h>
220
221 /* Define CTAGS to make the program "ctags" compatible with the usual one.
222 Leave it undefined to make the program "etags", which makes emacs-style
223 tag tables and tags typedefs, #defines and struct/union/enum by default. */
224 #ifdef CTAGS
225 # undef CTAGS
226 # define CTAGS TRUE
227 #else
228 # define CTAGS FALSE
229 #endif
230
231 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
232 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
233 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
234 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
235
236 #define CHARS 256 /* 2^sizeof(char) */
237 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
238 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
239 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
240 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
241 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
242 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
243
244 #define ISALNUM(c) isalnum (CHAR(c))
245 #define ISALPHA(c) isalpha (CHAR(c))
246 #define ISDIGIT(c) isdigit (CHAR(c))
247 #define ISLOWER(c) islower (CHAR(c))
248
249 #define lowcase(c) tolower (CHAR(c))
250 #define upcase(c) toupper (CHAR(c))
251
252
253 /*
254 * xnew, xrnew -- allocate, reallocate storage
255 *
256 * SYNOPSIS: Type *xnew (int n, Type);
257 * void xrnew (OldPointer, int n, Type);
258 */
259 #if DEBUG
260 # include "chkmalloc.h"
261 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
262 (n) * sizeof (Type)))
263 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
264 (char *) (op), (n) * sizeof (Type)))
265 #else
266 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
267 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
268 (char *) (op), (n) * sizeof (Type)))
269 #endif
270
271 #define bool int
272
273 typedef void Lang_function __P((FILE *));
274
275 typedef struct
276 {
277 char *suffix; /* file name suffix for this compressor */
278 char *command; /* takes one arg and decompresses to stdout */
279 } compressor;
280
281 typedef struct
282 {
283 char *name; /* language name */
284 char *help; /* detailed help for the language */
285 Lang_function *function; /* parse function */
286 char **suffixes; /* name suffixes of this language's files */
287 char **filenames; /* names of this language's files */
288 char **interpreters; /* interpreters for this language */
289 bool metasource; /* source used to generate other sources */
290 } language;
291
292 typedef struct fdesc
293 {
294 struct fdesc *next; /* for the linked list */
295 char *infname; /* uncompressed input file name */
296 char *infabsname; /* absolute uncompressed input file name */
297 char *infabsdir; /* absolute dir of input file */
298 char *taggedfname; /* file name to write in tagfile */
299 language *lang; /* language of file */
300 char *prop; /* file properties to write in tagfile */
301 bool usecharno; /* etags tags shall contain char number */
302 bool written; /* entry written in the tags file */
303 } fdesc;
304
305 typedef struct node_st
306 { /* sorting structure */
307 struct node_st *left, *right; /* left and right sons */
308 fdesc *fdp; /* description of file to whom tag belongs */
309 char *name; /* tag name */
310 char *regex; /* search regexp */
311 bool valid; /* write this tag on the tag file */
312 bool is_func; /* function tag: use regexp in CTAGS mode */
313 bool been_warned; /* warning already given for duplicated tag */
314 int lno; /* line number tag is on */
315 long cno; /* character number line starts on */
316 } node;
317
318 /*
319 * A `linebuffer' is a structure which holds a line of text.
320 * `readline_internal' reads a line from a stream into a linebuffer
321 * and works regardless of the length of the line.
322 * SIZE is the size of BUFFER, LEN is the length of the string in
323 * BUFFER after readline reads it.
324 */
325 typedef struct
326 {
327 long size;
328 int len;
329 char *buffer;
330 } linebuffer;
331
332 /* Used to support mixing of --lang and file names. */
333 typedef struct
334 {
335 enum {
336 at_language, /* a language specification */
337 at_regexp, /* a regular expression */
338 at_filename, /* a file name */
339 at_stdin, /* read from stdin here */
340 at_end /* stop parsing the list */
341 } arg_type; /* argument type */
342 language *lang; /* language associated with the argument */
343 char *what; /* the argument itself */
344 } argument;
345
346 /* Structure defining a regular expression. */
347 typedef struct regexp
348 {
349 struct regexp *p_next; /* pointer to next in list */
350 language *lang; /* if set, use only for this language */
351 char *pattern; /* the regexp pattern */
352 char *name; /* tag name */
353 struct re_pattern_buffer *pat; /* the compiled pattern */
354 struct re_registers regs; /* re registers */
355 bool error_signaled; /* already signaled for this regexp */
356 bool force_explicit_name; /* do not allow implict tag name */
357 bool ignore_case; /* ignore case when matching */
358 bool multi_line; /* do a multi-line match on the whole file */
359 } regexp;
360
361
362 /* Many compilers barf on this:
363 Lang_function Ada_funcs;
364 so let's write it this way */
365 static void Ada_funcs __P((FILE *));
366 static void Asm_labels __P((FILE *));
367 static void C_entries __P((int c_ext, FILE *));
368 static void default_C_entries __P((FILE *));
369 static void plain_C_entries __P((FILE *));
370 static void Cjava_entries __P((FILE *));
371 static void Cobol_paragraphs __P((FILE *));
372 static void Cplusplus_entries __P((FILE *));
373 static void Cstar_entries __P((FILE *));
374 static void Erlang_functions __P((FILE *));
375 static void Forth_words __P((FILE *));
376 static void Fortran_functions __P((FILE *));
377 static void HTML_labels __P((FILE *));
378 static void Lisp_functions __P((FILE *));
379 static void Lua_functions __P((FILE *));
380 static void Makefile_targets __P((FILE *));
381 static void Pascal_functions __P((FILE *));
382 static void Perl_functions __P((FILE *));
383 static void PHP_functions __P((FILE *));
384 static void PS_functions __P((FILE *));
385 static void Prolog_functions __P((FILE *));
386 static void Python_functions __P((FILE *));
387 static void Scheme_functions __P((FILE *));
388 static void TeX_commands __P((FILE *));
389 static void Texinfo_nodes __P((FILE *));
390 static void Yacc_entries __P((FILE *));
391 static void just_read_file __P((FILE *));
392
393 static void print_language_names __P((void));
394 static void print_version __P((void));
395 static void print_help __P((argument *));
396 int main __P((int, char **));
397
398 static compressor *get_compressor_from_suffix __P((char *, char **));
399 static language *get_language_from_langname __P((const char *));
400 static language *get_language_from_interpreter __P((char *));
401 static language *get_language_from_filename __P((char *, bool));
402 static void readline __P((linebuffer *, FILE *));
403 static long readline_internal __P((linebuffer *, FILE *));
404 static bool nocase_tail __P((char *));
405 static void get_tag __P((char *, char **));
406
407 static void analyse_regex __P((char *));
408 static void free_regexps __P((void));
409 static void regex_tag_multiline __P((void));
410 static void error __P((const char *, const char *));
411 static void suggest_asking_for_help __P((void));
412 void fatal __P((char *, char *));
413 static void pfatal __P((char *));
414 static void add_node __P((node *, node **));
415
416 static void init __P((void));
417 static void process_file_name __P((char *, language *));
418 static void process_file __P((FILE *, char *, language *));
419 static void find_entries __P((FILE *));
420 static void free_tree __P((node *));
421 static void free_fdesc __P((fdesc *));
422 static void pfnote __P((char *, bool, char *, int, int, long));
423 static void make_tag __P((char *, int, bool, char *, int, int, long));
424 static void invalidate_nodes __P((fdesc *, node **));
425 static void put_entries __P((node *));
426
427 static char *concat __P((char *, char *, char *));
428 static char *skip_spaces __P((char *));
429 static char *skip_non_spaces __P((char *));
430 static char *savenstr __P((char *, int));
431 static char *savestr __P((char *));
432 static char *etags_strchr __P((const char *, int));
433 static char *etags_strrchr __P((const char *, int));
434 static int etags_strcasecmp __P((const char *, const char *));
435 static int etags_strncasecmp __P((const char *, const char *, int));
436 static char *etags_getcwd __P((void));
437 static char *relative_filename __P((char *, char *));
438 static char *absolute_filename __P((char *, char *));
439 static char *absolute_dirname __P((char *, char *));
440 static bool filename_is_absolute __P((char *f));
441 static void canonicalize_filename __P((char *));
442 static void linebuffer_init __P((linebuffer *));
443 static void linebuffer_setlen __P((linebuffer *, int));
444 static PTR xmalloc __P((unsigned int));
445 static PTR xrealloc __P((char *, unsigned int));
446
447 \f
448 static char searchar = '/'; /* use /.../ searches */
449
450 static char *tagfile; /* output file */
451 static char *progname; /* name this program was invoked with */
452 static char *cwd; /* current working directory */
453 static char *tagfiledir; /* directory of tagfile */
454 static FILE *tagf; /* ioptr for tags file */
455
456 static fdesc *fdhead; /* head of file description list */
457 static fdesc *curfdp; /* current file description */
458 static int lineno; /* line number of current line */
459 static long charno; /* current character number */
460 static long linecharno; /* charno of start of current line */
461 static char *dbp; /* pointer to start of current tag */
462
463 static const int invalidcharno = -1;
464
465 static node *nodehead; /* the head of the binary tree of tags */
466 static node *last_node; /* the last node created */
467
468 static linebuffer lb; /* the current line */
469 static linebuffer filebuf; /* a buffer containing the whole file */
470 static linebuffer token_name; /* a buffer containing a tag name */
471
472 /* boolean "functions" (see init) */
473 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
474 static char
475 /* white chars */
476 *white = " \f\t\n\r\v",
477 /* not in a name */
478 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
479 /* token ending chars */
480 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
481 /* token starting chars */
482 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
483 /* valid in-token chars */
484 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
485
486 static bool append_to_tagfile; /* -a: append to tags */
487 /* The next four default to TRUE for etags, but to FALSE for ctags. */
488 static bool typedefs; /* -t: create tags for C and Ada typedefs */
489 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
490 /* 0 struct/enum/union decls, and C++ */
491 /* member functions. */
492 static bool constantypedefs; /* -d: create tags for C #define, enum */
493 /* constants and variables. */
494 /* -D: opposite of -d. Default under ctags. */
495 static bool globals; /* create tags for global variables */
496 static bool members; /* create tags for C member variables */
497 static bool declarations; /* --declarations: tag them and extern in C&Co*/
498 static bool no_line_directive; /* ignore #line directives (undocumented) */
499 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
500 static bool update; /* -u: update tags */
501 static bool vgrind_style; /* -v: create vgrind style index output */
502 static bool no_warnings; /* -w: suppress warnings (undocumented) */
503 static bool cxref_style; /* -x: create cxref style output */
504 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
505 static bool ignoreindent; /* -I: ignore indentation in C */
506 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
507
508 /* STDIN is defined in LynxOS system headers */
509 #ifdef STDIN
510 # undef STDIN
511 #endif
512
513 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
514 static bool parsing_stdin; /* --parse-stdin used */
515
516 static regexp *p_head; /* list of all regexps */
517 static bool need_filebuf; /* some regexes are multi-line */
518
519 static struct option longopts[] =
520 {
521 { "append", no_argument, NULL, 'a' },
522 { "packages-only", no_argument, &packages_only, TRUE },
523 { "c++", no_argument, NULL, 'C' },
524 { "declarations", no_argument, &declarations, TRUE },
525 { "no-line-directive", no_argument, &no_line_directive, TRUE },
526 { "no-duplicates", no_argument, &no_duplicates, TRUE },
527 { "help", no_argument, NULL, 'h' },
528 { "help", no_argument, NULL, 'H' },
529 { "ignore-indentation", no_argument, NULL, 'I' },
530 { "language", required_argument, NULL, 'l' },
531 { "members", no_argument, &members, TRUE },
532 { "no-members", no_argument, &members, FALSE },
533 { "output", required_argument, NULL, 'o' },
534 { "regex", required_argument, NULL, 'r' },
535 { "no-regex", no_argument, NULL, 'R' },
536 { "ignore-case-regex", required_argument, NULL, 'c' },
537 { "parse-stdin", required_argument, NULL, STDIN },
538 { "version", no_argument, NULL, 'V' },
539
540 #if CTAGS /* Ctags options */
541 { "backward-search", no_argument, NULL, 'B' },
542 { "cxref", no_argument, NULL, 'x' },
543 { "defines", no_argument, NULL, 'd' },
544 { "globals", no_argument, &globals, TRUE },
545 { "typedefs", no_argument, NULL, 't' },
546 { "typedefs-and-c++", no_argument, NULL, 'T' },
547 { "update", no_argument, NULL, 'u' },
548 { "vgrind", no_argument, NULL, 'v' },
549 { "no-warn", no_argument, NULL, 'w' },
550
551 #else /* Etags options */
552 { "no-defines", no_argument, NULL, 'D' },
553 { "no-globals", no_argument, &globals, FALSE },
554 { "include", required_argument, NULL, 'i' },
555 #endif
556 { NULL }
557 };
558
559 static compressor compressors[] =
560 {
561 { "z", "gzip -d -c"},
562 { "Z", "gzip -d -c"},
563 { "gz", "gzip -d -c"},
564 { "GZ", "gzip -d -c"},
565 { "bz2", "bzip2 -d -c" },
566 { NULL }
567 };
568
569 /*
570 * Language stuff.
571 */
572
573 /* Ada code */
574 static char *Ada_suffixes [] =
575 { "ads", "adb", "ada", NULL };
576 static char Ada_help [] =
577 "In Ada code, functions, procedures, packages, tasks and types are\n\
578 tags. Use the `--packages-only' option to create tags for\n\
579 packages only.\n\
580 Ada tag names have suffixes indicating the type of entity:\n\
581 Entity type: Qualifier:\n\
582 ------------ ----------\n\
583 function /f\n\
584 procedure /p\n\
585 package spec /s\n\
586 package body /b\n\
587 type /t\n\
588 task /k\n\
589 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
590 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
591 will just search for any tag `bidule'.";
592
593 /* Assembly code */
594 static char *Asm_suffixes [] =
595 { "a", /* Unix assembler */
596 "asm", /* Microcontroller assembly */
597 "def", /* BSO/Tasking definition includes */
598 "inc", /* Microcontroller include files */
599 "ins", /* Microcontroller include files */
600 "s", "sa", /* Unix assembler */
601 "S", /* cpp-processed Unix assembler */
602 "src", /* BSO/Tasking C compiler output */
603 NULL
604 };
605 static char Asm_help [] =
606 "In assembler code, labels appearing at the beginning of a line,\n\
607 followed by a colon, are tags.";
608
609
610 /* Note that .c and .h can be considered C++, if the --c++ flag was
611 given, or if the `class' or `template' keywords are met inside the file.
612 That is why default_C_entries is called for these. */
613 static char *default_C_suffixes [] =
614 { "c", "h", NULL };
615 #if CTAGS /* C help for Ctags */
616 static char default_C_help [] =
617 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
618 Use -T to tag definitions of `struct', `union' and `enum'.\n\
619 Use -d to tag `#define' macro definitions and `enum' constants.\n\
620 Use --globals to tag global variables.\n\
621 You can tag function declarations and external variables by\n\
622 using `--declarations', and struct members by using `--members'.";
623 #else /* C help for Etags */
624 static char default_C_help [] =
625 "In C code, any C function or typedef is a tag, and so are\n\
626 definitions of `struct', `union' and `enum'. `#define' macro\n\
627 definitions and `enum' constants are tags unless you specify\n\
628 `--no-defines'. Global variables are tags unless you specify\n\
629 `--no-globals'. Use of `--no-globals' and `--no-defines'\n\
630 can make the tags table file much smaller.\n\
631 You can tag function declarations and external variables by\n\
632 using `--declarations', and struct members by using `--members'.";
633 #endif /* C help for Ctags and Etags */
634
635 static char *Cplusplus_suffixes [] =
636 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
637 "M", /* Objective C++ */
638 "pdb", /* Postscript with C syntax */
639 NULL };
640 static char Cplusplus_help [] =
641 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
642 --help --lang=c --lang=c++ for full help.)\n\
643 In addition to C tags, member functions are also recognized. Member\n\
644 variables are also recognized if you use the `--members' option.\n\
645 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
646 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
647 `operator+'.";
648
649 static char *Cjava_suffixes [] =
650 { "java", NULL };
651 static char Cjava_help [] =
652 "In Java code, all the tags constructs of C and C++ code are\n\
653 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
654
655
656 static char *Cobol_suffixes [] =
657 { "COB", "cob", NULL };
658 static char Cobol_help [] =
659 "In Cobol code, tags are paragraph names; that is, any word\n\
660 starting in column 8 and followed by a period.";
661
662 static char *Cstar_suffixes [] =
663 { "cs", "hs", NULL };
664
665 static char *Erlang_suffixes [] =
666 { "erl", "hrl", NULL };
667 static char Erlang_help [] =
668 "In Erlang code, the tags are the functions, records and macros\n\
669 defined in the file.";
670
671 char *Forth_suffixes [] =
672 { "fth", "tok", NULL };
673 static char Forth_help [] =
674 "In Forth code, tags are words defined by `:',\n\
675 constant, code, create, defer, value, variable, buffer:, field.";
676
677 static char *Fortran_suffixes [] =
678 { "F", "f", "f90", "for", NULL };
679 static char Fortran_help [] =
680 "In Fortran code, functions, subroutines and block data are tags.";
681
682 static char *HTML_suffixes [] =
683 { "htm", "html", "shtml", NULL };
684 static char HTML_help [] =
685 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
686 `h3' headers. Also, tags are `name=' in anchors and all\n\
687 occurrences of `id='.";
688
689 static char *Lisp_suffixes [] =
690 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
691 static char Lisp_help [] =
692 "In Lisp code, any function defined with `defun', any variable\n\
693 defined with `defvar' or `defconst', and in general the first\n\
694 argument of any expression that starts with `(def' in column zero\n\
695 is a tag.";
696
697 static char *Lua_suffixes [] =
698 { "lua", "LUA", NULL };
699 static char Lua_help [] =
700 "In Lua scripts, all functions are tags.";
701
702 static char *Makefile_filenames [] =
703 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
704 static char Makefile_help [] =
705 "In makefiles, targets are tags; additionally, variables are tags\n\
706 unless you specify `--no-globals'.";
707
708 static char *Objc_suffixes [] =
709 { "lm", /* Objective lex file */
710 "m", /* Objective C file */
711 NULL };
712 static char Objc_help [] =
713 "In Objective C code, tags include Objective C definitions for classes,\n\
714 class categories, methods and protocols. Tags for variables and\n\
715 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
716 (Use --help --lang=c --lang=objc --lang=java for full help.)";
717
718 static char *Pascal_suffixes [] =
719 { "p", "pas", NULL };
720 static char Pascal_help [] =
721 "In Pascal code, the tags are the functions and procedures defined\n\
722 in the file.";
723 /* " // this is for working around an Emacs highlighting bug... */
724
725 static char *Perl_suffixes [] =
726 { "pl", "pm", NULL };
727 static char *Perl_interpreters [] =
728 { "perl", "@PERL@", NULL };
729 static char Perl_help [] =
730 "In Perl code, the tags are the packages, subroutines and variables\n\
731 defined by the `package', `sub', `my' and `local' keywords. Use\n\
732 `--globals' if you want to tag global variables. Tags for\n\
733 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
734 defined in the default package is `main::SUB'.";
735
736 static char *PHP_suffixes [] =
737 { "php", "php3", "php4", NULL };
738 static char PHP_help [] =
739 "In PHP code, tags are functions, classes and defines. When using\n\
740 the `--members' option, vars are tags too.";
741
742 static char *plain_C_suffixes [] =
743 { "pc", /* Pro*C file */
744 NULL };
745
746 static char *PS_suffixes [] =
747 { "ps", "psw", NULL }; /* .psw is for PSWrap */
748 static char PS_help [] =
749 "In PostScript code, the tags are the functions.";
750
751 static char *Prolog_suffixes [] =
752 { "prolog", NULL };
753 static char Prolog_help [] =
754 "In Prolog code, tags are predicates and rules at the beginning of\n\
755 line.";
756
757 static char *Python_suffixes [] =
758 { "py", NULL };
759 static char Python_help [] =
760 "In Python code, `def' or `class' at the beginning of a line\n\
761 generate a tag.";
762
763 /* Can't do the `SCM' or `scm' prefix with a version number. */
764 static char *Scheme_suffixes [] =
765 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
766 static char Scheme_help [] =
767 "In Scheme code, tags include anything defined with `def' or with a\n\
768 construct whose name starts with `def'. They also include\n\
769 variables set with `set!' at top level in the file.";
770
771 static char *TeX_suffixes [] =
772 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
773 static char TeX_help [] =
774 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
775 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
776 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
777 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
778 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
779 \n\
780 Other commands can be specified by setting the environment variable\n\
781 `TEXTAGS' to a colon-separated list like, for example,\n\
782 TEXTAGS=\"mycommand:myothercommand\".";
783
784
785 static char *Texinfo_suffixes [] =
786 { "texi", "texinfo", "txi", NULL };
787 static char Texinfo_help [] =
788 "for texinfo files, lines starting with @node are tagged.";
789
790 static char *Yacc_suffixes [] =
791 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
792 static char Yacc_help [] =
793 "In Bison or Yacc input files, each rule defines as a tag the\n\
794 nonterminal it constructs. The portions of the file that contain\n\
795 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
796 for full help).";
797
798 static char auto_help [] =
799 "`auto' is not a real language, it indicates to use\n\
800 a default language for files base on file name suffix and file contents.";
801
802 static char none_help [] =
803 "`none' is not a real language, it indicates to only do\n\
804 regexp processing on files.";
805
806 static char no_lang_help [] =
807 "No detailed help available for this language.";
808
809
810 /*
811 * Table of languages.
812 *
813 * It is ok for a given function to be listed under more than one
814 * name. I just didn't.
815 */
816
817 static language lang_names [] =
818 {
819 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
820 { "asm", Asm_help, Asm_labels, Asm_suffixes },
821 { "c", default_C_help, default_C_entries, default_C_suffixes },
822 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
823 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
824 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
825 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
826 { "forth", Forth_help, Forth_words, Forth_suffixes },
827 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
828 { "html", HTML_help, HTML_labels, HTML_suffixes },
829 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
830 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
831 { "lua", Lua_help, Lua_functions, Lua_suffixes },
832 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
833 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
834 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
835 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
836 { "php", PHP_help, PHP_functions, PHP_suffixes },
837 { "postscript",PS_help, PS_functions, PS_suffixes },
838 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
839 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
840 { "python", Python_help, Python_functions, Python_suffixes },
841 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
842 { "tex", TeX_help, TeX_commands, TeX_suffixes },
843 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
844 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
845 { "auto", auto_help }, /* default guessing scheme */
846 { "none", none_help, just_read_file }, /* regexp matching only */
847 { NULL } /* end of list */
848 };
849
850 \f
851 static void
852 print_language_names ()
853 {
854 language *lang;
855 char **name, **ext;
856
857 puts ("\nThese are the currently supported languages, along with the\n\
858 default file names and dot suffixes:");
859 for (lang = lang_names; lang->name != NULL; lang++)
860 {
861 printf (" %-*s", 10, lang->name);
862 if (lang->filenames != NULL)
863 for (name = lang->filenames; *name != NULL; name++)
864 printf (" %s", *name);
865 if (lang->suffixes != NULL)
866 for (ext = lang->suffixes; *ext != NULL; ext++)
867 printf (" .%s", *ext);
868 puts ("");
869 }
870 puts ("where `auto' means use default language for files based on file\n\
871 name suffix, and `none' means only do regexp processing on files.\n\
872 If no language is specified and no matching suffix is found,\n\
873 the first line of the file is read for a sharp-bang (#!) sequence\n\
874 followed by the name of an interpreter. If no such sequence is found,\n\
875 Fortran is tried first; if no tags are found, C is tried next.\n\
876 When parsing any C file, a \"class\" or \"template\" keyword\n\
877 switches to C++.");
878 puts ("Compressed files are supported using gzip and bzip2.\n\
879 \n\
880 For detailed help on a given language use, for example,\n\
881 etags --help --lang=ada.");
882 }
883
884 #ifndef EMACS_NAME
885 # define EMACS_NAME "standalone"
886 #endif
887 #ifndef VERSION
888 # define VERSION "17.26"
889 #endif
890 static void
891 print_version ()
892 {
893 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
894 puts ("Copyright (C) 2008 Free Software Foundation, Inc.");
895 puts ("This program is distributed under the terms in ETAGS.README");
896
897 exit (EXIT_SUCCESS);
898 }
899
900 static void
901 print_help (argbuffer)
902 argument *argbuffer;
903 {
904 bool help_for_lang = FALSE;
905
906 for (; argbuffer->arg_type != at_end; argbuffer++)
907 if (argbuffer->arg_type == at_language)
908 {
909 if (help_for_lang)
910 puts ("");
911 puts (argbuffer->lang->help);
912 help_for_lang = TRUE;
913 }
914
915 if (help_for_lang)
916 exit (EXIT_SUCCESS);
917
918 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
919 \n\
920 These are the options accepted by %s.\n", progname, progname);
921 if (NO_LONG_OPTIONS)
922 puts ("WARNING: long option names do not work with this executable,\n\
923 as it is not linked with GNU getopt.");
924 else
925 puts ("You may use unambiguous abbreviations for the long option names.");
926 puts (" A - as file name means read names from stdin (one per line).\n\
927 Absolute names are stored in the output file as they are.\n\
928 Relative ones are stored relative to the output file's directory.\n");
929
930 puts ("-a, --append\n\
931 Append tag entries to existing tags file.");
932
933 puts ("--packages-only\n\
934 For Ada files, only generate tags for packages.");
935
936 if (CTAGS)
937 puts ("-B, --backward-search\n\
938 Write the search commands for the tag entries using '?', the\n\
939 backward-search command instead of '/', the forward-search command.");
940
941 /* This option is mostly obsolete, because etags can now automatically
942 detect C++. Retained for backward compatibility and for debugging and
943 experimentation. In principle, we could want to tag as C++ even
944 before any "class" or "template" keyword.
945 puts ("-C, --c++\n\
946 Treat files whose name suffix defaults to C language as C++ files.");
947 */
948
949 puts ("--declarations\n\
950 In C and derived languages, create tags for function declarations,");
951 if (CTAGS)
952 puts ("\tand create tags for extern variables if --globals is used.");
953 else
954 puts
955 ("\tand create tags for extern variables unless --no-globals is used.");
956
957 if (CTAGS)
958 puts ("-d, --defines\n\
959 Create tag entries for C #define constants and enum constants, too.");
960 else
961 puts ("-D, --no-defines\n\
962 Don't create tag entries for C #define constants and enum constants.\n\
963 This makes the tags file smaller.");
964
965 if (!CTAGS)
966 puts ("-i FILE, --include=FILE\n\
967 Include a note in tag file indicating that, when searching for\n\
968 a tag, one should also consult the tags file FILE after\n\
969 checking the current file.");
970
971 puts ("-l LANG, --language=LANG\n\
972 Force the following files to be considered as written in the\n\
973 named language up to the next --language=LANG option.");
974
975 if (CTAGS)
976 puts ("--globals\n\
977 Create tag entries for global variables in some languages.");
978 else
979 puts ("--no-globals\n\
980 Do not create tag entries for global variables in some\n\
981 languages. This makes the tags file smaller.");
982 puts ("--members\n\
983 Create tag entries for members of structures in some languages.");
984
985 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
986 Make a tag for each line matching a regular expression pattern\n\
987 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
988 files only. REGEXFILE is a file containing one REGEXP per line.\n\
989 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
990 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
991 puts (" If TAGNAME/ is present, the tags created are named.\n\
992 For example Tcl named tags can be created with:\n\
993 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
994 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
995 `m' means to allow multi-line matches, `s' implies `m' and\n\
996 causes dot to match any character, including newline.");
997 puts ("-R, --no-regex\n\
998 Don't create tags from regexps for the following files.");
999 puts ("-I, --ignore-indentation\n\
1000 In C and C++ do not assume that a closing brace in the first\n\
1001 column is the final brace of a function or structure definition.");
1002 puts ("-o FILE, --output=FILE\n\
1003 Write the tags to FILE.");
1004 puts ("--parse-stdin=NAME\n\
1005 Read from standard input and record tags as belonging to file NAME.");
1006
1007 if (CTAGS)
1008 {
1009 puts ("-t, --typedefs\n\
1010 Generate tag entries for C and Ada typedefs.");
1011 puts ("-T, --typedefs-and-c++\n\
1012 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1013 and C++ member functions.");
1014 }
1015
1016 if (CTAGS)
1017 puts ("-u, --update\n\
1018 Update the tag entries for the given files, leaving tag\n\
1019 entries for other files in place. Currently, this is\n\
1020 implemented by deleting the existing entries for the given\n\
1021 files and then rewriting the new entries at the end of the\n\
1022 tags file. It is often faster to simply rebuild the entire\n\
1023 tag file than to use this.");
1024
1025 if (CTAGS)
1026 {
1027 puts ("-v, --vgrind\n\
1028 Print on the standard output an index of items intended for\n\
1029 human consumption, similar to the output of vgrind. The index\n\
1030 is sorted, and gives the page number of each item.");
1031 # if PRINT_UNDOCUMENTED_OPTIONS_HELP
1032 puts ("-w, --no-duplicates\n\
1033 Do not create duplicate tag entries, for compatibility with\n\
1034 traditional ctags.");
1035 puts ("-w, --no-warn\n\
1036 Suppress warning messages about duplicate tag entries.");
1037 # endif /* PRINT_UNDOCUMENTED_OPTIONS_HELP */
1038 puts ("-x, --cxref\n\
1039 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1040 The output uses line numbers instead of page numbers, but\n\
1041 beyond that the differences are cosmetic; try both to see\n\
1042 which you like.");
1043 }
1044
1045 puts ("-V, --version\n\
1046 Print the version of the program.\n\
1047 -h, --help\n\
1048 Print this help message.\n\
1049 Followed by one or more `--language' options prints detailed\n\
1050 help about tag generation for the specified languages.");
1051
1052 print_language_names ();
1053
1054 puts ("");
1055 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1056
1057 exit (EXIT_SUCCESS);
1058 }
1059
1060 \f
1061 #ifdef VMS /* VMS specific functions */
1062
1063 #define EOS '\0'
1064
1065 /* This is a BUG! ANY arbitrary limit is a BUG!
1066 Won't someone please fix this? */
1067 #define MAX_FILE_SPEC_LEN 255
1068 typedef struct {
1069 short curlen;
1070 char body[MAX_FILE_SPEC_LEN + 1];
1071 } vspec;
1072
1073 /*
1074 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1075 returning in each successive call the next file name matching the input
1076 spec. The function expects that each in_spec passed
1077 to it will be processed to completion; in particular, up to and
1078 including the call following that in which the last matching name
1079 is returned, the function ignores the value of in_spec, and will
1080 only start processing a new spec with the following call.
1081 If an error occurs, on return out_spec contains the value
1082 of in_spec when the error occurred.
1083
1084 With each successive file name returned in out_spec, the
1085 function's return value is one. When there are no more matching
1086 names the function returns zero. If on the first call no file
1087 matches in_spec, or there is any other error, -1 is returned.
1088 */
1089
1090 #include <rmsdef.h>
1091 #include <descrip.h>
1092 #define OUTSIZE MAX_FILE_SPEC_LEN
1093 static short
1094 fn_exp (out, in)
1095 vspec *out;
1096 char *in;
1097 {
1098 static long context = 0;
1099 static struct dsc$descriptor_s o;
1100 static struct dsc$descriptor_s i;
1101 static bool pass1 = TRUE;
1102 long status;
1103 short retval;
1104
1105 if (pass1)
1106 {
1107 pass1 = FALSE;
1108 o.dsc$a_pointer = (char *) out;
1109 o.dsc$w_length = (short)OUTSIZE;
1110 i.dsc$a_pointer = in;
1111 i.dsc$w_length = (short)strlen(in);
1112 i.dsc$b_dtype = DSC$K_DTYPE_T;
1113 i.dsc$b_class = DSC$K_CLASS_S;
1114 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1115 o.dsc$b_class = DSC$K_CLASS_VS;
1116 }
1117 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1118 {
1119 out->body[out->curlen] = EOS;
1120 return 1;
1121 }
1122 else if (status == RMS$_NMF)
1123 retval = 0;
1124 else
1125 {
1126 strcpy(out->body, in);
1127 retval = -1;
1128 }
1129 lib$find_file_end(&context);
1130 pass1 = TRUE;
1131 return retval;
1132 }
1133
1134 /*
1135 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1136 name of each file specified by the provided arg expanding wildcards.
1137 */
1138 static char *
1139 gfnames (arg, p_error)
1140 char *arg;
1141 bool *p_error;
1142 {
1143 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1144
1145 switch (fn_exp (&filename, arg))
1146 {
1147 case 1:
1148 *p_error = FALSE;
1149 return filename.body;
1150 case 0:
1151 *p_error = FALSE;
1152 return NULL;
1153 default:
1154 *p_error = TRUE;
1155 return filename.body;
1156 }
1157 }
1158
1159 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1160 system (cmd)
1161 char *cmd;
1162 {
1163 error ("%s", "system() function not implemented under VMS");
1164 }
1165 #endif
1166
1167 #define VERSION_DELIM ';'
1168 char *massage_name (s)
1169 char *s;
1170 {
1171 char *start = s;
1172
1173 for ( ; *s; s++)
1174 if (*s == VERSION_DELIM)
1175 {
1176 *s = EOS;
1177 break;
1178 }
1179 else
1180 *s = lowcase (*s);
1181 return start;
1182 }
1183 #endif /* VMS */
1184
1185 \f
1186 int
1187 main (argc, argv)
1188 int argc;
1189 char *argv[];
1190 {
1191 int i;
1192 unsigned int nincluded_files;
1193 char **included_files;
1194 argument *argbuffer;
1195 int current_arg, file_count;
1196 linebuffer filename_lb;
1197 bool help_asked = FALSE;
1198 #ifdef VMS
1199 bool got_err;
1200 #endif
1201 char *optstring;
1202 int opt;
1203
1204
1205 #ifdef DOS_NT
1206 _fmode = O_BINARY; /* all of files are treated as binary files */
1207 #endif /* DOS_NT */
1208
1209 progname = argv[0];
1210 nincluded_files = 0;
1211 included_files = xnew (argc, char *);
1212 current_arg = 0;
1213 file_count = 0;
1214
1215 /* Allocate enough no matter what happens. Overkill, but each one
1216 is small. */
1217 argbuffer = xnew (argc, argument);
1218
1219 /*
1220 * If etags, always find typedefs and structure tags. Why not?
1221 * Also default to find macro constants, enum constants and
1222 * global variables.
1223 */
1224 if (!CTAGS)
1225 {
1226 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1227 globals = TRUE;
1228 }
1229
1230 /* When the optstring begins with a '-' getopt_long does not rearrange the
1231 non-options arguments to be at the end, but leaves them alone. */
1232 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1233 "ac:Cf:Il:o:r:RSVhH",
1234 (CTAGS) ? "BxdtTuvw" : "Di:");
1235
1236 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1237 switch (opt)
1238 {
1239 case 0:
1240 /* If getopt returns 0, then it has already processed a
1241 long-named option. We should do nothing. */
1242 break;
1243
1244 case 1:
1245 /* This means that a file name has been seen. Record it. */
1246 argbuffer[current_arg].arg_type = at_filename;
1247 argbuffer[current_arg].what = optarg;
1248 ++current_arg;
1249 ++file_count;
1250 break;
1251
1252 case STDIN:
1253 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1254 argbuffer[current_arg].arg_type = at_stdin;
1255 argbuffer[current_arg].what = optarg;
1256 ++current_arg;
1257 ++file_count;
1258 if (parsing_stdin)
1259 fatal ("cannot parse standard input more than once", (char *)NULL);
1260 parsing_stdin = TRUE;
1261 break;
1262
1263 /* Common options. */
1264 case 'a': append_to_tagfile = TRUE; break;
1265 case 'C': cplusplus = TRUE; break;
1266 case 'f': /* for compatibility with old makefiles */
1267 case 'o':
1268 if (tagfile)
1269 {
1270 error ("-o option may only be given once.", (char *)NULL);
1271 suggest_asking_for_help ();
1272 /* NOTREACHED */
1273 }
1274 tagfile = optarg;
1275 break;
1276 case 'I':
1277 case 'S': /* for backward compatibility */
1278 ignoreindent = TRUE;
1279 break;
1280 case 'l':
1281 {
1282 language *lang = get_language_from_langname (optarg);
1283 if (lang != NULL)
1284 {
1285 argbuffer[current_arg].lang = lang;
1286 argbuffer[current_arg].arg_type = at_language;
1287 ++current_arg;
1288 }
1289 }
1290 break;
1291 case 'c':
1292 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1293 optarg = concat (optarg, "i", ""); /* memory leak here */
1294 /* FALLTHRU */
1295 case 'r':
1296 argbuffer[current_arg].arg_type = at_regexp;
1297 argbuffer[current_arg].what = optarg;
1298 ++current_arg;
1299 break;
1300 case 'R':
1301 argbuffer[current_arg].arg_type = at_regexp;
1302 argbuffer[current_arg].what = NULL;
1303 ++current_arg;
1304 break;
1305 case 'V':
1306 print_version ();
1307 break;
1308 case 'h':
1309 case 'H':
1310 help_asked = TRUE;
1311 break;
1312
1313 /* Etags options */
1314 case 'D': constantypedefs = FALSE; break;
1315 case 'i': included_files[nincluded_files++] = optarg; break;
1316
1317 /* Ctags options. */
1318 case 'B': searchar = '?'; break;
1319 case 'd': constantypedefs = TRUE; break;
1320 case 't': typedefs = TRUE; break;
1321 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1322 case 'u': update = TRUE; break;
1323 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1324 case 'x': cxref_style = TRUE; break;
1325 case 'w': no_warnings = TRUE; break;
1326 default:
1327 suggest_asking_for_help ();
1328 /* NOTREACHED */
1329 }
1330
1331 /* No more options. Store the rest of arguments. */
1332 for (; optind < argc; optind++)
1333 {
1334 argbuffer[current_arg].arg_type = at_filename;
1335 argbuffer[current_arg].what = argv[optind];
1336 ++current_arg;
1337 ++file_count;
1338 }
1339
1340 argbuffer[current_arg].arg_type = at_end;
1341
1342 if (help_asked)
1343 print_help (argbuffer);
1344 /* NOTREACHED */
1345
1346 if (nincluded_files == 0 && file_count == 0)
1347 {
1348 error ("no input files specified.", (char *)NULL);
1349 suggest_asking_for_help ();
1350 /* NOTREACHED */
1351 }
1352
1353 if (tagfile == NULL)
1354 tagfile = CTAGS ? "tags" : "TAGS";
1355 cwd = etags_getcwd (); /* the current working directory */
1356 if (cwd[strlen (cwd) - 1] != '/')
1357 {
1358 char *oldcwd = cwd;
1359 cwd = concat (oldcwd, "/", "");
1360 free (oldcwd);
1361 }
1362 /* Relative file names are made relative to the current directory. */
1363 if (streq (tagfile, "-")
1364 || strneq (tagfile, "/dev/", 5))
1365 tagfiledir = cwd;
1366 else
1367 tagfiledir = absolute_dirname (tagfile, cwd);
1368
1369 init (); /* set up boolean "functions" */
1370
1371 linebuffer_init (&lb);
1372 linebuffer_init (&filename_lb);
1373 linebuffer_init (&filebuf);
1374 linebuffer_init (&token_name);
1375
1376 if (!CTAGS)
1377 {
1378 if (streq (tagfile, "-"))
1379 {
1380 tagf = stdout;
1381 #ifdef DOS_NT
1382 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1383 doesn't take effect until after `stdout' is already open). */
1384 if (!isatty (fileno (stdout)))
1385 setmode (fileno (stdout), O_BINARY);
1386 #endif /* DOS_NT */
1387 }
1388 else
1389 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1390 if (tagf == NULL)
1391 pfatal (tagfile);
1392 }
1393
1394 /*
1395 * Loop through files finding functions.
1396 */
1397 for (i = 0; i < current_arg; i++)
1398 {
1399 static language *lang; /* non-NULL if language is forced */
1400 char *this_file;
1401
1402 switch (argbuffer[i].arg_type)
1403 {
1404 case at_language:
1405 lang = argbuffer[i].lang;
1406 break;
1407 case at_regexp:
1408 analyse_regex (argbuffer[i].what);
1409 break;
1410 case at_filename:
1411 #ifdef VMS
1412 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1413 {
1414 if (got_err)
1415 {
1416 error ("can't find file %s\n", this_file);
1417 argc--, argv++;
1418 }
1419 else
1420 {
1421 this_file = massage_name (this_file);
1422 }
1423 #else
1424 this_file = argbuffer[i].what;
1425 #endif
1426 /* Input file named "-" means read file names from stdin
1427 (one per line) and use them. */
1428 if (streq (this_file, "-"))
1429 {
1430 if (parsing_stdin)
1431 fatal ("cannot parse standard input AND read file names from it",
1432 (char *)NULL);
1433 while (readline_internal (&filename_lb, stdin) > 0)
1434 process_file_name (filename_lb.buffer, lang);
1435 }
1436 else
1437 process_file_name (this_file, lang);
1438 #ifdef VMS
1439 }
1440 #endif
1441 break;
1442 case at_stdin:
1443 this_file = argbuffer[i].what;
1444 process_file (stdin, this_file, lang);
1445 break;
1446 }
1447 }
1448
1449 free_regexps ();
1450 free (lb.buffer);
1451 free (filebuf.buffer);
1452 free (token_name.buffer);
1453
1454 if (!CTAGS || cxref_style)
1455 {
1456 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1457 put_entries (nodehead);
1458 free_tree (nodehead);
1459 nodehead = NULL;
1460 if (!CTAGS)
1461 {
1462 fdesc *fdp;
1463
1464 /* Output file entries that have no tags. */
1465 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1466 if (!fdp->written)
1467 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1468
1469 while (nincluded_files-- > 0)
1470 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1471
1472 if (fclose (tagf) == EOF)
1473 pfatal (tagfile);
1474 }
1475
1476 exit (EXIT_SUCCESS);
1477 }
1478
1479 /* From here on, we are in (CTAGS && !cxref_style) */
1480 if (update)
1481 {
1482 char cmd[BUFSIZ];
1483 for (i = 0; i < current_arg; ++i)
1484 {
1485 switch (argbuffer[i].arg_type)
1486 {
1487 case at_filename:
1488 case at_stdin:
1489 break;
1490 default:
1491 continue; /* the for loop */
1492 }
1493 sprintf (cmd,
1494 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1495 tagfile, argbuffer[i].what, tagfile);
1496 if (system (cmd) != EXIT_SUCCESS)
1497 fatal ("failed to execute shell command", (char *)NULL);
1498 }
1499 append_to_tagfile = TRUE;
1500 }
1501
1502 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1503 if (tagf == NULL)
1504 pfatal (tagfile);
1505 put_entries (nodehead); /* write all the tags (CTAGS) */
1506 free_tree (nodehead);
1507 nodehead = NULL;
1508 if (fclose (tagf) == EOF)
1509 pfatal (tagfile);
1510
1511 if (CTAGS)
1512 if (append_to_tagfile || update)
1513 {
1514 char cmd[2*BUFSIZ+20];
1515 /* Maybe these should be used:
1516 setenv ("LC_COLLATE", "C", 1);
1517 setenv ("LC_ALL", "C", 1); */
1518 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1519 exit (system (cmd));
1520 }
1521 return EXIT_SUCCESS;
1522 }
1523
1524
1525 /*
1526 * Return a compressor given the file name. If EXTPTR is non-zero,
1527 * return a pointer into FILE where the compressor-specific
1528 * extension begins. If no compressor is found, NULL is returned
1529 * and EXTPTR is not significant.
1530 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1531 */
1532 static compressor *
1533 get_compressor_from_suffix (file, extptr)
1534 char *file;
1535 char **extptr;
1536 {
1537 compressor *compr;
1538 char *slash, *suffix;
1539
1540 /* This relies on FN to be after canonicalize_filename,
1541 so we don't need to consider backslashes on DOS_NT. */
1542 slash = etags_strrchr (file, '/');
1543 suffix = etags_strrchr (file, '.');
1544 if (suffix == NULL || suffix < slash)
1545 return NULL;
1546 if (extptr != NULL)
1547 *extptr = suffix;
1548 suffix += 1;
1549 /* Let those poor souls who live with DOS 8+3 file name limits get
1550 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1551 Only the first do loop is run if not MSDOS */
1552 do
1553 {
1554 for (compr = compressors; compr->suffix != NULL; compr++)
1555 if (streq (compr->suffix, suffix))
1556 return compr;
1557 if (!MSDOS)
1558 break; /* do it only once: not really a loop */
1559 if (extptr != NULL)
1560 *extptr = ++suffix;
1561 } while (*suffix != '\0');
1562 return NULL;
1563 }
1564
1565
1566
1567 /*
1568 * Return a language given the name.
1569 */
1570 static language *
1571 get_language_from_langname (name)
1572 const char *name;
1573 {
1574 language *lang;
1575
1576 if (name == NULL)
1577 error ("empty language name", (char *)NULL);
1578 else
1579 {
1580 for (lang = lang_names; lang->name != NULL; lang++)
1581 if (streq (name, lang->name))
1582 return lang;
1583 error ("unknown language \"%s\"", name);
1584 }
1585
1586 return NULL;
1587 }
1588
1589
1590 /*
1591 * Return a language given the interpreter name.
1592 */
1593 static language *
1594 get_language_from_interpreter (interpreter)
1595 char *interpreter;
1596 {
1597 language *lang;
1598 char **iname;
1599
1600 if (interpreter == NULL)
1601 return NULL;
1602 for (lang = lang_names; lang->name != NULL; lang++)
1603 if (lang->interpreters != NULL)
1604 for (iname = lang->interpreters; *iname != NULL; iname++)
1605 if (streq (*iname, interpreter))
1606 return lang;
1607
1608 return NULL;
1609 }
1610
1611
1612
1613 /*
1614 * Return a language given the file name.
1615 */
1616 static language *
1617 get_language_from_filename (file, case_sensitive)
1618 char *file;
1619 bool case_sensitive;
1620 {
1621 language *lang;
1622 char **name, **ext, *suffix;
1623
1624 /* Try whole file name first. */
1625 for (lang = lang_names; lang->name != NULL; lang++)
1626 if (lang->filenames != NULL)
1627 for (name = lang->filenames; *name != NULL; name++)
1628 if ((case_sensitive)
1629 ? streq (*name, file)
1630 : strcaseeq (*name, file))
1631 return lang;
1632
1633 /* If not found, try suffix after last dot. */
1634 suffix = etags_strrchr (file, '.');
1635 if (suffix == NULL)
1636 return NULL;
1637 suffix += 1;
1638 for (lang = lang_names; lang->name != NULL; lang++)
1639 if (lang->suffixes != NULL)
1640 for (ext = lang->suffixes; *ext != NULL; ext++)
1641 if ((case_sensitive)
1642 ? streq (*ext, suffix)
1643 : strcaseeq (*ext, suffix))
1644 return lang;
1645 return NULL;
1646 }
1647
1648 \f
1649 /*
1650 * This routine is called on each file argument.
1651 */
1652 static void
1653 process_file_name (file, lang)
1654 char *file;
1655 language *lang;
1656 {
1657 struct stat stat_buf;
1658 FILE *inf;
1659 fdesc *fdp;
1660 compressor *compr;
1661 char *compressed_name, *uncompressed_name;
1662 char *ext, *real_name;
1663 int retval;
1664
1665 canonicalize_filename (file);
1666 if (streq (file, tagfile) && !streq (tagfile, "-"))
1667 {
1668 error ("skipping inclusion of %s in self.", file);
1669 return;
1670 }
1671 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1672 {
1673 compressed_name = NULL;
1674 real_name = uncompressed_name = savestr (file);
1675 }
1676 else
1677 {
1678 real_name = compressed_name = savestr (file);
1679 uncompressed_name = savenstr (file, ext - file);
1680 }
1681
1682 /* If the canonicalized uncompressed name
1683 has already been dealt with, skip it silently. */
1684 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1685 {
1686 assert (fdp->infname != NULL);
1687 if (streq (uncompressed_name, fdp->infname))
1688 goto cleanup;
1689 }
1690
1691 if (stat (real_name, &stat_buf) != 0)
1692 {
1693 /* Reset real_name and try with a different name. */
1694 real_name = NULL;
1695 if (compressed_name != NULL) /* try with the given suffix */
1696 {
1697 if (stat (uncompressed_name, &stat_buf) == 0)
1698 real_name = uncompressed_name;
1699 }
1700 else /* try all possible suffixes */
1701 {
1702 for (compr = compressors; compr->suffix != NULL; compr++)
1703 {
1704 compressed_name = concat (file, ".", compr->suffix);
1705 if (stat (compressed_name, &stat_buf) != 0)
1706 {
1707 if (MSDOS)
1708 {
1709 char *suf = compressed_name + strlen (file);
1710 size_t suflen = strlen (compr->suffix) + 1;
1711 for ( ; suf[1]; suf++, suflen--)
1712 {
1713 memmove (suf, suf + 1, suflen);
1714 if (stat (compressed_name, &stat_buf) == 0)
1715 {
1716 real_name = compressed_name;
1717 break;
1718 }
1719 }
1720 if (real_name != NULL)
1721 break;
1722 } /* MSDOS */
1723 free (compressed_name);
1724 compressed_name = NULL;
1725 }
1726 else
1727 {
1728 real_name = compressed_name;
1729 break;
1730 }
1731 }
1732 }
1733 if (real_name == NULL)
1734 {
1735 perror (file);
1736 goto cleanup;
1737 }
1738 } /* try with a different name */
1739
1740 if (!S_ISREG (stat_buf.st_mode))
1741 {
1742 error ("skipping %s: it is not a regular file.", real_name);
1743 goto cleanup;
1744 }
1745 if (real_name == compressed_name)
1746 {
1747 char *cmd = concat (compr->command, " ", real_name);
1748 inf = (FILE *) popen (cmd, "r");
1749 free (cmd);
1750 }
1751 else
1752 inf = fopen (real_name, "r");
1753 if (inf == NULL)
1754 {
1755 perror (real_name);
1756 goto cleanup;
1757 }
1758
1759 process_file (inf, uncompressed_name, lang);
1760
1761 if (real_name == compressed_name)
1762 retval = pclose (inf);
1763 else
1764 retval = fclose (inf);
1765 if (retval < 0)
1766 pfatal (file);
1767
1768 cleanup:
1769 if (compressed_name) free (compressed_name);
1770 if (uncompressed_name) free (uncompressed_name);
1771 last_node = NULL;
1772 curfdp = NULL;
1773 return;
1774 }
1775
1776 static void
1777 process_file (fh, fn, lang)
1778 FILE *fh;
1779 char *fn;
1780 language *lang;
1781 {
1782 static const fdesc emptyfdesc;
1783 fdesc *fdp;
1784
1785 /* Create a new input file description entry. */
1786 fdp = xnew (1, fdesc);
1787 *fdp = emptyfdesc;
1788 fdp->next = fdhead;
1789 fdp->infname = savestr (fn);
1790 fdp->lang = lang;
1791 fdp->infabsname = absolute_filename (fn, cwd);
1792 fdp->infabsdir = absolute_dirname (fn, cwd);
1793 if (filename_is_absolute (fn))
1794 {
1795 /* An absolute file name. Canonicalize it. */
1796 fdp->taggedfname = absolute_filename (fn, NULL);
1797 }
1798 else
1799 {
1800 /* A file name relative to cwd. Make it relative
1801 to the directory of the tags file. */
1802 fdp->taggedfname = relative_filename (fn, tagfiledir);
1803 }
1804 fdp->usecharno = TRUE; /* use char position when making tags */
1805 fdp->prop = NULL;
1806 fdp->written = FALSE; /* not written on tags file yet */
1807
1808 fdhead = fdp;
1809 curfdp = fdhead; /* the current file description */
1810
1811 find_entries (fh);
1812
1813 /* If not Ctags, and if this is not metasource and if it contained no #line
1814 directives, we can write the tags and free all nodes pointing to
1815 curfdp. */
1816 if (!CTAGS
1817 && curfdp->usecharno /* no #line directives in this file */
1818 && !curfdp->lang->metasource)
1819 {
1820 node *np, *prev;
1821
1822 /* Look for the head of the sublist relative to this file. See add_node
1823 for the structure of the node tree. */
1824 prev = NULL;
1825 for (np = nodehead; np != NULL; prev = np, np = np->left)
1826 if (np->fdp == curfdp)
1827 break;
1828
1829 /* If we generated tags for this file, write and delete them. */
1830 if (np != NULL)
1831 {
1832 /* This is the head of the last sublist, if any. The following
1833 instructions depend on this being true. */
1834 assert (np->left == NULL);
1835
1836 assert (fdhead == curfdp);
1837 assert (last_node->fdp == curfdp);
1838 put_entries (np); /* write tags for file curfdp->taggedfname */
1839 free_tree (np); /* remove the written nodes */
1840 if (prev == NULL)
1841 nodehead = NULL; /* no nodes left */
1842 else
1843 prev->left = NULL; /* delete the pointer to the sublist */
1844 }
1845 }
1846 }
1847
1848 /*
1849 * This routine sets up the boolean pseudo-functions which work
1850 * by setting boolean flags dependent upon the corresponding character.
1851 * Every char which is NOT in that string is not a white char. Therefore,
1852 * all of the array "_wht" is set to FALSE, and then the elements
1853 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1854 * of a char is TRUE if it is the string "white", else FALSE.
1855 */
1856 static void
1857 init ()
1858 {
1859 register char *sp;
1860 register int i;
1861
1862 for (i = 0; i < CHARS; i++)
1863 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1864 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1865 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1866 notinname('\0') = notinname('\n');
1867 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1868 begtoken('\0') = begtoken('\n');
1869 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1870 intoken('\0') = intoken('\n');
1871 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1872 endtoken('\0') = endtoken('\n');
1873 }
1874
1875 /*
1876 * This routine opens the specified file and calls the function
1877 * which finds the function and type definitions.
1878 */
1879 static void
1880 find_entries (inf)
1881 FILE *inf;
1882 {
1883 char *cp;
1884 language *lang = curfdp->lang;
1885 Lang_function *parser = NULL;
1886
1887 /* If user specified a language, use it. */
1888 if (lang != NULL && lang->function != NULL)
1889 {
1890 parser = lang->function;
1891 }
1892
1893 /* Else try to guess the language given the file name. */
1894 if (parser == NULL)
1895 {
1896 lang = get_language_from_filename (curfdp->infname, TRUE);
1897 if (lang != NULL && lang->function != NULL)
1898 {
1899 curfdp->lang = lang;
1900 parser = lang->function;
1901 }
1902 }
1903
1904 /* Else look for sharp-bang as the first two characters. */
1905 if (parser == NULL
1906 && readline_internal (&lb, inf) > 0
1907 && lb.len >= 2
1908 && lb.buffer[0] == '#'
1909 && lb.buffer[1] == '!')
1910 {
1911 char *lp;
1912
1913 /* Set lp to point at the first char after the last slash in the
1914 line or, if no slashes, at the first nonblank. Then set cp to
1915 the first successive blank and terminate the string. */
1916 lp = etags_strrchr (lb.buffer+2, '/');
1917 if (lp != NULL)
1918 lp += 1;
1919 else
1920 lp = skip_spaces (lb.buffer + 2);
1921 cp = skip_non_spaces (lp);
1922 *cp = '\0';
1923
1924 if (strlen (lp) > 0)
1925 {
1926 lang = get_language_from_interpreter (lp);
1927 if (lang != NULL && lang->function != NULL)
1928 {
1929 curfdp->lang = lang;
1930 parser = lang->function;
1931 }
1932 }
1933 }
1934
1935 /* We rewind here, even if inf may be a pipe. We fail if the
1936 length of the first line is longer than the pipe block size,
1937 which is unlikely. */
1938 rewind (inf);
1939
1940 /* Else try to guess the language given the case insensitive file name. */
1941 if (parser == NULL)
1942 {
1943 lang = get_language_from_filename (curfdp->infname, FALSE);
1944 if (lang != NULL && lang->function != NULL)
1945 {
1946 curfdp->lang = lang;
1947 parser = lang->function;
1948 }
1949 }
1950
1951 /* Else try Fortran or C. */
1952 if (parser == NULL)
1953 {
1954 node *old_last_node = last_node;
1955
1956 curfdp->lang = get_language_from_langname ("fortran");
1957 find_entries (inf);
1958
1959 if (old_last_node == last_node)
1960 /* No Fortran entries found. Try C. */
1961 {
1962 /* We do not tag if rewind fails.
1963 Only the file name will be recorded in the tags file. */
1964 rewind (inf);
1965 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1966 find_entries (inf);
1967 }
1968 return;
1969 }
1970
1971 if (!no_line_directive
1972 && curfdp->lang != NULL && curfdp->lang->metasource)
1973 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1974 file, or anyway we parsed a file that is automatically generated from
1975 this one. If this is the case, the bingo.c file contained #line
1976 directives that generated tags pointing to this file. Let's delete
1977 them all before parsing this file, which is the real source. */
1978 {
1979 fdesc **fdpp = &fdhead;
1980 while (*fdpp != NULL)
1981 if (*fdpp != curfdp
1982 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1983 /* We found one of those! We must delete both the file description
1984 and all tags referring to it. */
1985 {
1986 fdesc *badfdp = *fdpp;
1987
1988 /* Delete the tags referring to badfdp->taggedfname
1989 that were obtained from badfdp->infname. */
1990 invalidate_nodes (badfdp, &nodehead);
1991
1992 *fdpp = badfdp->next; /* remove the bad description from the list */
1993 free_fdesc (badfdp);
1994 }
1995 else
1996 fdpp = &(*fdpp)->next; /* advance the list pointer */
1997 }
1998
1999 assert (parser != NULL);
2000
2001 /* Generic initialisations before reading from file. */
2002 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
2003
2004 /* Generic initialisations before parsing file with readline. */
2005 lineno = 0; /* reset global line number */
2006 charno = 0; /* reset global char number */
2007 linecharno = 0; /* reset global char number of line start */
2008
2009 parser (inf);
2010
2011 regex_tag_multiline ();
2012 }
2013
2014 \f
2015 /*
2016 * Check whether an implicitly named tag should be created,
2017 * then call `pfnote'.
2018 * NAME is a string that is internally copied by this function.
2019 *
2020 * TAGS format specification
2021 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2022 * The following is explained in some more detail in etc/ETAGS.EBNF.
2023 *
2024 * make_tag creates tags with "implicit tag names" (unnamed tags)
2025 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2026 * 1. NAME does not contain any of the characters in NONAM;
2027 * 2. LINESTART contains name as either a rightmost, or rightmost but
2028 * one character, substring;
2029 * 3. the character, if any, immediately before NAME in LINESTART must
2030 * be a character in NONAM;
2031 * 4. the character, if any, immediately after NAME in LINESTART must
2032 * also be a character in NONAM.
2033 *
2034 * The implementation uses the notinname() macro, which recognises the
2035 * characters stored in the string `nonam'.
2036 * etags.el needs to use the same characters that are in NONAM.
2037 */
2038 static void
2039 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2040 char *name; /* tag name, or NULL if unnamed */
2041 int namelen; /* tag length */
2042 bool is_func; /* tag is a function */
2043 char *linestart; /* start of the line where tag is */
2044 int linelen; /* length of the line where tag is */
2045 int lno; /* line number */
2046 long cno; /* character number */
2047 {
2048 bool named = (name != NULL && namelen > 0);
2049
2050 if (!CTAGS && named) /* maybe set named to false */
2051 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2052 such that etags.el can guess a name from it. */
2053 {
2054 int i;
2055 register char *cp = name;
2056
2057 for (i = 0; i < namelen; i++)
2058 if (notinname (*cp++))
2059 break;
2060 if (i == namelen) /* rule #1 */
2061 {
2062 cp = linestart + linelen - namelen;
2063 if (notinname (linestart[linelen-1]))
2064 cp -= 1; /* rule #4 */
2065 if (cp >= linestart /* rule #2 */
2066 && (cp == linestart
2067 || notinname (cp[-1])) /* rule #3 */
2068 && strneq (name, cp, namelen)) /* rule #2 */
2069 named = FALSE; /* use implicit tag name */
2070 }
2071 }
2072
2073 if (named)
2074 name = savenstr (name, namelen);
2075 else
2076 name = NULL;
2077 pfnote (name, is_func, linestart, linelen, lno, cno);
2078 }
2079
2080 /* Record a tag. */
2081 static void
2082 pfnote (name, is_func, linestart, linelen, lno, cno)
2083 char *name; /* tag name, or NULL if unnamed */
2084 bool is_func; /* tag is a function */
2085 char *linestart; /* start of the line where tag is */
2086 int linelen; /* length of the line where tag is */
2087 int lno; /* line number */
2088 long cno; /* character number */
2089 {
2090 register node *np;
2091
2092 assert (name == NULL || name[0] != '\0');
2093 if (CTAGS && name == NULL)
2094 return;
2095
2096 np = xnew (1, node);
2097
2098 /* If ctags mode, change name "main" to M<thisfilename>. */
2099 if (CTAGS && !cxref_style && streq (name, "main"))
2100 {
2101 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2102 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2103 fp = etags_strrchr (np->name, '.');
2104 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2105 fp[0] = '\0';
2106 }
2107 else
2108 np->name = name;
2109 np->valid = TRUE;
2110 np->been_warned = FALSE;
2111 np->fdp = curfdp;
2112 np->is_func = is_func;
2113 np->lno = lno;
2114 if (np->fdp->usecharno)
2115 /* Our char numbers are 0-base, because of C language tradition?
2116 ctags compatibility? old versions compatibility? I don't know.
2117 Anyway, since emacs's are 1-base we expect etags.el to take care
2118 of the difference. If we wanted to have 1-based numbers, we would
2119 uncomment the +1 below. */
2120 np->cno = cno /* + 1 */ ;
2121 else
2122 np->cno = invalidcharno;
2123 np->left = np->right = NULL;
2124 if (CTAGS && !cxref_style)
2125 {
2126 if (strlen (linestart) < 50)
2127 np->regex = concat (linestart, "$", "");
2128 else
2129 np->regex = savenstr (linestart, 50);
2130 }
2131 else
2132 np->regex = savenstr (linestart, linelen);
2133
2134 add_node (np, &nodehead);
2135 }
2136
2137 /*
2138 * free_tree ()
2139 * recurse on left children, iterate on right children.
2140 */
2141 static void
2142 free_tree (np)
2143 register node *np;
2144 {
2145 while (np)
2146 {
2147 register node *node_right = np->right;
2148 free_tree (np->left);
2149 if (np->name != NULL)
2150 free (np->name);
2151 free (np->regex);
2152 free (np);
2153 np = node_right;
2154 }
2155 }
2156
2157 /*
2158 * free_fdesc ()
2159 * delete a file description
2160 */
2161 static void
2162 free_fdesc (fdp)
2163 register fdesc *fdp;
2164 {
2165 if (fdp->infname != NULL) free (fdp->infname);
2166 if (fdp->infabsname != NULL) free (fdp->infabsname);
2167 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2168 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2169 if (fdp->prop != NULL) free (fdp->prop);
2170 free (fdp);
2171 }
2172
2173 /*
2174 * add_node ()
2175 * Adds a node to the tree of nodes. In etags mode, sort by file
2176 * name. In ctags mode, sort by tag name. Make no attempt at
2177 * balancing.
2178 *
2179 * add_node is the only function allowed to add nodes, so it can
2180 * maintain state.
2181 */
2182 static void
2183 add_node (np, cur_node_p)
2184 node *np, **cur_node_p;
2185 {
2186 register int dif;
2187 register node *cur_node = *cur_node_p;
2188
2189 if (cur_node == NULL)
2190 {
2191 *cur_node_p = np;
2192 last_node = np;
2193 return;
2194 }
2195
2196 if (!CTAGS)
2197 /* Etags Mode */
2198 {
2199 /* For each file name, tags are in a linked sublist on the right
2200 pointer. The first tags of different files are a linked list
2201 on the left pointer. last_node points to the end of the last
2202 used sublist. */
2203 if (last_node != NULL && last_node->fdp == np->fdp)
2204 {
2205 /* Let's use the same sublist as the last added node. */
2206 assert (last_node->right == NULL);
2207 last_node->right = np;
2208 last_node = np;
2209 }
2210 else if (cur_node->fdp == np->fdp)
2211 {
2212 /* Scanning the list we found the head of a sublist which is
2213 good for us. Let's scan this sublist. */
2214 add_node (np, &cur_node->right);
2215 }
2216 else
2217 /* The head of this sublist is not good for us. Let's try the
2218 next one. */
2219 add_node (np, &cur_node->left);
2220 } /* if ETAGS mode */
2221
2222 else
2223 {
2224 /* Ctags Mode */
2225 dif = strcmp (np->name, cur_node->name);
2226
2227 /*
2228 * If this tag name matches an existing one, then
2229 * do not add the node, but maybe print a warning.
2230 */
2231 if (no_duplicates && !dif)
2232 {
2233 if (np->fdp == cur_node->fdp)
2234 {
2235 if (!no_warnings)
2236 {
2237 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2238 np->fdp->infname, lineno, np->name);
2239 fprintf (stderr, "Second entry ignored\n");
2240 }
2241 }
2242 else if (!cur_node->been_warned && !no_warnings)
2243 {
2244 fprintf
2245 (stderr,
2246 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2247 np->fdp->infname, cur_node->fdp->infname, np->name);
2248 cur_node->been_warned = TRUE;
2249 }
2250 return;
2251 }
2252
2253 /* Actually add the node */
2254 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2255 } /* if CTAGS mode */
2256 }
2257
2258 /*
2259 * invalidate_nodes ()
2260 * Scan the node tree and invalidate all nodes pointing to the
2261 * given file description (CTAGS case) or free them (ETAGS case).
2262 */
2263 static void
2264 invalidate_nodes (badfdp, npp)
2265 fdesc *badfdp;
2266 node **npp;
2267 {
2268 node *np = *npp;
2269
2270 if (np == NULL)
2271 return;
2272
2273 if (CTAGS)
2274 {
2275 if (np->left != NULL)
2276 invalidate_nodes (badfdp, &np->left);
2277 if (np->fdp == badfdp)
2278 np->valid = FALSE;
2279 if (np->right != NULL)
2280 invalidate_nodes (badfdp, &np->right);
2281 }
2282 else
2283 {
2284 assert (np->fdp != NULL);
2285 if (np->fdp == badfdp)
2286 {
2287 *npp = np->left; /* detach the sublist from the list */
2288 np->left = NULL; /* isolate it */
2289 free_tree (np); /* free it */
2290 invalidate_nodes (badfdp, npp);
2291 }
2292 else
2293 invalidate_nodes (badfdp, &np->left);
2294 }
2295 }
2296
2297 \f
2298 static int total_size_of_entries __P((node *));
2299 static int number_len __P((long));
2300
2301 /* Length of a non-negative number's decimal representation. */
2302 static int
2303 number_len (num)
2304 long num;
2305 {
2306 int len = 1;
2307 while ((num /= 10) > 0)
2308 len += 1;
2309 return len;
2310 }
2311
2312 /*
2313 * Return total number of characters that put_entries will output for
2314 * the nodes in the linked list at the right of the specified node.
2315 * This count is irrelevant with etags.el since emacs 19.34 at least,
2316 * but is still supplied for backward compatibility.
2317 */
2318 static int
2319 total_size_of_entries (np)
2320 register node *np;
2321 {
2322 register int total = 0;
2323
2324 for (; np != NULL; np = np->right)
2325 if (np->valid)
2326 {
2327 total += strlen (np->regex) + 1; /* pat\177 */
2328 if (np->name != NULL)
2329 total += strlen (np->name) + 1; /* name\001 */
2330 total += number_len ((long) np->lno) + 1; /* lno, */
2331 if (np->cno != invalidcharno) /* cno */
2332 total += number_len (np->cno);
2333 total += 1; /* newline */
2334 }
2335
2336 return total;
2337 }
2338
2339 static void
2340 put_entries (np)
2341 register node *np;
2342 {
2343 register char *sp;
2344 static fdesc *fdp = NULL;
2345
2346 if (np == NULL)
2347 return;
2348
2349 /* Output subentries that precede this one */
2350 if (CTAGS)
2351 put_entries (np->left);
2352
2353 /* Output this entry */
2354 if (np->valid)
2355 {
2356 if (!CTAGS)
2357 {
2358 /* Etags mode */
2359 if (fdp != np->fdp)
2360 {
2361 fdp = np->fdp;
2362 fprintf (tagf, "\f\n%s,%d\n",
2363 fdp->taggedfname, total_size_of_entries (np));
2364 fdp->written = TRUE;
2365 }
2366 fputs (np->regex, tagf);
2367 fputc ('\177', tagf);
2368 if (np->name != NULL)
2369 {
2370 fputs (np->name, tagf);
2371 fputc ('\001', tagf);
2372 }
2373 fprintf (tagf, "%d,", np->lno);
2374 if (np->cno != invalidcharno)
2375 fprintf (tagf, "%ld", np->cno);
2376 fputs ("\n", tagf);
2377 }
2378 else
2379 {
2380 /* Ctags mode */
2381 if (np->name == NULL)
2382 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2383
2384 if (cxref_style)
2385 {
2386 if (vgrind_style)
2387 fprintf (stdout, "%s %s %d\n",
2388 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2389 else
2390 fprintf (stdout, "%-16s %3d %-16s %s\n",
2391 np->name, np->lno, np->fdp->taggedfname, np->regex);
2392 }
2393 else
2394 {
2395 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2396
2397 if (np->is_func)
2398 { /* function or #define macro with args */
2399 putc (searchar, tagf);
2400 putc ('^', tagf);
2401
2402 for (sp = np->regex; *sp; sp++)
2403 {
2404 if (*sp == '\\' || *sp == searchar)
2405 putc ('\\', tagf);
2406 putc (*sp, tagf);
2407 }
2408 putc (searchar, tagf);
2409 }
2410 else
2411 { /* anything else; text pattern inadequate */
2412 fprintf (tagf, "%d", np->lno);
2413 }
2414 putc ('\n', tagf);
2415 }
2416 }
2417 } /* if this node contains a valid tag */
2418
2419 /* Output subentries that follow this one */
2420 put_entries (np->right);
2421 if (!CTAGS)
2422 put_entries (np->left);
2423 }
2424
2425 \f
2426 /* C extensions. */
2427 #define C_EXT 0x00fff /* C extensions */
2428 #define C_PLAIN 0x00000 /* C */
2429 #define C_PLPL 0x00001 /* C++ */
2430 #define C_STAR 0x00003 /* C* */
2431 #define C_JAVA 0x00005 /* JAVA */
2432 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2433 #define YACC 0x10000 /* yacc file */
2434
2435 /*
2436 * The C symbol tables.
2437 */
2438 enum sym_type
2439 {
2440 st_none,
2441 st_C_objprot, st_C_objimpl, st_C_objend,
2442 st_C_gnumacro,
2443 st_C_ignore, st_C_attribute,
2444 st_C_javastruct,
2445 st_C_operator,
2446 st_C_class, st_C_template,
2447 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2448 };
2449
2450 static unsigned int hash __P((const char *, unsigned int));
2451 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2452 static enum sym_type C_symtype __P((char *, int, int));
2453
2454 /* Feed stuff between (but not including) %[ and %] lines to:
2455 gperf -m 5
2456 %[
2457 %compare-strncmp
2458 %enum
2459 %struct-type
2460 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2461 %%
2462 if, 0, st_C_ignore
2463 for, 0, st_C_ignore
2464 while, 0, st_C_ignore
2465 switch, 0, st_C_ignore
2466 return, 0, st_C_ignore
2467 __attribute__, 0, st_C_attribute
2468 @interface, 0, st_C_objprot
2469 @protocol, 0, st_C_objprot
2470 @implementation,0, st_C_objimpl
2471 @end, 0, st_C_objend
2472 import, (C_JAVA & ~C_PLPL), st_C_ignore
2473 package, (C_JAVA & ~C_PLPL), st_C_ignore
2474 friend, C_PLPL, st_C_ignore
2475 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2476 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2477 interface, (C_JAVA & ~C_PLPL), st_C_struct
2478 class, 0, st_C_class
2479 namespace, C_PLPL, st_C_struct
2480 domain, C_STAR, st_C_struct
2481 union, 0, st_C_struct
2482 struct, 0, st_C_struct
2483 extern, 0, st_C_extern
2484 enum, 0, st_C_enum
2485 typedef, 0, st_C_typedef
2486 define, 0, st_C_define
2487 undef, 0, st_C_define
2488 operator, C_PLPL, st_C_operator
2489 template, 0, st_C_template
2490 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2491 DEFUN, 0, st_C_gnumacro
2492 SYSCALL, 0, st_C_gnumacro
2493 ENTRY, 0, st_C_gnumacro
2494 PSEUDO, 0, st_C_gnumacro
2495 # These are defined inside C functions, so currently they are not met.
2496 # EXFUN used in glibc, DEFVAR_* in emacs.
2497 #EXFUN, 0, st_C_gnumacro
2498 #DEFVAR_, 0, st_C_gnumacro
2499 %]
2500 and replace lines between %< and %> with its output, then:
2501 - remove the #if characterset check
2502 - make in_word_set static and not inline. */
2503 /*%<*/
2504 /* C code produced by gperf version 3.0.1 */
2505 /* Command-line: gperf -m 5 */
2506 /* Computed positions: -k'2-3' */
2507
2508 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2509 /* maximum key range = 33, duplicates = 0 */
2510
2511 #ifdef __GNUC__
2512 __inline
2513 #else
2514 #ifdef __cplusplus
2515 inline
2516 #endif
2517 #endif
2518 static unsigned int
2519 hash (str, len)
2520 register const char *str;
2521 register unsigned int len;
2522 {
2523 static unsigned char asso_values[] =
2524 {
2525 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2526 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2527 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2528 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2529 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2530 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2531 35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2532 14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2533 35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2534 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2535 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2536 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2537 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2538 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2539 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2540 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2541 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2542 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2543 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2544 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2545 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2546 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2547 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2548 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2549 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2550 35, 35, 35, 35, 35, 35
2551 };
2552 register int hval = len;
2553
2554 switch (hval)
2555 {
2556 default:
2557 hval += asso_values[(unsigned char)str[2]];
2558 /*FALLTHROUGH*/
2559 case 2:
2560 hval += asso_values[(unsigned char)str[1]];
2561 break;
2562 }
2563 return hval;
2564 }
2565
2566 static struct C_stab_entry *
2567 in_word_set (str, len)
2568 register const char *str;
2569 register unsigned int len;
2570 {
2571 enum
2572 {
2573 TOTAL_KEYWORDS = 32,
2574 MIN_WORD_LENGTH = 2,
2575 MAX_WORD_LENGTH = 15,
2576 MIN_HASH_VALUE = 2,
2577 MAX_HASH_VALUE = 34
2578 };
2579
2580 static struct C_stab_entry wordlist[] =
2581 {
2582 {""}, {""},
2583 {"if", 0, st_C_ignore},
2584 {""},
2585 {"@end", 0, st_C_objend},
2586 {"union", 0, st_C_struct},
2587 {"define", 0, st_C_define},
2588 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2589 {"template", 0, st_C_template},
2590 {"operator", C_PLPL, st_C_operator},
2591 {"@interface", 0, st_C_objprot},
2592 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2593 {"friend", C_PLPL, st_C_ignore},
2594 {"typedef", 0, st_C_typedef},
2595 {"return", 0, st_C_ignore},
2596 {"@implementation",0, st_C_objimpl},
2597 {"@protocol", 0, st_C_objprot},
2598 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2599 {"extern", 0, st_C_extern},
2600 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2601 {"struct", 0, st_C_struct},
2602 {"domain", C_STAR, st_C_struct},
2603 {"switch", 0, st_C_ignore},
2604 {"enum", 0, st_C_enum},
2605 {"for", 0, st_C_ignore},
2606 {"namespace", C_PLPL, st_C_struct},
2607 {"class", 0, st_C_class},
2608 {"while", 0, st_C_ignore},
2609 {"undef", 0, st_C_define},
2610 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2611 {"__attribute__", 0, st_C_attribute},
2612 {"SYSCALL", 0, st_C_gnumacro},
2613 {"ENTRY", 0, st_C_gnumacro},
2614 {"PSEUDO", 0, st_C_gnumacro},
2615 {"DEFUN", 0, st_C_gnumacro}
2616 };
2617
2618 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2619 {
2620 register int key = hash (str, len);
2621
2622 if (key <= MAX_HASH_VALUE && key >= 0)
2623 {
2624 register const char *s = wordlist[key].name;
2625
2626 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2627 return &wordlist[key];
2628 }
2629 }
2630 return 0;
2631 }
2632 /*%>*/
2633
2634 static enum sym_type
2635 C_symtype (str, len, c_ext)
2636 char *str;
2637 int len;
2638 int c_ext;
2639 {
2640 register struct C_stab_entry *se = in_word_set (str, len);
2641
2642 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2643 return st_none;
2644 return se->type;
2645 }
2646
2647 \f
2648 /*
2649 * Ignoring __attribute__ ((list))
2650 */
2651 static bool inattribute; /* looking at an __attribute__ construct */
2652
2653 /*
2654 * C functions and variables are recognized using a simple
2655 * finite automaton. fvdef is its state variable.
2656 */
2657 static enum
2658 {
2659 fvnone, /* nothing seen */
2660 fdefunkey, /* Emacs DEFUN keyword seen */
2661 fdefunname, /* Emacs DEFUN name seen */
2662 foperator, /* func: operator keyword seen (cplpl) */
2663 fvnameseen, /* function or variable name seen */
2664 fstartlist, /* func: just after open parenthesis */
2665 finlist, /* func: in parameter list */
2666 flistseen, /* func: after parameter list */
2667 fignore, /* func: before open brace */
2668 vignore /* var-like: ignore until ';' */
2669 } fvdef;
2670
2671 static bool fvextern; /* func or var: extern keyword seen; */
2672
2673 /*
2674 * typedefs are recognized using a simple finite automaton.
2675 * typdef is its state variable.
2676 */
2677 static enum
2678 {
2679 tnone, /* nothing seen */
2680 tkeyseen, /* typedef keyword seen */
2681 ttypeseen, /* defined type seen */
2682 tinbody, /* inside typedef body */
2683 tend, /* just before typedef tag */
2684 tignore /* junk after typedef tag */
2685 } typdef;
2686
2687 /*
2688 * struct-like structures (enum, struct and union) are recognized
2689 * using another simple finite automaton. `structdef' is its state
2690 * variable.
2691 */
2692 static enum
2693 {
2694 snone, /* nothing seen yet,
2695 or in struct body if bracelev > 0 */
2696 skeyseen, /* struct-like keyword seen */
2697 stagseen, /* struct-like tag seen */
2698 scolonseen /* colon seen after struct-like tag */
2699 } structdef;
2700
2701 /*
2702 * When objdef is different from onone, objtag is the name of the class.
2703 */
2704 static char *objtag = "<uninited>";
2705
2706 /*
2707 * Yet another little state machine to deal with preprocessor lines.
2708 */
2709 static enum
2710 {
2711 dnone, /* nothing seen */
2712 dsharpseen, /* '#' seen as first char on line */
2713 ddefineseen, /* '#' and 'define' seen */
2714 dignorerest /* ignore rest of line */
2715 } definedef;
2716
2717 /*
2718 * State machine for Objective C protocols and implementations.
2719 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2720 */
2721 static enum
2722 {
2723 onone, /* nothing seen */
2724 oprotocol, /* @interface or @protocol seen */
2725 oimplementation, /* @implementations seen */
2726 otagseen, /* class name seen */
2727 oparenseen, /* parenthesis before category seen */
2728 ocatseen, /* category name seen */
2729 oinbody, /* in @implementation body */
2730 omethodsign, /* in @implementation body, after +/- */
2731 omethodtag, /* after method name */
2732 omethodcolon, /* after method colon */
2733 omethodparm, /* after method parameter */
2734 oignore /* wait for @end */
2735 } objdef;
2736
2737
2738 /*
2739 * Use this structure to keep info about the token read, and how it
2740 * should be tagged. Used by the make_C_tag function to build a tag.
2741 */
2742 static struct tok
2743 {
2744 char *line; /* string containing the token */
2745 int offset; /* where the token starts in LINE */
2746 int length; /* token length */
2747 /*
2748 The previous members can be used to pass strings around for generic
2749 purposes. The following ones specifically refer to creating tags. In this
2750 case the token contained here is the pattern that will be used to create a
2751 tag.
2752 */
2753 bool valid; /* do not create a tag; the token should be
2754 invalidated whenever a state machine is
2755 reset prematurely */
2756 bool named; /* create a named tag */
2757 int lineno; /* source line number of tag */
2758 long linepos; /* source char number of tag */
2759 } token; /* latest token read */
2760
2761 /*
2762 * Variables and functions for dealing with nested structures.
2763 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2764 */
2765 static void pushclass_above __P((int, char *, int));
2766 static void popclass_above __P((int));
2767 static void write_classname __P((linebuffer *, char *qualifier));
2768
2769 static struct {
2770 char **cname; /* nested class names */
2771 int *bracelev; /* nested class brace level */
2772 int nl; /* class nesting level (elements used) */
2773 int size; /* length of the array */
2774 } cstack; /* stack for nested declaration tags */
2775 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2776 #define nestlev (cstack.nl)
2777 /* After struct keyword or in struct body, not inside a nested function. */
2778 #define instruct (structdef == snone && nestlev > 0 \
2779 && bracelev == cstack.bracelev[nestlev-1] + 1)
2780
2781 static void
2782 pushclass_above (bracelev, str, len)
2783 int bracelev;
2784 char *str;
2785 int len;
2786 {
2787 int nl;
2788
2789 popclass_above (bracelev);
2790 nl = cstack.nl;
2791 if (nl >= cstack.size)
2792 {
2793 int size = cstack.size *= 2;
2794 xrnew (cstack.cname, size, char *);
2795 xrnew (cstack.bracelev, size, int);
2796 }
2797 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2798 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2799 cstack.bracelev[nl] = bracelev;
2800 cstack.nl = nl + 1;
2801 }
2802
2803 static void
2804 popclass_above (bracelev)
2805 int bracelev;
2806 {
2807 int nl;
2808
2809 for (nl = cstack.nl - 1;
2810 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2811 nl--)
2812 {
2813 if (cstack.cname[nl] != NULL)
2814 free (cstack.cname[nl]);
2815 cstack.nl = nl;
2816 }
2817 }
2818
2819 static void
2820 write_classname (cn, qualifier)
2821 linebuffer *cn;
2822 char *qualifier;
2823 {
2824 int i, len;
2825 int qlen = strlen (qualifier);
2826
2827 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2828 {
2829 len = 0;
2830 cn->len = 0;
2831 cn->buffer[0] = '\0';
2832 }
2833 else
2834 {
2835 len = strlen (cstack.cname[0]);
2836 linebuffer_setlen (cn, len);
2837 strcpy (cn->buffer, cstack.cname[0]);
2838 }
2839 for (i = 1; i < cstack.nl; i++)
2840 {
2841 char *s;
2842 int slen;
2843
2844 s = cstack.cname[i];
2845 if (s == NULL)
2846 continue;
2847 slen = strlen (s);
2848 len += slen + qlen;
2849 linebuffer_setlen (cn, len);
2850 strncat (cn->buffer, qualifier, qlen);
2851 strncat (cn->buffer, s, slen);
2852 }
2853 }
2854
2855 \f
2856 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2857 static void make_C_tag __P((bool));
2858
2859 /*
2860 * consider_token ()
2861 * checks to see if the current token is at the start of a
2862 * function or variable, or corresponds to a typedef, or
2863 * is a struct/union/enum tag, or #define, or an enum constant.
2864 *
2865 * *IS_FUNC gets TRUE if the token is a function or #define macro
2866 * with args. C_EXTP points to which language we are looking at.
2867 *
2868 * Globals
2869 * fvdef IN OUT
2870 * structdef IN OUT
2871 * definedef IN OUT
2872 * typdef IN OUT
2873 * objdef IN OUT
2874 */
2875
2876 static bool
2877 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2878 register char *str; /* IN: token pointer */
2879 register int len; /* IN: token length */
2880 register int c; /* IN: first char after the token */
2881 int *c_extp; /* IN, OUT: C extensions mask */
2882 int bracelev; /* IN: brace level */
2883 int parlev; /* IN: parenthesis level */
2884 bool *is_func_or_var; /* OUT: function or variable found */
2885 {
2886 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2887 structtype is the type of the preceding struct-like keyword, and
2888 structbracelev is the brace level where it has been seen. */
2889 static enum sym_type structtype;
2890 static int structbracelev;
2891 static enum sym_type toktype;
2892
2893
2894 toktype = C_symtype (str, len, *c_extp);
2895
2896 /*
2897 * Skip __attribute__
2898 */
2899 if (toktype == st_C_attribute)
2900 {
2901 inattribute = TRUE;
2902 return FALSE;
2903 }
2904
2905 /*
2906 * Advance the definedef state machine.
2907 */
2908 switch (definedef)
2909 {
2910 case dnone:
2911 /* We're not on a preprocessor line. */
2912 if (toktype == st_C_gnumacro)
2913 {
2914 fvdef = fdefunkey;
2915 return FALSE;
2916 }
2917 break;
2918 case dsharpseen:
2919 if (toktype == st_C_define)
2920 {
2921 definedef = ddefineseen;
2922 }
2923 else
2924 {
2925 definedef = dignorerest;
2926 }
2927 return FALSE;
2928 case ddefineseen:
2929 /*
2930 * Make a tag for any macro, unless it is a constant
2931 * and constantypedefs is FALSE.
2932 */
2933 definedef = dignorerest;
2934 *is_func_or_var = (c == '(');
2935 if (!*is_func_or_var && !constantypedefs)
2936 return FALSE;
2937 else
2938 return TRUE;
2939 case dignorerest:
2940 return FALSE;
2941 default:
2942 error ("internal error: definedef value.", (char *)NULL);
2943 }
2944
2945 /*
2946 * Now typedefs
2947 */
2948 switch (typdef)
2949 {
2950 case tnone:
2951 if (toktype == st_C_typedef)
2952 {
2953 if (typedefs)
2954 typdef = tkeyseen;
2955 fvextern = FALSE;
2956 fvdef = fvnone;
2957 return FALSE;
2958 }
2959 break;
2960 case tkeyseen:
2961 switch (toktype)
2962 {
2963 case st_none:
2964 case st_C_class:
2965 case st_C_struct:
2966 case st_C_enum:
2967 typdef = ttypeseen;
2968 }
2969 break;
2970 case ttypeseen:
2971 if (structdef == snone && fvdef == fvnone)
2972 {
2973 fvdef = fvnameseen;
2974 return TRUE;
2975 }
2976 break;
2977 case tend:
2978 switch (toktype)
2979 {
2980 case st_C_class:
2981 case st_C_struct:
2982 case st_C_enum:
2983 return FALSE;
2984 }
2985 return TRUE;
2986 }
2987
2988 switch (toktype)
2989 {
2990 case st_C_javastruct:
2991 if (structdef == stagseen)
2992 structdef = scolonseen;
2993 return FALSE;
2994 case st_C_template:
2995 case st_C_class:
2996 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2997 && bracelev == 0
2998 && definedef == dnone && structdef == snone
2999 && typdef == tnone && fvdef == fvnone)
3000 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3001 if (toktype == st_C_template)
3002 break;
3003 /* FALLTHRU */
3004 case st_C_struct:
3005 case st_C_enum:
3006 if (parlev == 0
3007 && fvdef != vignore
3008 && (typdef == tkeyseen
3009 || (typedefs_or_cplusplus && structdef == snone)))
3010 {
3011 structdef = skeyseen;
3012 structtype = toktype;
3013 structbracelev = bracelev;
3014 if (fvdef == fvnameseen)
3015 fvdef = fvnone;
3016 }
3017 return FALSE;
3018 }
3019
3020 if (structdef == skeyseen)
3021 {
3022 structdef = stagseen;
3023 return TRUE;
3024 }
3025
3026 if (typdef != tnone)
3027 definedef = dnone;
3028
3029 /* Detect Objective C constructs. */
3030 switch (objdef)
3031 {
3032 case onone:
3033 switch (toktype)
3034 {
3035 case st_C_objprot:
3036 objdef = oprotocol;
3037 return FALSE;
3038 case st_C_objimpl:
3039 objdef = oimplementation;
3040 return FALSE;
3041 }
3042 break;
3043 case oimplementation:
3044 /* Save the class tag for functions or variables defined inside. */
3045 objtag = savenstr (str, len);
3046 objdef = oinbody;
3047 return FALSE;
3048 case oprotocol:
3049 /* Save the class tag for categories. */
3050 objtag = savenstr (str, len);
3051 objdef = otagseen;
3052 *is_func_or_var = TRUE;
3053 return TRUE;
3054 case oparenseen:
3055 objdef = ocatseen;
3056 *is_func_or_var = TRUE;
3057 return TRUE;
3058 case oinbody:
3059 break;
3060 case omethodsign:
3061 if (parlev == 0)
3062 {
3063 fvdef = fvnone;
3064 objdef = omethodtag;
3065 linebuffer_setlen (&token_name, len);
3066 strncpy (token_name.buffer, str, len);
3067 token_name.buffer[len] = '\0';
3068 return TRUE;
3069 }
3070 return FALSE;
3071 case omethodcolon:
3072 if (parlev == 0)
3073 objdef = omethodparm;
3074 return FALSE;
3075 case omethodparm:
3076 if (parlev == 0)
3077 {
3078 fvdef = fvnone;
3079 objdef = omethodtag;
3080 linebuffer_setlen (&token_name, token_name.len + len);
3081 strncat (token_name.buffer, str, len);
3082 return TRUE;
3083 }
3084 return FALSE;
3085 case oignore:
3086 if (toktype == st_C_objend)
3087 {
3088 /* Memory leakage here: the string pointed by objtag is
3089 never released, because many tests would be needed to
3090 avoid breaking on incorrect input code. The amount of
3091 memory leaked here is the sum of the lengths of the
3092 class tags.
3093 free (objtag); */
3094 objdef = onone;
3095 }
3096 return FALSE;
3097 }
3098
3099 /* A function, variable or enum constant? */
3100 switch (toktype)
3101 {
3102 case st_C_extern:
3103 fvextern = TRUE;
3104 switch (fvdef)
3105 {
3106 case finlist:
3107 case flistseen:
3108 case fignore:
3109 case vignore:
3110 break;
3111 default:
3112 fvdef = fvnone;
3113 }
3114 return FALSE;
3115 case st_C_ignore:
3116 fvextern = FALSE;
3117 fvdef = vignore;
3118 return FALSE;
3119 case st_C_operator:
3120 fvdef = foperator;
3121 *is_func_or_var = TRUE;
3122 return TRUE;
3123 case st_none:
3124 if (constantypedefs
3125 && structdef == snone
3126 && structtype == st_C_enum && bracelev > structbracelev)
3127 return TRUE; /* enum constant */
3128 switch (fvdef)
3129 {
3130 case fdefunkey:
3131 if (bracelev > 0)
3132 break;
3133 fvdef = fdefunname; /* GNU macro */
3134 *is_func_or_var = TRUE;
3135 return TRUE;
3136 case fvnone:
3137 switch (typdef)
3138 {
3139 case ttypeseen:
3140 return FALSE;
3141 case tnone:
3142 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3143 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3144 {
3145 fvdef = vignore;
3146 return FALSE;
3147 }
3148 break;
3149 }
3150 /* FALLTHRU */
3151 case fvnameseen:
3152 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3153 {
3154 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3155 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3156 fvdef = foperator;
3157 *is_func_or_var = TRUE;
3158 return TRUE;
3159 }
3160 if (bracelev > 0 && !instruct)
3161 break;
3162 fvdef = fvnameseen; /* function or variable */
3163 *is_func_or_var = TRUE;
3164 return TRUE;
3165 }
3166 break;
3167 }
3168
3169 return FALSE;
3170 }
3171
3172 \f
3173 /*
3174 * C_entries often keeps pointers to tokens or lines which are older than
3175 * the line currently read. By keeping two line buffers, and switching
3176 * them at end of line, it is possible to use those pointers.
3177 */
3178 static struct
3179 {
3180 long linepos;
3181 linebuffer lb;
3182 } lbs[2];
3183
3184 #define current_lb_is_new (newndx == curndx)
3185 #define switch_line_buffers() (curndx = 1 - curndx)
3186
3187 #define curlb (lbs[curndx].lb)
3188 #define newlb (lbs[newndx].lb)
3189 #define curlinepos (lbs[curndx].linepos)
3190 #define newlinepos (lbs[newndx].linepos)
3191
3192 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3193 #define cplpl (c_ext & C_PLPL)
3194 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3195
3196 #define CNL_SAVE_DEFINEDEF() \
3197 do { \
3198 curlinepos = charno; \
3199 readline (&curlb, inf); \
3200 lp = curlb.buffer; \
3201 quotednl = FALSE; \
3202 newndx = curndx; \
3203 } while (0)
3204
3205 #define CNL() \
3206 do { \
3207 CNL_SAVE_DEFINEDEF(); \
3208 if (savetoken.valid) \
3209 { \
3210 token = savetoken; \
3211 savetoken.valid = FALSE; \
3212 } \
3213 definedef = dnone; \
3214 } while (0)
3215
3216
3217 static void
3218 make_C_tag (isfun)
3219 bool isfun;
3220 {
3221 /* This function is never called when token.valid is FALSE, but
3222 we must protect against invalid input or internal errors. */
3223 if (!DEBUG && !token.valid)
3224 return;
3225
3226 if (token.valid)
3227 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3228 token.offset+token.length+1, token.lineno, token.linepos);
3229 else /* this case is optimised away if !DEBUG */
3230 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3231 token_name.len + 17, isfun, token.line,
3232 token.offset+token.length+1, token.lineno, token.linepos);
3233
3234 token.valid = FALSE;
3235 }
3236
3237
3238 /*
3239 * C_entries ()
3240 * This routine finds functions, variables, typedefs,
3241 * #define's, enum constants and struct/union/enum definitions in
3242 * C syntax and adds them to the list.
3243 */
3244 static void
3245 C_entries (c_ext, inf)
3246 int c_ext; /* extension of C */
3247 FILE *inf; /* input file */
3248 {
3249 register char c; /* latest char read; '\0' for end of line */
3250 register char *lp; /* pointer one beyond the character `c' */
3251 int curndx, newndx; /* indices for current and new lb */
3252 register int tokoff; /* offset in line of start of current token */
3253 register int toklen; /* length of current token */
3254 char *qualifier; /* string used to qualify names */
3255 int qlen; /* length of qualifier */
3256 int bracelev; /* current brace level */
3257 int bracketlev; /* current bracket level */
3258 int parlev; /* current parenthesis level */
3259 int attrparlev; /* __attribute__ parenthesis level */
3260 int templatelev; /* current template level */
3261 int typdefbracelev; /* bracelev where a typedef struct body begun */
3262 bool incomm, inquote, inchar, quotednl, midtoken;
3263 bool yacc_rules; /* in the rules part of a yacc file */
3264 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3265
3266
3267 linebuffer_init (&lbs[0].lb);
3268 linebuffer_init (&lbs[1].lb);
3269 if (cstack.size == 0)
3270 {
3271 cstack.size = (DEBUG) ? 1 : 4;
3272 cstack.nl = 0;
3273 cstack.cname = xnew (cstack.size, char *);
3274 cstack.bracelev = xnew (cstack.size, int);
3275 }
3276
3277 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3278 curndx = newndx = 0;
3279 lp = curlb.buffer;
3280 *lp = 0;
3281
3282 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3283 structdef = snone; definedef = dnone; objdef = onone;
3284 yacc_rules = FALSE;
3285 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3286 token.valid = savetoken.valid = FALSE;
3287 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3288 if (cjava)
3289 { qualifier = "."; qlen = 1; }
3290 else
3291 { qualifier = "::"; qlen = 2; }
3292
3293
3294 while (!feof (inf))
3295 {
3296 c = *lp++;
3297 if (c == '\\')
3298 {
3299 /* If we are at the end of the line, the next character is a
3300 '\0'; do not skip it, because it is what tells us
3301 to read the next line. */
3302 if (*lp == '\0')
3303 {
3304 quotednl = TRUE;
3305 continue;
3306 }
3307 lp++;
3308 c = ' ';
3309 }
3310 else if (incomm)
3311 {
3312 switch (c)
3313 {
3314 case '*':
3315 if (*lp == '/')
3316 {
3317 c = *lp++;
3318 incomm = FALSE;
3319 }
3320 break;
3321 case '\0':
3322 /* Newlines inside comments do not end macro definitions in
3323 traditional cpp. */
3324 CNL_SAVE_DEFINEDEF ();
3325 break;
3326 }
3327 continue;
3328 }
3329 else if (inquote)
3330 {
3331 switch (c)
3332 {
3333 case '"':
3334 inquote = FALSE;
3335 break;
3336 case '\0':
3337 /* Newlines inside strings do not end macro definitions
3338 in traditional cpp, even though compilers don't
3339 usually accept them. */
3340 CNL_SAVE_DEFINEDEF ();
3341 break;
3342 }
3343 continue;
3344 }
3345 else if (inchar)
3346 {
3347 switch (c)
3348 {
3349 case '\0':
3350 /* Hmmm, something went wrong. */
3351 CNL ();
3352 /* FALLTHRU */
3353 case '\'':
3354 inchar = FALSE;
3355 break;
3356 }
3357 continue;
3358 }
3359 else if (bracketlev > 0)
3360 {
3361 switch (c)
3362 {
3363 case ']':
3364 if (--bracketlev > 0)
3365 continue;
3366 break;
3367 case '\0':
3368 CNL_SAVE_DEFINEDEF ();
3369 break;
3370 }
3371 continue;
3372 }
3373 else switch (c)
3374 {
3375 case '"':
3376 inquote = TRUE;
3377 if (inattribute)
3378 break;
3379 switch (fvdef)
3380 {
3381 case fdefunkey:
3382 case fstartlist:
3383 case finlist:
3384 case fignore:
3385 case vignore:
3386 break;
3387 default:
3388 fvextern = FALSE;
3389 fvdef = fvnone;
3390 }
3391 continue;
3392 case '\'':
3393 inchar = TRUE;
3394 if (inattribute)
3395 break;
3396 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3397 {
3398 fvextern = FALSE;
3399 fvdef = fvnone;
3400 }
3401 continue;
3402 case '/':
3403 if (*lp == '*')
3404 {
3405 incomm = TRUE;
3406 lp++;
3407 c = ' ';
3408 }
3409 else if (/* cplpl && */ *lp == '/')
3410 {
3411 c = '\0';
3412 }
3413 break;
3414 case '%':
3415 if ((c_ext & YACC) && *lp == '%')
3416 {
3417 /* Entering or exiting rules section in yacc file. */
3418 lp++;
3419 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3420 typdef = tnone; structdef = snone;
3421 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3422 bracelev = 0;
3423 yacc_rules = !yacc_rules;
3424 continue;
3425 }
3426 else
3427 break;
3428 case '#':
3429 if (definedef == dnone)
3430 {
3431 char *cp;
3432 bool cpptoken = TRUE;
3433
3434 /* Look back on this line. If all blanks, or nonblanks
3435 followed by an end of comment, this is a preprocessor
3436 token. */
3437 for (cp = newlb.buffer; cp < lp-1; cp++)
3438 if (!iswhite (*cp))
3439 {
3440 if (*cp == '*' && *(cp+1) == '/')
3441 {
3442 cp++;
3443 cpptoken = TRUE;
3444 }
3445 else
3446 cpptoken = FALSE;
3447 }
3448 if (cpptoken)
3449 definedef = dsharpseen;
3450 } /* if (definedef == dnone) */
3451 continue;
3452 case '[':
3453 bracketlev++;
3454 continue;
3455 } /* switch (c) */
3456
3457
3458 /* Consider token only if some involved conditions are satisfied. */
3459 if (typdef != tignore
3460 && definedef != dignorerest
3461 && fvdef != finlist
3462 && templatelev == 0
3463 && (definedef != dnone
3464 || structdef != scolonseen)
3465 && !inattribute)
3466 {
3467 if (midtoken)
3468 {
3469 if (endtoken (c))
3470 {
3471 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3472 /* This handles :: in the middle,
3473 but not at the beginning of an identifier.
3474 Also, space-separated :: is not recognised. */
3475 {
3476 if (c_ext & C_AUTO) /* automatic detection of C++ */
3477 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3478 lp += 2;
3479 toklen += 2;
3480 c = lp[-1];
3481 goto still_in_token;
3482 }
3483 else
3484 {
3485 bool funorvar = FALSE;
3486
3487 if (yacc_rules
3488 || consider_token (newlb.buffer + tokoff, toklen, c,
3489 &c_ext, bracelev, parlev,
3490 &funorvar))
3491 {
3492 if (fvdef == foperator)
3493 {
3494 char *oldlp = lp;
3495 lp = skip_spaces (lp-1);
3496 if (*lp != '\0')
3497 lp += 1;
3498 while (*lp != '\0'
3499 && !iswhite (*lp) && *lp != '(')
3500 lp += 1;
3501 c = *lp++;
3502 toklen += lp - oldlp;
3503 }
3504 token.named = FALSE;
3505 if (!plainc
3506 && nestlev > 0 && definedef == dnone)
3507 /* in struct body */
3508 {
3509 write_classname (&token_name, qualifier);
3510 linebuffer_setlen (&token_name,
3511 token_name.len+qlen+toklen);
3512 strcat (token_name.buffer, qualifier);
3513 strncat (token_name.buffer,
3514 newlb.buffer + tokoff, toklen);
3515 token.named = TRUE;
3516 }
3517 else if (objdef == ocatseen)
3518 /* Objective C category */
3519 {
3520 int len = strlen (objtag) + 2 + toklen;
3521 linebuffer_setlen (&token_name, len);
3522 strcpy (token_name.buffer, objtag);
3523 strcat (token_name.buffer, "(");
3524 strncat (token_name.buffer,
3525 newlb.buffer + tokoff, toklen);
3526 strcat (token_name.buffer, ")");
3527 token.named = TRUE;
3528 }
3529 else if (objdef == omethodtag
3530 || objdef == omethodparm)
3531 /* Objective C method */
3532 {
3533 token.named = TRUE;
3534 }
3535 else if (fvdef == fdefunname)
3536 /* GNU DEFUN and similar macros */
3537 {
3538 bool defun = (newlb.buffer[tokoff] == 'F');
3539 int off = tokoff;
3540 int len = toklen;
3541
3542 /* Rewrite the tag so that emacs lisp DEFUNs
3543 can be found by their elisp name */
3544 if (defun)
3545 {
3546 off += 1;
3547 len -= 1;
3548 }
3549 linebuffer_setlen (&token_name, len);
3550 strncpy (token_name.buffer,
3551 newlb.buffer + off, len);
3552 token_name.buffer[len] = '\0';
3553 if (defun)
3554 while (--len >= 0)
3555 if (token_name.buffer[len] == '_')
3556 token_name.buffer[len] = '-';
3557 token.named = defun;
3558 }
3559 else
3560 {
3561 linebuffer_setlen (&token_name, toklen);
3562 strncpy (token_name.buffer,
3563 newlb.buffer + tokoff, toklen);
3564 token_name.buffer[toklen] = '\0';
3565 /* Name macros and members. */
3566 token.named = (structdef == stagseen
3567 || typdef == ttypeseen
3568 || typdef == tend
3569 || (funorvar
3570 && definedef == dignorerest)
3571 || (funorvar
3572 && definedef == dnone
3573 && structdef == snone
3574 && bracelev > 0));
3575 }
3576 token.lineno = lineno;
3577 token.offset = tokoff;
3578 token.length = toklen;
3579 token.line = newlb.buffer;
3580 token.linepos = newlinepos;
3581 token.valid = TRUE;
3582
3583 if (definedef == dnone
3584 && (fvdef == fvnameseen
3585 || fvdef == foperator
3586 || structdef == stagseen
3587 || typdef == tend
3588 || typdef == ttypeseen
3589 || objdef != onone))
3590 {
3591 if (current_lb_is_new)
3592 switch_line_buffers ();
3593 }
3594 else if (definedef != dnone
3595 || fvdef == fdefunname
3596 || instruct)
3597 make_C_tag (funorvar);
3598 }
3599 else /* not yacc and consider_token failed */
3600 {
3601 if (inattribute && fvdef == fignore)
3602 {
3603 /* We have just met __attribute__ after a
3604 function parameter list: do not tag the
3605 function again. */
3606 fvdef = fvnone;
3607 }
3608 }
3609 midtoken = FALSE;
3610 }
3611 } /* if (endtoken (c)) */
3612 else if (intoken (c))
3613 still_in_token:
3614 {
3615 toklen++;
3616 continue;
3617 }
3618 } /* if (midtoken) */
3619 else if (begtoken (c))
3620 {
3621 switch (definedef)
3622 {
3623 case dnone:
3624 switch (fvdef)
3625 {
3626 case fstartlist:
3627 /* This prevents tagging fb in
3628 void (__attribute__((noreturn)) *fb) (void);
3629 Fixing this is not easy and not very important. */
3630 fvdef = finlist;
3631 continue;
3632 case flistseen:
3633 if (plainc || declarations)
3634 {
3635 make_C_tag (TRUE); /* a function */
3636 fvdef = fignore;
3637 }
3638 break;
3639 }
3640 if (structdef == stagseen && !cjava)
3641 {
3642 popclass_above (bracelev);
3643 structdef = snone;
3644 }
3645 break;
3646 case dsharpseen:
3647 savetoken = token;
3648 break;
3649 }
3650 if (!yacc_rules || lp == newlb.buffer + 1)
3651 {
3652 tokoff = lp - 1 - newlb.buffer;
3653 toklen = 1;
3654 midtoken = TRUE;
3655 }
3656 continue;
3657 } /* if (begtoken) */
3658 } /* if must look at token */
3659
3660
3661 /* Detect end of line, colon, comma, semicolon and various braces
3662 after having handled a token.*/
3663 switch (c)
3664 {
3665 case ':':
3666 if (inattribute)
3667 break;
3668 if (yacc_rules && token.offset == 0 && token.valid)
3669 {
3670 make_C_tag (FALSE); /* a yacc function */
3671 break;
3672 }
3673 if (definedef != dnone)
3674 break;
3675 switch (objdef)
3676 {
3677 case otagseen:
3678 objdef = oignore;
3679 make_C_tag (TRUE); /* an Objective C class */
3680 break;
3681 case omethodtag:
3682 case omethodparm:
3683 objdef = omethodcolon;
3684 linebuffer_setlen (&token_name, token_name.len + 1);
3685 strcat (token_name.buffer, ":");
3686 break;
3687 }
3688 if (structdef == stagseen)
3689 {
3690 structdef = scolonseen;
3691 break;
3692 }
3693 /* Should be useless, but may be work as a safety net. */
3694 if (cplpl && fvdef == flistseen)
3695 {
3696 make_C_tag (TRUE); /* a function */
3697 fvdef = fignore;
3698 break;
3699 }
3700 break;
3701 case ';':
3702 if (definedef != dnone || inattribute)
3703 break;
3704 switch (typdef)
3705 {
3706 case tend:
3707 case ttypeseen:
3708 make_C_tag (FALSE); /* a typedef */
3709 typdef = tnone;
3710 fvdef = fvnone;
3711 break;
3712 case tnone:
3713 case tinbody:
3714 case tignore:
3715 switch (fvdef)
3716 {
3717 case fignore:
3718 if (typdef == tignore || cplpl)
3719 fvdef = fvnone;
3720 break;
3721 case fvnameseen:
3722 if ((globals && bracelev == 0 && (!fvextern || declarations))
3723 || (members && instruct))
3724 make_C_tag (FALSE); /* a variable */
3725 fvextern = FALSE;
3726 fvdef = fvnone;
3727 token.valid = FALSE;
3728 break;
3729 case flistseen:
3730 if ((declarations
3731 && (cplpl || !instruct)
3732 && (typdef == tnone || (typdef != tignore && instruct)))
3733 || (members
3734 && plainc && instruct))
3735 make_C_tag (TRUE); /* a function */
3736 /* FALLTHRU */
3737 default:
3738 fvextern = FALSE;
3739 fvdef = fvnone;
3740 if (declarations
3741 && cplpl && structdef == stagseen)
3742 make_C_tag (FALSE); /* forward declaration */
3743 else
3744 token.valid = FALSE;
3745 } /* switch (fvdef) */
3746 /* FALLTHRU */
3747 default:
3748 if (!instruct)
3749 typdef = tnone;
3750 }
3751 if (structdef == stagseen)
3752 structdef = snone;
3753 break;
3754 case ',':
3755 if (definedef != dnone || inattribute)
3756 break;
3757 switch (objdef)
3758 {
3759 case omethodtag:
3760 case omethodparm:
3761 make_C_tag (TRUE); /* an Objective C method */
3762 objdef = oinbody;
3763 break;
3764 }
3765 switch (fvdef)
3766 {
3767 case fdefunkey:
3768 case foperator:
3769 case fstartlist:
3770 case finlist:
3771 case fignore:
3772 case vignore:
3773 break;
3774 case fdefunname:
3775 fvdef = fignore;
3776 break;
3777 case fvnameseen:
3778 if (parlev == 0
3779 && ((globals
3780 && bracelev == 0
3781 && templatelev == 0
3782 && (!fvextern || declarations))
3783 || (members && instruct)))
3784 make_C_tag (FALSE); /* a variable */
3785 break;
3786 case flistseen:
3787 if ((declarations && typdef == tnone && !instruct)
3788 || (members && typdef != tignore && instruct))
3789 {
3790 make_C_tag (TRUE); /* a function */
3791 fvdef = fvnameseen;
3792 }
3793 else if (!declarations)
3794 fvdef = fvnone;
3795 token.valid = FALSE;
3796 break;
3797 default:
3798 fvdef = fvnone;
3799 }
3800 if (structdef == stagseen)
3801 structdef = snone;
3802 break;
3803 case ']':
3804 if (definedef != dnone || inattribute)
3805 break;
3806 if (structdef == stagseen)
3807 structdef = snone;
3808 switch (typdef)
3809 {
3810 case ttypeseen:
3811 case tend:
3812 typdef = tignore;
3813 make_C_tag (FALSE); /* a typedef */
3814 break;
3815 case tnone:
3816 case tinbody:
3817 switch (fvdef)
3818 {
3819 case foperator:
3820 case finlist:
3821 case fignore:
3822 case vignore:
3823 break;
3824 case fvnameseen:
3825 if ((members && bracelev == 1)
3826 || (globals && bracelev == 0
3827 && (!fvextern || declarations)))
3828 make_C_tag (FALSE); /* a variable */
3829 /* FALLTHRU */
3830 default:
3831 fvdef = fvnone;
3832 }
3833 break;
3834 }
3835 break;
3836 case '(':
3837 if (inattribute)
3838 {
3839 attrparlev++;
3840 break;
3841 }
3842 if (definedef != dnone)
3843 break;
3844 if (objdef == otagseen && parlev == 0)
3845 objdef = oparenseen;
3846 switch (fvdef)
3847 {
3848 case fvnameseen:
3849 if (typdef == ttypeseen
3850 && *lp != '*'
3851 && !instruct)
3852 {
3853 /* This handles constructs like:
3854 typedef void OperatorFun (int fun); */
3855 make_C_tag (FALSE);
3856 typdef = tignore;
3857 fvdef = fignore;
3858 break;
3859 }
3860 /* FALLTHRU */
3861 case foperator:
3862 fvdef = fstartlist;
3863 break;
3864 case flistseen:
3865 fvdef = finlist;
3866 break;
3867 }
3868 parlev++;
3869 break;
3870 case ')':
3871 if (inattribute)
3872 {
3873 if (--attrparlev == 0)
3874 inattribute = FALSE;
3875 break;
3876 }
3877 if (definedef != dnone)
3878 break;
3879 if (objdef == ocatseen && parlev == 1)
3880 {
3881 make_C_tag (TRUE); /* an Objective C category */
3882 objdef = oignore;
3883 }
3884 if (--parlev == 0)
3885 {
3886 switch (fvdef)
3887 {
3888 case fstartlist:
3889 case finlist:
3890 fvdef = flistseen;
3891 break;
3892 }
3893 if (!instruct
3894 && (typdef == tend
3895 || typdef == ttypeseen))
3896 {
3897 typdef = tignore;
3898 make_C_tag (FALSE); /* a typedef */
3899 }
3900 }
3901 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3902 parlev = 0;
3903 break;
3904 case '{':
3905 if (definedef != dnone)
3906 break;
3907 if (typdef == ttypeseen)
3908 {
3909 /* Whenever typdef is set to tinbody (currently only
3910 here), typdefbracelev should be set to bracelev. */
3911 typdef = tinbody;
3912 typdefbracelev = bracelev;
3913 }
3914 switch (fvdef)
3915 {
3916 case flistseen:
3917 make_C_tag (TRUE); /* a function */
3918 /* FALLTHRU */
3919 case fignore:
3920 fvdef = fvnone;
3921 break;
3922 case fvnone:
3923 switch (objdef)
3924 {
3925 case otagseen:
3926 make_C_tag (TRUE); /* an Objective C class */
3927 objdef = oignore;
3928 break;
3929 case omethodtag:
3930 case omethodparm:
3931 make_C_tag (TRUE); /* an Objective C method */
3932 objdef = oinbody;
3933 break;
3934 default:
3935 /* Neutralize `extern "C" {' grot. */
3936 if (bracelev == 0 && structdef == snone && nestlev == 0
3937 && typdef == tnone)
3938 bracelev = -1;
3939 }
3940 break;
3941 }
3942 switch (structdef)
3943 {
3944 case skeyseen: /* unnamed struct */
3945 pushclass_above (bracelev, NULL, 0);
3946 structdef = snone;
3947 break;
3948 case stagseen: /* named struct or enum */
3949 case scolonseen: /* a class */
3950 pushclass_above (bracelev,token.line+token.offset, token.length);
3951 structdef = snone;
3952 make_C_tag (FALSE); /* a struct or enum */
3953 break;
3954 }
3955 bracelev += 1;
3956 break;
3957 case '*':
3958 if (definedef != dnone)
3959 break;
3960 if (fvdef == fstartlist)
3961 {
3962 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3963 token.valid = FALSE;
3964 }
3965 break;
3966 case '}':
3967 if (definedef != dnone)
3968 break;
3969 bracelev -= 1;
3970 if (!ignoreindent && lp == newlb.buffer + 1)
3971 {
3972 if (bracelev != 0)
3973 token.valid = FALSE; /* unexpected value, token unreliable */
3974 bracelev = 0; /* reset brace level if first column */
3975 parlev = 0; /* also reset paren level, just in case... */
3976 }
3977 else if (bracelev < 0)
3978 {
3979 token.valid = FALSE; /* something gone amiss, token unreliable */
3980 bracelev = 0;
3981 }
3982 if (bracelev == 0 && fvdef == vignore)
3983 fvdef = fvnone; /* end of function */
3984 popclass_above (bracelev);
3985 structdef = snone;
3986 /* Only if typdef == tinbody is typdefbracelev significant. */
3987 if (typdef == tinbody && bracelev <= typdefbracelev)
3988 {
3989 assert (bracelev == typdefbracelev);
3990 typdef = tend;
3991 }
3992 break;
3993 case '=':
3994 if (definedef != dnone)
3995 break;
3996 switch (fvdef)
3997 {
3998 case foperator:
3999 case finlist:
4000 case fignore:
4001 case vignore:
4002 break;
4003 case fvnameseen:
4004 if ((members && bracelev == 1)
4005 || (globals && bracelev == 0 && (!fvextern || declarations)))
4006 make_C_tag (FALSE); /* a variable */
4007 /* FALLTHRU */
4008 default:
4009 fvdef = vignore;
4010 }
4011 break;
4012 case '<':
4013 if (cplpl
4014 && (structdef == stagseen || fvdef == fvnameseen))
4015 {
4016 templatelev++;
4017 break;
4018 }
4019 goto resetfvdef;
4020 case '>':
4021 if (templatelev > 0)
4022 {
4023 templatelev--;
4024 break;
4025 }
4026 goto resetfvdef;
4027 case '+':
4028 case '-':
4029 if (objdef == oinbody && bracelev == 0)
4030 {
4031 objdef = omethodsign;
4032 break;
4033 }
4034 /* FALLTHRU */
4035 resetfvdef:
4036 case '#': case '~': case '&': case '%': case '/':
4037 case '|': case '^': case '!': case '.': case '?':
4038 if (definedef != dnone)
4039 break;
4040 /* These surely cannot follow a function tag in C. */
4041 switch (fvdef)
4042 {
4043 case foperator:
4044 case finlist:
4045 case fignore:
4046 case vignore:
4047 break;
4048 default:
4049 fvdef = fvnone;
4050 }
4051 break;
4052 case '\0':
4053 if (objdef == otagseen)
4054 {
4055 make_C_tag (TRUE); /* an Objective C class */
4056 objdef = oignore;
4057 }
4058 /* If a macro spans multiple lines don't reset its state. */
4059 if (quotednl)
4060 CNL_SAVE_DEFINEDEF ();
4061 else
4062 CNL ();
4063 break;
4064 } /* switch (c) */
4065
4066 } /* while not eof */
4067
4068 free (lbs[0].lb.buffer);
4069 free (lbs[1].lb.buffer);
4070 }
4071
4072 /*
4073 * Process either a C++ file or a C file depending on the setting
4074 * of a global flag.
4075 */
4076 static void
4077 default_C_entries (inf)
4078 FILE *inf;
4079 {
4080 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4081 }
4082
4083 /* Always do plain C. */
4084 static void
4085 plain_C_entries (inf)
4086 FILE *inf;
4087 {
4088 C_entries (0, inf);
4089 }
4090
4091 /* Always do C++. */
4092 static void
4093 Cplusplus_entries (inf)
4094 FILE *inf;
4095 {
4096 C_entries (C_PLPL, inf);
4097 }
4098
4099 /* Always do Java. */
4100 static void
4101 Cjava_entries (inf)
4102 FILE *inf;
4103 {
4104 C_entries (C_JAVA, inf);
4105 }
4106
4107 /* Always do C*. */
4108 static void
4109 Cstar_entries (inf)
4110 FILE *inf;
4111 {
4112 C_entries (C_STAR, inf);
4113 }
4114
4115 /* Always do Yacc. */
4116 static void
4117 Yacc_entries (inf)
4118 FILE *inf;
4119 {
4120 C_entries (YACC, inf);
4121 }
4122
4123 \f
4124 /* Useful macros. */
4125 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4126 for (; /* loop initialization */ \
4127 !feof (file_pointer) /* loop test */ \
4128 && /* instructions at start of loop */ \
4129 (readline (&line_buffer, file_pointer), \
4130 char_pointer = line_buffer.buffer, \
4131 TRUE); \
4132 )
4133
4134 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4135 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4136 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4137 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4138 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4139
4140 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4141 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4142 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4143 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4144 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4145
4146 /*
4147 * Read a file, but do no processing. This is used to do regexp
4148 * matching on files that have no language defined.
4149 */
4150 static void
4151 just_read_file (inf)
4152 FILE *inf;
4153 {
4154 register char *dummy;
4155
4156 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4157 continue;
4158 }
4159
4160 \f
4161 /* Fortran parsing */
4162
4163 static void F_takeprec __P((void));
4164 static void F_getit __P((FILE *));
4165
4166 static void
4167 F_takeprec ()
4168 {
4169 dbp = skip_spaces (dbp);
4170 if (*dbp != '*')
4171 return;
4172 dbp++;
4173 dbp = skip_spaces (dbp);
4174 if (strneq (dbp, "(*)", 3))
4175 {
4176 dbp += 3;
4177 return;
4178 }
4179 if (!ISDIGIT (*dbp))
4180 {
4181 --dbp; /* force failure */
4182 return;
4183 }
4184 do
4185 dbp++;
4186 while (ISDIGIT (*dbp));
4187 }
4188
4189 static void
4190 F_getit (inf)
4191 FILE *inf;
4192 {
4193 register char *cp;
4194
4195 dbp = skip_spaces (dbp);
4196 if (*dbp == '\0')
4197 {
4198 readline (&lb, inf);
4199 dbp = lb.buffer;
4200 if (dbp[5] != '&')
4201 return;
4202 dbp += 6;
4203 dbp = skip_spaces (dbp);
4204 }
4205 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4206 return;
4207 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4208 continue;
4209 make_tag (dbp, cp-dbp, TRUE,
4210 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4211 }
4212
4213
4214 static void
4215 Fortran_functions (inf)
4216 FILE *inf;
4217 {
4218 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4219 {
4220 if (*dbp == '%')
4221 dbp++; /* Ratfor escape to fortran */
4222 dbp = skip_spaces (dbp);
4223 if (*dbp == '\0')
4224 continue;
4225 switch (lowcase (*dbp))
4226 {
4227 case 'i':
4228 if (nocase_tail ("integer"))
4229 F_takeprec ();
4230 break;
4231 case 'r':
4232 if (nocase_tail ("real"))
4233 F_takeprec ();
4234 break;
4235 case 'l':
4236 if (nocase_tail ("logical"))
4237 F_takeprec ();
4238 break;
4239 case 'c':
4240 if (nocase_tail ("complex") || nocase_tail ("character"))
4241 F_takeprec ();
4242 break;
4243 case 'd':
4244 if (nocase_tail ("double"))
4245 {
4246 dbp = skip_spaces (dbp);
4247 if (*dbp == '\0')
4248 continue;
4249 if (nocase_tail ("precision"))
4250 break;
4251 continue;
4252 }
4253 break;
4254 }
4255 dbp = skip_spaces (dbp);
4256 if (*dbp == '\0')
4257 continue;
4258 switch (lowcase (*dbp))
4259 {
4260 case 'f':
4261 if (nocase_tail ("function"))
4262 F_getit (inf);
4263 continue;
4264 case 's':
4265 if (nocase_tail ("subroutine"))
4266 F_getit (inf);
4267 continue;
4268 case 'e':
4269 if (nocase_tail ("entry"))
4270 F_getit (inf);
4271 continue;
4272 case 'b':
4273 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4274 {
4275 dbp = skip_spaces (dbp);
4276 if (*dbp == '\0') /* assume un-named */
4277 make_tag ("blockdata", 9, TRUE,
4278 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4279 else
4280 F_getit (inf); /* look for name */
4281 }
4282 continue;
4283 }
4284 }
4285 }
4286
4287 \f
4288 /*
4289 * Ada parsing
4290 * Original code by
4291 * Philippe Waroquiers (1998)
4292 */
4293
4294 static void Ada_getit __P((FILE *, char *));
4295
4296 /* Once we are positioned after an "interesting" keyword, let's get
4297 the real tag value necessary. */
4298 static void
4299 Ada_getit (inf, name_qualifier)
4300 FILE *inf;
4301 char *name_qualifier;
4302 {
4303 register char *cp;
4304 char *name;
4305 char c;
4306
4307 while (!feof (inf))
4308 {
4309 dbp = skip_spaces (dbp);
4310 if (*dbp == '\0'
4311 || (dbp[0] == '-' && dbp[1] == '-'))
4312 {
4313 readline (&lb, inf);
4314 dbp = lb.buffer;
4315 }
4316 switch (lowcase(*dbp))
4317 {
4318 case 'b':
4319 if (nocase_tail ("body"))
4320 {
4321 /* Skipping body of procedure body or package body or ....
4322 resetting qualifier to body instead of spec. */
4323 name_qualifier = "/b";
4324 continue;
4325 }
4326 break;
4327 case 't':
4328 /* Skipping type of task type or protected type ... */
4329 if (nocase_tail ("type"))
4330 continue;
4331 break;
4332 }
4333 if (*dbp == '"')
4334 {
4335 dbp += 1;
4336 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4337 continue;
4338 }
4339 else
4340 {
4341 dbp = skip_spaces (dbp);
4342 for (cp = dbp;
4343 (*cp != '\0'
4344 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4345 cp++)
4346 continue;
4347 if (cp == dbp)
4348 return;
4349 }
4350 c = *cp;
4351 *cp = '\0';
4352 name = concat (dbp, name_qualifier, "");
4353 *cp = c;
4354 make_tag (name, strlen (name), TRUE,
4355 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4356 free (name);
4357 if (c == '"')
4358 dbp = cp + 1;
4359 return;
4360 }
4361 }
4362
4363 static void
4364 Ada_funcs (inf)
4365 FILE *inf;
4366 {
4367 bool inquote = FALSE;
4368 bool skip_till_semicolumn = FALSE;
4369
4370 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4371 {
4372 while (*dbp != '\0')
4373 {
4374 /* Skip a string i.e. "abcd". */
4375 if (inquote || (*dbp == '"'))
4376 {
4377 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4378 if (dbp != NULL)
4379 {
4380 inquote = FALSE;
4381 dbp += 1;
4382 continue; /* advance char */
4383 }
4384 else
4385 {
4386 inquote = TRUE;
4387 break; /* advance line */
4388 }
4389 }
4390
4391 /* Skip comments. */
4392 if (dbp[0] == '-' && dbp[1] == '-')
4393 break; /* advance line */
4394
4395 /* Skip character enclosed in single quote i.e. 'a'
4396 and skip single quote starting an attribute i.e. 'Image. */
4397 if (*dbp == '\'')
4398 {
4399 dbp++ ;
4400 if (*dbp != '\0')
4401 dbp++;
4402 continue;
4403 }
4404
4405 if (skip_till_semicolumn)
4406 {
4407 if (*dbp == ';')
4408 skip_till_semicolumn = FALSE;
4409 dbp++;
4410 continue; /* advance char */
4411 }
4412
4413 /* Search for beginning of a token. */
4414 if (!begtoken (*dbp))
4415 {
4416 dbp++;
4417 continue; /* advance char */
4418 }
4419
4420 /* We are at the beginning of a token. */
4421 switch (lowcase(*dbp))
4422 {
4423 case 'f':
4424 if (!packages_only && nocase_tail ("function"))
4425 Ada_getit (inf, "/f");
4426 else
4427 break; /* from switch */
4428 continue; /* advance char */
4429 case 'p':
4430 if (!packages_only && nocase_tail ("procedure"))
4431 Ada_getit (inf, "/p");
4432 else if (nocase_tail ("package"))
4433 Ada_getit (inf, "/s");
4434 else if (nocase_tail ("protected")) /* protected type */
4435 Ada_getit (inf, "/t");
4436 else
4437 break; /* from switch */
4438 continue; /* advance char */
4439
4440 case 'u':
4441 if (typedefs && !packages_only && nocase_tail ("use"))
4442 {
4443 /* when tagging types, avoid tagging use type Pack.Typename;
4444 for this, we will skip everything till a ; */
4445 skip_till_semicolumn = TRUE;
4446 continue; /* advance char */
4447 }
4448
4449 case 't':
4450 if (!packages_only && nocase_tail ("task"))
4451 Ada_getit (inf, "/k");
4452 else if (typedefs && !packages_only && nocase_tail ("type"))
4453 {
4454 Ada_getit (inf, "/t");
4455 while (*dbp != '\0')
4456 dbp += 1;
4457 }
4458 else
4459 break; /* from switch */
4460 continue; /* advance char */
4461 }
4462
4463 /* Look for the end of the token. */
4464 while (!endtoken (*dbp))
4465 dbp++;
4466
4467 } /* advance char */
4468 } /* advance line */
4469 }
4470
4471 \f
4472 /*
4473 * Unix and microcontroller assembly tag handling
4474 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4475 * Idea by Bob Weiner, Motorola Inc. (1994)
4476 */
4477 static void
4478 Asm_labels (inf)
4479 FILE *inf;
4480 {
4481 register char *cp;
4482
4483 LOOP_ON_INPUT_LINES (inf, lb, cp)
4484 {
4485 /* If first char is alphabetic or one of [_.$], test for colon
4486 following identifier. */
4487 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4488 {
4489 /* Read past label. */
4490 cp++;
4491 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4492 cp++;
4493 if (*cp == ':' || iswhite (*cp))
4494 /* Found end of label, so copy it and add it to the table. */
4495 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4496 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4497 }
4498 }
4499 }
4500
4501 \f
4502 /*
4503 * Perl support
4504 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4505 * Perl variable names: /^(my|local).../
4506 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4507 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4508 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4509 */
4510 static void
4511 Perl_functions (inf)
4512 FILE *inf;
4513 {
4514 char *package = savestr ("main"); /* current package name */
4515 register char *cp;
4516
4517 LOOP_ON_INPUT_LINES (inf, lb, cp)
4518 {
4519 skip_spaces(cp);
4520
4521 if (LOOKING_AT (cp, "package"))
4522 {
4523 free (package);
4524 get_tag (cp, &package);
4525 }
4526 else if (LOOKING_AT (cp, "sub"))
4527 {
4528 char *pos;
4529 char *sp = cp;
4530
4531 while (!notinname (*cp))
4532 cp++;
4533 if (cp == sp)
4534 continue; /* nothing found */
4535 if ((pos = etags_strchr (sp, ':')) != NULL
4536 && pos < cp && pos[1] == ':')
4537 /* The name is already qualified. */
4538 make_tag (sp, cp - sp, TRUE,
4539 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4540 else
4541 /* Qualify it. */
4542 {
4543 char savechar, *name;
4544
4545 savechar = *cp;
4546 *cp = '\0';
4547 name = concat (package, "::", sp);
4548 *cp = savechar;
4549 make_tag (name, strlen(name), TRUE,
4550 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4551 free (name);
4552 }
4553 }
4554 else if (globals) /* only if we are tagging global vars */
4555 {
4556 /* Skip a qualifier, if any. */
4557 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4558 /* After "my" or "local", but before any following paren or space. */
4559 char *varstart = cp;
4560
4561 if (qual /* should this be removed? If yes, how? */
4562 && (*cp == '$' || *cp == '@' || *cp == '%'))
4563 {
4564 varstart += 1;
4565 do
4566 cp++;
4567 while (ISALNUM (*cp) || *cp == '_');
4568 }
4569 else if (qual)
4570 {
4571 /* Should be examining a variable list at this point;
4572 could insist on seeing an open parenthesis. */
4573 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4574 cp++;
4575 }
4576 else
4577 continue;
4578
4579 make_tag (varstart, cp - varstart, FALSE,
4580 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4581 }
4582 }
4583 free (package);
4584 }
4585
4586
4587 /*
4588 * Python support
4589 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4590 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4591 * More ideas by seb bacon <seb@jamkit.com> (2002)
4592 */
4593 static void
4594 Python_functions (inf)
4595 FILE *inf;
4596 {
4597 register char *cp;
4598
4599 LOOP_ON_INPUT_LINES (inf, lb, cp)
4600 {
4601 cp = skip_spaces (cp);
4602 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4603 {
4604 char *name = cp;
4605 while (!notinname (*cp) && *cp != ':')
4606 cp++;
4607 make_tag (name, cp - name, TRUE,
4608 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4609 }
4610 }
4611 }
4612
4613 \f
4614 /*
4615 * PHP support
4616 * Look for:
4617 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4618 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4619 * - /^[ \t]*define\(\"[^\"]+/
4620 * Only with --members:
4621 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4622 * Idea by Diez B. Roggisch (2001)
4623 */
4624 static void
4625 PHP_functions (inf)
4626 FILE *inf;
4627 {
4628 register char *cp, *name;
4629 bool search_identifier = FALSE;
4630
4631 LOOP_ON_INPUT_LINES (inf, lb, cp)
4632 {
4633 cp = skip_spaces (cp);
4634 name = cp;
4635 if (search_identifier
4636 && *cp != '\0')
4637 {
4638 while (!notinname (*cp))
4639 cp++;
4640 make_tag (name, cp - name, TRUE,
4641 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4642 search_identifier = FALSE;
4643 }
4644 else if (LOOKING_AT (cp, "function"))
4645 {
4646 if(*cp == '&')
4647 cp = skip_spaces (cp+1);
4648 if(*cp != '\0')
4649 {
4650 name = cp;
4651 while (!notinname (*cp))
4652 cp++;
4653 make_tag (name, cp - name, TRUE,
4654 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4655 }
4656 else
4657 search_identifier = TRUE;
4658 }
4659 else if (LOOKING_AT (cp, "class"))
4660 {
4661 if (*cp != '\0')
4662 {
4663 name = cp;
4664 while (*cp != '\0' && !iswhite (*cp))
4665 cp++;
4666 make_tag (name, cp - name, FALSE,
4667 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4668 }
4669 else
4670 search_identifier = TRUE;
4671 }
4672 else if (strneq (cp, "define", 6)
4673 && (cp = skip_spaces (cp+6))
4674 && *cp++ == '('
4675 && (*cp == '"' || *cp == '\''))
4676 {
4677 char quote = *cp++;
4678 name = cp;
4679 while (*cp != quote && *cp != '\0')
4680 cp++;
4681 make_tag (name, cp - name, FALSE,
4682 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4683 }
4684 else if (members
4685 && LOOKING_AT (cp, "var")
4686 && *cp == '$')
4687 {
4688 name = cp;
4689 while (!notinname(*cp))
4690 cp++;
4691 make_tag (name, cp - name, FALSE,
4692 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4693 }
4694 }
4695 }
4696
4697 \f
4698 /*
4699 * Cobol tag functions
4700 * We could look for anything that could be a paragraph name.
4701 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4702 * Idea by Corny de Souza (1993)
4703 */
4704 static void
4705 Cobol_paragraphs (inf)
4706 FILE *inf;
4707 {
4708 register char *bp, *ep;
4709
4710 LOOP_ON_INPUT_LINES (inf, lb, bp)
4711 {
4712 if (lb.len < 9)
4713 continue;
4714 bp += 8;
4715
4716 /* If eoln, compiler option or comment ignore whole line. */
4717 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4718 continue;
4719
4720 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4721 continue;
4722 if (*ep++ == '.')
4723 make_tag (bp, ep - bp, TRUE,
4724 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4725 }
4726 }
4727
4728 \f
4729 /*
4730 * Makefile support
4731 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4732 */
4733 static void
4734 Makefile_targets (inf)
4735 FILE *inf;
4736 {
4737 register char *bp;
4738
4739 LOOP_ON_INPUT_LINES (inf, lb, bp)
4740 {
4741 if (*bp == '\t' || *bp == '#')
4742 continue;
4743 while (*bp != '\0' && *bp != '=' && *bp != ':')
4744 bp++;
4745 if (*bp == ':' || (globals && *bp == '='))
4746 {
4747 /* We should detect if there is more than one tag, but we do not.
4748 We just skip initial and final spaces. */
4749 char * namestart = skip_spaces (lb.buffer);
4750 while (--bp > namestart)
4751 if (!notinname (*bp))
4752 break;
4753 make_tag (namestart, bp - namestart + 1, TRUE,
4754 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4755 }
4756 }
4757 }
4758
4759 \f
4760 /*
4761 * Pascal parsing
4762 * Original code by Mosur K. Mohan (1989)
4763 *
4764 * Locates tags for procedures & functions. Doesn't do any type- or
4765 * var-definitions. It does look for the keyword "extern" or
4766 * "forward" immediately following the procedure statement; if found,
4767 * the tag is skipped.
4768 */
4769 static void
4770 Pascal_functions (inf)
4771 FILE *inf;
4772 {
4773 linebuffer tline; /* mostly copied from C_entries */
4774 long save_lcno;
4775 int save_lineno, namelen, taglen;
4776 char c, *name;
4777
4778 bool /* each of these flags is TRUE if: */
4779 incomment, /* point is inside a comment */
4780 inquote, /* point is inside '..' string */
4781 get_tagname, /* point is after PROCEDURE/FUNCTION
4782 keyword, so next item = potential tag */
4783 found_tag, /* point is after a potential tag */
4784 inparms, /* point is within parameter-list */
4785 verify_tag; /* point has passed the parm-list, so the
4786 next token will determine whether this
4787 is a FORWARD/EXTERN to be ignored, or
4788 whether it is a real tag */
4789
4790 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4791 name = NULL; /* keep compiler quiet */
4792 dbp = lb.buffer;
4793 *dbp = '\0';
4794 linebuffer_init (&tline);
4795
4796 incomment = inquote = FALSE;
4797 found_tag = FALSE; /* have a proc name; check if extern */
4798 get_tagname = FALSE; /* found "procedure" keyword */
4799 inparms = FALSE; /* found '(' after "proc" */
4800 verify_tag = FALSE; /* check if "extern" is ahead */
4801
4802
4803 while (!feof (inf)) /* long main loop to get next char */
4804 {
4805 c = *dbp++;
4806 if (c == '\0') /* if end of line */
4807 {
4808 readline (&lb, inf);
4809 dbp = lb.buffer;
4810 if (*dbp == '\0')
4811 continue;
4812 if (!((found_tag && verify_tag)
4813 || get_tagname))
4814 c = *dbp++; /* only if don't need *dbp pointing
4815 to the beginning of the name of
4816 the procedure or function */
4817 }
4818 if (incomment)
4819 {
4820 if (c == '}') /* within { } comments */
4821 incomment = FALSE;
4822 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4823 {
4824 dbp++;
4825 incomment = FALSE;
4826 }
4827 continue;
4828 }
4829 else if (inquote)
4830 {
4831 if (c == '\'')
4832 inquote = FALSE;
4833 continue;
4834 }
4835 else
4836 switch (c)
4837 {
4838 case '\'':
4839 inquote = TRUE; /* found first quote */
4840 continue;
4841 case '{': /* found open { comment */
4842 incomment = TRUE;
4843 continue;
4844 case '(':
4845 if (*dbp == '*') /* found open (* comment */
4846 {
4847 incomment = TRUE;
4848 dbp++;
4849 }
4850 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4851 inparms = TRUE;
4852 continue;
4853 case ')': /* end of parms list */
4854 if (inparms)
4855 inparms = FALSE;
4856 continue;
4857 case ';':
4858 if (found_tag && !inparms) /* end of proc or fn stmt */
4859 {
4860 verify_tag = TRUE;
4861 break;
4862 }
4863 continue;
4864 }
4865 if (found_tag && verify_tag && (*dbp != ' '))
4866 {
4867 /* Check if this is an "extern" declaration. */
4868 if (*dbp == '\0')
4869 continue;
4870 if (lowcase (*dbp == 'e'))
4871 {
4872 if (nocase_tail ("extern")) /* superfluous, really! */
4873 {
4874 found_tag = FALSE;
4875 verify_tag = FALSE;
4876 }
4877 }
4878 else if (lowcase (*dbp) == 'f')
4879 {
4880 if (nocase_tail ("forward")) /* check for forward reference */
4881 {
4882 found_tag = FALSE;
4883 verify_tag = FALSE;
4884 }
4885 }
4886 if (found_tag && verify_tag) /* not external proc, so make tag */
4887 {
4888 found_tag = FALSE;
4889 verify_tag = FALSE;
4890 make_tag (name, namelen, TRUE,
4891 tline.buffer, taglen, save_lineno, save_lcno);
4892 continue;
4893 }
4894 }
4895 if (get_tagname) /* grab name of proc or fn */
4896 {
4897 char *cp;
4898
4899 if (*dbp == '\0')
4900 continue;
4901
4902 /* Find block name. */
4903 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4904 continue;
4905
4906 /* Save all values for later tagging. */
4907 linebuffer_setlen (&tline, lb.len);
4908 strcpy (tline.buffer, lb.buffer);
4909 save_lineno = lineno;
4910 save_lcno = linecharno;
4911 name = tline.buffer + (dbp - lb.buffer);
4912 namelen = cp - dbp;
4913 taglen = cp - lb.buffer + 1;
4914
4915 dbp = cp; /* set dbp to e-o-token */
4916 get_tagname = FALSE;
4917 found_tag = TRUE;
4918 continue;
4919
4920 /* And proceed to check for "extern". */
4921 }
4922 else if (!incomment && !inquote && !found_tag)
4923 {
4924 /* Check for proc/fn keywords. */
4925 switch (lowcase (c))
4926 {
4927 case 'p':
4928 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4929 get_tagname = TRUE;
4930 continue;
4931 case 'f':
4932 if (nocase_tail ("unction"))
4933 get_tagname = TRUE;
4934 continue;
4935 }
4936 }
4937 } /* while not eof */
4938
4939 free (tline.buffer);
4940 }
4941
4942 \f
4943 /*
4944 * Lisp tag functions
4945 * look for (def or (DEF, quote or QUOTE
4946 */
4947
4948 static void L_getit __P((void));
4949
4950 static void
4951 L_getit ()
4952 {
4953 if (*dbp == '\'') /* Skip prefix quote */
4954 dbp++;
4955 else if (*dbp == '(')
4956 {
4957 dbp++;
4958 /* Try to skip "(quote " */
4959 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4960 /* Ok, then skip "(" before name in (defstruct (foo)) */
4961 dbp = skip_spaces (dbp);
4962 }
4963 get_tag (dbp, NULL);
4964 }
4965
4966 static void
4967 Lisp_functions (inf)
4968 FILE *inf;
4969 {
4970 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4971 {
4972 if (dbp[0] != '(')
4973 continue;
4974
4975 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4976 {
4977 dbp = skip_non_spaces (dbp);
4978 dbp = skip_spaces (dbp);
4979 L_getit ();
4980 }
4981 else
4982 {
4983 /* Check for (foo::defmumble name-defined ... */
4984 do
4985 dbp++;
4986 while (!notinname (*dbp) && *dbp != ':');
4987 if (*dbp == ':')
4988 {
4989 do
4990 dbp++;
4991 while (*dbp == ':');
4992
4993 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4994 {
4995 dbp = skip_non_spaces (dbp);
4996 dbp = skip_spaces (dbp);
4997 L_getit ();
4998 }
4999 }
5000 }
5001 }
5002 }
5003
5004 \f
5005 /*
5006 * Lua script language parsing
5007 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5008 *
5009 * "function" and "local function" are tags if they start at column 1.
5010 */
5011 static void
5012 Lua_functions (inf)
5013 FILE *inf;
5014 {
5015 register char *bp;
5016
5017 LOOP_ON_INPUT_LINES (inf, lb, bp)
5018 {
5019 if (bp[0] != 'f' && bp[0] != 'l')
5020 continue;
5021
5022 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5023
5024 if (LOOKING_AT (bp, "function"))
5025 get_tag (bp, NULL);
5026 }
5027 }
5028
5029 \f
5030 /*
5031 * Postscript tags
5032 * Just look for lines where the first character is '/'
5033 * Also look at "defineps" for PSWrap
5034 * Ideas by:
5035 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5036 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5037 */
5038 static void
5039 PS_functions (inf)
5040 FILE *inf;
5041 {
5042 register char *bp, *ep;
5043
5044 LOOP_ON_INPUT_LINES (inf, lb, bp)
5045 {
5046 if (bp[0] == '/')
5047 {
5048 for (ep = bp+1;
5049 *ep != '\0' && *ep != ' ' && *ep != '{';
5050 ep++)
5051 continue;
5052 make_tag (bp, ep - bp, TRUE,
5053 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5054 }
5055 else if (LOOKING_AT (bp, "defineps"))
5056 get_tag (bp, NULL);
5057 }
5058 }
5059
5060 \f
5061 /*
5062 * Forth tags
5063 * Ignore anything after \ followed by space or in ( )
5064 * Look for words defined by :
5065 * Look for constant, code, create, defer, value, and variable
5066 * OBP extensions: Look for buffer:, field,
5067 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5068 */
5069 static void
5070 Forth_words (inf)
5071 FILE *inf;
5072 {
5073 register char *bp;
5074
5075 LOOP_ON_INPUT_LINES (inf, lb, bp)
5076 while ((bp = skip_spaces (bp))[0] != '\0')
5077 if (bp[0] == '\\' && iswhite(bp[1]))
5078 break; /* read next line */
5079 else if (bp[0] == '(' && iswhite(bp[1]))
5080 do /* skip to ) or eol */
5081 bp++;
5082 while (*bp != ')' && *bp != '\0');
5083 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5084 || LOOKING_AT_NOCASE (bp, "constant")
5085 || LOOKING_AT_NOCASE (bp, "code")
5086 || LOOKING_AT_NOCASE (bp, "create")
5087 || LOOKING_AT_NOCASE (bp, "defer")
5088 || LOOKING_AT_NOCASE (bp, "value")
5089 || LOOKING_AT_NOCASE (bp, "variable")
5090 || LOOKING_AT_NOCASE (bp, "buffer:")
5091 || LOOKING_AT_NOCASE (bp, "field"))
5092 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5093 else
5094 bp = skip_non_spaces (bp);
5095 }
5096
5097 \f
5098 /*
5099 * Scheme tag functions
5100 * look for (def... xyzzy
5101 * (def... (xyzzy
5102 * (def ... ((...(xyzzy ....
5103 * (set! xyzzy
5104 * Original code by Ken Haase (1985?)
5105 */
5106 static void
5107 Scheme_functions (inf)
5108 FILE *inf;
5109 {
5110 register char *bp;
5111
5112 LOOP_ON_INPUT_LINES (inf, lb, bp)
5113 {
5114 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5115 {
5116 bp = skip_non_spaces (bp+4);
5117 /* Skip over open parens and white space */
5118 while (notinname (*bp))
5119 bp++;
5120 get_tag (bp, NULL);
5121 }
5122 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5123 get_tag (bp, NULL);
5124 }
5125 }
5126
5127 \f
5128 /* Find tags in TeX and LaTeX input files. */
5129
5130 /* TEX_toktab is a table of TeX control sequences that define tags.
5131 * Each entry records one such control sequence.
5132 *
5133 * Original code from who knows whom.
5134 * Ideas by:
5135 * Stefan Monnier (2002)
5136 */
5137
5138 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5139
5140 /* Default set of control sequences to put into TEX_toktab.
5141 The value of environment var TEXTAGS is prepended to this. */
5142 static char *TEX_defenv = "\
5143 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5144 :part:appendix:entry:index:def\
5145 :newcommand:renewcommand:newenvironment:renewenvironment";
5146
5147 static void TEX_mode __P((FILE *));
5148 static void TEX_decode_env __P((char *, char *));
5149
5150 static char TEX_esc = '\\';
5151 static char TEX_opgrp = '{';
5152 static char TEX_clgrp = '}';
5153
5154 /*
5155 * TeX/LaTeX scanning loop.
5156 */
5157 static void
5158 TeX_commands (inf)
5159 FILE *inf;
5160 {
5161 char *cp;
5162 linebuffer *key;
5163
5164 /* Select either \ or ! as escape character. */
5165 TEX_mode (inf);
5166
5167 /* Initialize token table once from environment. */
5168 if (TEX_toktab == NULL)
5169 TEX_decode_env ("TEXTAGS", TEX_defenv);
5170
5171 LOOP_ON_INPUT_LINES (inf, lb, cp)
5172 {
5173 /* Look at each TEX keyword in line. */
5174 for (;;)
5175 {
5176 /* Look for a TEX escape. */
5177 while (*cp++ != TEX_esc)
5178 if (cp[-1] == '\0' || cp[-1] == '%')
5179 goto tex_next_line;
5180
5181 for (key = TEX_toktab; key->buffer != NULL; key++)
5182 if (strneq (cp, key->buffer, key->len))
5183 {
5184 register char *p;
5185 int namelen, linelen;
5186 bool opgrp = FALSE;
5187
5188 cp = skip_spaces (cp + key->len);
5189 if (*cp == TEX_opgrp)
5190 {
5191 opgrp = TRUE;
5192 cp++;
5193 }
5194 for (p = cp;
5195 (!iswhite (*p) && *p != '#' &&
5196 *p != TEX_opgrp && *p != TEX_clgrp);
5197 p++)
5198 continue;
5199 namelen = p - cp;
5200 linelen = lb.len;
5201 if (!opgrp || *p == TEX_clgrp)
5202 {
5203 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5204 p++;
5205 linelen = p - lb.buffer + 1;
5206 }
5207 make_tag (cp, namelen, TRUE,
5208 lb.buffer, linelen, lineno, linecharno);
5209 goto tex_next_line; /* We only tag a line once */
5210 }
5211 }
5212 tex_next_line:
5213 ;
5214 }
5215 }
5216
5217 #define TEX_LESC '\\'
5218 #define TEX_SESC '!'
5219
5220 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5221 chars accordingly. */
5222 static void
5223 TEX_mode (inf)
5224 FILE *inf;
5225 {
5226 int c;
5227
5228 while ((c = getc (inf)) != EOF)
5229 {
5230 /* Skip to next line if we hit the TeX comment char. */
5231 if (c == '%')
5232 while (c != '\n' && c != EOF)
5233 c = getc (inf);
5234 else if (c == TEX_LESC || c == TEX_SESC )
5235 break;
5236 }
5237
5238 if (c == TEX_LESC)
5239 {
5240 TEX_esc = TEX_LESC;
5241 TEX_opgrp = '{';
5242 TEX_clgrp = '}';
5243 }
5244 else
5245 {
5246 TEX_esc = TEX_SESC;
5247 TEX_opgrp = '<';
5248 TEX_clgrp = '>';
5249 }
5250 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5251 No attempt is made to correct the situation. */
5252 rewind (inf);
5253 }
5254
5255 /* Read environment and prepend it to the default string.
5256 Build token table. */
5257 static void
5258 TEX_decode_env (evarname, defenv)
5259 char *evarname;
5260 char *defenv;
5261 {
5262 register char *env, *p;
5263 int i, len;
5264
5265 /* Append default string to environment. */
5266 env = getenv (evarname);
5267 if (!env)
5268 env = defenv;
5269 else
5270 {
5271 char *oldenv = env;
5272 env = concat (oldenv, defenv, "");
5273 }
5274
5275 /* Allocate a token table */
5276 for (len = 1, p = env; p;)
5277 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5278 len++;
5279 TEX_toktab = xnew (len, linebuffer);
5280
5281 /* Unpack environment string into token table. Be careful about */
5282 /* zero-length strings (leading ':', "::" and trailing ':') */
5283 for (i = 0; *env != '\0';)
5284 {
5285 p = etags_strchr (env, ':');
5286 if (!p) /* End of environment string. */
5287 p = env + strlen (env);
5288 if (p - env > 0)
5289 { /* Only non-zero strings. */
5290 TEX_toktab[i].buffer = savenstr (env, p - env);
5291 TEX_toktab[i].len = p - env;
5292 i++;
5293 }
5294 if (*p)
5295 env = p + 1;
5296 else
5297 {
5298 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5299 TEX_toktab[i].len = 0;
5300 break;
5301 }
5302 }
5303 }
5304
5305 \f
5306 /* Texinfo support. Dave Love, Mar. 2000. */
5307 static void
5308 Texinfo_nodes (inf)
5309 FILE * inf;
5310 {
5311 char *cp, *start;
5312 LOOP_ON_INPUT_LINES (inf, lb, cp)
5313 if (LOOKING_AT (cp, "@node"))
5314 {
5315 start = cp;
5316 while (*cp != '\0' && *cp != ',')
5317 cp++;
5318 make_tag (start, cp - start, TRUE,
5319 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5320 }
5321 }
5322
5323 \f
5324 /*
5325 * HTML support.
5326 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5327 * Contents of <a name=xxx> are tags with name xxx.
5328 *
5329 * Francesco Potortì, 2002.
5330 */
5331 static void
5332 HTML_labels (inf)
5333 FILE * inf;
5334 {
5335 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5336 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5337 bool intag = FALSE; /* inside an html tag, looking for ID= */
5338 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5339 char *end;
5340
5341
5342 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5343
5344 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5345 for (;;) /* loop on the same line */
5346 {
5347 if (skiptag) /* skip HTML tag */
5348 {
5349 while (*dbp != '\0' && *dbp != '>')
5350 dbp++;
5351 if (*dbp == '>')
5352 {
5353 dbp += 1;
5354 skiptag = FALSE;
5355 continue; /* look on the same line */
5356 }
5357 break; /* go to next line */
5358 }
5359
5360 else if (intag) /* look for "name=" or "id=" */
5361 {
5362 while (*dbp != '\0' && *dbp != '>'
5363 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5364 dbp++;
5365 if (*dbp == '\0')
5366 break; /* go to next line */
5367 if (*dbp == '>')
5368 {
5369 dbp += 1;
5370 intag = FALSE;
5371 continue; /* look on the same line */
5372 }
5373 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5374 || LOOKING_AT_NOCASE (dbp, "id="))
5375 {
5376 bool quoted = (dbp[0] == '"');
5377
5378 if (quoted)
5379 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5380 continue;
5381 else
5382 for (end = dbp; *end != '\0' && intoken (*end); end++)
5383 continue;
5384 linebuffer_setlen (&token_name, end - dbp);
5385 strncpy (token_name.buffer, dbp, end - dbp);
5386 token_name.buffer[end - dbp] = '\0';
5387
5388 dbp = end;
5389 intag = FALSE; /* we found what we looked for */
5390 skiptag = TRUE; /* skip to the end of the tag */
5391 getnext = TRUE; /* then grab the text */
5392 continue; /* look on the same line */
5393 }
5394 dbp += 1;
5395 }
5396
5397 else if (getnext) /* grab next tokens and tag them */
5398 {
5399 dbp = skip_spaces (dbp);
5400 if (*dbp == '\0')
5401 break; /* go to next line */
5402 if (*dbp == '<')
5403 {
5404 intag = TRUE;
5405 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5406 continue; /* look on the same line */
5407 }
5408
5409 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5410 continue;
5411 make_tag (token_name.buffer, token_name.len, TRUE,
5412 dbp, end - dbp, lineno, linecharno);
5413 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5414 getnext = FALSE;
5415 break; /* go to next line */
5416 }
5417
5418 else /* look for an interesting HTML tag */
5419 {
5420 while (*dbp != '\0' && *dbp != '<')
5421 dbp++;
5422 if (*dbp == '\0')
5423 break; /* go to next line */
5424 intag = TRUE;
5425 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5426 {
5427 inanchor = TRUE;
5428 continue; /* look on the same line */
5429 }
5430 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5431 || LOOKING_AT_NOCASE (dbp, "<h1>")
5432 || LOOKING_AT_NOCASE (dbp, "<h2>")
5433 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5434 {
5435 intag = FALSE;
5436 getnext = TRUE;
5437 continue; /* look on the same line */
5438 }
5439 dbp += 1;
5440 }
5441 }
5442 }
5443
5444 \f
5445 /*
5446 * Prolog support
5447 *
5448 * Assumes that the predicate or rule starts at column 0.
5449 * Only the first clause of a predicate or rule is added.
5450 * Original code by Sunichirou Sugou (1989)
5451 * Rewritten by Anders Lindgren (1996)
5452 */
5453 static int prolog_pr __P((char *, char *));
5454 static void prolog_skip_comment __P((linebuffer *, FILE *));
5455 static int prolog_atom __P((char *, int));
5456
5457 static void
5458 Prolog_functions (inf)
5459 FILE *inf;
5460 {
5461 char *cp, *last;
5462 int len;
5463 int allocated;
5464
5465 allocated = 0;
5466 len = 0;
5467 last = NULL;
5468
5469 LOOP_ON_INPUT_LINES (inf, lb, cp)
5470 {
5471 if (cp[0] == '\0') /* Empty line */
5472 continue;
5473 else if (iswhite (cp[0])) /* Not a predicate */
5474 continue;
5475 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5476 prolog_skip_comment (&lb, inf);
5477 else if ((len = prolog_pr (cp, last)) > 0)
5478 {
5479 /* Predicate or rule. Store the function name so that we
5480 only generate a tag for the first clause. */
5481 if (last == NULL)
5482 last = xnew(len + 1, char);
5483 else if (len + 1 > allocated)
5484 xrnew (last, len + 1, char);
5485 allocated = len + 1;
5486 strncpy (last, cp, len);
5487 last[len] = '\0';
5488 }
5489 }
5490 if (last != NULL)
5491 free (last);
5492 }
5493
5494
5495 static void
5496 prolog_skip_comment (plb, inf)
5497 linebuffer *plb;
5498 FILE *inf;
5499 {
5500 char *cp;
5501
5502 do
5503 {
5504 for (cp = plb->buffer; *cp != '\0'; cp++)
5505 if (cp[0] == '*' && cp[1] == '/')
5506 return;
5507 readline (plb, inf);
5508 }
5509 while (!feof(inf));
5510 }
5511
5512 /*
5513 * A predicate or rule definition is added if it matches:
5514 * <beginning of line><Prolog Atom><whitespace>(
5515 * or <beginning of line><Prolog Atom><whitespace>:-
5516 *
5517 * It is added to the tags database if it doesn't match the
5518 * name of the previous clause header.
5519 *
5520 * Return the size of the name of the predicate or rule, or 0 if no
5521 * header was found.
5522 */
5523 static int
5524 prolog_pr (s, last)
5525 char *s;
5526 char *last; /* Name of last clause. */
5527 {
5528 int pos;
5529 int len;
5530
5531 pos = prolog_atom (s, 0);
5532 if (pos < 1)
5533 return 0;
5534
5535 len = pos;
5536 pos = skip_spaces (s + pos) - s;
5537
5538 if ((s[pos] == '.'
5539 || (s[pos] == '(' && (pos += 1))
5540 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5541 && (last == NULL /* save only the first clause */
5542 || len != (int)strlen (last)
5543 || !strneq (s, last, len)))
5544 {
5545 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5546 return len;
5547 }
5548 else
5549 return 0;
5550 }
5551
5552 /*
5553 * Consume a Prolog atom.
5554 * Return the number of bytes consumed, or -1 if there was an error.
5555 *
5556 * A prolog atom, in this context, could be one of:
5557 * - An alphanumeric sequence, starting with a lower case letter.
5558 * - A quoted arbitrary string. Single quotes can escape themselves.
5559 * Backslash quotes everything.
5560 */
5561 static int
5562 prolog_atom (s, pos)
5563 char *s;
5564 int pos;
5565 {
5566 int origpos;
5567
5568 origpos = pos;
5569
5570 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5571 {
5572 /* The atom is unquoted. */
5573 pos++;
5574 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5575 {
5576 pos++;
5577 }
5578 return pos - origpos;
5579 }
5580 else if (s[pos] == '\'')
5581 {
5582 pos++;
5583
5584 for (;;)
5585 {
5586 if (s[pos] == '\'')
5587 {
5588 pos++;
5589 if (s[pos] != '\'')
5590 break;
5591 pos++; /* A double quote */
5592 }
5593 else if (s[pos] == '\0')
5594 /* Multiline quoted atoms are ignored. */
5595 return -1;
5596 else if (s[pos] == '\\')
5597 {
5598 if (s[pos+1] == '\0')
5599 return -1;
5600 pos += 2;
5601 }
5602 else
5603 pos++;
5604 }
5605 return pos - origpos;
5606 }
5607 else
5608 return -1;
5609 }
5610
5611 \f
5612 /*
5613 * Support for Erlang
5614 *
5615 * Generates tags for functions, defines, and records.
5616 * Assumes that Erlang functions start at column 0.
5617 * Original code by Anders Lindgren (1996)
5618 */
5619 static int erlang_func __P((char *, char *));
5620 static void erlang_attribute __P((char *));
5621 static int erlang_atom __P((char *));
5622
5623 static void
5624 Erlang_functions (inf)
5625 FILE *inf;
5626 {
5627 char *cp, *last;
5628 int len;
5629 int allocated;
5630
5631 allocated = 0;
5632 len = 0;
5633 last = NULL;
5634
5635 LOOP_ON_INPUT_LINES (inf, lb, cp)
5636 {
5637 if (cp[0] == '\0') /* Empty line */
5638 continue;
5639 else if (iswhite (cp[0])) /* Not function nor attribute */
5640 continue;
5641 else if (cp[0] == '%') /* comment */
5642 continue;
5643 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5644 continue;
5645 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5646 {
5647 erlang_attribute (cp);
5648 if (last != NULL)
5649 {
5650 free (last);
5651 last = NULL;
5652 }
5653 }
5654 else if ((len = erlang_func (cp, last)) > 0)
5655 {
5656 /*
5657 * Function. Store the function name so that we only
5658 * generates a tag for the first clause.
5659 */
5660 if (last == NULL)
5661 last = xnew (len + 1, char);
5662 else if (len + 1 > allocated)
5663 xrnew (last, len + 1, char);
5664 allocated = len + 1;
5665 strncpy (last, cp, len);
5666 last[len] = '\0';
5667 }
5668 }
5669 if (last != NULL)
5670 free (last);
5671 }
5672
5673
5674 /*
5675 * A function definition is added if it matches:
5676 * <beginning of line><Erlang Atom><whitespace>(
5677 *
5678 * It is added to the tags database if it doesn't match the
5679 * name of the previous clause header.
5680 *
5681 * Return the size of the name of the function, or 0 if no function
5682 * was found.
5683 */
5684 static int
5685 erlang_func (s, last)
5686 char *s;
5687 char *last; /* Name of last clause. */
5688 {
5689 int pos;
5690 int len;
5691
5692 pos = erlang_atom (s);
5693 if (pos < 1)
5694 return 0;
5695
5696 len = pos;
5697 pos = skip_spaces (s + pos) - s;
5698
5699 /* Save only the first clause. */
5700 if (s[pos++] == '('
5701 && (last == NULL
5702 || len != (int)strlen (last)
5703 || !strneq (s, last, len)))
5704 {
5705 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5706 return len;
5707 }
5708
5709 return 0;
5710 }
5711
5712
5713 /*
5714 * Handle attributes. Currently, tags are generated for defines
5715 * and records.
5716 *
5717 * They are on the form:
5718 * -define(foo, bar).
5719 * -define(Foo(M, N), M+N).
5720 * -record(graph, {vtab = notable, cyclic = true}).
5721 */
5722 static void
5723 erlang_attribute (s)
5724 char *s;
5725 {
5726 char *cp = s;
5727
5728 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5729 && *cp++ == '(')
5730 {
5731 int len = erlang_atom (skip_spaces (cp));
5732 if (len > 0)
5733 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5734 }
5735 return;
5736 }
5737
5738
5739 /*
5740 * Consume an Erlang atom (or variable).
5741 * Return the number of bytes consumed, or -1 if there was an error.
5742 */
5743 static int
5744 erlang_atom (s)
5745 char *s;
5746 {
5747 int pos = 0;
5748
5749 if (ISALPHA (s[pos]) || s[pos] == '_')
5750 {
5751 /* The atom is unquoted. */
5752 do
5753 pos++;
5754 while (ISALNUM (s[pos]) || s[pos] == '_');
5755 }
5756 else if (s[pos] == '\'')
5757 {
5758 for (pos++; s[pos] != '\''; pos++)
5759 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5760 || (s[pos] == '\\' && s[++pos] == '\0'))
5761 return 0;
5762 pos++;
5763 }
5764
5765 return pos;
5766 }
5767
5768 \f
5769 static char *scan_separators __P((char *));
5770 static void add_regex __P((char *, language *));
5771 static char *substitute __P((char *, char *, struct re_registers *));
5772
5773 /*
5774 * Take a string like "/blah/" and turn it into "blah", verifying
5775 * that the first and last characters are the same, and handling
5776 * quoted separator characters. Actually, stops on the occurrence of
5777 * an unquoted separator. Also process \t, \n, etc. and turn into
5778 * appropriate characters. Works in place. Null terminates name string.
5779 * Returns pointer to terminating separator, or NULL for
5780 * unterminated regexps.
5781 */
5782 static char *
5783 scan_separators (name)
5784 char *name;
5785 {
5786 char sep = name[0];
5787 char *copyto = name;
5788 bool quoted = FALSE;
5789
5790 for (++name; *name != '\0'; ++name)
5791 {
5792 if (quoted)
5793 {
5794 switch (*name)
5795 {
5796 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5797 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5798 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5799 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5800 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5801 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5802 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5803 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5804 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5805 default:
5806 if (*name == sep)
5807 *copyto++ = sep;
5808 else
5809 {
5810 /* Something else is quoted, so preserve the quote. */
5811 *copyto++ = '\\';
5812 *copyto++ = *name;
5813 }
5814 break;
5815 }
5816 quoted = FALSE;
5817 }
5818 else if (*name == '\\')
5819 quoted = TRUE;
5820 else if (*name == sep)
5821 break;
5822 else
5823 *copyto++ = *name;
5824 }
5825 if (*name != sep)
5826 name = NULL; /* signal unterminated regexp */
5827
5828 /* Terminate copied string. */
5829 *copyto = '\0';
5830 return name;
5831 }
5832
5833 /* Look at the argument of --regex or --no-regex and do the right
5834 thing. Same for each line of a regexp file. */
5835 static void
5836 analyse_regex (regex_arg)
5837 char *regex_arg;
5838 {
5839 if (regex_arg == NULL)
5840 {
5841 free_regexps (); /* --no-regex: remove existing regexps */
5842 return;
5843 }
5844
5845 /* A real --regexp option or a line in a regexp file. */
5846 switch (regex_arg[0])
5847 {
5848 /* Comments in regexp file or null arg to --regex. */
5849 case '\0':
5850 case ' ':
5851 case '\t':
5852 break;
5853
5854 /* Read a regex file. This is recursive and may result in a
5855 loop, which will stop when the file descriptors are exhausted. */
5856 case '@':
5857 {
5858 FILE *regexfp;
5859 linebuffer regexbuf;
5860 char *regexfile = regex_arg + 1;
5861
5862 /* regexfile is a file containing regexps, one per line. */
5863 regexfp = fopen (regexfile, "r");
5864 if (regexfp == NULL)
5865 {
5866 pfatal (regexfile);
5867 return;
5868 }
5869 linebuffer_init (&regexbuf);
5870 while (readline_internal (&regexbuf, regexfp) > 0)
5871 analyse_regex (regexbuf.buffer);
5872 free (regexbuf.buffer);
5873 fclose (regexfp);
5874 }
5875 break;
5876
5877 /* Regexp to be used for a specific language only. */
5878 case '{':
5879 {
5880 language *lang;
5881 char *lang_name = regex_arg + 1;
5882 char *cp;
5883
5884 for (cp = lang_name; *cp != '}'; cp++)
5885 if (*cp == '\0')
5886 {
5887 error ("unterminated language name in regex: %s", regex_arg);
5888 return;
5889 }
5890 *cp++ = '\0';
5891 lang = get_language_from_langname (lang_name);
5892 if (lang == NULL)
5893 return;
5894 add_regex (cp, lang);
5895 }
5896 break;
5897
5898 /* Regexp to be used for any language. */
5899 default:
5900 add_regex (regex_arg, NULL);
5901 break;
5902 }
5903 }
5904
5905 /* Separate the regexp pattern, compile it,
5906 and care for optional name and modifiers. */
5907 static void
5908 add_regex (regexp_pattern, lang)
5909 char *regexp_pattern;
5910 language *lang;
5911 {
5912 static struct re_pattern_buffer zeropattern;
5913 char sep, *pat, *name, *modifiers;
5914 const char *err;
5915 struct re_pattern_buffer *patbuf;
5916 regexp *rp;
5917 bool
5918 force_explicit_name = TRUE, /* do not use implicit tag names */
5919 ignore_case = FALSE, /* case is significant */
5920 multi_line = FALSE, /* matches are done one line at a time */
5921 single_line = FALSE; /* dot does not match newline */
5922
5923
5924 if (strlen(regexp_pattern) < 3)
5925 {
5926 error ("null regexp", (char *)NULL);
5927 return;
5928 }
5929 sep = regexp_pattern[0];
5930 name = scan_separators (regexp_pattern);
5931 if (name == NULL)
5932 {
5933 error ("%s: unterminated regexp", regexp_pattern);
5934 return;
5935 }
5936 if (name[1] == sep)
5937 {
5938 error ("null name for regexp \"%s\"", regexp_pattern);
5939 return;
5940 }
5941 modifiers = scan_separators (name);
5942 if (modifiers == NULL) /* no terminating separator --> no name */
5943 {
5944 modifiers = name;
5945 name = "";
5946 }
5947 else
5948 modifiers += 1; /* skip separator */
5949
5950 /* Parse regex modifiers. */
5951 for (; modifiers[0] != '\0'; modifiers++)
5952 switch (modifiers[0])
5953 {
5954 case 'N':
5955 if (modifiers == name)
5956 error ("forcing explicit tag name but no name, ignoring", NULL);
5957 force_explicit_name = TRUE;
5958 break;
5959 case 'i':
5960 ignore_case = TRUE;
5961 break;
5962 case 's':
5963 single_line = TRUE;
5964 /* FALLTHRU */
5965 case 'm':
5966 multi_line = TRUE;
5967 need_filebuf = TRUE;
5968 break;
5969 default:
5970 {
5971 char wrongmod [2];
5972 wrongmod[0] = modifiers[0];
5973 wrongmod[1] = '\0';
5974 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5975 }
5976 break;
5977 }
5978
5979 patbuf = xnew (1, struct re_pattern_buffer);
5980 *patbuf = zeropattern;
5981 if (ignore_case)
5982 {
5983 static char lc_trans[CHARS];
5984 int i;
5985 for (i = 0; i < CHARS; i++)
5986 lc_trans[i] = lowcase (i);
5987 patbuf->translate = lc_trans; /* translation table to fold case */
5988 }
5989
5990 if (multi_line)
5991 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5992 else
5993 pat = regexp_pattern;
5994
5995 if (single_line)
5996 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5997 else
5998 re_set_syntax (RE_SYNTAX_EMACS);
5999
6000 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
6001 if (multi_line)
6002 free (pat);
6003 if (err != NULL)
6004 {
6005 error ("%s while compiling pattern", err);
6006 return;
6007 }
6008
6009 rp = p_head;
6010 p_head = xnew (1, regexp);
6011 p_head->pattern = savestr (regexp_pattern);
6012 p_head->p_next = rp;
6013 p_head->lang = lang;
6014 p_head->pat = patbuf;
6015 p_head->name = savestr (name);
6016 p_head->error_signaled = FALSE;
6017 p_head->force_explicit_name = force_explicit_name;
6018 p_head->ignore_case = ignore_case;
6019 p_head->multi_line = multi_line;
6020 }
6021
6022 /*
6023 * Do the substitutions indicated by the regular expression and
6024 * arguments.
6025 */
6026 static char *
6027 substitute (in, out, regs)
6028 char *in, *out;
6029 struct re_registers *regs;
6030 {
6031 char *result, *t;
6032 int size, dig, diglen;
6033
6034 result = NULL;
6035 size = strlen (out);
6036
6037 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6038 if (out[size - 1] == '\\')
6039 fatal ("pattern error in \"%s\"", out);
6040 for (t = etags_strchr (out, '\\');
6041 t != NULL;
6042 t = etags_strchr (t + 2, '\\'))
6043 if (ISDIGIT (t[1]))
6044 {
6045 dig = t[1] - '0';
6046 diglen = regs->end[dig] - regs->start[dig];
6047 size += diglen - 2;
6048 }
6049 else
6050 size -= 1;
6051
6052 /* Allocate space and do the substitutions. */
6053 assert (size >= 0);
6054 result = xnew (size + 1, char);
6055
6056 for (t = result; *out != '\0'; out++)
6057 if (*out == '\\' && ISDIGIT (*++out))
6058 {
6059 dig = *out - '0';
6060 diglen = regs->end[dig] - regs->start[dig];
6061 strncpy (t, in + regs->start[dig], diglen);
6062 t += diglen;
6063 }
6064 else
6065 *t++ = *out;
6066 *t = '\0';
6067
6068 assert (t <= result + size);
6069 assert (t - result == (int)strlen (result));
6070
6071 return result;
6072 }
6073
6074 /* Deallocate all regexps. */
6075 static void
6076 free_regexps ()
6077 {
6078 regexp *rp;
6079 while (p_head != NULL)
6080 {
6081 rp = p_head->p_next;
6082 free (p_head->pattern);
6083 free (p_head->name);
6084 free (p_head);
6085 p_head = rp;
6086 }
6087 return;
6088 }
6089
6090 /*
6091 * Reads the whole file as a single string from `filebuf' and looks for
6092 * multi-line regular expressions, creating tags on matches.
6093 * readline already dealt with normal regexps.
6094 *
6095 * Idea by Ben Wing <ben@666.com> (2002).
6096 */
6097 static void
6098 regex_tag_multiline ()
6099 {
6100 char *buffer = filebuf.buffer;
6101 regexp *rp;
6102 char *name;
6103
6104 for (rp = p_head; rp != NULL; rp = rp->p_next)
6105 {
6106 int match = 0;
6107
6108 if (!rp->multi_line)
6109 continue; /* skip normal regexps */
6110
6111 /* Generic initialisations before parsing file from memory. */
6112 lineno = 1; /* reset global line number */
6113 charno = 0; /* reset global char number */
6114 linecharno = 0; /* reset global char number of line start */
6115
6116 /* Only use generic regexps or those for the current language. */
6117 if (rp->lang != NULL && rp->lang != curfdp->lang)
6118 continue;
6119
6120 while (match >= 0 && match < filebuf.len)
6121 {
6122 match = re_search (rp->pat, buffer, filebuf.len, charno,
6123 filebuf.len - match, &rp->regs);
6124 switch (match)
6125 {
6126 case -2:
6127 /* Some error. */
6128 if (!rp->error_signaled)
6129 {
6130 error ("regexp stack overflow while matching \"%s\"",
6131 rp->pattern);
6132 rp->error_signaled = TRUE;
6133 }
6134 break;
6135 case -1:
6136 /* No match. */
6137 break;
6138 default:
6139 if (match == rp->regs.end[0])
6140 {
6141 if (!rp->error_signaled)
6142 {
6143 error ("regexp matches the empty string: \"%s\"",
6144 rp->pattern);
6145 rp->error_signaled = TRUE;
6146 }
6147 match = -3; /* exit from while loop */
6148 break;
6149 }
6150
6151 /* Match occurred. Construct a tag. */
6152 while (charno < rp->regs.end[0])
6153 if (buffer[charno++] == '\n')
6154 lineno++, linecharno = charno;
6155 name = rp->name;
6156 if (name[0] == '\0')
6157 name = NULL;
6158 else /* make a named tag */
6159 name = substitute (buffer, rp->name, &rp->regs);
6160 if (rp->force_explicit_name)
6161 /* Force explicit tag name, if a name is there. */
6162 pfnote (name, TRUE, buffer + linecharno,
6163 charno - linecharno + 1, lineno, linecharno);
6164 else
6165 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6166 charno - linecharno + 1, lineno, linecharno);
6167 break;
6168 }
6169 }
6170 }
6171 }
6172
6173 \f
6174 static bool
6175 nocase_tail (cp)
6176 char *cp;
6177 {
6178 register int len = 0;
6179
6180 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6181 cp++, len++;
6182 if (*cp == '\0' && !intoken (dbp[len]))
6183 {
6184 dbp += len;
6185 return TRUE;
6186 }
6187 return FALSE;
6188 }
6189
6190 static void
6191 get_tag (bp, namepp)
6192 register char *bp;
6193 char **namepp;
6194 {
6195 register char *cp = bp;
6196
6197 if (*bp != '\0')
6198 {
6199 /* Go till you get to white space or a syntactic break */
6200 for (cp = bp + 1; !notinname (*cp); cp++)
6201 continue;
6202 make_tag (bp, cp - bp, TRUE,
6203 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6204 }
6205
6206 if (namepp != NULL)
6207 *namepp = savenstr (bp, cp - bp);
6208 }
6209
6210 /*
6211 * Read a line of text from `stream' into `lbp', excluding the
6212 * newline or CR-NL, if any. Return the number of characters read from
6213 * `stream', which is the length of the line including the newline.
6214 *
6215 * On DOS or Windows we do not count the CR character, if any before the
6216 * NL, in the returned length; this mirrors the behavior of Emacs on those
6217 * platforms (for text files, it translates CR-NL to NL as it reads in the
6218 * file).
6219 *
6220 * If multi-line regular expressions are requested, each line read is
6221 * appended to `filebuf'.
6222 */
6223 static long
6224 readline_internal (lbp, stream)
6225 linebuffer *lbp;
6226 register FILE *stream;
6227 {
6228 char *buffer = lbp->buffer;
6229 register char *p = lbp->buffer;
6230 register char *pend;
6231 int chars_deleted;
6232
6233 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6234
6235 for (;;)
6236 {
6237 register int c = getc (stream);
6238 if (p == pend)
6239 {
6240 /* We're at the end of linebuffer: expand it. */
6241 lbp->size *= 2;
6242 xrnew (buffer, lbp->size, char);
6243 p += buffer - lbp->buffer;
6244 pend = buffer + lbp->size;
6245 lbp->buffer = buffer;
6246 }
6247 if (c == EOF)
6248 {
6249 *p = '\0';
6250 chars_deleted = 0;
6251 break;
6252 }
6253 if (c == '\n')
6254 {
6255 if (p > buffer && p[-1] == '\r')
6256 {
6257 p -= 1;
6258 #ifdef DOS_NT
6259 /* Assume CRLF->LF translation will be performed by Emacs
6260 when loading this file, so CRs won't appear in the buffer.
6261 It would be cleaner to compensate within Emacs;
6262 however, Emacs does not know how many CRs were deleted
6263 before any given point in the file. */
6264 chars_deleted = 1;
6265 #else
6266 chars_deleted = 2;
6267 #endif
6268 }
6269 else
6270 {
6271 chars_deleted = 1;
6272 }
6273 *p = '\0';
6274 break;
6275 }
6276 *p++ = c;
6277 }
6278 lbp->len = p - buffer;
6279
6280 if (need_filebuf /* we need filebuf for multi-line regexps */
6281 && chars_deleted > 0) /* not at EOF */
6282 {
6283 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6284 {
6285 /* Expand filebuf. */
6286 filebuf.size *= 2;
6287 xrnew (filebuf.buffer, filebuf.size, char);
6288 }
6289 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6290 filebuf.len += lbp->len;
6291 filebuf.buffer[filebuf.len++] = '\n';
6292 filebuf.buffer[filebuf.len] = '\0';
6293 }
6294
6295 return lbp->len + chars_deleted;
6296 }
6297
6298 /*
6299 * Like readline_internal, above, but in addition try to match the
6300 * input line against relevant regular expressions and manage #line
6301 * directives.
6302 */
6303 static void
6304 readline (lbp, stream)
6305 linebuffer *lbp;
6306 FILE *stream;
6307 {
6308 long result;
6309
6310 linecharno = charno; /* update global char number of line start */
6311 result = readline_internal (lbp, stream); /* read line */
6312 lineno += 1; /* increment global line number */
6313 charno += result; /* increment global char number */
6314
6315 /* Honour #line directives. */
6316 if (!no_line_directive)
6317 {
6318 static bool discard_until_line_directive;
6319
6320 /* Check whether this is a #line directive. */
6321 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6322 {
6323 unsigned int lno;
6324 int start = 0;
6325
6326 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6327 && start > 0) /* double quote character found */
6328 {
6329 char *endp = lbp->buffer + start;
6330
6331 while ((endp = etags_strchr (endp, '"')) != NULL
6332 && endp[-1] == '\\')
6333 endp++;
6334 if (endp != NULL)
6335 /* Ok, this is a real #line directive. Let's deal with it. */
6336 {
6337 char *taggedabsname; /* absolute name of original file */
6338 char *taggedfname; /* name of original file as given */
6339 char *name; /* temp var */
6340
6341 discard_until_line_directive = FALSE; /* found it */
6342 name = lbp->buffer + start;
6343 *endp = '\0';
6344 canonicalize_filename (name); /* for DOS */
6345 taggedabsname = absolute_filename (name, tagfiledir);
6346 if (filename_is_absolute (name)
6347 || filename_is_absolute (curfdp->infname))
6348 taggedfname = savestr (taggedabsname);
6349 else
6350 taggedfname = relative_filename (taggedabsname,tagfiledir);
6351
6352 if (streq (curfdp->taggedfname, taggedfname))
6353 /* The #line directive is only a line number change. We
6354 deal with this afterwards. */
6355 free (taggedfname);
6356 else
6357 /* The tags following this #line directive should be
6358 attributed to taggedfname. In order to do this, set
6359 curfdp accordingly. */
6360 {
6361 fdesc *fdp; /* file description pointer */
6362
6363 /* Go look for a file description already set up for the
6364 file indicated in the #line directive. If there is
6365 one, use it from now until the next #line
6366 directive. */
6367 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6368 if (streq (fdp->infname, curfdp->infname)
6369 && streq (fdp->taggedfname, taggedfname))
6370 /* If we remove the second test above (after the &&)
6371 then all entries pertaining to the same file are
6372 coalesced in the tags file. If we use it, then
6373 entries pertaining to the same file but generated
6374 from different files (via #line directives) will
6375 go into separate sections in the tags file. These
6376 alternatives look equivalent. The first one
6377 destroys some apparently useless information. */
6378 {
6379 curfdp = fdp;
6380 free (taggedfname);
6381 break;
6382 }
6383 /* Else, if we already tagged the real file, skip all
6384 input lines until the next #line directive. */
6385 if (fdp == NULL) /* not found */
6386 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6387 if (streq (fdp->infabsname, taggedabsname))
6388 {
6389 discard_until_line_directive = TRUE;
6390 free (taggedfname);
6391 break;
6392 }
6393 /* Else create a new file description and use that from
6394 now on, until the next #line directive. */
6395 if (fdp == NULL) /* not found */
6396 {
6397 fdp = fdhead;
6398 fdhead = xnew (1, fdesc);
6399 *fdhead = *curfdp; /* copy curr. file description */
6400 fdhead->next = fdp;
6401 fdhead->infname = savestr (curfdp->infname);
6402 fdhead->infabsname = savestr (curfdp->infabsname);
6403 fdhead->infabsdir = savestr (curfdp->infabsdir);
6404 fdhead->taggedfname = taggedfname;
6405 fdhead->usecharno = FALSE;
6406 fdhead->prop = NULL;
6407 fdhead->written = FALSE;
6408 curfdp = fdhead;
6409 }
6410 }
6411 free (taggedabsname);
6412 lineno = lno - 1;
6413 readline (lbp, stream);
6414 return;
6415 } /* if a real #line directive */
6416 } /* if #line is followed by a a number */
6417 } /* if line begins with "#line " */
6418
6419 /* If we are here, no #line directive was found. */
6420 if (discard_until_line_directive)
6421 {
6422 if (result > 0)
6423 {
6424 /* Do a tail recursion on ourselves, thus discarding the contents
6425 of the line buffer. */
6426 readline (lbp, stream);
6427 return;
6428 }
6429 /* End of file. */
6430 discard_until_line_directive = FALSE;
6431 return;
6432 }
6433 } /* if #line directives should be considered */
6434
6435 {
6436 int match;
6437 regexp *rp;
6438 char *name;
6439
6440 /* Match against relevant regexps. */
6441 if (lbp->len > 0)
6442 for (rp = p_head; rp != NULL; rp = rp->p_next)
6443 {
6444 /* Only use generic regexps or those for the current language.
6445 Also do not use multiline regexps, which is the job of
6446 regex_tag_multiline. */
6447 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6448 || rp->multi_line)
6449 continue;
6450
6451 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6452 switch (match)
6453 {
6454 case -2:
6455 /* Some error. */
6456 if (!rp->error_signaled)
6457 {
6458 error ("regexp stack overflow while matching \"%s\"",
6459 rp->pattern);
6460 rp->error_signaled = TRUE;
6461 }
6462 break;
6463 case -1:
6464 /* No match. */
6465 break;
6466 case 0:
6467 /* Empty string matched. */
6468 if (!rp->error_signaled)
6469 {
6470 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6471 rp->error_signaled = TRUE;
6472 }
6473 break;
6474 default:
6475 /* Match occurred. Construct a tag. */
6476 name = rp->name;
6477 if (name[0] == '\0')
6478 name = NULL;
6479 else /* make a named tag */
6480 name = substitute (lbp->buffer, rp->name, &rp->regs);
6481 if (rp->force_explicit_name)
6482 /* Force explicit tag name, if a name is there. */
6483 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6484 else
6485 make_tag (name, strlen (name), TRUE,
6486 lbp->buffer, match, lineno, linecharno);
6487 break;
6488 }
6489 }
6490 }
6491 }
6492
6493 \f
6494 /*
6495 * Return a pointer to a space of size strlen(cp)+1 allocated
6496 * with xnew where the string CP has been copied.
6497 */
6498 static char *
6499 savestr (cp)
6500 char *cp;
6501 {
6502 return savenstr (cp, strlen (cp));
6503 }
6504
6505 /*
6506 * Return a pointer to a space of size LEN+1 allocated with xnew where
6507 * the string CP has been copied for at most the first LEN characters.
6508 */
6509 static char *
6510 savenstr (cp, len)
6511 char *cp;
6512 int len;
6513 {
6514 register char *dp;
6515
6516 dp = xnew (len + 1, char);
6517 strncpy (dp, cp, len);
6518 dp[len] = '\0';
6519 return dp;
6520 }
6521
6522 /*
6523 * Return the ptr in sp at which the character c last
6524 * appears; NULL if not found
6525 *
6526 * Identical to POSIX strrchr, included for portability.
6527 */
6528 static char *
6529 etags_strrchr (sp, c)
6530 register const char *sp;
6531 register int c;
6532 {
6533 register const char *r;
6534
6535 r = NULL;
6536 do
6537 {
6538 if (*sp == c)
6539 r = sp;
6540 } while (*sp++);
6541 return (char *)r;
6542 }
6543
6544 /*
6545 * Return the ptr in sp at which the character c first
6546 * appears; NULL if not found
6547 *
6548 * Identical to POSIX strchr, included for portability.
6549 */
6550 static char *
6551 etags_strchr (sp, c)
6552 register const char *sp;
6553 register int c;
6554 {
6555 do
6556 {
6557 if (*sp == c)
6558 return (char *)sp;
6559 } while (*sp++);
6560 return NULL;
6561 }
6562
6563 /*
6564 * Compare two strings, ignoring case for alphabetic characters.
6565 *
6566 * Same as BSD's strcasecmp, included for portability.
6567 */
6568 static int
6569 etags_strcasecmp (s1, s2)
6570 register const char *s1;
6571 register const char *s2;
6572 {
6573 while (*s1 != '\0'
6574 && (ISALPHA (*s1) && ISALPHA (*s2)
6575 ? lowcase (*s1) == lowcase (*s2)
6576 : *s1 == *s2))
6577 s1++, s2++;
6578
6579 return (ISALPHA (*s1) && ISALPHA (*s2)
6580 ? lowcase (*s1) - lowcase (*s2)
6581 : *s1 - *s2);
6582 }
6583
6584 /*
6585 * Compare two strings, ignoring case for alphabetic characters.
6586 * Stop after a given number of characters
6587 *
6588 * Same as BSD's strncasecmp, included for portability.
6589 */
6590 static int
6591 etags_strncasecmp (s1, s2, n)
6592 register const char *s1;
6593 register const char *s2;
6594 register int n;
6595 {
6596 while (*s1 != '\0' && n-- > 0
6597 && (ISALPHA (*s1) && ISALPHA (*s2)
6598 ? lowcase (*s1) == lowcase (*s2)
6599 : *s1 == *s2))
6600 s1++, s2++;
6601
6602 if (n < 0)
6603 return 0;
6604 else
6605 return (ISALPHA (*s1) && ISALPHA (*s2)
6606 ? lowcase (*s1) - lowcase (*s2)
6607 : *s1 - *s2);
6608 }
6609
6610 /* Skip spaces (end of string is not space), return new pointer. */
6611 static char *
6612 skip_spaces (cp)
6613 char *cp;
6614 {
6615 while (iswhite (*cp))
6616 cp++;
6617 return cp;
6618 }
6619
6620 /* Skip non spaces, except end of string, return new pointer. */
6621 static char *
6622 skip_non_spaces (cp)
6623 char *cp;
6624 {
6625 while (*cp != '\0' && !iswhite (*cp))
6626 cp++;
6627 return cp;
6628 }
6629
6630 /* Print error message and exit. */
6631 void
6632 fatal (s1, s2)
6633 char *s1, *s2;
6634 {
6635 error (s1, s2);
6636 exit (EXIT_FAILURE);
6637 }
6638
6639 static void
6640 pfatal (s1)
6641 char *s1;
6642 {
6643 perror (s1);
6644 exit (EXIT_FAILURE);
6645 }
6646
6647 static void
6648 suggest_asking_for_help ()
6649 {
6650 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6651 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6652 exit (EXIT_FAILURE);
6653 }
6654
6655 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6656 static void
6657 error (s1, s2)
6658 const char *s1, *s2;
6659 {
6660 fprintf (stderr, "%s: ", progname);
6661 fprintf (stderr, s1, s2);
6662 fprintf (stderr, "\n");
6663 }
6664
6665 /* Return a newly-allocated string whose contents
6666 concatenate those of s1, s2, s3. */
6667 static char *
6668 concat (s1, s2, s3)
6669 char *s1, *s2, *s3;
6670 {
6671 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6672 char *result = xnew (len1 + len2 + len3 + 1, char);
6673
6674 strcpy (result, s1);
6675 strcpy (result + len1, s2);
6676 strcpy (result + len1 + len2, s3);
6677 result[len1 + len2 + len3] = '\0';
6678
6679 return result;
6680 }
6681
6682 \f
6683 /* Does the same work as the system V getcwd, but does not need to
6684 guess the buffer size in advance. */
6685 static char *
6686 etags_getcwd ()
6687 {
6688 #ifdef HAVE_GETCWD
6689 int bufsize = 200;
6690 char *path = xnew (bufsize, char);
6691
6692 while (getcwd (path, bufsize) == NULL)
6693 {
6694 if (errno != ERANGE)
6695 pfatal ("getcwd");
6696 bufsize *= 2;
6697 free (path);
6698 path = xnew (bufsize, char);
6699 }
6700
6701 canonicalize_filename (path);
6702 return path;
6703
6704 #else /* not HAVE_GETCWD */
6705 #if MSDOS
6706
6707 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6708
6709 getwd (path);
6710
6711 for (p = path; *p != '\0'; p++)
6712 if (*p == '\\')
6713 *p = '/';
6714 else
6715 *p = lowcase (*p);
6716
6717 return strdup (path);
6718 #else /* not MSDOS */
6719 linebuffer path;
6720 FILE *pipe;
6721
6722 linebuffer_init (&path);
6723 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6724 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6725 pfatal ("pwd");
6726 pclose (pipe);
6727
6728 return path.buffer;
6729 #endif /* not MSDOS */
6730 #endif /* not HAVE_GETCWD */
6731 }
6732
6733 /* Return a newly allocated string containing the file name of FILE
6734 relative to the absolute directory DIR (which should end with a slash). */
6735 static char *
6736 relative_filename (file, dir)
6737 char *file, *dir;
6738 {
6739 char *fp, *dp, *afn, *res;
6740 int i;
6741
6742 /* Find the common root of file and dir (with a trailing slash). */
6743 afn = absolute_filename (file, cwd);
6744 fp = afn;
6745 dp = dir;
6746 while (*fp++ == *dp++)
6747 continue;
6748 fp--, dp--; /* back to the first differing char */
6749 #ifdef DOS_NT
6750 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6751 return afn;
6752 #endif
6753 do /* look at the equal chars until '/' */
6754 fp--, dp--;
6755 while (*fp != '/');
6756
6757 /* Build a sequence of "../" strings for the resulting relative file name. */
6758 i = 0;
6759 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6760 i += 1;
6761 res = xnew (3*i + strlen (fp + 1) + 1, char);
6762 res[0] = '\0';
6763 while (i-- > 0)
6764 strcat (res, "../");
6765
6766 /* Add the file name relative to the common root of file and dir. */
6767 strcat (res, fp + 1);
6768 free (afn);
6769
6770 return res;
6771 }
6772
6773 /* Return a newly allocated string containing the absolute file name
6774 of FILE given DIR (which should end with a slash). */
6775 static char *
6776 absolute_filename (file, dir)
6777 char *file, *dir;
6778 {
6779 char *slashp, *cp, *res;
6780
6781 if (filename_is_absolute (file))
6782 res = savestr (file);
6783 #ifdef DOS_NT
6784 /* We don't support non-absolute file names with a drive
6785 letter, like `d:NAME' (it's too much hassle). */
6786 else if (file[1] == ':')
6787 fatal ("%s: relative file names with drive letters not supported", file);
6788 #endif
6789 else
6790 res = concat (dir, file, "");
6791
6792 /* Delete the "/dirname/.." and "/." substrings. */
6793 slashp = etags_strchr (res, '/');
6794 while (slashp != NULL && slashp[0] != '\0')
6795 {
6796 if (slashp[1] == '.')
6797 {
6798 if (slashp[2] == '.'
6799 && (slashp[3] == '/' || slashp[3] == '\0'))
6800 {
6801 cp = slashp;
6802 do
6803 cp--;
6804 while (cp >= res && !filename_is_absolute (cp));
6805 if (cp < res)
6806 cp = slashp; /* the absolute name begins with "/.." */
6807 #ifdef DOS_NT
6808 /* Under MSDOS and NT we get `d:/NAME' as absolute
6809 file name, so the luser could say `d:/../NAME'.
6810 We silently treat this as `d:/NAME'. */
6811 else if (cp[0] != '/')
6812 cp = slashp;
6813 #endif
6814 strcpy (cp, slashp + 3);
6815 slashp = cp;
6816 continue;
6817 }
6818 else if (slashp[2] == '/' || slashp[2] == '\0')
6819 {
6820 strcpy (slashp, slashp + 2);
6821 continue;
6822 }
6823 }
6824
6825 slashp = etags_strchr (slashp + 1, '/');
6826 }
6827
6828 if (res[0] == '\0') /* just a safety net: should never happen */
6829 {
6830 free (res);
6831 return savestr ("/");
6832 }
6833 else
6834 return res;
6835 }
6836
6837 /* Return a newly allocated string containing the absolute
6838 file name of dir where FILE resides given DIR (which should
6839 end with a slash). */
6840 static char *
6841 absolute_dirname (file, dir)
6842 char *file, *dir;
6843 {
6844 char *slashp, *res;
6845 char save;
6846
6847 canonicalize_filename (file);
6848 slashp = etags_strrchr (file, '/');
6849 if (slashp == NULL)
6850 return savestr (dir);
6851 save = slashp[1];
6852 slashp[1] = '\0';
6853 res = absolute_filename (file, dir);
6854 slashp[1] = save;
6855
6856 return res;
6857 }
6858
6859 /* Whether the argument string is an absolute file name. The argument
6860 string must have been canonicalized with canonicalize_filename. */
6861 static bool
6862 filename_is_absolute (fn)
6863 char *fn;
6864 {
6865 return (fn[0] == '/'
6866 #ifdef DOS_NT
6867 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6868 #endif
6869 );
6870 }
6871
6872 /* Translate backslashes into slashes. Works in place. */
6873 static void
6874 canonicalize_filename (fn)
6875 register char *fn;
6876 {
6877 #ifdef DOS_NT
6878 /* Canonicalize drive letter case. */
6879 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6880 fn[0] = upcase (fn[0]);
6881 /* Convert backslashes to slashes. */
6882 for (; *fn != '\0'; fn++)
6883 if (*fn == '\\')
6884 *fn = '/';
6885 #else
6886 /* No action. */
6887 fn = NULL; /* shut up the compiler */
6888 #endif
6889 }
6890
6891 \f
6892 /* Initialize a linebuffer for use */
6893 static void
6894 linebuffer_init (lbp)
6895 linebuffer *lbp;
6896 {
6897 lbp->size = (DEBUG) ? 3 : 200;
6898 lbp->buffer = xnew (lbp->size, char);
6899 lbp->buffer[0] = '\0';
6900 lbp->len = 0;
6901 }
6902
6903 /* Set the minimum size of a string contained in a linebuffer. */
6904 static void
6905 linebuffer_setlen (lbp, toksize)
6906 linebuffer *lbp;
6907 int toksize;
6908 {
6909 while (lbp->size <= toksize)
6910 {
6911 lbp->size *= 2;
6912 xrnew (lbp->buffer, lbp->size, char);
6913 }
6914 lbp->len = toksize;
6915 }
6916
6917 /* Like malloc but get fatal error if memory is exhausted. */
6918 static PTR
6919 xmalloc (size)
6920 unsigned int size;
6921 {
6922 PTR result = (PTR) malloc (size);
6923 if (result == NULL)
6924 fatal ("virtual memory exhausted", (char *)NULL);
6925 return result;
6926 }
6927
6928 static PTR
6929 xrealloc (ptr, size)
6930 char *ptr;
6931 unsigned int size;
6932 {
6933 PTR result = (PTR) realloc (ptr, size);
6934 if (result == NULL)
6935 fatal ("virtual memory exhausted", (char *)NULL);
6936 return result;
6937 }
6938
6939 /*
6940 * Local Variables:
6941 * indent-tabs-mode: t
6942 * tab-width: 8
6943 * fill-column: 79
6944 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6945 * c-file-style: "gnu"
6946 * End:
6947 */
6948
6949 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6950 (do not change this comment) */
6951
6952 /* etags.c ends here */