Added "GTY" as synonym for __attribute__. Updated gperf output.
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2
3 Copyright (C) 1984 The Regents of the University of California
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the
13 distribution.
14 3. Neither the name of the University nor the names of its
15 contributors may be used to endorse or promote products derived
16 from this software without specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
32 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
33 Free Software Foundation, Inc.
34
35 This file is not considered part of GNU Emacs.
36
37 This program is free software; you can redistribute it and/or modify
38 it under the terms of the GNU General Public License as published by
39 the Free Software Foundation; either version 3, or (at your option)
40 any later version.
41
42 This program is distributed in the hope that it will be useful,
43 but WITHOUT ANY WARRANTY; without even the implied warranty of
44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 GNU General Public License for more details.
46
47 You should have received a copy of the GNU General Public License
48 along with this program; see the file COPYING. If not, write to the
49 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
50 Boston, MA 02110-1301, USA. */
51
52
53 /* NB To comply with the above BSD license, copyright information is
54 reproduced in etc/ETAGS.README. That file should be updated when the
55 above notices are.
56
57 To the best of our knowledge, this code was originally based on the
58 ctags.c distributed with BSD4.2, which was copyrighted by the
59 University of California, as described above. */
60
61
62 /*
63 * Authors:
64 * 1983 Ctags originally by Ken Arnold.
65 * 1984 Fortran added by Jim Kleckner.
66 * 1984 Ed Pelegri-Llopart added C typedefs.
67 * 1985 Emacs TAGS format by Richard Stallman.
68 * 1989 Sam Kendall added C++.
69 * 1992 Joseph B. Wells improved C and C++ parsing.
70 * 1993 Francesco Potortì reorganised C and C++.
71 * 1994 Line-by-line regexp tags by Tom Tromey.
72 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
73 * 2002 #line directives by Francesco Potortì.
74 *
75 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
76 */
77
78 /*
79 * If you want to add support for a new language, start by looking at the LUA
80 * language, which is the simplest. Alternatively, consider shipping a
81 * configuration file containing regexp definitions for etags.
82 */
83
84 char pot_etags_version[] = "@(#) pot revision number is 17.26";
85
86 #define TRUE 1
87 #define FALSE 0
88
89 #ifdef DEBUG
90 # undef DEBUG
91 # define DEBUG TRUE
92 #else
93 # define DEBUG FALSE
94 # define NDEBUG /* disable assert */
95 #endif
96
97 #ifdef HAVE_CONFIG_H
98 # include <config.h>
99 /* On some systems, Emacs defines static as nothing for the sake
100 of unexec. We don't want that here since we don't use unexec. */
101 # undef static
102 # ifndef PTR /* for XEmacs */
103 # define PTR void *
104 # endif
105 # ifndef __P /* for XEmacs */
106 # define __P(args) args
107 # endif
108 #else /* no config.h */
109 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
110 # define __P(args) args /* use prototypes */
111 # define PTR void * /* for generic pointers */
112 # else /* not standard C */
113 # define __P(args) () /* no prototypes */
114 # define const /* remove const for old compilers' sake */
115 # define PTR long * /* don't use void* */
116 # endif
117 #endif /* !HAVE_CONFIG_H */
118
119 #ifndef _GNU_SOURCE
120 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
121 #endif
122
123 /* WIN32_NATIVE is for XEmacs.
124 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
125 #ifdef WIN32_NATIVE
126 # undef MSDOS
127 # undef WINDOWSNT
128 # define WINDOWSNT
129 #endif /* WIN32_NATIVE */
130
131 #ifdef MSDOS
132 # undef MSDOS
133 # define MSDOS TRUE
134 # include <fcntl.h>
135 # include <sys/param.h>
136 # include <io.h>
137 # ifndef HAVE_CONFIG_H
138 # define DOS_NT
139 # include <sys/config.h>
140 # endif
141 #else
142 # define MSDOS FALSE
143 #endif /* MSDOS */
144
145 #ifdef WINDOWSNT
146 # include <stdlib.h>
147 # include <fcntl.h>
148 # include <string.h>
149 # include <direct.h>
150 # include <io.h>
151 # define MAXPATHLEN _MAX_PATH
152 # undef HAVE_NTGUI
153 # undef DOS_NT
154 # define DOS_NT
155 # ifndef HAVE_GETCWD
156 # define HAVE_GETCWD
157 # endif /* undef HAVE_GETCWD */
158 #else /* not WINDOWSNT */
159 # ifdef STDC_HEADERS
160 # include <stdlib.h>
161 # include <string.h>
162 # else /* no standard C headers */
163 extern char *getenv ();
164 # ifdef VMS
165 # define EXIT_SUCCESS 1
166 # define EXIT_FAILURE 0
167 # else /* no VMS */
168 # define EXIT_SUCCESS 0
169 # define EXIT_FAILURE 1
170 # endif
171 # endif
172 #endif /* !WINDOWSNT */
173
174 #ifdef HAVE_UNISTD_H
175 # include <unistd.h>
176 #else
177 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
178 extern char *getcwd (char *buf, size_t size);
179 # endif
180 #endif /* HAVE_UNISTD_H */
181
182 #include <stdio.h>
183 #include <ctype.h>
184 #include <errno.h>
185 #ifndef errno
186 extern int errno;
187 #endif
188 #include <sys/types.h>
189 #include <sys/stat.h>
190
191 #include <assert.h>
192 #ifdef NDEBUG
193 # undef assert /* some systems have a buggy assert.h */
194 # define assert(x) ((void) 0)
195 #endif
196
197 #if !defined (S_ISREG) && defined (S_IFREG)
198 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
199 #endif
200
201 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
202 # define NO_LONG_OPTIONS TRUE
203 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
204 extern char *optarg;
205 extern int optind, opterr;
206 #else
207 # define NO_LONG_OPTIONS FALSE
208 # include <getopt.h>
209 #endif /* NO_LONG_OPTIONS */
210
211 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
212 # ifdef __CYGWIN__ /* compiling on Cygwin */
213 !!! NOTICE !!!
214 the regex.h distributed with Cygwin is not compatible with etags, alas!
215 If you want regular expression support, you should delete this notice and
216 arrange to use the GNU regex.h and regex.c.
217 # endif
218 #endif
219 #include <regex.h>
220
221 /* Define CTAGS to make the program "ctags" compatible with the usual one.
222 Leave it undefined to make the program "etags", which makes emacs-style
223 tag tables and tags typedefs, #defines and struct/union/enum by default. */
224 #ifdef CTAGS
225 # undef CTAGS
226 # define CTAGS TRUE
227 #else
228 # define CTAGS FALSE
229 #endif
230
231 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
232 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
233 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
234 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
235
236 #define CHARS 256 /* 2^sizeof(char) */
237 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
238 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
239 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
240 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
241 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
242 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
243
244 #define ISALNUM(c) isalnum (CHAR(c))
245 #define ISALPHA(c) isalpha (CHAR(c))
246 #define ISDIGIT(c) isdigit (CHAR(c))
247 #define ISLOWER(c) islower (CHAR(c))
248
249 #define lowcase(c) tolower (CHAR(c))
250 #define upcase(c) toupper (CHAR(c))
251
252
253 /*
254 * xnew, xrnew -- allocate, reallocate storage
255 *
256 * SYNOPSIS: Type *xnew (int n, Type);
257 * void xrnew (OldPointer, int n, Type);
258 */
259 #if DEBUG
260 # include "chkmalloc.h"
261 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
262 (n) * sizeof (Type)))
263 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
264 (char *) (op), (n) * sizeof (Type)))
265 #else
266 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
267 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
268 (char *) (op), (n) * sizeof (Type)))
269 #endif
270
271 #define bool int
272
273 typedef void Lang_function __P((FILE *));
274
275 typedef struct
276 {
277 char *suffix; /* file name suffix for this compressor */
278 char *command; /* takes one arg and decompresses to stdout */
279 } compressor;
280
281 typedef struct
282 {
283 char *name; /* language name */
284 char *help; /* detailed help for the language */
285 Lang_function *function; /* parse function */
286 char **suffixes; /* name suffixes of this language's files */
287 char **filenames; /* names of this language's files */
288 char **interpreters; /* interpreters for this language */
289 bool metasource; /* source used to generate other sources */
290 } language;
291
292 typedef struct fdesc
293 {
294 struct fdesc *next; /* for the linked list */
295 char *infname; /* uncompressed input file name */
296 char *infabsname; /* absolute uncompressed input file name */
297 char *infabsdir; /* absolute dir of input file */
298 char *taggedfname; /* file name to write in tagfile */
299 language *lang; /* language of file */
300 char *prop; /* file properties to write in tagfile */
301 bool usecharno; /* etags tags shall contain char number */
302 bool written; /* entry written in the tags file */
303 } fdesc;
304
305 typedef struct node_st
306 { /* sorting structure */
307 struct node_st *left, *right; /* left and right sons */
308 fdesc *fdp; /* description of file to whom tag belongs */
309 char *name; /* tag name */
310 char *regex; /* search regexp */
311 bool valid; /* write this tag on the tag file */
312 bool is_func; /* function tag: use regexp in CTAGS mode */
313 bool been_warned; /* warning already given for duplicated tag */
314 int lno; /* line number tag is on */
315 long cno; /* character number line starts on */
316 } node;
317
318 /*
319 * A `linebuffer' is a structure which holds a line of text.
320 * `readline_internal' reads a line from a stream into a linebuffer
321 * and works regardless of the length of the line.
322 * SIZE is the size of BUFFER, LEN is the length of the string in
323 * BUFFER after readline reads it.
324 */
325 typedef struct
326 {
327 long size;
328 int len;
329 char *buffer;
330 } linebuffer;
331
332 /* Used to support mixing of --lang and file names. */
333 typedef struct
334 {
335 enum {
336 at_language, /* a language specification */
337 at_regexp, /* a regular expression */
338 at_filename, /* a file name */
339 at_stdin, /* read from stdin here */
340 at_end /* stop parsing the list */
341 } arg_type; /* argument type */
342 language *lang; /* language associated with the argument */
343 char *what; /* the argument itself */
344 } argument;
345
346 /* Structure defining a regular expression. */
347 typedef struct regexp
348 {
349 struct regexp *p_next; /* pointer to next in list */
350 language *lang; /* if set, use only for this language */
351 char *pattern; /* the regexp pattern */
352 char *name; /* tag name */
353 struct re_pattern_buffer *pat; /* the compiled pattern */
354 struct re_registers regs; /* re registers */
355 bool error_signaled; /* already signaled for this regexp */
356 bool force_explicit_name; /* do not allow implict tag name */
357 bool ignore_case; /* ignore case when matching */
358 bool multi_line; /* do a multi-line match on the whole file */
359 } regexp;
360
361
362 /* Many compilers barf on this:
363 Lang_function Ada_funcs;
364 so let's write it this way */
365 static void Ada_funcs __P((FILE *));
366 static void Asm_labels __P((FILE *));
367 static void C_entries __P((int c_ext, FILE *));
368 static void default_C_entries __P((FILE *));
369 static void plain_C_entries __P((FILE *));
370 static void Cjava_entries __P((FILE *));
371 static void Cobol_paragraphs __P((FILE *));
372 static void Cplusplus_entries __P((FILE *));
373 static void Cstar_entries __P((FILE *));
374 static void Erlang_functions __P((FILE *));
375 static void Forth_words __P((FILE *));
376 static void Fortran_functions __P((FILE *));
377 static void HTML_labels __P((FILE *));
378 static void Lisp_functions __P((FILE *));
379 static void Lua_functions __P((FILE *));
380 static void Makefile_targets __P((FILE *));
381 static void Pascal_functions __P((FILE *));
382 static void Perl_functions __P((FILE *));
383 static void PHP_functions __P((FILE *));
384 static void PS_functions __P((FILE *));
385 static void Prolog_functions __P((FILE *));
386 static void Python_functions __P((FILE *));
387 static void Scheme_functions __P((FILE *));
388 static void TeX_commands __P((FILE *));
389 static void Texinfo_nodes __P((FILE *));
390 static void Yacc_entries __P((FILE *));
391 static void just_read_file __P((FILE *));
392
393 static void print_language_names __P((void));
394 static void print_version __P((void));
395 static void print_help __P((argument *));
396 int main __P((int, char **));
397
398 static compressor *get_compressor_from_suffix __P((char *, char **));
399 static language *get_language_from_langname __P((const char *));
400 static language *get_language_from_interpreter __P((char *));
401 static language *get_language_from_filename __P((char *, bool));
402 static void readline __P((linebuffer *, FILE *));
403 static long readline_internal __P((linebuffer *, FILE *));
404 static bool nocase_tail __P((char *));
405 static void get_tag __P((char *, char **));
406
407 static void analyse_regex __P((char *));
408 static void free_regexps __P((void));
409 static void regex_tag_multiline __P((void));
410 static void error __P((const char *, const char *));
411 static void suggest_asking_for_help __P((void));
412 void fatal __P((char *, char *));
413 static void pfatal __P((char *));
414 static void add_node __P((node *, node **));
415
416 static void init __P((void));
417 static void process_file_name __P((char *, language *));
418 static void process_file __P((FILE *, char *, language *));
419 static void find_entries __P((FILE *));
420 static void free_tree __P((node *));
421 static void free_fdesc __P((fdesc *));
422 static void pfnote __P((char *, bool, char *, int, int, long));
423 static void make_tag __P((char *, int, bool, char *, int, int, long));
424 static void invalidate_nodes __P((fdesc *, node **));
425 static void put_entries __P((node *));
426
427 static char *concat __P((char *, char *, char *));
428 static char *skip_spaces __P((char *));
429 static char *skip_non_spaces __P((char *));
430 static char *savenstr __P((char *, int));
431 static char *savestr __P((char *));
432 static char *etags_strchr __P((const char *, int));
433 static char *etags_strrchr __P((const char *, int));
434 static int etags_strcasecmp __P((const char *, const char *));
435 static int etags_strncasecmp __P((const char *, const char *, int));
436 static char *etags_getcwd __P((void));
437 static char *relative_filename __P((char *, char *));
438 static char *absolute_filename __P((char *, char *));
439 static char *absolute_dirname __P((char *, char *));
440 static bool filename_is_absolute __P((char *f));
441 static void canonicalize_filename __P((char *));
442 static void linebuffer_init __P((linebuffer *));
443 static void linebuffer_setlen __P((linebuffer *, int));
444 static PTR xmalloc __P((unsigned int));
445 static PTR xrealloc __P((char *, unsigned int));
446
447 \f
448 static char searchar = '/'; /* use /.../ searches */
449
450 static char *tagfile; /* output file */
451 static char *progname; /* name this program was invoked with */
452 static char *cwd; /* current working directory */
453 static char *tagfiledir; /* directory of tagfile */
454 static FILE *tagf; /* ioptr for tags file */
455
456 static fdesc *fdhead; /* head of file description list */
457 static fdesc *curfdp; /* current file description */
458 static int lineno; /* line number of current line */
459 static long charno; /* current character number */
460 static long linecharno; /* charno of start of current line */
461 static char *dbp; /* pointer to start of current tag */
462
463 static const int invalidcharno = -1;
464
465 static node *nodehead; /* the head of the binary tree of tags */
466 static node *last_node; /* the last node created */
467
468 static linebuffer lb; /* the current line */
469 static linebuffer filebuf; /* a buffer containing the whole file */
470 static linebuffer token_name; /* a buffer containing a tag name */
471
472 /* boolean "functions" (see init) */
473 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
474 static char
475 /* white chars */
476 *white = " \f\t\n\r\v",
477 /* not in a name */
478 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
479 /* token ending chars */
480 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
481 /* token starting chars */
482 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
483 /* valid in-token chars */
484 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
485
486 static bool append_to_tagfile; /* -a: append to tags */
487 /* The next four default to TRUE for etags, but to FALSE for ctags. */
488 static bool typedefs; /* -t: create tags for C and Ada typedefs */
489 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
490 /* 0 struct/enum/union decls, and C++ */
491 /* member functions. */
492 static bool constantypedefs; /* -d: create tags for C #define, enum */
493 /* constants and variables. */
494 /* -D: opposite of -d. Default under ctags. */
495 static bool globals; /* create tags for global variables */
496 static bool members; /* create tags for C member variables */
497 static bool declarations; /* --declarations: tag them and extern in C&Co*/
498 static bool no_line_directive; /* ignore #line directives (undocumented) */
499 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
500 static bool update; /* -u: update tags */
501 static bool vgrind_style; /* -v: create vgrind style index output */
502 static bool no_warnings; /* -w: suppress warnings (undocumented) */
503 static bool cxref_style; /* -x: create cxref style output */
504 static bool cplusplus; /* .[hc] means C++, not C (undocumented) */
505 static bool ignoreindent; /* -I: ignore indentation in C */
506 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
507
508 /* STDIN is defined in LynxOS system headers */
509 #ifdef STDIN
510 # undef STDIN
511 #endif
512
513 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
514 static bool parsing_stdin; /* --parse-stdin used */
515
516 static regexp *p_head; /* list of all regexps */
517 static bool need_filebuf; /* some regexes are multi-line */
518
519 static struct option longopts[] =
520 {
521 { "append", no_argument, NULL, 'a' },
522 { "packages-only", no_argument, &packages_only, TRUE },
523 { "c++", no_argument, NULL, 'C' },
524 { "declarations", no_argument, &declarations, TRUE },
525 { "no-line-directive", no_argument, &no_line_directive, TRUE },
526 { "no-duplicates", no_argument, &no_duplicates, TRUE },
527 { "help", no_argument, NULL, 'h' },
528 { "help", no_argument, NULL, 'H' },
529 { "ignore-indentation", no_argument, NULL, 'I' },
530 { "language", required_argument, NULL, 'l' },
531 { "members", no_argument, &members, TRUE },
532 { "no-members", no_argument, &members, FALSE },
533 { "output", required_argument, NULL, 'o' },
534 { "regex", required_argument, NULL, 'r' },
535 { "no-regex", no_argument, NULL, 'R' },
536 { "ignore-case-regex", required_argument, NULL, 'c' },
537 { "parse-stdin", required_argument, NULL, STDIN },
538 { "version", no_argument, NULL, 'V' },
539
540 #if CTAGS /* Ctags options */
541 { "backward-search", no_argument, NULL, 'B' },
542 { "cxref", no_argument, NULL, 'x' },
543 { "defines", no_argument, NULL, 'd' },
544 { "globals", no_argument, &globals, TRUE },
545 { "typedefs", no_argument, NULL, 't' },
546 { "typedefs-and-c++", no_argument, NULL, 'T' },
547 { "update", no_argument, NULL, 'u' },
548 { "vgrind", no_argument, NULL, 'v' },
549 { "no-warn", no_argument, NULL, 'w' },
550
551 #else /* Etags options */
552 { "no-defines", no_argument, NULL, 'D' },
553 { "no-globals", no_argument, &globals, FALSE },
554 { "include", required_argument, NULL, 'i' },
555 #endif
556 { NULL }
557 };
558
559 static compressor compressors[] =
560 {
561 { "z", "gzip -d -c"},
562 { "Z", "gzip -d -c"},
563 { "gz", "gzip -d -c"},
564 { "GZ", "gzip -d -c"},
565 { "bz2", "bzip2 -d -c" },
566 { NULL }
567 };
568
569 /*
570 * Language stuff.
571 */
572
573 /* Ada code */
574 static char *Ada_suffixes [] =
575 { "ads", "adb", "ada", NULL };
576 static char Ada_help [] =
577 "In Ada code, functions, procedures, packages, tasks and types are\n\
578 tags. Use the `--packages-only' option to create tags for\n\
579 packages only.\n\
580 Ada tag names have suffixes indicating the type of entity:\n\
581 Entity type: Qualifier:\n\
582 ------------ ----------\n\
583 function /f\n\
584 procedure /p\n\
585 package spec /s\n\
586 package body /b\n\
587 type /t\n\
588 task /k\n\
589 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
590 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
591 will just search for any tag `bidule'.";
592
593 /* Assembly code */
594 static char *Asm_suffixes [] =
595 { "a", /* Unix assembler */
596 "asm", /* Microcontroller assembly */
597 "def", /* BSO/Tasking definition includes */
598 "inc", /* Microcontroller include files */
599 "ins", /* Microcontroller include files */
600 "s", "sa", /* Unix assembler */
601 "S", /* cpp-processed Unix assembler */
602 "src", /* BSO/Tasking C compiler output */
603 NULL
604 };
605 static char Asm_help [] =
606 "In assembler code, labels appearing at the beginning of a line,\n\
607 followed by a colon, are tags.";
608
609
610 /* Note that .c and .h can be considered C++, if the --c++ flag was
611 given, or if the `class' or `template' keywords are met inside the file.
612 That is why default_C_entries is called for these. */
613 static char *default_C_suffixes [] =
614 { "c", "h", NULL };
615 #if CTAGS /* C help for Ctags */
616 static char default_C_help [] =
617 "In C code, any C function is a tag. Use -t to tag typedefs.\n\
618 Use -T to tag definitions of `struct', `union' and `enum'.\n\
619 Use -d to tag `#define' macro definitions and `enum' constants.\n\
620 Use --globals to tag global variables.\n\
621 You can tag function declarations and external variables by\n\
622 using `--declarations', and struct members by using `--members'.";
623 #else /* C help for Etags */
624 static char default_C_help [] =
625 "In C code, any C function or typedef is a tag, and so are\n\
626 definitions of `struct', `union' and `enum'. `#define' macro\n\
627 definitions and `enum' constants are tags unless you specify\n\
628 `--no-defines'. Global variables are tags unless you specify\n\
629 `--no-globals'. Use of `--no-globals' and `--no-defines'\n\
630 can make the tags table file much smaller.\n\
631 You can tag function declarations and external variables by\n\
632 using `--declarations', and struct members by using `--members'.";
633 #endif /* C help for Ctags and Etags */
634
635 static char *Cplusplus_suffixes [] =
636 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
637 "M", /* Objective C++ */
638 "pdb", /* Postscript with C syntax */
639 NULL };
640 static char Cplusplus_help [] =
641 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
642 --help --lang=c --lang=c++ for full help.)\n\
643 In addition to C tags, member functions are also recognized. Member\n\
644 variables are also recognized if you use the `--members' option.\n\
645 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
646 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
647 `operator+'.";
648
649 static char *Cjava_suffixes [] =
650 { "java", NULL };
651 static char Cjava_help [] =
652 "In Java code, all the tags constructs of C and C++ code are\n\
653 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
654
655
656 static char *Cobol_suffixes [] =
657 { "COB", "cob", NULL };
658 static char Cobol_help [] =
659 "In Cobol code, tags are paragraph names; that is, any word\n\
660 starting in column 8 and followed by a period.";
661
662 static char *Cstar_suffixes [] =
663 { "cs", "hs", NULL };
664
665 static char *Erlang_suffixes [] =
666 { "erl", "hrl", NULL };
667 static char Erlang_help [] =
668 "In Erlang code, the tags are the functions, records and macros\n\
669 defined in the file.";
670
671 char *Forth_suffixes [] =
672 { "fth", "tok", NULL };
673 static char Forth_help [] =
674 "In Forth code, tags are words defined by `:',\n\
675 constant, code, create, defer, value, variable, buffer:, field.";
676
677 static char *Fortran_suffixes [] =
678 { "F", "f", "f90", "for", NULL };
679 static char Fortran_help [] =
680 "In Fortran code, functions, subroutines and block data are tags.";
681
682 static char *HTML_suffixes [] =
683 { "htm", "html", "shtml", NULL };
684 static char HTML_help [] =
685 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
686 `h3' headers. Also, tags are `name=' in anchors and all\n\
687 occurrences of `id='.";
688
689 static char *Lisp_suffixes [] =
690 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
691 static char Lisp_help [] =
692 "In Lisp code, any function defined with `defun', any variable\n\
693 defined with `defvar' or `defconst', and in general the first\n\
694 argument of any expression that starts with `(def' in column zero\n\
695 is a tag.";
696
697 static char *Lua_suffixes [] =
698 { "lua", "LUA", NULL };
699 static char Lua_help [] =
700 "In Lua scripts, all functions are tags.";
701
702 static char *Makefile_filenames [] =
703 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
704 static char Makefile_help [] =
705 "In makefiles, targets are tags; additionally, variables are tags\n\
706 unless you specify `--no-globals'.";
707
708 static char *Objc_suffixes [] =
709 { "lm", /* Objective lex file */
710 "m", /* Objective C file */
711 NULL };
712 static char Objc_help [] =
713 "In Objective C code, tags include Objective C definitions for classes,\n\
714 class categories, methods and protocols. Tags for variables and\n\
715 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
716 (Use --help --lang=c --lang=objc --lang=java for full help.)";
717
718 static char *Pascal_suffixes [] =
719 { "p", "pas", NULL };
720 static char Pascal_help [] =
721 "In Pascal code, the tags are the functions and procedures defined\n\
722 in the file.";
723 /* " // this is for working around an Emacs highlighting bug... */
724
725 static char *Perl_suffixes [] =
726 { "pl", "pm", NULL };
727 static char *Perl_interpreters [] =
728 { "perl", "@PERL@", NULL };
729 static char Perl_help [] =
730 "In Perl code, the tags are the packages, subroutines and variables\n\
731 defined by the `package', `sub', `my' and `local' keywords. Use\n\
732 `--globals' if you want to tag global variables. Tags for\n\
733 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
734 defined in the default package is `main::SUB'.";
735
736 static char *PHP_suffixes [] =
737 { "php", "php3", "php4", NULL };
738 static char PHP_help [] =
739 "In PHP code, tags are functions, classes and defines. When using\n\
740 the `--members' option, vars are tags too.";
741
742 static char *plain_C_suffixes [] =
743 { "pc", /* Pro*C file */
744 NULL };
745
746 static char *PS_suffixes [] =
747 { "ps", "psw", NULL }; /* .psw is for PSWrap */
748 static char PS_help [] =
749 "In PostScript code, the tags are the functions.";
750
751 static char *Prolog_suffixes [] =
752 { "prolog", NULL };
753 static char Prolog_help [] =
754 "In Prolog code, tags are predicates and rules at the beginning of\n\
755 line.";
756
757 static char *Python_suffixes [] =
758 { "py", NULL };
759 static char Python_help [] =
760 "In Python code, `def' or `class' at the beginning of a line\n\
761 generate a tag.";
762
763 /* Can't do the `SCM' or `scm' prefix with a version number. */
764 static char *Scheme_suffixes [] =
765 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
766 static char Scheme_help [] =
767 "In Scheme code, tags include anything defined with `def' or with a\n\
768 construct whose name starts with `def'. They also include\n\
769 variables set with `set!' at top level in the file.";
770
771 static char *TeX_suffixes [] =
772 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
773 static char TeX_help [] =
774 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
775 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
776 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
777 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
778 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
779 \n\
780 Other commands can be specified by setting the environment variable\n\
781 `TEXTAGS' to a colon-separated list like, for example,\n\
782 TEXTAGS=\"mycommand:myothercommand\".";
783
784
785 static char *Texinfo_suffixes [] =
786 { "texi", "texinfo", "txi", NULL };
787 static char Texinfo_help [] =
788 "for texinfo files, lines starting with @node are tagged.";
789
790 static char *Yacc_suffixes [] =
791 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
792 static char Yacc_help [] =
793 "In Bison or Yacc input files, each rule defines as a tag the\n\
794 nonterminal it constructs. The portions of the file that contain\n\
795 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
796 for full help).";
797
798 static char auto_help [] =
799 "`auto' is not a real language, it indicates to use\n\
800 a default language for files base on file name suffix and file contents.";
801
802 static char none_help [] =
803 "`none' is not a real language, it indicates to only do\n\
804 regexp processing on files.";
805
806 static char no_lang_help [] =
807 "No detailed help available for this language.";
808
809
810 /*
811 * Table of languages.
812 *
813 * It is ok for a given function to be listed under more than one
814 * name. I just didn't.
815 */
816
817 static language lang_names [] =
818 {
819 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
820 { "asm", Asm_help, Asm_labels, Asm_suffixes },
821 { "c", default_C_help, default_C_entries, default_C_suffixes },
822 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
823 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
824 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
825 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
826 { "forth", Forth_help, Forth_words, Forth_suffixes },
827 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
828 { "html", HTML_help, HTML_labels, HTML_suffixes },
829 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
830 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
831 { "lua", Lua_help, Lua_functions, Lua_suffixes },
832 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
833 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
834 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
835 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
836 { "php", PHP_help, PHP_functions, PHP_suffixes },
837 { "postscript",PS_help, PS_functions, PS_suffixes },
838 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
839 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
840 { "python", Python_help, Python_functions, Python_suffixes },
841 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
842 { "tex", TeX_help, TeX_commands, TeX_suffixes },
843 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
844 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
845 { "auto", auto_help }, /* default guessing scheme */
846 { "none", none_help, just_read_file }, /* regexp matching only */
847 { NULL } /* end of list */
848 };
849
850 \f
851 static void
852 print_language_names ()
853 {
854 language *lang;
855 char **name, **ext;
856
857 puts ("\nThese are the currently supported languages, along with the\n\
858 default file names and dot suffixes:");
859 for (lang = lang_names; lang->name != NULL; lang++)
860 {
861 printf (" %-*s", 10, lang->name);
862 if (lang->filenames != NULL)
863 for (name = lang->filenames; *name != NULL; name++)
864 printf (" %s", *name);
865 if (lang->suffixes != NULL)
866 for (ext = lang->suffixes; *ext != NULL; ext++)
867 printf (" .%s", *ext);
868 puts ("");
869 }
870 puts ("where `auto' means use default language for files based on file\n\
871 name suffix, and `none' means only do regexp processing on files.\n\
872 If no language is specified and no matching suffix is found,\n\
873 the first line of the file is read for a sharp-bang (#!) sequence\n\
874 followed by the name of an interpreter. If no such sequence is found,\n\
875 Fortran is tried first; if no tags are found, C is tried next.\n\
876 When parsing any C file, a \"class\" or \"template\" keyword\n\
877 switches to C++.");
878 puts ("Compressed files are supported using gzip and bzip2.\n\
879 \n\
880 For detailed help on a given language use, for example,\n\
881 etags --help --lang=ada.");
882 }
883
884 #ifndef EMACS_NAME
885 # define EMACS_NAME "standalone"
886 #endif
887 #ifndef VERSION
888 # define VERSION "17.26"
889 #endif
890 static void
891 print_version ()
892 {
893 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
894 puts ("Copyright (C) 2008 Free Software Foundation, Inc.");
895 puts ("This program is distributed under the terms in ETAGS.README");
896
897 exit (EXIT_SUCCESS);
898 }
899
900 static void
901 print_help (argbuffer)
902 argument *argbuffer;
903 {
904 bool help_for_lang = FALSE;
905
906 for (; argbuffer->arg_type != at_end; argbuffer++)
907 if (argbuffer->arg_type == at_language)
908 {
909 if (help_for_lang)
910 puts ("");
911 puts (argbuffer->lang->help);
912 help_for_lang = TRUE;
913 }
914
915 if (help_for_lang)
916 exit (EXIT_SUCCESS);
917
918 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
919 \n\
920 These are the options accepted by %s.\n", progname, progname);
921 if (NO_LONG_OPTIONS)
922 puts ("WARNING: long option names do not work with this executable,\n\
923 as it is not linked with GNU getopt.");
924 else
925 puts ("You may use unambiguous abbreviations for the long option names.");
926 puts (" A - as file name means read names from stdin (one per line).\n\
927 Absolute names are stored in the output file as they are.\n\
928 Relative ones are stored relative to the output file's directory.\n");
929
930 puts ("-a, --append\n\
931 Append tag entries to existing tags file.");
932
933 puts ("--packages-only\n\
934 For Ada files, only generate tags for packages.");
935
936 if (CTAGS)
937 puts ("-B, --backward-search\n\
938 Write the search commands for the tag entries using '?', the\n\
939 backward-search command instead of '/', the forward-search command.");
940
941 /* This option is mostly obsolete, because etags can now automatically
942 detect C++. Retained for backward compatibility and for debugging and
943 experimentation. In principle, we could want to tag as C++ even
944 before any "class" or "template" keyword.
945 puts ("-C, --c++\n\
946 Treat files whose name suffix defaults to C language as C++ files.");
947 */
948
949 puts ("--declarations\n\
950 In C and derived languages, create tags for function declarations,");
951 if (CTAGS)
952 puts ("\tand create tags for extern variables if --globals is used.");
953 else
954 puts
955 ("\tand create tags for extern variables unless --no-globals is used.");
956
957 if (CTAGS)
958 puts ("-d, --defines\n\
959 Create tag entries for C #define constants and enum constants, too.");
960 else
961 puts ("-D, --no-defines\n\
962 Don't create tag entries for C #define constants and enum constants.\n\
963 This makes the tags file smaller.");
964
965 if (!CTAGS)
966 puts ("-i FILE, --include=FILE\n\
967 Include a note in tag file indicating that, when searching for\n\
968 a tag, one should also consult the tags file FILE after\n\
969 checking the current file.");
970
971 puts ("-l LANG, --language=LANG\n\
972 Force the following files to be considered as written in the\n\
973 named language up to the next --language=LANG option.");
974
975 if (CTAGS)
976 puts ("--globals\n\
977 Create tag entries for global variables in some languages.");
978 else
979 puts ("--no-globals\n\
980 Do not create tag entries for global variables in some\n\
981 languages. This makes the tags file smaller.");
982 puts ("--members\n\
983 Create tag entries for members of structures in some languages.");
984
985 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
986 Make a tag for each line matching a regular expression pattern\n\
987 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
988 files only. REGEXFILE is a file containing one REGEXP per line.\n\
989 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
990 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
991 puts (" If TAGNAME/ is present, the tags created are named.\n\
992 For example Tcl named tags can be created with:\n\
993 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
994 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
995 `m' means to allow multi-line matches, `s' implies `m' and\n\
996 causes dot to match any character, including newline.");
997 puts ("-R, --no-regex\n\
998 Don't create tags from regexps for the following files.");
999 puts ("-I, --ignore-indentation\n\
1000 In C and C++ do not assume that a closing brace in the first\n\
1001 column is the final brace of a function or structure definition.");
1002 puts ("-o FILE, --output=FILE\n\
1003 Write the tags to FILE.");
1004 puts ("--parse-stdin=NAME\n\
1005 Read from standard input and record tags as belonging to file NAME.");
1006
1007 if (CTAGS)
1008 {
1009 puts ("-t, --typedefs\n\
1010 Generate tag entries for C and Ada typedefs.");
1011 puts ("-T, --typedefs-and-c++\n\
1012 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1013 and C++ member functions.");
1014 }
1015
1016 if (CTAGS)
1017 puts ("-u, --update\n\
1018 Update the tag entries for the given files, leaving tag\n\
1019 entries for other files in place. Currently, this is\n\
1020 implemented by deleting the existing entries for the given\n\
1021 files and then rewriting the new entries at the end of the\n\
1022 tags file. It is often faster to simply rebuild the entire\n\
1023 tag file than to use this.");
1024
1025 if (CTAGS)
1026 {
1027 puts ("-v, --vgrind\n\
1028 Print on the standard output an index of items intended for\n\
1029 human consumption, similar to the output of vgrind. The index\n\
1030 is sorted, and gives the page number of each item.");
1031 # if PRINT_UNDOCUMENTED_OPTIONS_HELP
1032 puts ("-w, --no-duplicates\n\
1033 Do not create duplicate tag entries, for compatibility with\n\
1034 traditional ctags.");
1035 puts ("-w, --no-warn\n\
1036 Suppress warning messages about duplicate tag entries.");
1037 # endif /* PRINT_UNDOCUMENTED_OPTIONS_HELP */
1038 puts ("-x, --cxref\n\
1039 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1040 The output uses line numbers instead of page numbers, but\n\
1041 beyond that the differences are cosmetic; try both to see\n\
1042 which you like.");
1043 }
1044
1045 puts ("-V, --version\n\
1046 Print the version of the program.\n\
1047 -h, --help\n\
1048 Print this help message.\n\
1049 Followed by one or more `--language' options prints detailed\n\
1050 help about tag generation for the specified languages.");
1051
1052 print_language_names ();
1053
1054 puts ("");
1055 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1056
1057 exit (EXIT_SUCCESS);
1058 }
1059
1060 \f
1061 #ifdef VMS /* VMS specific functions */
1062
1063 #define EOS '\0'
1064
1065 /* This is a BUG! ANY arbitrary limit is a BUG!
1066 Won't someone please fix this? */
1067 #define MAX_FILE_SPEC_LEN 255
1068 typedef struct {
1069 short curlen;
1070 char body[MAX_FILE_SPEC_LEN + 1];
1071 } vspec;
1072
1073 /*
1074 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1075 returning in each successive call the next file name matching the input
1076 spec. The function expects that each in_spec passed
1077 to it will be processed to completion; in particular, up to and
1078 including the call following that in which the last matching name
1079 is returned, the function ignores the value of in_spec, and will
1080 only start processing a new spec with the following call.
1081 If an error occurs, on return out_spec contains the value
1082 of in_spec when the error occurred.
1083
1084 With each successive file name returned in out_spec, the
1085 function's return value is one. When there are no more matching
1086 names the function returns zero. If on the first call no file
1087 matches in_spec, or there is any other error, -1 is returned.
1088 */
1089
1090 #include <rmsdef.h>
1091 #include <descrip.h>
1092 #define OUTSIZE MAX_FILE_SPEC_LEN
1093 static short
1094 fn_exp (out, in)
1095 vspec *out;
1096 char *in;
1097 {
1098 static long context = 0;
1099 static struct dsc$descriptor_s o;
1100 static struct dsc$descriptor_s i;
1101 static bool pass1 = TRUE;
1102 long status;
1103 short retval;
1104
1105 if (pass1)
1106 {
1107 pass1 = FALSE;
1108 o.dsc$a_pointer = (char *) out;
1109 o.dsc$w_length = (short)OUTSIZE;
1110 i.dsc$a_pointer = in;
1111 i.dsc$w_length = (short)strlen(in);
1112 i.dsc$b_dtype = DSC$K_DTYPE_T;
1113 i.dsc$b_class = DSC$K_CLASS_S;
1114 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1115 o.dsc$b_class = DSC$K_CLASS_VS;
1116 }
1117 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1118 {
1119 out->body[out->curlen] = EOS;
1120 return 1;
1121 }
1122 else if (status == RMS$_NMF)
1123 retval = 0;
1124 else
1125 {
1126 strcpy(out->body, in);
1127 retval = -1;
1128 }
1129 lib$find_file_end(&context);
1130 pass1 = TRUE;
1131 return retval;
1132 }
1133
1134 /*
1135 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1136 name of each file specified by the provided arg expanding wildcards.
1137 */
1138 static char *
1139 gfnames (arg, p_error)
1140 char *arg;
1141 bool *p_error;
1142 {
1143 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1144
1145 switch (fn_exp (&filename, arg))
1146 {
1147 case 1:
1148 *p_error = FALSE;
1149 return filename.body;
1150 case 0:
1151 *p_error = FALSE;
1152 return NULL;
1153 default:
1154 *p_error = TRUE;
1155 return filename.body;
1156 }
1157 }
1158
1159 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1160 system (cmd)
1161 char *cmd;
1162 {
1163 error ("%s", "system() function not implemented under VMS");
1164 }
1165 #endif
1166
1167 #define VERSION_DELIM ';'
1168 char *massage_name (s)
1169 char *s;
1170 {
1171 char *start = s;
1172
1173 for ( ; *s; s++)
1174 if (*s == VERSION_DELIM)
1175 {
1176 *s = EOS;
1177 break;
1178 }
1179 else
1180 *s = lowcase (*s);
1181 return start;
1182 }
1183 #endif /* VMS */
1184
1185 \f
1186 int
1187 main (argc, argv)
1188 int argc;
1189 char *argv[];
1190 {
1191 int i;
1192 unsigned int nincluded_files;
1193 char **included_files;
1194 argument *argbuffer;
1195 int current_arg, file_count;
1196 linebuffer filename_lb;
1197 bool help_asked = FALSE;
1198 #ifdef VMS
1199 bool got_err;
1200 #endif
1201 char *optstring;
1202 int opt;
1203
1204
1205 #ifdef DOS_NT
1206 _fmode = O_BINARY; /* all of files are treated as binary files */
1207 #endif /* DOS_NT */
1208
1209 progname = argv[0];
1210 nincluded_files = 0;
1211 included_files = xnew (argc, char *);
1212 current_arg = 0;
1213 file_count = 0;
1214
1215 /* Allocate enough no matter what happens. Overkill, but each one
1216 is small. */
1217 argbuffer = xnew (argc, argument);
1218
1219 /*
1220 * If etags, always find typedefs and structure tags. Why not?
1221 * Also default to find macro constants, enum constants and
1222 * global variables.
1223 */
1224 if (!CTAGS)
1225 {
1226 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1227 globals = TRUE;
1228 }
1229
1230 /* When the optstring begins with a '-' getopt_long does not rearrange the
1231 non-options arguments to be at the end, but leaves them alone. */
1232 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1233 "ac:Cf:Il:o:r:RSVhH",
1234 (CTAGS) ? "BxdtTuvw" : "Di:");
1235
1236 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1237 switch (opt)
1238 {
1239 case 0:
1240 /* If getopt returns 0, then it has already processed a
1241 long-named option. We should do nothing. */
1242 break;
1243
1244 case 1:
1245 /* This means that a file name has been seen. Record it. */
1246 argbuffer[current_arg].arg_type = at_filename;
1247 argbuffer[current_arg].what = optarg;
1248 ++current_arg;
1249 ++file_count;
1250 break;
1251
1252 case STDIN:
1253 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1254 argbuffer[current_arg].arg_type = at_stdin;
1255 argbuffer[current_arg].what = optarg;
1256 ++current_arg;
1257 ++file_count;
1258 if (parsing_stdin)
1259 fatal ("cannot parse standard input more than once", (char *)NULL);
1260 parsing_stdin = TRUE;
1261 break;
1262
1263 /* Common options. */
1264 case 'a': append_to_tagfile = TRUE; break;
1265 case 'C': cplusplus = TRUE; break;
1266 case 'f': /* for compatibility with old makefiles */
1267 case 'o':
1268 if (tagfile)
1269 {
1270 error ("-o option may only be given once.", (char *)NULL);
1271 suggest_asking_for_help ();
1272 /* NOTREACHED */
1273 }
1274 tagfile = optarg;
1275 break;
1276 case 'I':
1277 case 'S': /* for backward compatibility */
1278 ignoreindent = TRUE;
1279 break;
1280 case 'l':
1281 {
1282 language *lang = get_language_from_langname (optarg);
1283 if (lang != NULL)
1284 {
1285 argbuffer[current_arg].lang = lang;
1286 argbuffer[current_arg].arg_type = at_language;
1287 ++current_arg;
1288 }
1289 }
1290 break;
1291 case 'c':
1292 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1293 optarg = concat (optarg, "i", ""); /* memory leak here */
1294 /* FALLTHRU */
1295 case 'r':
1296 argbuffer[current_arg].arg_type = at_regexp;
1297 argbuffer[current_arg].what = optarg;
1298 ++current_arg;
1299 break;
1300 case 'R':
1301 argbuffer[current_arg].arg_type = at_regexp;
1302 argbuffer[current_arg].what = NULL;
1303 ++current_arg;
1304 break;
1305 case 'V':
1306 print_version ();
1307 break;
1308 case 'h':
1309 case 'H':
1310 help_asked = TRUE;
1311 break;
1312
1313 /* Etags options */
1314 case 'D': constantypedefs = FALSE; break;
1315 case 'i': included_files[nincluded_files++] = optarg; break;
1316
1317 /* Ctags options. */
1318 case 'B': searchar = '?'; break;
1319 case 'd': constantypedefs = TRUE; break;
1320 case 't': typedefs = TRUE; break;
1321 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1322 case 'u': update = TRUE; break;
1323 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1324 case 'x': cxref_style = TRUE; break;
1325 case 'w': no_warnings = TRUE; break;
1326 default:
1327 suggest_asking_for_help ();
1328 /* NOTREACHED */
1329 }
1330
1331 /* No more options. Store the rest of arguments. */
1332 for (; optind < argc; optind++)
1333 {
1334 argbuffer[current_arg].arg_type = at_filename;
1335 argbuffer[current_arg].what = argv[optind];
1336 ++current_arg;
1337 ++file_count;
1338 }
1339
1340 argbuffer[current_arg].arg_type = at_end;
1341
1342 if (help_asked)
1343 print_help (argbuffer);
1344 /* NOTREACHED */
1345
1346 if (nincluded_files == 0 && file_count == 0)
1347 {
1348 error ("no input files specified.", (char *)NULL);
1349 suggest_asking_for_help ();
1350 /* NOTREACHED */
1351 }
1352
1353 if (tagfile == NULL)
1354 tagfile = CTAGS ? "tags" : "TAGS";
1355 cwd = etags_getcwd (); /* the current working directory */
1356 if (cwd[strlen (cwd) - 1] != '/')
1357 {
1358 char *oldcwd = cwd;
1359 cwd = concat (oldcwd, "/", "");
1360 free (oldcwd);
1361 }
1362 /* Relative file names are made relative to the current directory. */
1363 if (streq (tagfile, "-")
1364 || strneq (tagfile, "/dev/", 5))
1365 tagfiledir = cwd;
1366 else
1367 tagfiledir = absolute_dirname (tagfile, cwd);
1368
1369 init (); /* set up boolean "functions" */
1370
1371 linebuffer_init (&lb);
1372 linebuffer_init (&filename_lb);
1373 linebuffer_init (&filebuf);
1374 linebuffer_init (&token_name);
1375
1376 if (!CTAGS)
1377 {
1378 if (streq (tagfile, "-"))
1379 {
1380 tagf = stdout;
1381 #ifdef DOS_NT
1382 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1383 doesn't take effect until after `stdout' is already open). */
1384 if (!isatty (fileno (stdout)))
1385 setmode (fileno (stdout), O_BINARY);
1386 #endif /* DOS_NT */
1387 }
1388 else
1389 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1390 if (tagf == NULL)
1391 pfatal (tagfile);
1392 }
1393
1394 /*
1395 * Loop through files finding functions.
1396 */
1397 for (i = 0; i < current_arg; i++)
1398 {
1399 static language *lang; /* non-NULL if language is forced */
1400 char *this_file;
1401
1402 switch (argbuffer[i].arg_type)
1403 {
1404 case at_language:
1405 lang = argbuffer[i].lang;
1406 break;
1407 case at_regexp:
1408 analyse_regex (argbuffer[i].what);
1409 break;
1410 case at_filename:
1411 #ifdef VMS
1412 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1413 {
1414 if (got_err)
1415 {
1416 error ("can't find file %s\n", this_file);
1417 argc--, argv++;
1418 }
1419 else
1420 {
1421 this_file = massage_name (this_file);
1422 }
1423 #else
1424 this_file = argbuffer[i].what;
1425 #endif
1426 /* Input file named "-" means read file names from stdin
1427 (one per line) and use them. */
1428 if (streq (this_file, "-"))
1429 {
1430 if (parsing_stdin)
1431 fatal ("cannot parse standard input AND read file names from it",
1432 (char *)NULL);
1433 while (readline_internal (&filename_lb, stdin) > 0)
1434 process_file_name (filename_lb.buffer, lang);
1435 }
1436 else
1437 process_file_name (this_file, lang);
1438 #ifdef VMS
1439 }
1440 #endif
1441 break;
1442 case at_stdin:
1443 this_file = argbuffer[i].what;
1444 process_file (stdin, this_file, lang);
1445 break;
1446 }
1447 }
1448
1449 free_regexps ();
1450 free (lb.buffer);
1451 free (filebuf.buffer);
1452 free (token_name.buffer);
1453
1454 if (!CTAGS || cxref_style)
1455 {
1456 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1457 put_entries (nodehead);
1458 free_tree (nodehead);
1459 nodehead = NULL;
1460 if (!CTAGS)
1461 {
1462 fdesc *fdp;
1463
1464 /* Output file entries that have no tags. */
1465 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1466 if (!fdp->written)
1467 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1468
1469 while (nincluded_files-- > 0)
1470 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1471
1472 if (fclose (tagf) == EOF)
1473 pfatal (tagfile);
1474 }
1475
1476 exit (EXIT_SUCCESS);
1477 }
1478
1479 /* From here on, we are in (CTAGS && !cxref_style) */
1480 if (update)
1481 {
1482 char cmd[BUFSIZ];
1483 for (i = 0; i < current_arg; ++i)
1484 {
1485 switch (argbuffer[i].arg_type)
1486 {
1487 case at_filename:
1488 case at_stdin:
1489 break;
1490 default:
1491 continue; /* the for loop */
1492 }
1493 sprintf (cmd,
1494 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1495 tagfile, argbuffer[i].what, tagfile);
1496 if (system (cmd) != EXIT_SUCCESS)
1497 fatal ("failed to execute shell command", (char *)NULL);
1498 }
1499 append_to_tagfile = TRUE;
1500 }
1501
1502 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1503 if (tagf == NULL)
1504 pfatal (tagfile);
1505 put_entries (nodehead); /* write all the tags (CTAGS) */
1506 free_tree (nodehead);
1507 nodehead = NULL;
1508 if (fclose (tagf) == EOF)
1509 pfatal (tagfile);
1510
1511 if (CTAGS)
1512 if (append_to_tagfile || update)
1513 {
1514 char cmd[2*BUFSIZ+20];
1515 /* Maybe these should be used:
1516 setenv ("LC_COLLATE", "C", 1);
1517 setenv ("LC_ALL", "C", 1); */
1518 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1519 exit (system (cmd));
1520 }
1521 return EXIT_SUCCESS;
1522 }
1523
1524
1525 /*
1526 * Return a compressor given the file name. If EXTPTR is non-zero,
1527 * return a pointer into FILE where the compressor-specific
1528 * extension begins. If no compressor is found, NULL is returned
1529 * and EXTPTR is not significant.
1530 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1531 */
1532 static compressor *
1533 get_compressor_from_suffix (file, extptr)
1534 char *file;
1535 char **extptr;
1536 {
1537 compressor *compr;
1538 char *slash, *suffix;
1539
1540 /* This relies on FN to be after canonicalize_filename,
1541 so we don't need to consider backslashes on DOS_NT. */
1542 slash = etags_strrchr (file, '/');
1543 suffix = etags_strrchr (file, '.');
1544 if (suffix == NULL || suffix < slash)
1545 return NULL;
1546 if (extptr != NULL)
1547 *extptr = suffix;
1548 suffix += 1;
1549 /* Let those poor souls who live with DOS 8+3 file name limits get
1550 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1551 Only the first do loop is run if not MSDOS */
1552 do
1553 {
1554 for (compr = compressors; compr->suffix != NULL; compr++)
1555 if (streq (compr->suffix, suffix))
1556 return compr;
1557 if (!MSDOS)
1558 break; /* do it only once: not really a loop */
1559 if (extptr != NULL)
1560 *extptr = ++suffix;
1561 } while (*suffix != '\0');
1562 return NULL;
1563 }
1564
1565
1566
1567 /*
1568 * Return a language given the name.
1569 */
1570 static language *
1571 get_language_from_langname (name)
1572 const char *name;
1573 {
1574 language *lang;
1575
1576 if (name == NULL)
1577 error ("empty language name", (char *)NULL);
1578 else
1579 {
1580 for (lang = lang_names; lang->name != NULL; lang++)
1581 if (streq (name, lang->name))
1582 return lang;
1583 error ("unknown language \"%s\"", name);
1584 }
1585
1586 return NULL;
1587 }
1588
1589
1590 /*
1591 * Return a language given the interpreter name.
1592 */
1593 static language *
1594 get_language_from_interpreter (interpreter)
1595 char *interpreter;
1596 {
1597 language *lang;
1598 char **iname;
1599
1600 if (interpreter == NULL)
1601 return NULL;
1602 for (lang = lang_names; lang->name != NULL; lang++)
1603 if (lang->interpreters != NULL)
1604 for (iname = lang->interpreters; *iname != NULL; iname++)
1605 if (streq (*iname, interpreter))
1606 return lang;
1607
1608 return NULL;
1609 }
1610
1611
1612
1613 /*
1614 * Return a language given the file name.
1615 */
1616 static language *
1617 get_language_from_filename (file, case_sensitive)
1618 char *file;
1619 bool case_sensitive;
1620 {
1621 language *lang;
1622 char **name, **ext, *suffix;
1623
1624 /* Try whole file name first. */
1625 for (lang = lang_names; lang->name != NULL; lang++)
1626 if (lang->filenames != NULL)
1627 for (name = lang->filenames; *name != NULL; name++)
1628 if ((case_sensitive)
1629 ? streq (*name, file)
1630 : strcaseeq (*name, file))
1631 return lang;
1632
1633 /* If not found, try suffix after last dot. */
1634 suffix = etags_strrchr (file, '.');
1635 if (suffix == NULL)
1636 return NULL;
1637 suffix += 1;
1638 for (lang = lang_names; lang->name != NULL; lang++)
1639 if (lang->suffixes != NULL)
1640 for (ext = lang->suffixes; *ext != NULL; ext++)
1641 if ((case_sensitive)
1642 ? streq (*ext, suffix)
1643 : strcaseeq (*ext, suffix))
1644 return lang;
1645 return NULL;
1646 }
1647
1648 \f
1649 /*
1650 * This routine is called on each file argument.
1651 */
1652 static void
1653 process_file_name (file, lang)
1654 char *file;
1655 language *lang;
1656 {
1657 struct stat stat_buf;
1658 FILE *inf;
1659 fdesc *fdp;
1660 compressor *compr;
1661 char *compressed_name, *uncompressed_name;
1662 char *ext, *real_name;
1663 int retval;
1664
1665 canonicalize_filename (file);
1666 if (streq (file, tagfile) && !streq (tagfile, "-"))
1667 {
1668 error ("skipping inclusion of %s in self.", file);
1669 return;
1670 }
1671 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1672 {
1673 compressed_name = NULL;
1674 real_name = uncompressed_name = savestr (file);
1675 }
1676 else
1677 {
1678 real_name = compressed_name = savestr (file);
1679 uncompressed_name = savenstr (file, ext - file);
1680 }
1681
1682 /* If the canonicalized uncompressed name
1683 has already been dealt with, skip it silently. */
1684 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1685 {
1686 assert (fdp->infname != NULL);
1687 if (streq (uncompressed_name, fdp->infname))
1688 goto cleanup;
1689 }
1690
1691 if (stat (real_name, &stat_buf) != 0)
1692 {
1693 /* Reset real_name and try with a different name. */
1694 real_name = NULL;
1695 if (compressed_name != NULL) /* try with the given suffix */
1696 {
1697 if (stat (uncompressed_name, &stat_buf) == 0)
1698 real_name = uncompressed_name;
1699 }
1700 else /* try all possible suffixes */
1701 {
1702 for (compr = compressors; compr->suffix != NULL; compr++)
1703 {
1704 compressed_name = concat (file, ".", compr->suffix);
1705 if (stat (compressed_name, &stat_buf) != 0)
1706 {
1707 if (MSDOS)
1708 {
1709 char *suf = compressed_name + strlen (file);
1710 size_t suflen = strlen (compr->suffix) + 1;
1711 for ( ; suf[1]; suf++, suflen--)
1712 {
1713 memmove (suf, suf + 1, suflen);
1714 if (stat (compressed_name, &stat_buf) == 0)
1715 {
1716 real_name = compressed_name;
1717 break;
1718 }
1719 }
1720 if (real_name != NULL)
1721 break;
1722 } /* MSDOS */
1723 free (compressed_name);
1724 compressed_name = NULL;
1725 }
1726 else
1727 {
1728 real_name = compressed_name;
1729 break;
1730 }
1731 }
1732 }
1733 if (real_name == NULL)
1734 {
1735 perror (file);
1736 goto cleanup;
1737 }
1738 } /* try with a different name */
1739
1740 if (!S_ISREG (stat_buf.st_mode))
1741 {
1742 error ("skipping %s: it is not a regular file.", real_name);
1743 goto cleanup;
1744 }
1745 if (real_name == compressed_name)
1746 {
1747 char *cmd = concat (compr->command, " ", real_name);
1748 inf = (FILE *) popen (cmd, "r");
1749 free (cmd);
1750 }
1751 else
1752 inf = fopen (real_name, "r");
1753 if (inf == NULL)
1754 {
1755 perror (real_name);
1756 goto cleanup;
1757 }
1758
1759 process_file (inf, uncompressed_name, lang);
1760
1761 if (real_name == compressed_name)
1762 retval = pclose (inf);
1763 else
1764 retval = fclose (inf);
1765 if (retval < 0)
1766 pfatal (file);
1767
1768 cleanup:
1769 if (compressed_name) free (compressed_name);
1770 if (uncompressed_name) free (uncompressed_name);
1771 last_node = NULL;
1772 curfdp = NULL;
1773 return;
1774 }
1775
1776 static void
1777 process_file (fh, fn, lang)
1778 FILE *fh;
1779 char *fn;
1780 language *lang;
1781 {
1782 static const fdesc emptyfdesc;
1783 fdesc *fdp;
1784
1785 /* Create a new input file description entry. */
1786 fdp = xnew (1, fdesc);
1787 *fdp = emptyfdesc;
1788 fdp->next = fdhead;
1789 fdp->infname = savestr (fn);
1790 fdp->lang = lang;
1791 fdp->infabsname = absolute_filename (fn, cwd);
1792 fdp->infabsdir = absolute_dirname (fn, cwd);
1793 if (filename_is_absolute (fn))
1794 {
1795 /* An absolute file name. Canonicalize it. */
1796 fdp->taggedfname = absolute_filename (fn, NULL);
1797 }
1798 else
1799 {
1800 /* A file name relative to cwd. Make it relative
1801 to the directory of the tags file. */
1802 fdp->taggedfname = relative_filename (fn, tagfiledir);
1803 }
1804 fdp->usecharno = TRUE; /* use char position when making tags */
1805 fdp->prop = NULL;
1806 fdp->written = FALSE; /* not written on tags file yet */
1807
1808 fdhead = fdp;
1809 curfdp = fdhead; /* the current file description */
1810
1811 find_entries (fh);
1812
1813 /* If not Ctags, and if this is not metasource and if it contained no #line
1814 directives, we can write the tags and free all nodes pointing to
1815 curfdp. */
1816 if (!CTAGS
1817 && curfdp->usecharno /* no #line directives in this file */
1818 && !curfdp->lang->metasource)
1819 {
1820 node *np, *prev;
1821
1822 /* Look for the head of the sublist relative to this file. See add_node
1823 for the structure of the node tree. */
1824 prev = NULL;
1825 for (np = nodehead; np != NULL; prev = np, np = np->left)
1826 if (np->fdp == curfdp)
1827 break;
1828
1829 /* If we generated tags for this file, write and delete them. */
1830 if (np != NULL)
1831 {
1832 /* This is the head of the last sublist, if any. The following
1833 instructions depend on this being true. */
1834 assert (np->left == NULL);
1835
1836 assert (fdhead == curfdp);
1837 assert (last_node->fdp == curfdp);
1838 put_entries (np); /* write tags for file curfdp->taggedfname */
1839 free_tree (np); /* remove the written nodes */
1840 if (prev == NULL)
1841 nodehead = NULL; /* no nodes left */
1842 else
1843 prev->left = NULL; /* delete the pointer to the sublist */
1844 }
1845 }
1846 }
1847
1848 /*
1849 * This routine sets up the boolean pseudo-functions which work
1850 * by setting boolean flags dependent upon the corresponding character.
1851 * Every char which is NOT in that string is not a white char. Therefore,
1852 * all of the array "_wht" is set to FALSE, and then the elements
1853 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1854 * of a char is TRUE if it is the string "white", else FALSE.
1855 */
1856 static void
1857 init ()
1858 {
1859 register char *sp;
1860 register int i;
1861
1862 for (i = 0; i < CHARS; i++)
1863 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1864 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1865 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1866 notinname('\0') = notinname('\n');
1867 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1868 begtoken('\0') = begtoken('\n');
1869 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1870 intoken('\0') = intoken('\n');
1871 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1872 endtoken('\0') = endtoken('\n');
1873 }
1874
1875 /*
1876 * This routine opens the specified file and calls the function
1877 * which finds the function and type definitions.
1878 */
1879 static void
1880 find_entries (inf)
1881 FILE *inf;
1882 {
1883 char *cp;
1884 language *lang = curfdp->lang;
1885 Lang_function *parser = NULL;
1886
1887 /* If user specified a language, use it. */
1888 if (lang != NULL && lang->function != NULL)
1889 {
1890 parser = lang->function;
1891 }
1892
1893 /* Else try to guess the language given the file name. */
1894 if (parser == NULL)
1895 {
1896 lang = get_language_from_filename (curfdp->infname, TRUE);
1897 if (lang != NULL && lang->function != NULL)
1898 {
1899 curfdp->lang = lang;
1900 parser = lang->function;
1901 }
1902 }
1903
1904 /* Else look for sharp-bang as the first two characters. */
1905 if (parser == NULL
1906 && readline_internal (&lb, inf) > 0
1907 && lb.len >= 2
1908 && lb.buffer[0] == '#'
1909 && lb.buffer[1] == '!')
1910 {
1911 char *lp;
1912
1913 /* Set lp to point at the first char after the last slash in the
1914 line or, if no slashes, at the first nonblank. Then set cp to
1915 the first successive blank and terminate the string. */
1916 lp = etags_strrchr (lb.buffer+2, '/');
1917 if (lp != NULL)
1918 lp += 1;
1919 else
1920 lp = skip_spaces (lb.buffer + 2);
1921 cp = skip_non_spaces (lp);
1922 *cp = '\0';
1923
1924 if (strlen (lp) > 0)
1925 {
1926 lang = get_language_from_interpreter (lp);
1927 if (lang != NULL && lang->function != NULL)
1928 {
1929 curfdp->lang = lang;
1930 parser = lang->function;
1931 }
1932 }
1933 }
1934
1935 /* We rewind here, even if inf may be a pipe. We fail if the
1936 length of the first line is longer than the pipe block size,
1937 which is unlikely. */
1938 rewind (inf);
1939
1940 /* Else try to guess the language given the case insensitive file name. */
1941 if (parser == NULL)
1942 {
1943 lang = get_language_from_filename (curfdp->infname, FALSE);
1944 if (lang != NULL && lang->function != NULL)
1945 {
1946 curfdp->lang = lang;
1947 parser = lang->function;
1948 }
1949 }
1950
1951 /* Else try Fortran or C. */
1952 if (parser == NULL)
1953 {
1954 node *old_last_node = last_node;
1955
1956 curfdp->lang = get_language_from_langname ("fortran");
1957 find_entries (inf);
1958
1959 if (old_last_node == last_node)
1960 /* No Fortran entries found. Try C. */
1961 {
1962 /* We do not tag if rewind fails.
1963 Only the file name will be recorded in the tags file. */
1964 rewind (inf);
1965 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1966 find_entries (inf);
1967 }
1968 return;
1969 }
1970
1971 if (!no_line_directive
1972 && curfdp->lang != NULL && curfdp->lang->metasource)
1973 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1974 file, or anyway we parsed a file that is automatically generated from
1975 this one. If this is the case, the bingo.c file contained #line
1976 directives that generated tags pointing to this file. Let's delete
1977 them all before parsing this file, which is the real source. */
1978 {
1979 fdesc **fdpp = &fdhead;
1980 while (*fdpp != NULL)
1981 if (*fdpp != curfdp
1982 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1983 /* We found one of those! We must delete both the file description
1984 and all tags referring to it. */
1985 {
1986 fdesc *badfdp = *fdpp;
1987
1988 /* Delete the tags referring to badfdp->taggedfname
1989 that were obtained from badfdp->infname. */
1990 invalidate_nodes (badfdp, &nodehead);
1991
1992 *fdpp = badfdp->next; /* remove the bad description from the list */
1993 free_fdesc (badfdp);
1994 }
1995 else
1996 fdpp = &(*fdpp)->next; /* advance the list pointer */
1997 }
1998
1999 assert (parser != NULL);
2000
2001 /* Generic initialisations before reading from file. */
2002 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
2003
2004 /* Generic initialisations before parsing file with readline. */
2005 lineno = 0; /* reset global line number */
2006 charno = 0; /* reset global char number */
2007 linecharno = 0; /* reset global char number of line start */
2008
2009 parser (inf);
2010
2011 regex_tag_multiline ();
2012 }
2013
2014 \f
2015 /*
2016 * Check whether an implicitly named tag should be created,
2017 * then call `pfnote'.
2018 * NAME is a string that is internally copied by this function.
2019 *
2020 * TAGS format specification
2021 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2022 * The following is explained in some more detail in etc/ETAGS.EBNF.
2023 *
2024 * make_tag creates tags with "implicit tag names" (unnamed tags)
2025 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2026 * 1. NAME does not contain any of the characters in NONAM;
2027 * 2. LINESTART contains name as either a rightmost, or rightmost but
2028 * one character, substring;
2029 * 3. the character, if any, immediately before NAME in LINESTART must
2030 * be a character in NONAM;
2031 * 4. the character, if any, immediately after NAME in LINESTART must
2032 * also be a character in NONAM.
2033 *
2034 * The implementation uses the notinname() macro, which recognises the
2035 * characters stored in the string `nonam'.
2036 * etags.el needs to use the same characters that are in NONAM.
2037 */
2038 static void
2039 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2040 char *name; /* tag name, or NULL if unnamed */
2041 int namelen; /* tag length */
2042 bool is_func; /* tag is a function */
2043 char *linestart; /* start of the line where tag is */
2044 int linelen; /* length of the line where tag is */
2045 int lno; /* line number */
2046 long cno; /* character number */
2047 {
2048 bool named = (name != NULL && namelen > 0);
2049
2050 if (!CTAGS && named) /* maybe set named to false */
2051 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2052 such that etags.el can guess a name from it. */
2053 {
2054 int i;
2055 register char *cp = name;
2056
2057 for (i = 0; i < namelen; i++)
2058 if (notinname (*cp++))
2059 break;
2060 if (i == namelen) /* rule #1 */
2061 {
2062 cp = linestart + linelen - namelen;
2063 if (notinname (linestart[linelen-1]))
2064 cp -= 1; /* rule #4 */
2065 if (cp >= linestart /* rule #2 */
2066 && (cp == linestart
2067 || notinname (cp[-1])) /* rule #3 */
2068 && strneq (name, cp, namelen)) /* rule #2 */
2069 named = FALSE; /* use implicit tag name */
2070 }
2071 }
2072
2073 if (named)
2074 name = savenstr (name, namelen);
2075 else
2076 name = NULL;
2077 pfnote (name, is_func, linestart, linelen, lno, cno);
2078 }
2079
2080 /* Record a tag. */
2081 static void
2082 pfnote (name, is_func, linestart, linelen, lno, cno)
2083 char *name; /* tag name, or NULL if unnamed */
2084 bool is_func; /* tag is a function */
2085 char *linestart; /* start of the line where tag is */
2086 int linelen; /* length of the line where tag is */
2087 int lno; /* line number */
2088 long cno; /* character number */
2089 {
2090 register node *np;
2091
2092 assert (name == NULL || name[0] != '\0');
2093 if (CTAGS && name == NULL)
2094 return;
2095
2096 np = xnew (1, node);
2097
2098 /* If ctags mode, change name "main" to M<thisfilename>. */
2099 if (CTAGS && !cxref_style && streq (name, "main"))
2100 {
2101 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2102 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2103 fp = etags_strrchr (np->name, '.');
2104 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2105 fp[0] = '\0';
2106 }
2107 else
2108 np->name = name;
2109 np->valid = TRUE;
2110 np->been_warned = FALSE;
2111 np->fdp = curfdp;
2112 np->is_func = is_func;
2113 np->lno = lno;
2114 if (np->fdp->usecharno)
2115 /* Our char numbers are 0-base, because of C language tradition?
2116 ctags compatibility? old versions compatibility? I don't know.
2117 Anyway, since emacs's are 1-base we expect etags.el to take care
2118 of the difference. If we wanted to have 1-based numbers, we would
2119 uncomment the +1 below. */
2120 np->cno = cno /* + 1 */ ;
2121 else
2122 np->cno = invalidcharno;
2123 np->left = np->right = NULL;
2124 if (CTAGS && !cxref_style)
2125 {
2126 if (strlen (linestart) < 50)
2127 np->regex = concat (linestart, "$", "");
2128 else
2129 np->regex = savenstr (linestart, 50);
2130 }
2131 else
2132 np->regex = savenstr (linestart, linelen);
2133
2134 add_node (np, &nodehead);
2135 }
2136
2137 /*
2138 * free_tree ()
2139 * recurse on left children, iterate on right children.
2140 */
2141 static void
2142 free_tree (np)
2143 register node *np;
2144 {
2145 while (np)
2146 {
2147 register node *node_right = np->right;
2148 free_tree (np->left);
2149 if (np->name != NULL)
2150 free (np->name);
2151 free (np->regex);
2152 free (np);
2153 np = node_right;
2154 }
2155 }
2156
2157 /*
2158 * free_fdesc ()
2159 * delete a file description
2160 */
2161 static void
2162 free_fdesc (fdp)
2163 register fdesc *fdp;
2164 {
2165 if (fdp->infname != NULL) free (fdp->infname);
2166 if (fdp->infabsname != NULL) free (fdp->infabsname);
2167 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2168 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2169 if (fdp->prop != NULL) free (fdp->prop);
2170 free (fdp);
2171 }
2172
2173 /*
2174 * add_node ()
2175 * Adds a node to the tree of nodes. In etags mode, sort by file
2176 * name. In ctags mode, sort by tag name. Make no attempt at
2177 * balancing.
2178 *
2179 * add_node is the only function allowed to add nodes, so it can
2180 * maintain state.
2181 */
2182 static void
2183 add_node (np, cur_node_p)
2184 node *np, **cur_node_p;
2185 {
2186 register int dif;
2187 register node *cur_node = *cur_node_p;
2188
2189 if (cur_node == NULL)
2190 {
2191 *cur_node_p = np;
2192 last_node = np;
2193 return;
2194 }
2195
2196 if (!CTAGS)
2197 /* Etags Mode */
2198 {
2199 /* For each file name, tags are in a linked sublist on the right
2200 pointer. The first tags of different files are a linked list
2201 on the left pointer. last_node points to the end of the last
2202 used sublist. */
2203 if (last_node != NULL && last_node->fdp == np->fdp)
2204 {
2205 /* Let's use the same sublist as the last added node. */
2206 assert (last_node->right == NULL);
2207 last_node->right = np;
2208 last_node = np;
2209 }
2210 else if (cur_node->fdp == np->fdp)
2211 {
2212 /* Scanning the list we found the head of a sublist which is
2213 good for us. Let's scan this sublist. */
2214 add_node (np, &cur_node->right);
2215 }
2216 else
2217 /* The head of this sublist is not good for us. Let's try the
2218 next one. */
2219 add_node (np, &cur_node->left);
2220 } /* if ETAGS mode */
2221
2222 else
2223 {
2224 /* Ctags Mode */
2225 dif = strcmp (np->name, cur_node->name);
2226
2227 /*
2228 * If this tag name matches an existing one, then
2229 * do not add the node, but maybe print a warning.
2230 */
2231 if (no_duplicates && !dif)
2232 {
2233 if (np->fdp == cur_node->fdp)
2234 {
2235 if (!no_warnings)
2236 {
2237 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2238 np->fdp->infname, lineno, np->name);
2239 fprintf (stderr, "Second entry ignored\n");
2240 }
2241 }
2242 else if (!cur_node->been_warned && !no_warnings)
2243 {
2244 fprintf
2245 (stderr,
2246 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2247 np->fdp->infname, cur_node->fdp->infname, np->name);
2248 cur_node->been_warned = TRUE;
2249 }
2250 return;
2251 }
2252
2253 /* Actually add the node */
2254 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2255 } /* if CTAGS mode */
2256 }
2257
2258 /*
2259 * invalidate_nodes ()
2260 * Scan the node tree and invalidate all nodes pointing to the
2261 * given file description (CTAGS case) or free them (ETAGS case).
2262 */
2263 static void
2264 invalidate_nodes (badfdp, npp)
2265 fdesc *badfdp;
2266 node **npp;
2267 {
2268 node *np = *npp;
2269
2270 if (np == NULL)
2271 return;
2272
2273 if (CTAGS)
2274 {
2275 if (np->left != NULL)
2276 invalidate_nodes (badfdp, &np->left);
2277 if (np->fdp == badfdp)
2278 np->valid = FALSE;
2279 if (np->right != NULL)
2280 invalidate_nodes (badfdp, &np->right);
2281 }
2282 else
2283 {
2284 assert (np->fdp != NULL);
2285 if (np->fdp == badfdp)
2286 {
2287 *npp = np->left; /* detach the sublist from the list */
2288 np->left = NULL; /* isolate it */
2289 free_tree (np); /* free it */
2290 invalidate_nodes (badfdp, npp);
2291 }
2292 else
2293 invalidate_nodes (badfdp, &np->left);
2294 }
2295 }
2296
2297 \f
2298 static int total_size_of_entries __P((node *));
2299 static int number_len __P((long));
2300
2301 /* Length of a non-negative number's decimal representation. */
2302 static int
2303 number_len (num)
2304 long num;
2305 {
2306 int len = 1;
2307 while ((num /= 10) > 0)
2308 len += 1;
2309 return len;
2310 }
2311
2312 /*
2313 * Return total number of characters that put_entries will output for
2314 * the nodes in the linked list at the right of the specified node.
2315 * This count is irrelevant with etags.el since emacs 19.34 at least,
2316 * but is still supplied for backward compatibility.
2317 */
2318 static int
2319 total_size_of_entries (np)
2320 register node *np;
2321 {
2322 register int total = 0;
2323
2324 for (; np != NULL; np = np->right)
2325 if (np->valid)
2326 {
2327 total += strlen (np->regex) + 1; /* pat\177 */
2328 if (np->name != NULL)
2329 total += strlen (np->name) + 1; /* name\001 */
2330 total += number_len ((long) np->lno) + 1; /* lno, */
2331 if (np->cno != invalidcharno) /* cno */
2332 total += number_len (np->cno);
2333 total += 1; /* newline */
2334 }
2335
2336 return total;
2337 }
2338
2339 static void
2340 put_entries (np)
2341 register node *np;
2342 {
2343 register char *sp;
2344 static fdesc *fdp = NULL;
2345
2346 if (np == NULL)
2347 return;
2348
2349 /* Output subentries that precede this one */
2350 if (CTAGS)
2351 put_entries (np->left);
2352
2353 /* Output this entry */
2354 if (np->valid)
2355 {
2356 if (!CTAGS)
2357 {
2358 /* Etags mode */
2359 if (fdp != np->fdp)
2360 {
2361 fdp = np->fdp;
2362 fprintf (tagf, "\f\n%s,%d\n",
2363 fdp->taggedfname, total_size_of_entries (np));
2364 fdp->written = TRUE;
2365 }
2366 fputs (np->regex, tagf);
2367 fputc ('\177', tagf);
2368 if (np->name != NULL)
2369 {
2370 fputs (np->name, tagf);
2371 fputc ('\001', tagf);
2372 }
2373 fprintf (tagf, "%d,", np->lno);
2374 if (np->cno != invalidcharno)
2375 fprintf (tagf, "%ld", np->cno);
2376 fputs ("\n", tagf);
2377 }
2378 else
2379 {
2380 /* Ctags mode */
2381 if (np->name == NULL)
2382 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2383
2384 if (cxref_style)
2385 {
2386 if (vgrind_style)
2387 fprintf (stdout, "%s %s %d\n",
2388 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2389 else
2390 fprintf (stdout, "%-16s %3d %-16s %s\n",
2391 np->name, np->lno, np->fdp->taggedfname, np->regex);
2392 }
2393 else
2394 {
2395 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2396
2397 if (np->is_func)
2398 { /* function or #define macro with args */
2399 putc (searchar, tagf);
2400 putc ('^', tagf);
2401
2402 for (sp = np->regex; *sp; sp++)
2403 {
2404 if (*sp == '\\' || *sp == searchar)
2405 putc ('\\', tagf);
2406 putc (*sp, tagf);
2407 }
2408 putc (searchar, tagf);
2409 }
2410 else
2411 { /* anything else; text pattern inadequate */
2412 fprintf (tagf, "%d", np->lno);
2413 }
2414 putc ('\n', tagf);
2415 }
2416 }
2417 } /* if this node contains a valid tag */
2418
2419 /* Output subentries that follow this one */
2420 put_entries (np->right);
2421 if (!CTAGS)
2422 put_entries (np->left);
2423 }
2424
2425 \f
2426 /* C extensions. */
2427 #define C_EXT 0x00fff /* C extensions */
2428 #define C_PLAIN 0x00000 /* C */
2429 #define C_PLPL 0x00001 /* C++ */
2430 #define C_STAR 0x00003 /* C* */
2431 #define C_JAVA 0x00005 /* JAVA */
2432 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2433 #define YACC 0x10000 /* yacc file */
2434
2435 /*
2436 * The C symbol tables.
2437 */
2438 enum sym_type
2439 {
2440 st_none,
2441 st_C_objprot, st_C_objimpl, st_C_objend,
2442 st_C_gnumacro,
2443 st_C_ignore, st_C_attribute,
2444 st_C_javastruct,
2445 st_C_operator,
2446 st_C_class, st_C_template,
2447 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2448 };
2449
2450 static unsigned int hash __P((const char *, unsigned int));
2451 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2452 static enum sym_type C_symtype __P((char *, int, int));
2453
2454 /* Feed stuff between (but not including) %[ and %] lines to:
2455 gperf -m 5
2456 %[
2457 %compare-strncmp
2458 %enum
2459 %struct-type
2460 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2461 %%
2462 if, 0, st_C_ignore
2463 for, 0, st_C_ignore
2464 while, 0, st_C_ignore
2465 switch, 0, st_C_ignore
2466 return, 0, st_C_ignore
2467 __attribute__, 0, st_C_attribute
2468 GTY, 0, st_C_attribute
2469 @interface, 0, st_C_objprot
2470 @protocol, 0, st_C_objprot
2471 @implementation,0, st_C_objimpl
2472 @end, 0, st_C_objend
2473 import, (C_JAVA & ~C_PLPL), st_C_ignore
2474 package, (C_JAVA & ~C_PLPL), st_C_ignore
2475 friend, C_PLPL, st_C_ignore
2476 extends, (C_JAVA & ~C_PLPL), st_C_javastruct
2477 implements, (C_JAVA & ~C_PLPL), st_C_javastruct
2478 interface, (C_JAVA & ~C_PLPL), st_C_struct
2479 class, 0, st_C_class
2480 namespace, C_PLPL, st_C_struct
2481 domain, C_STAR, st_C_struct
2482 union, 0, st_C_struct
2483 struct, 0, st_C_struct
2484 extern, 0, st_C_extern
2485 enum, 0, st_C_enum
2486 typedef, 0, st_C_typedef
2487 define, 0, st_C_define
2488 undef, 0, st_C_define
2489 operator, C_PLPL, st_C_operator
2490 template, 0, st_C_template
2491 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2492 DEFUN, 0, st_C_gnumacro
2493 SYSCALL, 0, st_C_gnumacro
2494 ENTRY, 0, st_C_gnumacro
2495 PSEUDO, 0, st_C_gnumacro
2496 # These are defined inside C functions, so currently they are not met.
2497 # EXFUN used in glibc, DEFVAR_* in emacs.
2498 #EXFUN, 0, st_C_gnumacro
2499 #DEFVAR_, 0, st_C_gnumacro
2500 %]
2501 and replace lines between %< and %> with its output, then:
2502 - remove the #if characterset check
2503 - make in_word_set static and not inline. */
2504 /*%<*/
2505 /* C code produced by gperf version 3.0.1 */
2506 /* Command-line: gperf -m 5 */
2507 /* Computed positions: -k'2-3' */
2508
2509 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2510 /* maximum key range = 33, duplicates = 0 */
2511
2512 #ifdef __GNUC__
2513 __inline
2514 #else
2515 #ifdef __cplusplus
2516 inline
2517 #endif
2518 #endif
2519 static unsigned int
2520 hash (str, len)
2521 register const char *str;
2522 register unsigned int len;
2523 {
2524 static unsigned char asso_values[] =
2525 {
2526 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2527 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2528 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2529 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2530 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2531 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2532 35, 35, 35, 35, 35, 35, 35, 35, 35, 3,
2533 26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2534 35, 35, 35, 24, 0, 35, 35, 35, 35, 0,
2535 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2536 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2537 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2538 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2539 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2540 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2541 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2542 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2543 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2544 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2545 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2546 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2547 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2548 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2549 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2550 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2551 35, 35, 35, 35, 35, 35
2552 };
2553 register int hval = len;
2554
2555 switch (hval)
2556 {
2557 default:
2558 hval += asso_values[(unsigned char)str[2]];
2559 /*FALLTHROUGH*/
2560 case 2:
2561 hval += asso_values[(unsigned char)str[1]];
2562 break;
2563 }
2564 return hval;
2565 }
2566
2567 static struct C_stab_entry *
2568 in_word_set (str, len)
2569 register const char *str;
2570 register unsigned int len;
2571 {
2572 enum
2573 {
2574 TOTAL_KEYWORDS = 32,
2575 MIN_WORD_LENGTH = 2,
2576 MAX_WORD_LENGTH = 15,
2577 MIN_HASH_VALUE = 2,
2578 MAX_HASH_VALUE = 34
2579 };
2580
2581 static struct C_stab_entry wordlist[] =
2582 {
2583 {""}, {""},
2584 {"if", 0, st_C_ignore},
2585 {"GTY", 0, st_C_attribute},
2586 {"@end", 0, st_C_objend},
2587 {"union", 0, st_C_struct},
2588 {"define", 0, st_C_define},
2589 {"import", (C_JAVA & ~C_PLPL), st_C_ignore},
2590 {"template", 0, st_C_template},
2591 {"operator", C_PLPL, st_C_operator},
2592 {"@interface", 0, st_C_objprot},
2593 {"implements", (C_JAVA & ~C_PLPL), st_C_javastruct},
2594 {"friend", C_PLPL, st_C_ignore},
2595 {"typedef", 0, st_C_typedef},
2596 {"return", 0, st_C_ignore},
2597 {"@implementation",0, st_C_objimpl},
2598 {"@protocol", 0, st_C_objprot},
2599 {"interface", (C_JAVA & ~C_PLPL), st_C_struct},
2600 {"extern", 0, st_C_extern},
2601 {"extends", (C_JAVA & ~C_PLPL), st_C_javastruct},
2602 {"struct", 0, st_C_struct},
2603 {"domain", C_STAR, st_C_struct},
2604 {"switch", 0, st_C_ignore},
2605 {"enum", 0, st_C_enum},
2606 {"for", 0, st_C_ignore},
2607 {"namespace", C_PLPL, st_C_struct},
2608 {"class", 0, st_C_class},
2609 {"while", 0, st_C_ignore},
2610 {"undef", 0, st_C_define},
2611 {"package", (C_JAVA & ~C_PLPL), st_C_ignore},
2612 {"__attribute__", 0, st_C_attribute},
2613 {"SYSCALL", 0, st_C_gnumacro},
2614 {"ENTRY", 0, st_C_gnumacro},
2615 {"PSEUDO", 0, st_C_gnumacro},
2616 {"DEFUN", 0, st_C_gnumacro}
2617 };
2618
2619 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2620 {
2621 register int key = hash (str, len);
2622
2623 if (key <= MAX_HASH_VALUE && key >= 0)
2624 {
2625 register const char *s = wordlist[key].name;
2626
2627 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2628 return &wordlist[key];
2629 }
2630 }
2631 return 0;
2632 }
2633 /*%>*/
2634
2635 static enum sym_type
2636 C_symtype (str, len, c_ext)
2637 char *str;
2638 int len;
2639 int c_ext;
2640 {
2641 register struct C_stab_entry *se = in_word_set (str, len);
2642
2643 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2644 return st_none;
2645 return se->type;
2646 }
2647
2648 \f
2649 /*
2650 * Ignoring __attribute__ ((list))
2651 */
2652 static bool inattribute; /* looking at an __attribute__ construct */
2653
2654 /*
2655 * C functions and variables are recognized using a simple
2656 * finite automaton. fvdef is its state variable.
2657 */
2658 static enum
2659 {
2660 fvnone, /* nothing seen */
2661 fdefunkey, /* Emacs DEFUN keyword seen */
2662 fdefunname, /* Emacs DEFUN name seen */
2663 foperator, /* func: operator keyword seen (cplpl) */
2664 fvnameseen, /* function or variable name seen */
2665 fstartlist, /* func: just after open parenthesis */
2666 finlist, /* func: in parameter list */
2667 flistseen, /* func: after parameter list */
2668 fignore, /* func: before open brace */
2669 vignore /* var-like: ignore until ';' */
2670 } fvdef;
2671
2672 static bool fvextern; /* func or var: extern keyword seen; */
2673
2674 /*
2675 * typedefs are recognized using a simple finite automaton.
2676 * typdef is its state variable.
2677 */
2678 static enum
2679 {
2680 tnone, /* nothing seen */
2681 tkeyseen, /* typedef keyword seen */
2682 ttypeseen, /* defined type seen */
2683 tinbody, /* inside typedef body */
2684 tend, /* just before typedef tag */
2685 tignore /* junk after typedef tag */
2686 } typdef;
2687
2688 /*
2689 * struct-like structures (enum, struct and union) are recognized
2690 * using another simple finite automaton. `structdef' is its state
2691 * variable.
2692 */
2693 static enum
2694 {
2695 snone, /* nothing seen yet,
2696 or in struct body if bracelev > 0 */
2697 skeyseen, /* struct-like keyword seen */
2698 stagseen, /* struct-like tag seen */
2699 scolonseen /* colon seen after struct-like tag */
2700 } structdef;
2701
2702 /*
2703 * When objdef is different from onone, objtag is the name of the class.
2704 */
2705 static char *objtag = "<uninited>";
2706
2707 /*
2708 * Yet another little state machine to deal with preprocessor lines.
2709 */
2710 static enum
2711 {
2712 dnone, /* nothing seen */
2713 dsharpseen, /* '#' seen as first char on line */
2714 ddefineseen, /* '#' and 'define' seen */
2715 dignorerest /* ignore rest of line */
2716 } definedef;
2717
2718 /*
2719 * State machine for Objective C protocols and implementations.
2720 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2721 */
2722 static enum
2723 {
2724 onone, /* nothing seen */
2725 oprotocol, /* @interface or @protocol seen */
2726 oimplementation, /* @implementations seen */
2727 otagseen, /* class name seen */
2728 oparenseen, /* parenthesis before category seen */
2729 ocatseen, /* category name seen */
2730 oinbody, /* in @implementation body */
2731 omethodsign, /* in @implementation body, after +/- */
2732 omethodtag, /* after method name */
2733 omethodcolon, /* after method colon */
2734 omethodparm, /* after method parameter */
2735 oignore /* wait for @end */
2736 } objdef;
2737
2738
2739 /*
2740 * Use this structure to keep info about the token read, and how it
2741 * should be tagged. Used by the make_C_tag function to build a tag.
2742 */
2743 static struct tok
2744 {
2745 char *line; /* string containing the token */
2746 int offset; /* where the token starts in LINE */
2747 int length; /* token length */
2748 /*
2749 The previous members can be used to pass strings around for generic
2750 purposes. The following ones specifically refer to creating tags. In this
2751 case the token contained here is the pattern that will be used to create a
2752 tag.
2753 */
2754 bool valid; /* do not create a tag; the token should be
2755 invalidated whenever a state machine is
2756 reset prematurely */
2757 bool named; /* create a named tag */
2758 int lineno; /* source line number of tag */
2759 long linepos; /* source char number of tag */
2760 } token; /* latest token read */
2761
2762 /*
2763 * Variables and functions for dealing with nested structures.
2764 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2765 */
2766 static void pushclass_above __P((int, char *, int));
2767 static void popclass_above __P((int));
2768 static void write_classname __P((linebuffer *, char *qualifier));
2769
2770 static struct {
2771 char **cname; /* nested class names */
2772 int *bracelev; /* nested class brace level */
2773 int nl; /* class nesting level (elements used) */
2774 int size; /* length of the array */
2775 } cstack; /* stack for nested declaration tags */
2776 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2777 #define nestlev (cstack.nl)
2778 /* After struct keyword or in struct body, not inside a nested function. */
2779 #define instruct (structdef == snone && nestlev > 0 \
2780 && bracelev == cstack.bracelev[nestlev-1] + 1)
2781
2782 static void
2783 pushclass_above (bracelev, str, len)
2784 int bracelev;
2785 char *str;
2786 int len;
2787 {
2788 int nl;
2789
2790 popclass_above (bracelev);
2791 nl = cstack.nl;
2792 if (nl >= cstack.size)
2793 {
2794 int size = cstack.size *= 2;
2795 xrnew (cstack.cname, size, char *);
2796 xrnew (cstack.bracelev, size, int);
2797 }
2798 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2799 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2800 cstack.bracelev[nl] = bracelev;
2801 cstack.nl = nl + 1;
2802 }
2803
2804 static void
2805 popclass_above (bracelev)
2806 int bracelev;
2807 {
2808 int nl;
2809
2810 for (nl = cstack.nl - 1;
2811 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2812 nl--)
2813 {
2814 if (cstack.cname[nl] != NULL)
2815 free (cstack.cname[nl]);
2816 cstack.nl = nl;
2817 }
2818 }
2819
2820 static void
2821 write_classname (cn, qualifier)
2822 linebuffer *cn;
2823 char *qualifier;
2824 {
2825 int i, len;
2826 int qlen = strlen (qualifier);
2827
2828 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2829 {
2830 len = 0;
2831 cn->len = 0;
2832 cn->buffer[0] = '\0';
2833 }
2834 else
2835 {
2836 len = strlen (cstack.cname[0]);
2837 linebuffer_setlen (cn, len);
2838 strcpy (cn->buffer, cstack.cname[0]);
2839 }
2840 for (i = 1; i < cstack.nl; i++)
2841 {
2842 char *s;
2843 int slen;
2844
2845 s = cstack.cname[i];
2846 if (s == NULL)
2847 continue;
2848 slen = strlen (s);
2849 len += slen + qlen;
2850 linebuffer_setlen (cn, len);
2851 strncat (cn->buffer, qualifier, qlen);
2852 strncat (cn->buffer, s, slen);
2853 }
2854 }
2855
2856 \f
2857 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2858 static void make_C_tag __P((bool));
2859
2860 /*
2861 * consider_token ()
2862 * checks to see if the current token is at the start of a
2863 * function or variable, or corresponds to a typedef, or
2864 * is a struct/union/enum tag, or #define, or an enum constant.
2865 *
2866 * *IS_FUNC gets TRUE if the token is a function or #define macro
2867 * with args. C_EXTP points to which language we are looking at.
2868 *
2869 * Globals
2870 * fvdef IN OUT
2871 * structdef IN OUT
2872 * definedef IN OUT
2873 * typdef IN OUT
2874 * objdef IN OUT
2875 */
2876
2877 static bool
2878 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2879 register char *str; /* IN: token pointer */
2880 register int len; /* IN: token length */
2881 register int c; /* IN: first char after the token */
2882 int *c_extp; /* IN, OUT: C extensions mask */
2883 int bracelev; /* IN: brace level */
2884 int parlev; /* IN: parenthesis level */
2885 bool *is_func_or_var; /* OUT: function or variable found */
2886 {
2887 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2888 structtype is the type of the preceding struct-like keyword, and
2889 structbracelev is the brace level where it has been seen. */
2890 static enum sym_type structtype;
2891 static int structbracelev;
2892 static enum sym_type toktype;
2893
2894
2895 toktype = C_symtype (str, len, *c_extp);
2896
2897 /*
2898 * Skip __attribute__
2899 */
2900 if (toktype == st_C_attribute)
2901 {
2902 inattribute = TRUE;
2903 return FALSE;
2904 }
2905
2906 /*
2907 * Advance the definedef state machine.
2908 */
2909 switch (definedef)
2910 {
2911 case dnone:
2912 /* We're not on a preprocessor line. */
2913 if (toktype == st_C_gnumacro)
2914 {
2915 fvdef = fdefunkey;
2916 return FALSE;
2917 }
2918 break;
2919 case dsharpseen:
2920 if (toktype == st_C_define)
2921 {
2922 definedef = ddefineseen;
2923 }
2924 else
2925 {
2926 definedef = dignorerest;
2927 }
2928 return FALSE;
2929 case ddefineseen:
2930 /*
2931 * Make a tag for any macro, unless it is a constant
2932 * and constantypedefs is FALSE.
2933 */
2934 definedef = dignorerest;
2935 *is_func_or_var = (c == '(');
2936 if (!*is_func_or_var && !constantypedefs)
2937 return FALSE;
2938 else
2939 return TRUE;
2940 case dignorerest:
2941 return FALSE;
2942 default:
2943 error ("internal error: definedef value.", (char *)NULL);
2944 }
2945
2946 /*
2947 * Now typedefs
2948 */
2949 switch (typdef)
2950 {
2951 case tnone:
2952 if (toktype == st_C_typedef)
2953 {
2954 if (typedefs)
2955 typdef = tkeyseen;
2956 fvextern = FALSE;
2957 fvdef = fvnone;
2958 return FALSE;
2959 }
2960 break;
2961 case tkeyseen:
2962 switch (toktype)
2963 {
2964 case st_none:
2965 case st_C_class:
2966 case st_C_struct:
2967 case st_C_enum:
2968 typdef = ttypeseen;
2969 }
2970 break;
2971 case ttypeseen:
2972 if (structdef == snone && fvdef == fvnone)
2973 {
2974 fvdef = fvnameseen;
2975 return TRUE;
2976 }
2977 break;
2978 case tend:
2979 switch (toktype)
2980 {
2981 case st_C_class:
2982 case st_C_struct:
2983 case st_C_enum:
2984 return FALSE;
2985 }
2986 return TRUE;
2987 }
2988
2989 switch (toktype)
2990 {
2991 case st_C_javastruct:
2992 if (structdef == stagseen)
2993 structdef = scolonseen;
2994 return FALSE;
2995 case st_C_template:
2996 case st_C_class:
2997 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2998 && bracelev == 0
2999 && definedef == dnone && structdef == snone
3000 && typdef == tnone && fvdef == fvnone)
3001 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3002 if (toktype == st_C_template)
3003 break;
3004 /* FALLTHRU */
3005 case st_C_struct:
3006 case st_C_enum:
3007 if (parlev == 0
3008 && fvdef != vignore
3009 && (typdef == tkeyseen
3010 || (typedefs_or_cplusplus && structdef == snone)))
3011 {
3012 structdef = skeyseen;
3013 structtype = toktype;
3014 structbracelev = bracelev;
3015 if (fvdef == fvnameseen)
3016 fvdef = fvnone;
3017 }
3018 return FALSE;
3019 }
3020
3021 if (structdef == skeyseen)
3022 {
3023 structdef = stagseen;
3024 return TRUE;
3025 }
3026
3027 if (typdef != tnone)
3028 definedef = dnone;
3029
3030 /* Detect Objective C constructs. */
3031 switch (objdef)
3032 {
3033 case onone:
3034 switch (toktype)
3035 {
3036 case st_C_objprot:
3037 objdef = oprotocol;
3038 return FALSE;
3039 case st_C_objimpl:
3040 objdef = oimplementation;
3041 return FALSE;
3042 }
3043 break;
3044 case oimplementation:
3045 /* Save the class tag for functions or variables defined inside. */
3046 objtag = savenstr (str, len);
3047 objdef = oinbody;
3048 return FALSE;
3049 case oprotocol:
3050 /* Save the class tag for categories. */
3051 objtag = savenstr (str, len);
3052 objdef = otagseen;
3053 *is_func_or_var = TRUE;
3054 return TRUE;
3055 case oparenseen:
3056 objdef = ocatseen;
3057 *is_func_or_var = TRUE;
3058 return TRUE;
3059 case oinbody:
3060 break;
3061 case omethodsign:
3062 if (parlev == 0)
3063 {
3064 fvdef = fvnone;
3065 objdef = omethodtag;
3066 linebuffer_setlen (&token_name, len);
3067 strncpy (token_name.buffer, str, len);
3068 token_name.buffer[len] = '\0';
3069 return TRUE;
3070 }
3071 return FALSE;
3072 case omethodcolon:
3073 if (parlev == 0)
3074 objdef = omethodparm;
3075 return FALSE;
3076 case omethodparm:
3077 if (parlev == 0)
3078 {
3079 fvdef = fvnone;
3080 objdef = omethodtag;
3081 linebuffer_setlen (&token_name, token_name.len + len);
3082 strncat (token_name.buffer, str, len);
3083 return TRUE;
3084 }
3085 return FALSE;
3086 case oignore:
3087 if (toktype == st_C_objend)
3088 {
3089 /* Memory leakage here: the string pointed by objtag is
3090 never released, because many tests would be needed to
3091 avoid breaking on incorrect input code. The amount of
3092 memory leaked here is the sum of the lengths of the
3093 class tags.
3094 free (objtag); */
3095 objdef = onone;
3096 }
3097 return FALSE;
3098 }
3099
3100 /* A function, variable or enum constant? */
3101 switch (toktype)
3102 {
3103 case st_C_extern:
3104 fvextern = TRUE;
3105 switch (fvdef)
3106 {
3107 case finlist:
3108 case flistseen:
3109 case fignore:
3110 case vignore:
3111 break;
3112 default:
3113 fvdef = fvnone;
3114 }
3115 return FALSE;
3116 case st_C_ignore:
3117 fvextern = FALSE;
3118 fvdef = vignore;
3119 return FALSE;
3120 case st_C_operator:
3121 fvdef = foperator;
3122 *is_func_or_var = TRUE;
3123 return TRUE;
3124 case st_none:
3125 if (constantypedefs
3126 && structdef == snone
3127 && structtype == st_C_enum && bracelev > structbracelev)
3128 return TRUE; /* enum constant */
3129 switch (fvdef)
3130 {
3131 case fdefunkey:
3132 if (bracelev > 0)
3133 break;
3134 fvdef = fdefunname; /* GNU macro */
3135 *is_func_or_var = TRUE;
3136 return TRUE;
3137 case fvnone:
3138 switch (typdef)
3139 {
3140 case ttypeseen:
3141 return FALSE;
3142 case tnone:
3143 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3144 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3145 {
3146 fvdef = vignore;
3147 return FALSE;
3148 }
3149 break;
3150 }
3151 /* FALLTHRU */
3152 case fvnameseen:
3153 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3154 {
3155 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3156 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3157 fvdef = foperator;
3158 *is_func_or_var = TRUE;
3159 return TRUE;
3160 }
3161 if (bracelev > 0 && !instruct)
3162 break;
3163 fvdef = fvnameseen; /* function or variable */
3164 *is_func_or_var = TRUE;
3165 return TRUE;
3166 }
3167 break;
3168 }
3169
3170 return FALSE;
3171 }
3172
3173 \f
3174 /*
3175 * C_entries often keeps pointers to tokens or lines which are older than
3176 * the line currently read. By keeping two line buffers, and switching
3177 * them at end of line, it is possible to use those pointers.
3178 */
3179 static struct
3180 {
3181 long linepos;
3182 linebuffer lb;
3183 } lbs[2];
3184
3185 #define current_lb_is_new (newndx == curndx)
3186 #define switch_line_buffers() (curndx = 1 - curndx)
3187
3188 #define curlb (lbs[curndx].lb)
3189 #define newlb (lbs[newndx].lb)
3190 #define curlinepos (lbs[curndx].linepos)
3191 #define newlinepos (lbs[newndx].linepos)
3192
3193 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3194 #define cplpl (c_ext & C_PLPL)
3195 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3196
3197 #define CNL_SAVE_DEFINEDEF() \
3198 do { \
3199 curlinepos = charno; \
3200 readline (&curlb, inf); \
3201 lp = curlb.buffer; \
3202 quotednl = FALSE; \
3203 newndx = curndx; \
3204 } while (0)
3205
3206 #define CNL() \
3207 do { \
3208 CNL_SAVE_DEFINEDEF(); \
3209 if (savetoken.valid) \
3210 { \
3211 token = savetoken; \
3212 savetoken.valid = FALSE; \
3213 } \
3214 definedef = dnone; \
3215 } while (0)
3216
3217
3218 static void
3219 make_C_tag (isfun)
3220 bool isfun;
3221 {
3222 /* This function is never called when token.valid is FALSE, but
3223 we must protect against invalid input or internal errors. */
3224 if (!DEBUG && !token.valid)
3225 return;
3226
3227 if (token.valid)
3228 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3229 token.offset+token.length+1, token.lineno, token.linepos);
3230 else /* this case is optimised away if !DEBUG */
3231 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3232 token_name.len + 17, isfun, token.line,
3233 token.offset+token.length+1, token.lineno, token.linepos);
3234
3235 token.valid = FALSE;
3236 }
3237
3238
3239 /*
3240 * C_entries ()
3241 * This routine finds functions, variables, typedefs,
3242 * #define's, enum constants and struct/union/enum definitions in
3243 * C syntax and adds them to the list.
3244 */
3245 static void
3246 C_entries (c_ext, inf)
3247 int c_ext; /* extension of C */
3248 FILE *inf; /* input file */
3249 {
3250 register char c; /* latest char read; '\0' for end of line */
3251 register char *lp; /* pointer one beyond the character `c' */
3252 int curndx, newndx; /* indices for current and new lb */
3253 register int tokoff; /* offset in line of start of current token */
3254 register int toklen; /* length of current token */
3255 char *qualifier; /* string used to qualify names */
3256 int qlen; /* length of qualifier */
3257 int bracelev; /* current brace level */
3258 int bracketlev; /* current bracket level */
3259 int parlev; /* current parenthesis level */
3260 int attrparlev; /* __attribute__ parenthesis level */
3261 int templatelev; /* current template level */
3262 int typdefbracelev; /* bracelev where a typedef struct body begun */
3263 bool incomm, inquote, inchar, quotednl, midtoken;
3264 bool yacc_rules; /* in the rules part of a yacc file */
3265 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3266
3267
3268 linebuffer_init (&lbs[0].lb);
3269 linebuffer_init (&lbs[1].lb);
3270 if (cstack.size == 0)
3271 {
3272 cstack.size = (DEBUG) ? 1 : 4;
3273 cstack.nl = 0;
3274 cstack.cname = xnew (cstack.size, char *);
3275 cstack.bracelev = xnew (cstack.size, int);
3276 }
3277
3278 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3279 curndx = newndx = 0;
3280 lp = curlb.buffer;
3281 *lp = 0;
3282
3283 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3284 structdef = snone; definedef = dnone; objdef = onone;
3285 yacc_rules = FALSE;
3286 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3287 token.valid = savetoken.valid = FALSE;
3288 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3289 if (cjava)
3290 { qualifier = "."; qlen = 1; }
3291 else
3292 { qualifier = "::"; qlen = 2; }
3293
3294
3295 while (!feof (inf))
3296 {
3297 c = *lp++;
3298 if (c == '\\')
3299 {
3300 /* If we are at the end of the line, the next character is a
3301 '\0'; do not skip it, because it is what tells us
3302 to read the next line. */
3303 if (*lp == '\0')
3304 {
3305 quotednl = TRUE;
3306 continue;
3307 }
3308 lp++;
3309 c = ' ';
3310 }
3311 else if (incomm)
3312 {
3313 switch (c)
3314 {
3315 case '*':
3316 if (*lp == '/')
3317 {
3318 c = *lp++;
3319 incomm = FALSE;
3320 }
3321 break;
3322 case '\0':
3323 /* Newlines inside comments do not end macro definitions in
3324 traditional cpp. */
3325 CNL_SAVE_DEFINEDEF ();
3326 break;
3327 }
3328 continue;
3329 }
3330 else if (inquote)
3331 {
3332 switch (c)
3333 {
3334 case '"':
3335 inquote = FALSE;
3336 break;
3337 case '\0':
3338 /* Newlines inside strings do not end macro definitions
3339 in traditional cpp, even though compilers don't
3340 usually accept them. */
3341 CNL_SAVE_DEFINEDEF ();
3342 break;
3343 }
3344 continue;
3345 }
3346 else if (inchar)
3347 {
3348 switch (c)
3349 {
3350 case '\0':
3351 /* Hmmm, something went wrong. */
3352 CNL ();
3353 /* FALLTHRU */
3354 case '\'':
3355 inchar = FALSE;
3356 break;
3357 }
3358 continue;
3359 }
3360 else if (bracketlev > 0)
3361 {
3362 switch (c)
3363 {
3364 case ']':
3365 if (--bracketlev > 0)
3366 continue;
3367 break;
3368 case '\0':
3369 CNL_SAVE_DEFINEDEF ();
3370 break;
3371 }
3372 continue;
3373 }
3374 else switch (c)
3375 {
3376 case '"':
3377 inquote = TRUE;
3378 if (inattribute)
3379 break;
3380 switch (fvdef)
3381 {
3382 case fdefunkey:
3383 case fstartlist:
3384 case finlist:
3385 case fignore:
3386 case vignore:
3387 break;
3388 default:
3389 fvextern = FALSE;
3390 fvdef = fvnone;
3391 }
3392 continue;
3393 case '\'':
3394 inchar = TRUE;
3395 if (inattribute)
3396 break;
3397 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3398 {
3399 fvextern = FALSE;
3400 fvdef = fvnone;
3401 }
3402 continue;
3403 case '/':
3404 if (*lp == '*')
3405 {
3406 incomm = TRUE;
3407 lp++;
3408 c = ' ';
3409 }
3410 else if (/* cplpl && */ *lp == '/')
3411 {
3412 c = '\0';
3413 }
3414 break;
3415 case '%':
3416 if ((c_ext & YACC) && *lp == '%')
3417 {
3418 /* Entering or exiting rules section in yacc file. */
3419 lp++;
3420 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3421 typdef = tnone; structdef = snone;
3422 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3423 bracelev = 0;
3424 yacc_rules = !yacc_rules;
3425 continue;
3426 }
3427 else
3428 break;
3429 case '#':
3430 if (definedef == dnone)
3431 {
3432 char *cp;
3433 bool cpptoken = TRUE;
3434
3435 /* Look back on this line. If all blanks, or nonblanks
3436 followed by an end of comment, this is a preprocessor
3437 token. */
3438 for (cp = newlb.buffer; cp < lp-1; cp++)
3439 if (!iswhite (*cp))
3440 {
3441 if (*cp == '*' && *(cp+1) == '/')
3442 {
3443 cp++;
3444 cpptoken = TRUE;
3445 }
3446 else
3447 cpptoken = FALSE;
3448 }
3449 if (cpptoken)
3450 definedef = dsharpseen;
3451 } /* if (definedef == dnone) */
3452 continue;
3453 case '[':
3454 bracketlev++;
3455 continue;
3456 } /* switch (c) */
3457
3458
3459 /* Consider token only if some involved conditions are satisfied. */
3460 if (typdef != tignore
3461 && definedef != dignorerest
3462 && fvdef != finlist
3463 && templatelev == 0
3464 && (definedef != dnone
3465 || structdef != scolonseen)
3466 && !inattribute)
3467 {
3468 if (midtoken)
3469 {
3470 if (endtoken (c))
3471 {
3472 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3473 /* This handles :: in the middle,
3474 but not at the beginning of an identifier.
3475 Also, space-separated :: is not recognised. */
3476 {
3477 if (c_ext & C_AUTO) /* automatic detection of C++ */
3478 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3479 lp += 2;
3480 toklen += 2;
3481 c = lp[-1];
3482 goto still_in_token;
3483 }
3484 else
3485 {
3486 bool funorvar = FALSE;
3487
3488 if (yacc_rules
3489 || consider_token (newlb.buffer + tokoff, toklen, c,
3490 &c_ext, bracelev, parlev,
3491 &funorvar))
3492 {
3493 if (fvdef == foperator)
3494 {
3495 char *oldlp = lp;
3496 lp = skip_spaces (lp-1);
3497 if (*lp != '\0')
3498 lp += 1;
3499 while (*lp != '\0'
3500 && !iswhite (*lp) && *lp != '(')
3501 lp += 1;
3502 c = *lp++;
3503 toklen += lp - oldlp;
3504 }
3505 token.named = FALSE;
3506 if (!plainc
3507 && nestlev > 0 && definedef == dnone)
3508 /* in struct body */
3509 {
3510 write_classname (&token_name, qualifier);
3511 linebuffer_setlen (&token_name,
3512 token_name.len+qlen+toklen);
3513 strcat (token_name.buffer, qualifier);
3514 strncat (token_name.buffer,
3515 newlb.buffer + tokoff, toklen);
3516 token.named = TRUE;
3517 }
3518 else if (objdef == ocatseen)
3519 /* Objective C category */
3520 {
3521 int len = strlen (objtag) + 2 + toklen;
3522 linebuffer_setlen (&token_name, len);
3523 strcpy (token_name.buffer, objtag);
3524 strcat (token_name.buffer, "(");
3525 strncat (token_name.buffer,
3526 newlb.buffer + tokoff, toklen);
3527 strcat (token_name.buffer, ")");
3528 token.named = TRUE;
3529 }
3530 else if (objdef == omethodtag
3531 || objdef == omethodparm)
3532 /* Objective C method */
3533 {
3534 token.named = TRUE;
3535 }
3536 else if (fvdef == fdefunname)
3537 /* GNU DEFUN and similar macros */
3538 {
3539 bool defun = (newlb.buffer[tokoff] == 'F');
3540 int off = tokoff;
3541 int len = toklen;
3542
3543 /* Rewrite the tag so that emacs lisp DEFUNs
3544 can be found by their elisp name */
3545 if (defun)
3546 {
3547 off += 1;
3548 len -= 1;
3549 }
3550 linebuffer_setlen (&token_name, len);
3551 strncpy (token_name.buffer,
3552 newlb.buffer + off, len);
3553 token_name.buffer[len] = '\0';
3554 if (defun)
3555 while (--len >= 0)
3556 if (token_name.buffer[len] == '_')
3557 token_name.buffer[len] = '-';
3558 token.named = defun;
3559 }
3560 else
3561 {
3562 linebuffer_setlen (&token_name, toklen);
3563 strncpy (token_name.buffer,
3564 newlb.buffer + tokoff, toklen);
3565 token_name.buffer[toklen] = '\0';
3566 /* Name macros and members. */
3567 token.named = (structdef == stagseen
3568 || typdef == ttypeseen
3569 || typdef == tend
3570 || (funorvar
3571 && definedef == dignorerest)
3572 || (funorvar
3573 && definedef == dnone
3574 && structdef == snone
3575 && bracelev > 0));
3576 }
3577 token.lineno = lineno;
3578 token.offset = tokoff;
3579 token.length = toklen;
3580 token.line = newlb.buffer;
3581 token.linepos = newlinepos;
3582 token.valid = TRUE;
3583
3584 if (definedef == dnone
3585 && (fvdef == fvnameseen
3586 || fvdef == foperator
3587 || structdef == stagseen
3588 || typdef == tend
3589 || typdef == ttypeseen
3590 || objdef != onone))
3591 {
3592 if (current_lb_is_new)
3593 switch_line_buffers ();
3594 }
3595 else if (definedef != dnone
3596 || fvdef == fdefunname
3597 || instruct)
3598 make_C_tag (funorvar);
3599 }
3600 else /* not yacc and consider_token failed */
3601 {
3602 if (inattribute && fvdef == fignore)
3603 {
3604 /* We have just met __attribute__ after a
3605 function parameter list: do not tag the
3606 function again. */
3607 fvdef = fvnone;
3608 }
3609 }
3610 midtoken = FALSE;
3611 }
3612 } /* if (endtoken (c)) */
3613 else if (intoken (c))
3614 still_in_token:
3615 {
3616 toklen++;
3617 continue;
3618 }
3619 } /* if (midtoken) */
3620 else if (begtoken (c))
3621 {
3622 switch (definedef)
3623 {
3624 case dnone:
3625 switch (fvdef)
3626 {
3627 case fstartlist:
3628 /* This prevents tagging fb in
3629 void (__attribute__((noreturn)) *fb) (void);
3630 Fixing this is not easy and not very important. */
3631 fvdef = finlist;
3632 continue;
3633 case flistseen:
3634 if (plainc || declarations)
3635 {
3636 make_C_tag (TRUE); /* a function */
3637 fvdef = fignore;
3638 }
3639 break;
3640 }
3641 if (structdef == stagseen && !cjava)
3642 {
3643 popclass_above (bracelev);
3644 structdef = snone;
3645 }
3646 break;
3647 case dsharpseen:
3648 savetoken = token;
3649 break;
3650 }
3651 if (!yacc_rules || lp == newlb.buffer + 1)
3652 {
3653 tokoff = lp - 1 - newlb.buffer;
3654 toklen = 1;
3655 midtoken = TRUE;
3656 }
3657 continue;
3658 } /* if (begtoken) */
3659 } /* if must look at token */
3660
3661
3662 /* Detect end of line, colon, comma, semicolon and various braces
3663 after having handled a token.*/
3664 switch (c)
3665 {
3666 case ':':
3667 if (inattribute)
3668 break;
3669 if (yacc_rules && token.offset == 0 && token.valid)
3670 {
3671 make_C_tag (FALSE); /* a yacc function */
3672 break;
3673 }
3674 if (definedef != dnone)
3675 break;
3676 switch (objdef)
3677 {
3678 case otagseen:
3679 objdef = oignore;
3680 make_C_tag (TRUE); /* an Objective C class */
3681 break;
3682 case omethodtag:
3683 case omethodparm:
3684 objdef = omethodcolon;
3685 linebuffer_setlen (&token_name, token_name.len + 1);
3686 strcat (token_name.buffer, ":");
3687 break;
3688 }
3689 if (structdef == stagseen)
3690 {
3691 structdef = scolonseen;
3692 break;
3693 }
3694 /* Should be useless, but may be work as a safety net. */
3695 if (cplpl && fvdef == flistseen)
3696 {
3697 make_C_tag (TRUE); /* a function */
3698 fvdef = fignore;
3699 break;
3700 }
3701 break;
3702 case ';':
3703 if (definedef != dnone || inattribute)
3704 break;
3705 switch (typdef)
3706 {
3707 case tend:
3708 case ttypeseen:
3709 make_C_tag (FALSE); /* a typedef */
3710 typdef = tnone;
3711 fvdef = fvnone;
3712 break;
3713 case tnone:
3714 case tinbody:
3715 case tignore:
3716 switch (fvdef)
3717 {
3718 case fignore:
3719 if (typdef == tignore || cplpl)
3720 fvdef = fvnone;
3721 break;
3722 case fvnameseen:
3723 if ((globals && bracelev == 0 && (!fvextern || declarations))
3724 || (members && instruct))
3725 make_C_tag (FALSE); /* a variable */
3726 fvextern = FALSE;
3727 fvdef = fvnone;
3728 token.valid = FALSE;
3729 break;
3730 case flistseen:
3731 if ((declarations
3732 && (cplpl || !instruct)
3733 && (typdef == tnone || (typdef != tignore && instruct)))
3734 || (members
3735 && plainc && instruct))
3736 make_C_tag (TRUE); /* a function */
3737 /* FALLTHRU */
3738 default:
3739 fvextern = FALSE;
3740 fvdef = fvnone;
3741 if (declarations
3742 && cplpl && structdef == stagseen)
3743 make_C_tag (FALSE); /* forward declaration */
3744 else
3745 token.valid = FALSE;
3746 } /* switch (fvdef) */
3747 /* FALLTHRU */
3748 default:
3749 if (!instruct)
3750 typdef = tnone;
3751 }
3752 if (structdef == stagseen)
3753 structdef = snone;
3754 break;
3755 case ',':
3756 if (definedef != dnone || inattribute)
3757 break;
3758 switch (objdef)
3759 {
3760 case omethodtag:
3761 case omethodparm:
3762 make_C_tag (TRUE); /* an Objective C method */
3763 objdef = oinbody;
3764 break;
3765 }
3766 switch (fvdef)
3767 {
3768 case fdefunkey:
3769 case foperator:
3770 case fstartlist:
3771 case finlist:
3772 case fignore:
3773 case vignore:
3774 break;
3775 case fdefunname:
3776 fvdef = fignore;
3777 break;
3778 case fvnameseen:
3779 if (parlev == 0
3780 && ((globals
3781 && bracelev == 0
3782 && templatelev == 0
3783 && (!fvextern || declarations))
3784 || (members && instruct)))
3785 make_C_tag (FALSE); /* a variable */
3786 break;
3787 case flistseen:
3788 if ((declarations && typdef == tnone && !instruct)
3789 || (members && typdef != tignore && instruct))
3790 {
3791 make_C_tag (TRUE); /* a function */
3792 fvdef = fvnameseen;
3793 }
3794 else if (!declarations)
3795 fvdef = fvnone;
3796 token.valid = FALSE;
3797 break;
3798 default:
3799 fvdef = fvnone;
3800 }
3801 if (structdef == stagseen)
3802 structdef = snone;
3803 break;
3804 case ']':
3805 if (definedef != dnone || inattribute)
3806 break;
3807 if (structdef == stagseen)
3808 structdef = snone;
3809 switch (typdef)
3810 {
3811 case ttypeseen:
3812 case tend:
3813 typdef = tignore;
3814 make_C_tag (FALSE); /* a typedef */
3815 break;
3816 case tnone:
3817 case tinbody:
3818 switch (fvdef)
3819 {
3820 case foperator:
3821 case finlist:
3822 case fignore:
3823 case vignore:
3824 break;
3825 case fvnameseen:
3826 if ((members && bracelev == 1)
3827 || (globals && bracelev == 0
3828 && (!fvextern || declarations)))
3829 make_C_tag (FALSE); /* a variable */
3830 /* FALLTHRU */
3831 default:
3832 fvdef = fvnone;
3833 }
3834 break;
3835 }
3836 break;
3837 case '(':
3838 if (inattribute)
3839 {
3840 attrparlev++;
3841 break;
3842 }
3843 if (definedef != dnone)
3844 break;
3845 if (objdef == otagseen && parlev == 0)
3846 objdef = oparenseen;
3847 switch (fvdef)
3848 {
3849 case fvnameseen:
3850 if (typdef == ttypeseen
3851 && *lp != '*'
3852 && !instruct)
3853 {
3854 /* This handles constructs like:
3855 typedef void OperatorFun (int fun); */
3856 make_C_tag (FALSE);
3857 typdef = tignore;
3858 fvdef = fignore;
3859 break;
3860 }
3861 /* FALLTHRU */
3862 case foperator:
3863 fvdef = fstartlist;
3864 break;
3865 case flistseen:
3866 fvdef = finlist;
3867 break;
3868 }
3869 parlev++;
3870 break;
3871 case ')':
3872 if (inattribute)
3873 {
3874 if (--attrparlev == 0)
3875 inattribute = FALSE;
3876 break;
3877 }
3878 if (definedef != dnone)
3879 break;
3880 if (objdef == ocatseen && parlev == 1)
3881 {
3882 make_C_tag (TRUE); /* an Objective C category */
3883 objdef = oignore;
3884 }
3885 if (--parlev == 0)
3886 {
3887 switch (fvdef)
3888 {
3889 case fstartlist:
3890 case finlist:
3891 fvdef = flistseen;
3892 break;
3893 }
3894 if (!instruct
3895 && (typdef == tend
3896 || typdef == ttypeseen))
3897 {
3898 typdef = tignore;
3899 make_C_tag (FALSE); /* a typedef */
3900 }
3901 }
3902 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3903 parlev = 0;
3904 break;
3905 case '{':
3906 if (definedef != dnone)
3907 break;
3908 if (typdef == ttypeseen)
3909 {
3910 /* Whenever typdef is set to tinbody (currently only
3911 here), typdefbracelev should be set to bracelev. */
3912 typdef = tinbody;
3913 typdefbracelev = bracelev;
3914 }
3915 switch (fvdef)
3916 {
3917 case flistseen:
3918 make_C_tag (TRUE); /* a function */
3919 /* FALLTHRU */
3920 case fignore:
3921 fvdef = fvnone;
3922 break;
3923 case fvnone:
3924 switch (objdef)
3925 {
3926 case otagseen:
3927 make_C_tag (TRUE); /* an Objective C class */
3928 objdef = oignore;
3929 break;
3930 case omethodtag:
3931 case omethodparm:
3932 make_C_tag (TRUE); /* an Objective C method */
3933 objdef = oinbody;
3934 break;
3935 default:
3936 /* Neutralize `extern "C" {' grot. */
3937 if (bracelev == 0 && structdef == snone && nestlev == 0
3938 && typdef == tnone)
3939 bracelev = -1;
3940 }
3941 break;
3942 }
3943 switch (structdef)
3944 {
3945 case skeyseen: /* unnamed struct */
3946 pushclass_above (bracelev, NULL, 0);
3947 structdef = snone;
3948 break;
3949 case stagseen: /* named struct or enum */
3950 case scolonseen: /* a class */
3951 pushclass_above (bracelev,token.line+token.offset, token.length);
3952 structdef = snone;
3953 make_C_tag (FALSE); /* a struct or enum */
3954 break;
3955 }
3956 bracelev += 1;
3957 break;
3958 case '*':
3959 if (definedef != dnone)
3960 break;
3961 if (fvdef == fstartlist)
3962 {
3963 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3964 token.valid = FALSE;
3965 }
3966 break;
3967 case '}':
3968 if (definedef != dnone)
3969 break;
3970 bracelev -= 1;
3971 if (!ignoreindent && lp == newlb.buffer + 1)
3972 {
3973 if (bracelev != 0)
3974 token.valid = FALSE; /* unexpected value, token unreliable */
3975 bracelev = 0; /* reset brace level if first column */
3976 parlev = 0; /* also reset paren level, just in case... */
3977 }
3978 else if (bracelev < 0)
3979 {
3980 token.valid = FALSE; /* something gone amiss, token unreliable */
3981 bracelev = 0;
3982 }
3983 if (bracelev == 0 && fvdef == vignore)
3984 fvdef = fvnone; /* end of function */
3985 popclass_above (bracelev);
3986 structdef = snone;
3987 /* Only if typdef == tinbody is typdefbracelev significant. */
3988 if (typdef == tinbody && bracelev <= typdefbracelev)
3989 {
3990 assert (bracelev == typdefbracelev);
3991 typdef = tend;
3992 }
3993 break;
3994 case '=':
3995 if (definedef != dnone)
3996 break;
3997 switch (fvdef)
3998 {
3999 case foperator:
4000 case finlist:
4001 case fignore:
4002 case vignore:
4003 break;
4004 case fvnameseen:
4005 if ((members && bracelev == 1)
4006 || (globals && bracelev == 0 && (!fvextern || declarations)))
4007 make_C_tag (FALSE); /* a variable */
4008 /* FALLTHRU */
4009 default:
4010 fvdef = vignore;
4011 }
4012 break;
4013 case '<':
4014 if (cplpl
4015 && (structdef == stagseen || fvdef == fvnameseen))
4016 {
4017 templatelev++;
4018 break;
4019 }
4020 goto resetfvdef;
4021 case '>':
4022 if (templatelev > 0)
4023 {
4024 templatelev--;
4025 break;
4026 }
4027 goto resetfvdef;
4028 case '+':
4029 case '-':
4030 if (objdef == oinbody && bracelev == 0)
4031 {
4032 objdef = omethodsign;
4033 break;
4034 }
4035 /* FALLTHRU */
4036 resetfvdef:
4037 case '#': case '~': case '&': case '%': case '/':
4038 case '|': case '^': case '!': case '.': case '?':
4039 if (definedef != dnone)
4040 break;
4041 /* These surely cannot follow a function tag in C. */
4042 switch (fvdef)
4043 {
4044 case foperator:
4045 case finlist:
4046 case fignore:
4047 case vignore:
4048 break;
4049 default:
4050 fvdef = fvnone;
4051 }
4052 break;
4053 case '\0':
4054 if (objdef == otagseen)
4055 {
4056 make_C_tag (TRUE); /* an Objective C class */
4057 objdef = oignore;
4058 }
4059 /* If a macro spans multiple lines don't reset its state. */
4060 if (quotednl)
4061 CNL_SAVE_DEFINEDEF ();
4062 else
4063 CNL ();
4064 break;
4065 } /* switch (c) */
4066
4067 } /* while not eof */
4068
4069 free (lbs[0].lb.buffer);
4070 free (lbs[1].lb.buffer);
4071 }
4072
4073 /*
4074 * Process either a C++ file or a C file depending on the setting
4075 * of a global flag.
4076 */
4077 static void
4078 default_C_entries (inf)
4079 FILE *inf;
4080 {
4081 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4082 }
4083
4084 /* Always do plain C. */
4085 static void
4086 plain_C_entries (inf)
4087 FILE *inf;
4088 {
4089 C_entries (0, inf);
4090 }
4091
4092 /* Always do C++. */
4093 static void
4094 Cplusplus_entries (inf)
4095 FILE *inf;
4096 {
4097 C_entries (C_PLPL, inf);
4098 }
4099
4100 /* Always do Java. */
4101 static void
4102 Cjava_entries (inf)
4103 FILE *inf;
4104 {
4105 C_entries (C_JAVA, inf);
4106 }
4107
4108 /* Always do C*. */
4109 static void
4110 Cstar_entries (inf)
4111 FILE *inf;
4112 {
4113 C_entries (C_STAR, inf);
4114 }
4115
4116 /* Always do Yacc. */
4117 static void
4118 Yacc_entries (inf)
4119 FILE *inf;
4120 {
4121 C_entries (YACC, inf);
4122 }
4123
4124 \f
4125 /* Useful macros. */
4126 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4127 for (; /* loop initialization */ \
4128 !feof (file_pointer) /* loop test */ \
4129 && /* instructions at start of loop */ \
4130 (readline (&line_buffer, file_pointer), \
4131 char_pointer = line_buffer.buffer, \
4132 TRUE); \
4133 )
4134
4135 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4136 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4137 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4138 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4139 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4140
4141 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4142 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4143 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4144 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4145 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4146
4147 /*
4148 * Read a file, but do no processing. This is used to do regexp
4149 * matching on files that have no language defined.
4150 */
4151 static void
4152 just_read_file (inf)
4153 FILE *inf;
4154 {
4155 register char *dummy;
4156
4157 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4158 continue;
4159 }
4160
4161 \f
4162 /* Fortran parsing */
4163
4164 static void F_takeprec __P((void));
4165 static void F_getit __P((FILE *));
4166
4167 static void
4168 F_takeprec ()
4169 {
4170 dbp = skip_spaces (dbp);
4171 if (*dbp != '*')
4172 return;
4173 dbp++;
4174 dbp = skip_spaces (dbp);
4175 if (strneq (dbp, "(*)", 3))
4176 {
4177 dbp += 3;
4178 return;
4179 }
4180 if (!ISDIGIT (*dbp))
4181 {
4182 --dbp; /* force failure */
4183 return;
4184 }
4185 do
4186 dbp++;
4187 while (ISDIGIT (*dbp));
4188 }
4189
4190 static void
4191 F_getit (inf)
4192 FILE *inf;
4193 {
4194 register char *cp;
4195
4196 dbp = skip_spaces (dbp);
4197 if (*dbp == '\0')
4198 {
4199 readline (&lb, inf);
4200 dbp = lb.buffer;
4201 if (dbp[5] != '&')
4202 return;
4203 dbp += 6;
4204 dbp = skip_spaces (dbp);
4205 }
4206 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4207 return;
4208 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4209 continue;
4210 make_tag (dbp, cp-dbp, TRUE,
4211 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4212 }
4213
4214
4215 static void
4216 Fortran_functions (inf)
4217 FILE *inf;
4218 {
4219 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4220 {
4221 if (*dbp == '%')
4222 dbp++; /* Ratfor escape to fortran */
4223 dbp = skip_spaces (dbp);
4224 if (*dbp == '\0')
4225 continue;
4226 switch (lowcase (*dbp))
4227 {
4228 case 'i':
4229 if (nocase_tail ("integer"))
4230 F_takeprec ();
4231 break;
4232 case 'r':
4233 if (nocase_tail ("real"))
4234 F_takeprec ();
4235 break;
4236 case 'l':
4237 if (nocase_tail ("logical"))
4238 F_takeprec ();
4239 break;
4240 case 'c':
4241 if (nocase_tail ("complex") || nocase_tail ("character"))
4242 F_takeprec ();
4243 break;
4244 case 'd':
4245 if (nocase_tail ("double"))
4246 {
4247 dbp = skip_spaces (dbp);
4248 if (*dbp == '\0')
4249 continue;
4250 if (nocase_tail ("precision"))
4251 break;
4252 continue;
4253 }
4254 break;
4255 }
4256 dbp = skip_spaces (dbp);
4257 if (*dbp == '\0')
4258 continue;
4259 switch (lowcase (*dbp))
4260 {
4261 case 'f':
4262 if (nocase_tail ("function"))
4263 F_getit (inf);
4264 continue;
4265 case 's':
4266 if (nocase_tail ("subroutine"))
4267 F_getit (inf);
4268 continue;
4269 case 'e':
4270 if (nocase_tail ("entry"))
4271 F_getit (inf);
4272 continue;
4273 case 'b':
4274 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4275 {
4276 dbp = skip_spaces (dbp);
4277 if (*dbp == '\0') /* assume un-named */
4278 make_tag ("blockdata", 9, TRUE,
4279 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4280 else
4281 F_getit (inf); /* look for name */
4282 }
4283 continue;
4284 }
4285 }
4286 }
4287
4288 \f
4289 /*
4290 * Ada parsing
4291 * Original code by
4292 * Philippe Waroquiers (1998)
4293 */
4294
4295 static void Ada_getit __P((FILE *, char *));
4296
4297 /* Once we are positioned after an "interesting" keyword, let's get
4298 the real tag value necessary. */
4299 static void
4300 Ada_getit (inf, name_qualifier)
4301 FILE *inf;
4302 char *name_qualifier;
4303 {
4304 register char *cp;
4305 char *name;
4306 char c;
4307
4308 while (!feof (inf))
4309 {
4310 dbp = skip_spaces (dbp);
4311 if (*dbp == '\0'
4312 || (dbp[0] == '-' && dbp[1] == '-'))
4313 {
4314 readline (&lb, inf);
4315 dbp = lb.buffer;
4316 }
4317 switch (lowcase(*dbp))
4318 {
4319 case 'b':
4320 if (nocase_tail ("body"))
4321 {
4322 /* Skipping body of procedure body or package body or ....
4323 resetting qualifier to body instead of spec. */
4324 name_qualifier = "/b";
4325 continue;
4326 }
4327 break;
4328 case 't':
4329 /* Skipping type of task type or protected type ... */
4330 if (nocase_tail ("type"))
4331 continue;
4332 break;
4333 }
4334 if (*dbp == '"')
4335 {
4336 dbp += 1;
4337 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4338 continue;
4339 }
4340 else
4341 {
4342 dbp = skip_spaces (dbp);
4343 for (cp = dbp;
4344 (*cp != '\0'
4345 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4346 cp++)
4347 continue;
4348 if (cp == dbp)
4349 return;
4350 }
4351 c = *cp;
4352 *cp = '\0';
4353 name = concat (dbp, name_qualifier, "");
4354 *cp = c;
4355 make_tag (name, strlen (name), TRUE,
4356 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4357 free (name);
4358 if (c == '"')
4359 dbp = cp + 1;
4360 return;
4361 }
4362 }
4363
4364 static void
4365 Ada_funcs (inf)
4366 FILE *inf;
4367 {
4368 bool inquote = FALSE;
4369 bool skip_till_semicolumn = FALSE;
4370
4371 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4372 {
4373 while (*dbp != '\0')
4374 {
4375 /* Skip a string i.e. "abcd". */
4376 if (inquote || (*dbp == '"'))
4377 {
4378 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4379 if (dbp != NULL)
4380 {
4381 inquote = FALSE;
4382 dbp += 1;
4383 continue; /* advance char */
4384 }
4385 else
4386 {
4387 inquote = TRUE;
4388 break; /* advance line */
4389 }
4390 }
4391
4392 /* Skip comments. */
4393 if (dbp[0] == '-' && dbp[1] == '-')
4394 break; /* advance line */
4395
4396 /* Skip character enclosed in single quote i.e. 'a'
4397 and skip single quote starting an attribute i.e. 'Image. */
4398 if (*dbp == '\'')
4399 {
4400 dbp++ ;
4401 if (*dbp != '\0')
4402 dbp++;
4403 continue;
4404 }
4405
4406 if (skip_till_semicolumn)
4407 {
4408 if (*dbp == ';')
4409 skip_till_semicolumn = FALSE;
4410 dbp++;
4411 continue; /* advance char */
4412 }
4413
4414 /* Search for beginning of a token. */
4415 if (!begtoken (*dbp))
4416 {
4417 dbp++;
4418 continue; /* advance char */
4419 }
4420
4421 /* We are at the beginning of a token. */
4422 switch (lowcase(*dbp))
4423 {
4424 case 'f':
4425 if (!packages_only && nocase_tail ("function"))
4426 Ada_getit (inf, "/f");
4427 else
4428 break; /* from switch */
4429 continue; /* advance char */
4430 case 'p':
4431 if (!packages_only && nocase_tail ("procedure"))
4432 Ada_getit (inf, "/p");
4433 else if (nocase_tail ("package"))
4434 Ada_getit (inf, "/s");
4435 else if (nocase_tail ("protected")) /* protected type */
4436 Ada_getit (inf, "/t");
4437 else
4438 break; /* from switch */
4439 continue; /* advance char */
4440
4441 case 'u':
4442 if (typedefs && !packages_only && nocase_tail ("use"))
4443 {
4444 /* when tagging types, avoid tagging use type Pack.Typename;
4445 for this, we will skip everything till a ; */
4446 skip_till_semicolumn = TRUE;
4447 continue; /* advance char */
4448 }
4449
4450 case 't':
4451 if (!packages_only && nocase_tail ("task"))
4452 Ada_getit (inf, "/k");
4453 else if (typedefs && !packages_only && nocase_tail ("type"))
4454 {
4455 Ada_getit (inf, "/t");
4456 while (*dbp != '\0')
4457 dbp += 1;
4458 }
4459 else
4460 break; /* from switch */
4461 continue; /* advance char */
4462 }
4463
4464 /* Look for the end of the token. */
4465 while (!endtoken (*dbp))
4466 dbp++;
4467
4468 } /* advance char */
4469 } /* advance line */
4470 }
4471
4472 \f
4473 /*
4474 * Unix and microcontroller assembly tag handling
4475 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4476 * Idea by Bob Weiner, Motorola Inc. (1994)
4477 */
4478 static void
4479 Asm_labels (inf)
4480 FILE *inf;
4481 {
4482 register char *cp;
4483
4484 LOOP_ON_INPUT_LINES (inf, lb, cp)
4485 {
4486 /* If first char is alphabetic or one of [_.$], test for colon
4487 following identifier. */
4488 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4489 {
4490 /* Read past label. */
4491 cp++;
4492 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4493 cp++;
4494 if (*cp == ':' || iswhite (*cp))
4495 /* Found end of label, so copy it and add it to the table. */
4496 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4497 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4498 }
4499 }
4500 }
4501
4502 \f
4503 /*
4504 * Perl support
4505 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4506 * Perl variable names: /^(my|local).../
4507 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4508 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4509 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4510 */
4511 static void
4512 Perl_functions (inf)
4513 FILE *inf;
4514 {
4515 char *package = savestr ("main"); /* current package name */
4516 register char *cp;
4517
4518 LOOP_ON_INPUT_LINES (inf, lb, cp)
4519 {
4520 skip_spaces(cp);
4521
4522 if (LOOKING_AT (cp, "package"))
4523 {
4524 free (package);
4525 get_tag (cp, &package);
4526 }
4527 else if (LOOKING_AT (cp, "sub"))
4528 {
4529 char *pos;
4530 char *sp = cp;
4531
4532 while (!notinname (*cp))
4533 cp++;
4534 if (cp == sp)
4535 continue; /* nothing found */
4536 if ((pos = etags_strchr (sp, ':')) != NULL
4537 && pos < cp && pos[1] == ':')
4538 /* The name is already qualified. */
4539 make_tag (sp, cp - sp, TRUE,
4540 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4541 else
4542 /* Qualify it. */
4543 {
4544 char savechar, *name;
4545
4546 savechar = *cp;
4547 *cp = '\0';
4548 name = concat (package, "::", sp);
4549 *cp = savechar;
4550 make_tag (name, strlen(name), TRUE,
4551 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4552 free (name);
4553 }
4554 }
4555 else if (globals) /* only if we are tagging global vars */
4556 {
4557 /* Skip a qualifier, if any. */
4558 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4559 /* After "my" or "local", but before any following paren or space. */
4560 char *varstart = cp;
4561
4562 if (qual /* should this be removed? If yes, how? */
4563 && (*cp == '$' || *cp == '@' || *cp == '%'))
4564 {
4565 varstart += 1;
4566 do
4567 cp++;
4568 while (ISALNUM (*cp) || *cp == '_');
4569 }
4570 else if (qual)
4571 {
4572 /* Should be examining a variable list at this point;
4573 could insist on seeing an open parenthesis. */
4574 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4575 cp++;
4576 }
4577 else
4578 continue;
4579
4580 make_tag (varstart, cp - varstart, FALSE,
4581 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4582 }
4583 }
4584 free (package);
4585 }
4586
4587
4588 /*
4589 * Python support
4590 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4591 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4592 * More ideas by seb bacon <seb@jamkit.com> (2002)
4593 */
4594 static void
4595 Python_functions (inf)
4596 FILE *inf;
4597 {
4598 register char *cp;
4599
4600 LOOP_ON_INPUT_LINES (inf, lb, cp)
4601 {
4602 cp = skip_spaces (cp);
4603 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4604 {
4605 char *name = cp;
4606 while (!notinname (*cp) && *cp != ':')
4607 cp++;
4608 make_tag (name, cp - name, TRUE,
4609 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4610 }
4611 }
4612 }
4613
4614 \f
4615 /*
4616 * PHP support
4617 * Look for:
4618 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4619 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4620 * - /^[ \t]*define\(\"[^\"]+/
4621 * Only with --members:
4622 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4623 * Idea by Diez B. Roggisch (2001)
4624 */
4625 static void
4626 PHP_functions (inf)
4627 FILE *inf;
4628 {
4629 register char *cp, *name;
4630 bool search_identifier = FALSE;
4631
4632 LOOP_ON_INPUT_LINES (inf, lb, cp)
4633 {
4634 cp = skip_spaces (cp);
4635 name = cp;
4636 if (search_identifier
4637 && *cp != '\0')
4638 {
4639 while (!notinname (*cp))
4640 cp++;
4641 make_tag (name, cp - name, TRUE,
4642 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4643 search_identifier = FALSE;
4644 }
4645 else if (LOOKING_AT (cp, "function"))
4646 {
4647 if(*cp == '&')
4648 cp = skip_spaces (cp+1);
4649 if(*cp != '\0')
4650 {
4651 name = cp;
4652 while (!notinname (*cp))
4653 cp++;
4654 make_tag (name, cp - name, TRUE,
4655 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4656 }
4657 else
4658 search_identifier = TRUE;
4659 }
4660 else if (LOOKING_AT (cp, "class"))
4661 {
4662 if (*cp != '\0')
4663 {
4664 name = cp;
4665 while (*cp != '\0' && !iswhite (*cp))
4666 cp++;
4667 make_tag (name, cp - name, FALSE,
4668 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4669 }
4670 else
4671 search_identifier = TRUE;
4672 }
4673 else if (strneq (cp, "define", 6)
4674 && (cp = skip_spaces (cp+6))
4675 && *cp++ == '('
4676 && (*cp == '"' || *cp == '\''))
4677 {
4678 char quote = *cp++;
4679 name = cp;
4680 while (*cp != quote && *cp != '\0')
4681 cp++;
4682 make_tag (name, cp - name, FALSE,
4683 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4684 }
4685 else if (members
4686 && LOOKING_AT (cp, "var")
4687 && *cp == '$')
4688 {
4689 name = cp;
4690 while (!notinname(*cp))
4691 cp++;
4692 make_tag (name, cp - name, FALSE,
4693 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4694 }
4695 }
4696 }
4697
4698 \f
4699 /*
4700 * Cobol tag functions
4701 * We could look for anything that could be a paragraph name.
4702 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4703 * Idea by Corny de Souza (1993)
4704 */
4705 static void
4706 Cobol_paragraphs (inf)
4707 FILE *inf;
4708 {
4709 register char *bp, *ep;
4710
4711 LOOP_ON_INPUT_LINES (inf, lb, bp)
4712 {
4713 if (lb.len < 9)
4714 continue;
4715 bp += 8;
4716
4717 /* If eoln, compiler option or comment ignore whole line. */
4718 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4719 continue;
4720
4721 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4722 continue;
4723 if (*ep++ == '.')
4724 make_tag (bp, ep - bp, TRUE,
4725 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4726 }
4727 }
4728
4729 \f
4730 /*
4731 * Makefile support
4732 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4733 */
4734 static void
4735 Makefile_targets (inf)
4736 FILE *inf;
4737 {
4738 register char *bp;
4739
4740 LOOP_ON_INPUT_LINES (inf, lb, bp)
4741 {
4742 if (*bp == '\t' || *bp == '#')
4743 continue;
4744 while (*bp != '\0' && *bp != '=' && *bp != ':')
4745 bp++;
4746 if (*bp == ':' || (globals && *bp == '='))
4747 {
4748 /* We should detect if there is more than one tag, but we do not.
4749 We just skip initial and final spaces. */
4750 char * namestart = skip_spaces (lb.buffer);
4751 while (--bp > namestart)
4752 if (!notinname (*bp))
4753 break;
4754 make_tag (namestart, bp - namestart + 1, TRUE,
4755 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4756 }
4757 }
4758 }
4759
4760 \f
4761 /*
4762 * Pascal parsing
4763 * Original code by Mosur K. Mohan (1989)
4764 *
4765 * Locates tags for procedures & functions. Doesn't do any type- or
4766 * var-definitions. It does look for the keyword "extern" or
4767 * "forward" immediately following the procedure statement; if found,
4768 * the tag is skipped.
4769 */
4770 static void
4771 Pascal_functions (inf)
4772 FILE *inf;
4773 {
4774 linebuffer tline; /* mostly copied from C_entries */
4775 long save_lcno;
4776 int save_lineno, namelen, taglen;
4777 char c, *name;
4778
4779 bool /* each of these flags is TRUE if: */
4780 incomment, /* point is inside a comment */
4781 inquote, /* point is inside '..' string */
4782 get_tagname, /* point is after PROCEDURE/FUNCTION
4783 keyword, so next item = potential tag */
4784 found_tag, /* point is after a potential tag */
4785 inparms, /* point is within parameter-list */
4786 verify_tag; /* point has passed the parm-list, so the
4787 next token will determine whether this
4788 is a FORWARD/EXTERN to be ignored, or
4789 whether it is a real tag */
4790
4791 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4792 name = NULL; /* keep compiler quiet */
4793 dbp = lb.buffer;
4794 *dbp = '\0';
4795 linebuffer_init (&tline);
4796
4797 incomment = inquote = FALSE;
4798 found_tag = FALSE; /* have a proc name; check if extern */
4799 get_tagname = FALSE; /* found "procedure" keyword */
4800 inparms = FALSE; /* found '(' after "proc" */
4801 verify_tag = FALSE; /* check if "extern" is ahead */
4802
4803
4804 while (!feof (inf)) /* long main loop to get next char */
4805 {
4806 c = *dbp++;
4807 if (c == '\0') /* if end of line */
4808 {
4809 readline (&lb, inf);
4810 dbp = lb.buffer;
4811 if (*dbp == '\0')
4812 continue;
4813 if (!((found_tag && verify_tag)
4814 || get_tagname))
4815 c = *dbp++; /* only if don't need *dbp pointing
4816 to the beginning of the name of
4817 the procedure or function */
4818 }
4819 if (incomment)
4820 {
4821 if (c == '}') /* within { } comments */
4822 incomment = FALSE;
4823 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4824 {
4825 dbp++;
4826 incomment = FALSE;
4827 }
4828 continue;
4829 }
4830 else if (inquote)
4831 {
4832 if (c == '\'')
4833 inquote = FALSE;
4834 continue;
4835 }
4836 else
4837 switch (c)
4838 {
4839 case '\'':
4840 inquote = TRUE; /* found first quote */
4841 continue;
4842 case '{': /* found open { comment */
4843 incomment = TRUE;
4844 continue;
4845 case '(':
4846 if (*dbp == '*') /* found open (* comment */
4847 {
4848 incomment = TRUE;
4849 dbp++;
4850 }
4851 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4852 inparms = TRUE;
4853 continue;
4854 case ')': /* end of parms list */
4855 if (inparms)
4856 inparms = FALSE;
4857 continue;
4858 case ';':
4859 if (found_tag && !inparms) /* end of proc or fn stmt */
4860 {
4861 verify_tag = TRUE;
4862 break;
4863 }
4864 continue;
4865 }
4866 if (found_tag && verify_tag && (*dbp != ' '))
4867 {
4868 /* Check if this is an "extern" declaration. */
4869 if (*dbp == '\0')
4870 continue;
4871 if (lowcase (*dbp == 'e'))
4872 {
4873 if (nocase_tail ("extern")) /* superfluous, really! */
4874 {
4875 found_tag = FALSE;
4876 verify_tag = FALSE;
4877 }
4878 }
4879 else if (lowcase (*dbp) == 'f')
4880 {
4881 if (nocase_tail ("forward")) /* check for forward reference */
4882 {
4883 found_tag = FALSE;
4884 verify_tag = FALSE;
4885 }
4886 }
4887 if (found_tag && verify_tag) /* not external proc, so make tag */
4888 {
4889 found_tag = FALSE;
4890 verify_tag = FALSE;
4891 make_tag (name, namelen, TRUE,
4892 tline.buffer, taglen, save_lineno, save_lcno);
4893 continue;
4894 }
4895 }
4896 if (get_tagname) /* grab name of proc or fn */
4897 {
4898 char *cp;
4899
4900 if (*dbp == '\0')
4901 continue;
4902
4903 /* Find block name. */
4904 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4905 continue;
4906
4907 /* Save all values for later tagging. */
4908 linebuffer_setlen (&tline, lb.len);
4909 strcpy (tline.buffer, lb.buffer);
4910 save_lineno = lineno;
4911 save_lcno = linecharno;
4912 name = tline.buffer + (dbp - lb.buffer);
4913 namelen = cp - dbp;
4914 taglen = cp - lb.buffer + 1;
4915
4916 dbp = cp; /* set dbp to e-o-token */
4917 get_tagname = FALSE;
4918 found_tag = TRUE;
4919 continue;
4920
4921 /* And proceed to check for "extern". */
4922 }
4923 else if (!incomment && !inquote && !found_tag)
4924 {
4925 /* Check for proc/fn keywords. */
4926 switch (lowcase (c))
4927 {
4928 case 'p':
4929 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4930 get_tagname = TRUE;
4931 continue;
4932 case 'f':
4933 if (nocase_tail ("unction"))
4934 get_tagname = TRUE;
4935 continue;
4936 }
4937 }
4938 } /* while not eof */
4939
4940 free (tline.buffer);
4941 }
4942
4943 \f
4944 /*
4945 * Lisp tag functions
4946 * look for (def or (DEF, quote or QUOTE
4947 */
4948
4949 static void L_getit __P((void));
4950
4951 static void
4952 L_getit ()
4953 {
4954 if (*dbp == '\'') /* Skip prefix quote */
4955 dbp++;
4956 else if (*dbp == '(')
4957 {
4958 dbp++;
4959 /* Try to skip "(quote " */
4960 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4961 /* Ok, then skip "(" before name in (defstruct (foo)) */
4962 dbp = skip_spaces (dbp);
4963 }
4964 get_tag (dbp, NULL);
4965 }
4966
4967 static void
4968 Lisp_functions (inf)
4969 FILE *inf;
4970 {
4971 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4972 {
4973 if (dbp[0] != '(')
4974 continue;
4975
4976 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4977 {
4978 dbp = skip_non_spaces (dbp);
4979 dbp = skip_spaces (dbp);
4980 L_getit ();
4981 }
4982 else
4983 {
4984 /* Check for (foo::defmumble name-defined ... */
4985 do
4986 dbp++;
4987 while (!notinname (*dbp) && *dbp != ':');
4988 if (*dbp == ':')
4989 {
4990 do
4991 dbp++;
4992 while (*dbp == ':');
4993
4994 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4995 {
4996 dbp = skip_non_spaces (dbp);
4997 dbp = skip_spaces (dbp);
4998 L_getit ();
4999 }
5000 }
5001 }
5002 }
5003 }
5004
5005 \f
5006 /*
5007 * Lua script language parsing
5008 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5009 *
5010 * "function" and "local function" are tags if they start at column 1.
5011 */
5012 static void
5013 Lua_functions (inf)
5014 FILE *inf;
5015 {
5016 register char *bp;
5017
5018 LOOP_ON_INPUT_LINES (inf, lb, bp)
5019 {
5020 if (bp[0] != 'f' && bp[0] != 'l')
5021 continue;
5022
5023 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5024
5025 if (LOOKING_AT (bp, "function"))
5026 get_tag (bp, NULL);
5027 }
5028 }
5029
5030 \f
5031 /*
5032 * Postscript tags
5033 * Just look for lines where the first character is '/'
5034 * Also look at "defineps" for PSWrap
5035 * Ideas by:
5036 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
5037 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5038 */
5039 static void
5040 PS_functions (inf)
5041 FILE *inf;
5042 {
5043 register char *bp, *ep;
5044
5045 LOOP_ON_INPUT_LINES (inf, lb, bp)
5046 {
5047 if (bp[0] == '/')
5048 {
5049 for (ep = bp+1;
5050 *ep != '\0' && *ep != ' ' && *ep != '{';
5051 ep++)
5052 continue;
5053 make_tag (bp, ep - bp, TRUE,
5054 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5055 }
5056 else if (LOOKING_AT (bp, "defineps"))
5057 get_tag (bp, NULL);
5058 }
5059 }
5060
5061 \f
5062 /*
5063 * Forth tags
5064 * Ignore anything after \ followed by space or in ( )
5065 * Look for words defined by :
5066 * Look for constant, code, create, defer, value, and variable
5067 * OBP extensions: Look for buffer:, field,
5068 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5069 */
5070 static void
5071 Forth_words (inf)
5072 FILE *inf;
5073 {
5074 register char *bp;
5075
5076 LOOP_ON_INPUT_LINES (inf, lb, bp)
5077 while ((bp = skip_spaces (bp))[0] != '\0')
5078 if (bp[0] == '\\' && iswhite(bp[1]))
5079 break; /* read next line */
5080 else if (bp[0] == '(' && iswhite(bp[1]))
5081 do /* skip to ) or eol */
5082 bp++;
5083 while (*bp != ')' && *bp != '\0');
5084 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5085 || LOOKING_AT_NOCASE (bp, "constant")
5086 || LOOKING_AT_NOCASE (bp, "code")
5087 || LOOKING_AT_NOCASE (bp, "create")
5088 || LOOKING_AT_NOCASE (bp, "defer")
5089 || LOOKING_AT_NOCASE (bp, "value")
5090 || LOOKING_AT_NOCASE (bp, "variable")
5091 || LOOKING_AT_NOCASE (bp, "buffer:")
5092 || LOOKING_AT_NOCASE (bp, "field"))
5093 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5094 else
5095 bp = skip_non_spaces (bp);
5096 }
5097
5098 \f
5099 /*
5100 * Scheme tag functions
5101 * look for (def... xyzzy
5102 * (def... (xyzzy
5103 * (def ... ((...(xyzzy ....
5104 * (set! xyzzy
5105 * Original code by Ken Haase (1985?)
5106 */
5107 static void
5108 Scheme_functions (inf)
5109 FILE *inf;
5110 {
5111 register char *bp;
5112
5113 LOOP_ON_INPUT_LINES (inf, lb, bp)
5114 {
5115 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5116 {
5117 bp = skip_non_spaces (bp+4);
5118 /* Skip over open parens and white space */
5119 while (notinname (*bp))
5120 bp++;
5121 get_tag (bp, NULL);
5122 }
5123 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5124 get_tag (bp, NULL);
5125 }
5126 }
5127
5128 \f
5129 /* Find tags in TeX and LaTeX input files. */
5130
5131 /* TEX_toktab is a table of TeX control sequences that define tags.
5132 * Each entry records one such control sequence.
5133 *
5134 * Original code from who knows whom.
5135 * Ideas by:
5136 * Stefan Monnier (2002)
5137 */
5138
5139 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5140
5141 /* Default set of control sequences to put into TEX_toktab.
5142 The value of environment var TEXTAGS is prepended to this. */
5143 static char *TEX_defenv = "\
5144 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5145 :part:appendix:entry:index:def\
5146 :newcommand:renewcommand:newenvironment:renewenvironment";
5147
5148 static void TEX_mode __P((FILE *));
5149 static void TEX_decode_env __P((char *, char *));
5150
5151 static char TEX_esc = '\\';
5152 static char TEX_opgrp = '{';
5153 static char TEX_clgrp = '}';
5154
5155 /*
5156 * TeX/LaTeX scanning loop.
5157 */
5158 static void
5159 TeX_commands (inf)
5160 FILE *inf;
5161 {
5162 char *cp;
5163 linebuffer *key;
5164
5165 /* Select either \ or ! as escape character. */
5166 TEX_mode (inf);
5167
5168 /* Initialize token table once from environment. */
5169 if (TEX_toktab == NULL)
5170 TEX_decode_env ("TEXTAGS", TEX_defenv);
5171
5172 LOOP_ON_INPUT_LINES (inf, lb, cp)
5173 {
5174 /* Look at each TEX keyword in line. */
5175 for (;;)
5176 {
5177 /* Look for a TEX escape. */
5178 while (*cp++ != TEX_esc)
5179 if (cp[-1] == '\0' || cp[-1] == '%')
5180 goto tex_next_line;
5181
5182 for (key = TEX_toktab; key->buffer != NULL; key++)
5183 if (strneq (cp, key->buffer, key->len))
5184 {
5185 register char *p;
5186 int namelen, linelen;
5187 bool opgrp = FALSE;
5188
5189 cp = skip_spaces (cp + key->len);
5190 if (*cp == TEX_opgrp)
5191 {
5192 opgrp = TRUE;
5193 cp++;
5194 }
5195 for (p = cp;
5196 (!iswhite (*p) && *p != '#' &&
5197 *p != TEX_opgrp && *p != TEX_clgrp);
5198 p++)
5199 continue;
5200 namelen = p - cp;
5201 linelen = lb.len;
5202 if (!opgrp || *p == TEX_clgrp)
5203 {
5204 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5205 p++;
5206 linelen = p - lb.buffer + 1;
5207 }
5208 make_tag (cp, namelen, TRUE,
5209 lb.buffer, linelen, lineno, linecharno);
5210 goto tex_next_line; /* We only tag a line once */
5211 }
5212 }
5213 tex_next_line:
5214 ;
5215 }
5216 }
5217
5218 #define TEX_LESC '\\'
5219 #define TEX_SESC '!'
5220
5221 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5222 chars accordingly. */
5223 static void
5224 TEX_mode (inf)
5225 FILE *inf;
5226 {
5227 int c;
5228
5229 while ((c = getc (inf)) != EOF)
5230 {
5231 /* Skip to next line if we hit the TeX comment char. */
5232 if (c == '%')
5233 while (c != '\n' && c != EOF)
5234 c = getc (inf);
5235 else if (c == TEX_LESC || c == TEX_SESC )
5236 break;
5237 }
5238
5239 if (c == TEX_LESC)
5240 {
5241 TEX_esc = TEX_LESC;
5242 TEX_opgrp = '{';
5243 TEX_clgrp = '}';
5244 }
5245 else
5246 {
5247 TEX_esc = TEX_SESC;
5248 TEX_opgrp = '<';
5249 TEX_clgrp = '>';
5250 }
5251 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5252 No attempt is made to correct the situation. */
5253 rewind (inf);
5254 }
5255
5256 /* Read environment and prepend it to the default string.
5257 Build token table. */
5258 static void
5259 TEX_decode_env (evarname, defenv)
5260 char *evarname;
5261 char *defenv;
5262 {
5263 register char *env, *p;
5264 int i, len;
5265
5266 /* Append default string to environment. */
5267 env = getenv (evarname);
5268 if (!env)
5269 env = defenv;
5270 else
5271 {
5272 char *oldenv = env;
5273 env = concat (oldenv, defenv, "");
5274 }
5275
5276 /* Allocate a token table */
5277 for (len = 1, p = env; p;)
5278 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5279 len++;
5280 TEX_toktab = xnew (len, linebuffer);
5281
5282 /* Unpack environment string into token table. Be careful about */
5283 /* zero-length strings (leading ':', "::" and trailing ':') */
5284 for (i = 0; *env != '\0';)
5285 {
5286 p = etags_strchr (env, ':');
5287 if (!p) /* End of environment string. */
5288 p = env + strlen (env);
5289 if (p - env > 0)
5290 { /* Only non-zero strings. */
5291 TEX_toktab[i].buffer = savenstr (env, p - env);
5292 TEX_toktab[i].len = p - env;
5293 i++;
5294 }
5295 if (*p)
5296 env = p + 1;
5297 else
5298 {
5299 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5300 TEX_toktab[i].len = 0;
5301 break;
5302 }
5303 }
5304 }
5305
5306 \f
5307 /* Texinfo support. Dave Love, Mar. 2000. */
5308 static void
5309 Texinfo_nodes (inf)
5310 FILE * inf;
5311 {
5312 char *cp, *start;
5313 LOOP_ON_INPUT_LINES (inf, lb, cp)
5314 if (LOOKING_AT (cp, "@node"))
5315 {
5316 start = cp;
5317 while (*cp != '\0' && *cp != ',')
5318 cp++;
5319 make_tag (start, cp - start, TRUE,
5320 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5321 }
5322 }
5323
5324 \f
5325 /*
5326 * HTML support.
5327 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5328 * Contents of <a name=xxx> are tags with name xxx.
5329 *
5330 * Francesco Potortì, 2002.
5331 */
5332 static void
5333 HTML_labels (inf)
5334 FILE * inf;
5335 {
5336 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5337 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5338 bool intag = FALSE; /* inside an html tag, looking for ID= */
5339 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5340 char *end;
5341
5342
5343 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5344
5345 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5346 for (;;) /* loop on the same line */
5347 {
5348 if (skiptag) /* skip HTML tag */
5349 {
5350 while (*dbp != '\0' && *dbp != '>')
5351 dbp++;
5352 if (*dbp == '>')
5353 {
5354 dbp += 1;
5355 skiptag = FALSE;
5356 continue; /* look on the same line */
5357 }
5358 break; /* go to next line */
5359 }
5360
5361 else if (intag) /* look for "name=" or "id=" */
5362 {
5363 while (*dbp != '\0' && *dbp != '>'
5364 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5365 dbp++;
5366 if (*dbp == '\0')
5367 break; /* go to next line */
5368 if (*dbp == '>')
5369 {
5370 dbp += 1;
5371 intag = FALSE;
5372 continue; /* look on the same line */
5373 }
5374 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5375 || LOOKING_AT_NOCASE (dbp, "id="))
5376 {
5377 bool quoted = (dbp[0] == '"');
5378
5379 if (quoted)
5380 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5381 continue;
5382 else
5383 for (end = dbp; *end != '\0' && intoken (*end); end++)
5384 continue;
5385 linebuffer_setlen (&token_name, end - dbp);
5386 strncpy (token_name.buffer, dbp, end - dbp);
5387 token_name.buffer[end - dbp] = '\0';
5388
5389 dbp = end;
5390 intag = FALSE; /* we found what we looked for */
5391 skiptag = TRUE; /* skip to the end of the tag */
5392 getnext = TRUE; /* then grab the text */
5393 continue; /* look on the same line */
5394 }
5395 dbp += 1;
5396 }
5397
5398 else if (getnext) /* grab next tokens and tag them */
5399 {
5400 dbp = skip_spaces (dbp);
5401 if (*dbp == '\0')
5402 break; /* go to next line */
5403 if (*dbp == '<')
5404 {
5405 intag = TRUE;
5406 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5407 continue; /* look on the same line */
5408 }
5409
5410 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5411 continue;
5412 make_tag (token_name.buffer, token_name.len, TRUE,
5413 dbp, end - dbp, lineno, linecharno);
5414 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5415 getnext = FALSE;
5416 break; /* go to next line */
5417 }
5418
5419 else /* look for an interesting HTML tag */
5420 {
5421 while (*dbp != '\0' && *dbp != '<')
5422 dbp++;
5423 if (*dbp == '\0')
5424 break; /* go to next line */
5425 intag = TRUE;
5426 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5427 {
5428 inanchor = TRUE;
5429 continue; /* look on the same line */
5430 }
5431 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5432 || LOOKING_AT_NOCASE (dbp, "<h1>")
5433 || LOOKING_AT_NOCASE (dbp, "<h2>")
5434 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5435 {
5436 intag = FALSE;
5437 getnext = TRUE;
5438 continue; /* look on the same line */
5439 }
5440 dbp += 1;
5441 }
5442 }
5443 }
5444
5445 \f
5446 /*
5447 * Prolog support
5448 *
5449 * Assumes that the predicate or rule starts at column 0.
5450 * Only the first clause of a predicate or rule is added.
5451 * Original code by Sunichirou Sugou (1989)
5452 * Rewritten by Anders Lindgren (1996)
5453 */
5454 static int prolog_pr __P((char *, char *));
5455 static void prolog_skip_comment __P((linebuffer *, FILE *));
5456 static int prolog_atom __P((char *, int));
5457
5458 static void
5459 Prolog_functions (inf)
5460 FILE *inf;
5461 {
5462 char *cp, *last;
5463 int len;
5464 int allocated;
5465
5466 allocated = 0;
5467 len = 0;
5468 last = NULL;
5469
5470 LOOP_ON_INPUT_LINES (inf, lb, cp)
5471 {
5472 if (cp[0] == '\0') /* Empty line */
5473 continue;
5474 else if (iswhite (cp[0])) /* Not a predicate */
5475 continue;
5476 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5477 prolog_skip_comment (&lb, inf);
5478 else if ((len = prolog_pr (cp, last)) > 0)
5479 {
5480 /* Predicate or rule. Store the function name so that we
5481 only generate a tag for the first clause. */
5482 if (last == NULL)
5483 last = xnew(len + 1, char);
5484 else if (len + 1 > allocated)
5485 xrnew (last, len + 1, char);
5486 allocated = len + 1;
5487 strncpy (last, cp, len);
5488 last[len] = '\0';
5489 }
5490 }
5491 if (last != NULL)
5492 free (last);
5493 }
5494
5495
5496 static void
5497 prolog_skip_comment (plb, inf)
5498 linebuffer *plb;
5499 FILE *inf;
5500 {
5501 char *cp;
5502
5503 do
5504 {
5505 for (cp = plb->buffer; *cp != '\0'; cp++)
5506 if (cp[0] == '*' && cp[1] == '/')
5507 return;
5508 readline (plb, inf);
5509 }
5510 while (!feof(inf));
5511 }
5512
5513 /*
5514 * A predicate or rule definition is added if it matches:
5515 * <beginning of line><Prolog Atom><whitespace>(
5516 * or <beginning of line><Prolog Atom><whitespace>:-
5517 *
5518 * It is added to the tags database if it doesn't match the
5519 * name of the previous clause header.
5520 *
5521 * Return the size of the name of the predicate or rule, or 0 if no
5522 * header was found.
5523 */
5524 static int
5525 prolog_pr (s, last)
5526 char *s;
5527 char *last; /* Name of last clause. */
5528 {
5529 int pos;
5530 int len;
5531
5532 pos = prolog_atom (s, 0);
5533 if (pos < 1)
5534 return 0;
5535
5536 len = pos;
5537 pos = skip_spaces (s + pos) - s;
5538
5539 if ((s[pos] == '.'
5540 || (s[pos] == '(' && (pos += 1))
5541 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5542 && (last == NULL /* save only the first clause */
5543 || len != (int)strlen (last)
5544 || !strneq (s, last, len)))
5545 {
5546 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5547 return len;
5548 }
5549 else
5550 return 0;
5551 }
5552
5553 /*
5554 * Consume a Prolog atom.
5555 * Return the number of bytes consumed, or -1 if there was an error.
5556 *
5557 * A prolog atom, in this context, could be one of:
5558 * - An alphanumeric sequence, starting with a lower case letter.
5559 * - A quoted arbitrary string. Single quotes can escape themselves.
5560 * Backslash quotes everything.
5561 */
5562 static int
5563 prolog_atom (s, pos)
5564 char *s;
5565 int pos;
5566 {
5567 int origpos;
5568
5569 origpos = pos;
5570
5571 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5572 {
5573 /* The atom is unquoted. */
5574 pos++;
5575 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5576 {
5577 pos++;
5578 }
5579 return pos - origpos;
5580 }
5581 else if (s[pos] == '\'')
5582 {
5583 pos++;
5584
5585 for (;;)
5586 {
5587 if (s[pos] == '\'')
5588 {
5589 pos++;
5590 if (s[pos] != '\'')
5591 break;
5592 pos++; /* A double quote */
5593 }
5594 else if (s[pos] == '\0')
5595 /* Multiline quoted atoms are ignored. */
5596 return -1;
5597 else if (s[pos] == '\\')
5598 {
5599 if (s[pos+1] == '\0')
5600 return -1;
5601 pos += 2;
5602 }
5603 else
5604 pos++;
5605 }
5606 return pos - origpos;
5607 }
5608 else
5609 return -1;
5610 }
5611
5612 \f
5613 /*
5614 * Support for Erlang
5615 *
5616 * Generates tags for functions, defines, and records.
5617 * Assumes that Erlang functions start at column 0.
5618 * Original code by Anders Lindgren (1996)
5619 */
5620 static int erlang_func __P((char *, char *));
5621 static void erlang_attribute __P((char *));
5622 static int erlang_atom __P((char *));
5623
5624 static void
5625 Erlang_functions (inf)
5626 FILE *inf;
5627 {
5628 char *cp, *last;
5629 int len;
5630 int allocated;
5631
5632 allocated = 0;
5633 len = 0;
5634 last = NULL;
5635
5636 LOOP_ON_INPUT_LINES (inf, lb, cp)
5637 {
5638 if (cp[0] == '\0') /* Empty line */
5639 continue;
5640 else if (iswhite (cp[0])) /* Not function nor attribute */
5641 continue;
5642 else if (cp[0] == '%') /* comment */
5643 continue;
5644 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5645 continue;
5646 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5647 {
5648 erlang_attribute (cp);
5649 if (last != NULL)
5650 {
5651 free (last);
5652 last = NULL;
5653 }
5654 }
5655 else if ((len = erlang_func (cp, last)) > 0)
5656 {
5657 /*
5658 * Function. Store the function name so that we only
5659 * generates a tag for the first clause.
5660 */
5661 if (last == NULL)
5662 last = xnew (len + 1, char);
5663 else if (len + 1 > allocated)
5664 xrnew (last, len + 1, char);
5665 allocated = len + 1;
5666 strncpy (last, cp, len);
5667 last[len] = '\0';
5668 }
5669 }
5670 if (last != NULL)
5671 free (last);
5672 }
5673
5674
5675 /*
5676 * A function definition is added if it matches:
5677 * <beginning of line><Erlang Atom><whitespace>(
5678 *
5679 * It is added to the tags database if it doesn't match the
5680 * name of the previous clause header.
5681 *
5682 * Return the size of the name of the function, or 0 if no function
5683 * was found.
5684 */
5685 static int
5686 erlang_func (s, last)
5687 char *s;
5688 char *last; /* Name of last clause. */
5689 {
5690 int pos;
5691 int len;
5692
5693 pos = erlang_atom (s);
5694 if (pos < 1)
5695 return 0;
5696
5697 len = pos;
5698 pos = skip_spaces (s + pos) - s;
5699
5700 /* Save only the first clause. */
5701 if (s[pos++] == '('
5702 && (last == NULL
5703 || len != (int)strlen (last)
5704 || !strneq (s, last, len)))
5705 {
5706 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5707 return len;
5708 }
5709
5710 return 0;
5711 }
5712
5713
5714 /*
5715 * Handle attributes. Currently, tags are generated for defines
5716 * and records.
5717 *
5718 * They are on the form:
5719 * -define(foo, bar).
5720 * -define(Foo(M, N), M+N).
5721 * -record(graph, {vtab = notable, cyclic = true}).
5722 */
5723 static void
5724 erlang_attribute (s)
5725 char *s;
5726 {
5727 char *cp = s;
5728
5729 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5730 && *cp++ == '(')
5731 {
5732 int len = erlang_atom (skip_spaces (cp));
5733 if (len > 0)
5734 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5735 }
5736 return;
5737 }
5738
5739
5740 /*
5741 * Consume an Erlang atom (or variable).
5742 * Return the number of bytes consumed, or -1 if there was an error.
5743 */
5744 static int
5745 erlang_atom (s)
5746 char *s;
5747 {
5748 int pos = 0;
5749
5750 if (ISALPHA (s[pos]) || s[pos] == '_')
5751 {
5752 /* The atom is unquoted. */
5753 do
5754 pos++;
5755 while (ISALNUM (s[pos]) || s[pos] == '_');
5756 }
5757 else if (s[pos] == '\'')
5758 {
5759 for (pos++; s[pos] != '\''; pos++)
5760 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5761 || (s[pos] == '\\' && s[++pos] == '\0'))
5762 return 0;
5763 pos++;
5764 }
5765
5766 return pos;
5767 }
5768
5769 \f
5770 static char *scan_separators __P((char *));
5771 static void add_regex __P((char *, language *));
5772 static char *substitute __P((char *, char *, struct re_registers *));
5773
5774 /*
5775 * Take a string like "/blah/" and turn it into "blah", verifying
5776 * that the first and last characters are the same, and handling
5777 * quoted separator characters. Actually, stops on the occurrence of
5778 * an unquoted separator. Also process \t, \n, etc. and turn into
5779 * appropriate characters. Works in place. Null terminates name string.
5780 * Returns pointer to terminating separator, or NULL for
5781 * unterminated regexps.
5782 */
5783 static char *
5784 scan_separators (name)
5785 char *name;
5786 {
5787 char sep = name[0];
5788 char *copyto = name;
5789 bool quoted = FALSE;
5790
5791 for (++name; *name != '\0'; ++name)
5792 {
5793 if (quoted)
5794 {
5795 switch (*name)
5796 {
5797 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5798 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5799 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5800 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5801 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5802 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5803 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5804 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5805 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5806 default:
5807 if (*name == sep)
5808 *copyto++ = sep;
5809 else
5810 {
5811 /* Something else is quoted, so preserve the quote. */
5812 *copyto++ = '\\';
5813 *copyto++ = *name;
5814 }
5815 break;
5816 }
5817 quoted = FALSE;
5818 }
5819 else if (*name == '\\')
5820 quoted = TRUE;
5821 else if (*name == sep)
5822 break;
5823 else
5824 *copyto++ = *name;
5825 }
5826 if (*name != sep)
5827 name = NULL; /* signal unterminated regexp */
5828
5829 /* Terminate copied string. */
5830 *copyto = '\0';
5831 return name;
5832 }
5833
5834 /* Look at the argument of --regex or --no-regex and do the right
5835 thing. Same for each line of a regexp file. */
5836 static void
5837 analyse_regex (regex_arg)
5838 char *regex_arg;
5839 {
5840 if (regex_arg == NULL)
5841 {
5842 free_regexps (); /* --no-regex: remove existing regexps */
5843 return;
5844 }
5845
5846 /* A real --regexp option or a line in a regexp file. */
5847 switch (regex_arg[0])
5848 {
5849 /* Comments in regexp file or null arg to --regex. */
5850 case '\0':
5851 case ' ':
5852 case '\t':
5853 break;
5854
5855 /* Read a regex file. This is recursive and may result in a
5856 loop, which will stop when the file descriptors are exhausted. */
5857 case '@':
5858 {
5859 FILE *regexfp;
5860 linebuffer regexbuf;
5861 char *regexfile = regex_arg + 1;
5862
5863 /* regexfile is a file containing regexps, one per line. */
5864 regexfp = fopen (regexfile, "r");
5865 if (regexfp == NULL)
5866 {
5867 pfatal (regexfile);
5868 return;
5869 }
5870 linebuffer_init (&regexbuf);
5871 while (readline_internal (&regexbuf, regexfp) > 0)
5872 analyse_regex (regexbuf.buffer);
5873 free (regexbuf.buffer);
5874 fclose (regexfp);
5875 }
5876 break;
5877
5878 /* Regexp to be used for a specific language only. */
5879 case '{':
5880 {
5881 language *lang;
5882 char *lang_name = regex_arg + 1;
5883 char *cp;
5884
5885 for (cp = lang_name; *cp != '}'; cp++)
5886 if (*cp == '\0')
5887 {
5888 error ("unterminated language name in regex: %s", regex_arg);
5889 return;
5890 }
5891 *cp++ = '\0';
5892 lang = get_language_from_langname (lang_name);
5893 if (lang == NULL)
5894 return;
5895 add_regex (cp, lang);
5896 }
5897 break;
5898
5899 /* Regexp to be used for any language. */
5900 default:
5901 add_regex (regex_arg, NULL);
5902 break;
5903 }
5904 }
5905
5906 /* Separate the regexp pattern, compile it,
5907 and care for optional name and modifiers. */
5908 static void
5909 add_regex (regexp_pattern, lang)
5910 char *regexp_pattern;
5911 language *lang;
5912 {
5913 static struct re_pattern_buffer zeropattern;
5914 char sep, *pat, *name, *modifiers;
5915 const char *err;
5916 struct re_pattern_buffer *patbuf;
5917 regexp *rp;
5918 bool
5919 force_explicit_name = TRUE, /* do not use implicit tag names */
5920 ignore_case = FALSE, /* case is significant */
5921 multi_line = FALSE, /* matches are done one line at a time */
5922 single_line = FALSE; /* dot does not match newline */
5923
5924
5925 if (strlen(regexp_pattern) < 3)
5926 {
5927 error ("null regexp", (char *)NULL);
5928 return;
5929 }
5930 sep = regexp_pattern[0];
5931 name = scan_separators (regexp_pattern);
5932 if (name == NULL)
5933 {
5934 error ("%s: unterminated regexp", regexp_pattern);
5935 return;
5936 }
5937 if (name[1] == sep)
5938 {
5939 error ("null name for regexp \"%s\"", regexp_pattern);
5940 return;
5941 }
5942 modifiers = scan_separators (name);
5943 if (modifiers == NULL) /* no terminating separator --> no name */
5944 {
5945 modifiers = name;
5946 name = "";
5947 }
5948 else
5949 modifiers += 1; /* skip separator */
5950
5951 /* Parse regex modifiers. */
5952 for (; modifiers[0] != '\0'; modifiers++)
5953 switch (modifiers[0])
5954 {
5955 case 'N':
5956 if (modifiers == name)
5957 error ("forcing explicit tag name but no name, ignoring", NULL);
5958 force_explicit_name = TRUE;
5959 break;
5960 case 'i':
5961 ignore_case = TRUE;
5962 break;
5963 case 's':
5964 single_line = TRUE;
5965 /* FALLTHRU */
5966 case 'm':
5967 multi_line = TRUE;
5968 need_filebuf = TRUE;
5969 break;
5970 default:
5971 {
5972 char wrongmod [2];
5973 wrongmod[0] = modifiers[0];
5974 wrongmod[1] = '\0';
5975 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5976 }
5977 break;
5978 }
5979
5980 patbuf = xnew (1, struct re_pattern_buffer);
5981 *patbuf = zeropattern;
5982 if (ignore_case)
5983 {
5984 static char lc_trans[CHARS];
5985 int i;
5986 for (i = 0; i < CHARS; i++)
5987 lc_trans[i] = lowcase (i);
5988 patbuf->translate = lc_trans; /* translation table to fold case */
5989 }
5990
5991 if (multi_line)
5992 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5993 else
5994 pat = regexp_pattern;
5995
5996 if (single_line)
5997 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5998 else
5999 re_set_syntax (RE_SYNTAX_EMACS);
6000
6001 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
6002 if (multi_line)
6003 free (pat);
6004 if (err != NULL)
6005 {
6006 error ("%s while compiling pattern", err);
6007 return;
6008 }
6009
6010 rp = p_head;
6011 p_head = xnew (1, regexp);
6012 p_head->pattern = savestr (regexp_pattern);
6013 p_head->p_next = rp;
6014 p_head->lang = lang;
6015 p_head->pat = patbuf;
6016 p_head->name = savestr (name);
6017 p_head->error_signaled = FALSE;
6018 p_head->force_explicit_name = force_explicit_name;
6019 p_head->ignore_case = ignore_case;
6020 p_head->multi_line = multi_line;
6021 }
6022
6023 /*
6024 * Do the substitutions indicated by the regular expression and
6025 * arguments.
6026 */
6027 static char *
6028 substitute (in, out, regs)
6029 char *in, *out;
6030 struct re_registers *regs;
6031 {
6032 char *result, *t;
6033 int size, dig, diglen;
6034
6035 result = NULL;
6036 size = strlen (out);
6037
6038 /* Pass 1: figure out how much to allocate by finding all \N strings. */
6039 if (out[size - 1] == '\\')
6040 fatal ("pattern error in \"%s\"", out);
6041 for (t = etags_strchr (out, '\\');
6042 t != NULL;
6043 t = etags_strchr (t + 2, '\\'))
6044 if (ISDIGIT (t[1]))
6045 {
6046 dig = t[1] - '0';
6047 diglen = regs->end[dig] - regs->start[dig];
6048 size += diglen - 2;
6049 }
6050 else
6051 size -= 1;
6052
6053 /* Allocate space and do the substitutions. */
6054 assert (size >= 0);
6055 result = xnew (size + 1, char);
6056
6057 for (t = result; *out != '\0'; out++)
6058 if (*out == '\\' && ISDIGIT (*++out))
6059 {
6060 dig = *out - '0';
6061 diglen = regs->end[dig] - regs->start[dig];
6062 strncpy (t, in + regs->start[dig], diglen);
6063 t += diglen;
6064 }
6065 else
6066 *t++ = *out;
6067 *t = '\0';
6068
6069 assert (t <= result + size);
6070 assert (t - result == (int)strlen (result));
6071
6072 return result;
6073 }
6074
6075 /* Deallocate all regexps. */
6076 static void
6077 free_regexps ()
6078 {
6079 regexp *rp;
6080 while (p_head != NULL)
6081 {
6082 rp = p_head->p_next;
6083 free (p_head->pattern);
6084 free (p_head->name);
6085 free (p_head);
6086 p_head = rp;
6087 }
6088 return;
6089 }
6090
6091 /*
6092 * Reads the whole file as a single string from `filebuf' and looks for
6093 * multi-line regular expressions, creating tags on matches.
6094 * readline already dealt with normal regexps.
6095 *
6096 * Idea by Ben Wing <ben@666.com> (2002).
6097 */
6098 static void
6099 regex_tag_multiline ()
6100 {
6101 char *buffer = filebuf.buffer;
6102 regexp *rp;
6103 char *name;
6104
6105 for (rp = p_head; rp != NULL; rp = rp->p_next)
6106 {
6107 int match = 0;
6108
6109 if (!rp->multi_line)
6110 continue; /* skip normal regexps */
6111
6112 /* Generic initialisations before parsing file from memory. */
6113 lineno = 1; /* reset global line number */
6114 charno = 0; /* reset global char number */
6115 linecharno = 0; /* reset global char number of line start */
6116
6117 /* Only use generic regexps or those for the current language. */
6118 if (rp->lang != NULL && rp->lang != curfdp->lang)
6119 continue;
6120
6121 while (match >= 0 && match < filebuf.len)
6122 {
6123 match = re_search (rp->pat, buffer, filebuf.len, charno,
6124 filebuf.len - match, &rp->regs);
6125 switch (match)
6126 {
6127 case -2:
6128 /* Some error. */
6129 if (!rp->error_signaled)
6130 {
6131 error ("regexp stack overflow while matching \"%s\"",
6132 rp->pattern);
6133 rp->error_signaled = TRUE;
6134 }
6135 break;
6136 case -1:
6137 /* No match. */
6138 break;
6139 default:
6140 if (match == rp->regs.end[0])
6141 {
6142 if (!rp->error_signaled)
6143 {
6144 error ("regexp matches the empty string: \"%s\"",
6145 rp->pattern);
6146 rp->error_signaled = TRUE;
6147 }
6148 match = -3; /* exit from while loop */
6149 break;
6150 }
6151
6152 /* Match occurred. Construct a tag. */
6153 while (charno < rp->regs.end[0])
6154 if (buffer[charno++] == '\n')
6155 lineno++, linecharno = charno;
6156 name = rp->name;
6157 if (name[0] == '\0')
6158 name = NULL;
6159 else /* make a named tag */
6160 name = substitute (buffer, rp->name, &rp->regs);
6161 if (rp->force_explicit_name)
6162 /* Force explicit tag name, if a name is there. */
6163 pfnote (name, TRUE, buffer + linecharno,
6164 charno - linecharno + 1, lineno, linecharno);
6165 else
6166 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6167 charno - linecharno + 1, lineno, linecharno);
6168 break;
6169 }
6170 }
6171 }
6172 }
6173
6174 \f
6175 static bool
6176 nocase_tail (cp)
6177 char *cp;
6178 {
6179 register int len = 0;
6180
6181 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6182 cp++, len++;
6183 if (*cp == '\0' && !intoken (dbp[len]))
6184 {
6185 dbp += len;
6186 return TRUE;
6187 }
6188 return FALSE;
6189 }
6190
6191 static void
6192 get_tag (bp, namepp)
6193 register char *bp;
6194 char **namepp;
6195 {
6196 register char *cp = bp;
6197
6198 if (*bp != '\0')
6199 {
6200 /* Go till you get to white space or a syntactic break */
6201 for (cp = bp + 1; !notinname (*cp); cp++)
6202 continue;
6203 make_tag (bp, cp - bp, TRUE,
6204 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6205 }
6206
6207 if (namepp != NULL)
6208 *namepp = savenstr (bp, cp - bp);
6209 }
6210
6211 /*
6212 * Read a line of text from `stream' into `lbp', excluding the
6213 * newline or CR-NL, if any. Return the number of characters read from
6214 * `stream', which is the length of the line including the newline.
6215 *
6216 * On DOS or Windows we do not count the CR character, if any before the
6217 * NL, in the returned length; this mirrors the behavior of Emacs on those
6218 * platforms (for text files, it translates CR-NL to NL as it reads in the
6219 * file).
6220 *
6221 * If multi-line regular expressions are requested, each line read is
6222 * appended to `filebuf'.
6223 */
6224 static long
6225 readline_internal (lbp, stream)
6226 linebuffer *lbp;
6227 register FILE *stream;
6228 {
6229 char *buffer = lbp->buffer;
6230 register char *p = lbp->buffer;
6231 register char *pend;
6232 int chars_deleted;
6233
6234 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6235
6236 for (;;)
6237 {
6238 register int c = getc (stream);
6239 if (p == pend)
6240 {
6241 /* We're at the end of linebuffer: expand it. */
6242 lbp->size *= 2;
6243 xrnew (buffer, lbp->size, char);
6244 p += buffer - lbp->buffer;
6245 pend = buffer + lbp->size;
6246 lbp->buffer = buffer;
6247 }
6248 if (c == EOF)
6249 {
6250 *p = '\0';
6251 chars_deleted = 0;
6252 break;
6253 }
6254 if (c == '\n')
6255 {
6256 if (p > buffer && p[-1] == '\r')
6257 {
6258 p -= 1;
6259 #ifdef DOS_NT
6260 /* Assume CRLF->LF translation will be performed by Emacs
6261 when loading this file, so CRs won't appear in the buffer.
6262 It would be cleaner to compensate within Emacs;
6263 however, Emacs does not know how many CRs were deleted
6264 before any given point in the file. */
6265 chars_deleted = 1;
6266 #else
6267 chars_deleted = 2;
6268 #endif
6269 }
6270 else
6271 {
6272 chars_deleted = 1;
6273 }
6274 *p = '\0';
6275 break;
6276 }
6277 *p++ = c;
6278 }
6279 lbp->len = p - buffer;
6280
6281 if (need_filebuf /* we need filebuf for multi-line regexps */
6282 && chars_deleted > 0) /* not at EOF */
6283 {
6284 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6285 {
6286 /* Expand filebuf. */
6287 filebuf.size *= 2;
6288 xrnew (filebuf.buffer, filebuf.size, char);
6289 }
6290 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6291 filebuf.len += lbp->len;
6292 filebuf.buffer[filebuf.len++] = '\n';
6293 filebuf.buffer[filebuf.len] = '\0';
6294 }
6295
6296 return lbp->len + chars_deleted;
6297 }
6298
6299 /*
6300 * Like readline_internal, above, but in addition try to match the
6301 * input line against relevant regular expressions and manage #line
6302 * directives.
6303 */
6304 static void
6305 readline (lbp, stream)
6306 linebuffer *lbp;
6307 FILE *stream;
6308 {
6309 long result;
6310
6311 linecharno = charno; /* update global char number of line start */
6312 result = readline_internal (lbp, stream); /* read line */
6313 lineno += 1; /* increment global line number */
6314 charno += result; /* increment global char number */
6315
6316 /* Honour #line directives. */
6317 if (!no_line_directive)
6318 {
6319 static bool discard_until_line_directive;
6320
6321 /* Check whether this is a #line directive. */
6322 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6323 {
6324 unsigned int lno;
6325 int start = 0;
6326
6327 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6328 && start > 0) /* double quote character found */
6329 {
6330 char *endp = lbp->buffer + start;
6331
6332 while ((endp = etags_strchr (endp, '"')) != NULL
6333 && endp[-1] == '\\')
6334 endp++;
6335 if (endp != NULL)
6336 /* Ok, this is a real #line directive. Let's deal with it. */
6337 {
6338 char *taggedabsname; /* absolute name of original file */
6339 char *taggedfname; /* name of original file as given */
6340 char *name; /* temp var */
6341
6342 discard_until_line_directive = FALSE; /* found it */
6343 name = lbp->buffer + start;
6344 *endp = '\0';
6345 canonicalize_filename (name); /* for DOS */
6346 taggedabsname = absolute_filename (name, tagfiledir);
6347 if (filename_is_absolute (name)
6348 || filename_is_absolute (curfdp->infname))
6349 taggedfname = savestr (taggedabsname);
6350 else
6351 taggedfname = relative_filename (taggedabsname,tagfiledir);
6352
6353 if (streq (curfdp->taggedfname, taggedfname))
6354 /* The #line directive is only a line number change. We
6355 deal with this afterwards. */
6356 free (taggedfname);
6357 else
6358 /* The tags following this #line directive should be
6359 attributed to taggedfname. In order to do this, set
6360 curfdp accordingly. */
6361 {
6362 fdesc *fdp; /* file description pointer */
6363
6364 /* Go look for a file description already set up for the
6365 file indicated in the #line directive. If there is
6366 one, use it from now until the next #line
6367 directive. */
6368 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6369 if (streq (fdp->infname, curfdp->infname)
6370 && streq (fdp->taggedfname, taggedfname))
6371 /* If we remove the second test above (after the &&)
6372 then all entries pertaining to the same file are
6373 coalesced in the tags file. If we use it, then
6374 entries pertaining to the same file but generated
6375 from different files (via #line directives) will
6376 go into separate sections in the tags file. These
6377 alternatives look equivalent. The first one
6378 destroys some apparently useless information. */
6379 {
6380 curfdp = fdp;
6381 free (taggedfname);
6382 break;
6383 }
6384 /* Else, if we already tagged the real file, skip all
6385 input lines until the next #line directive. */
6386 if (fdp == NULL) /* not found */
6387 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6388 if (streq (fdp->infabsname, taggedabsname))
6389 {
6390 discard_until_line_directive = TRUE;
6391 free (taggedfname);
6392 break;
6393 }
6394 /* Else create a new file description and use that from
6395 now on, until the next #line directive. */
6396 if (fdp == NULL) /* not found */
6397 {
6398 fdp = fdhead;
6399 fdhead = xnew (1, fdesc);
6400 *fdhead = *curfdp; /* copy curr. file description */
6401 fdhead->next = fdp;
6402 fdhead->infname = savestr (curfdp->infname);
6403 fdhead->infabsname = savestr (curfdp->infabsname);
6404 fdhead->infabsdir = savestr (curfdp->infabsdir);
6405 fdhead->taggedfname = taggedfname;
6406 fdhead->usecharno = FALSE;
6407 fdhead->prop = NULL;
6408 fdhead->written = FALSE;
6409 curfdp = fdhead;
6410 }
6411 }
6412 free (taggedabsname);
6413 lineno = lno - 1;
6414 readline (lbp, stream);
6415 return;
6416 } /* if a real #line directive */
6417 } /* if #line is followed by a a number */
6418 } /* if line begins with "#line " */
6419
6420 /* If we are here, no #line directive was found. */
6421 if (discard_until_line_directive)
6422 {
6423 if (result > 0)
6424 {
6425 /* Do a tail recursion on ourselves, thus discarding the contents
6426 of the line buffer. */
6427 readline (lbp, stream);
6428 return;
6429 }
6430 /* End of file. */
6431 discard_until_line_directive = FALSE;
6432 return;
6433 }
6434 } /* if #line directives should be considered */
6435
6436 {
6437 int match;
6438 regexp *rp;
6439 char *name;
6440
6441 /* Match against relevant regexps. */
6442 if (lbp->len > 0)
6443 for (rp = p_head; rp != NULL; rp = rp->p_next)
6444 {
6445 /* Only use generic regexps or those for the current language.
6446 Also do not use multiline regexps, which is the job of
6447 regex_tag_multiline. */
6448 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6449 || rp->multi_line)
6450 continue;
6451
6452 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6453 switch (match)
6454 {
6455 case -2:
6456 /* Some error. */
6457 if (!rp->error_signaled)
6458 {
6459 error ("regexp stack overflow while matching \"%s\"",
6460 rp->pattern);
6461 rp->error_signaled = TRUE;
6462 }
6463 break;
6464 case -1:
6465 /* No match. */
6466 break;
6467 case 0:
6468 /* Empty string matched. */
6469 if (!rp->error_signaled)
6470 {
6471 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6472 rp->error_signaled = TRUE;
6473 }
6474 break;
6475 default:
6476 /* Match occurred. Construct a tag. */
6477 name = rp->name;
6478 if (name[0] == '\0')
6479 name = NULL;
6480 else /* make a named tag */
6481 name = substitute (lbp->buffer, rp->name, &rp->regs);
6482 if (rp->force_explicit_name)
6483 /* Force explicit tag name, if a name is there. */
6484 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6485 else
6486 make_tag (name, strlen (name), TRUE,
6487 lbp->buffer, match, lineno, linecharno);
6488 break;
6489 }
6490 }
6491 }
6492 }
6493
6494 \f
6495 /*
6496 * Return a pointer to a space of size strlen(cp)+1 allocated
6497 * with xnew where the string CP has been copied.
6498 */
6499 static char *
6500 savestr (cp)
6501 char *cp;
6502 {
6503 return savenstr (cp, strlen (cp));
6504 }
6505
6506 /*
6507 * Return a pointer to a space of size LEN+1 allocated with xnew where
6508 * the string CP has been copied for at most the first LEN characters.
6509 */
6510 static char *
6511 savenstr (cp, len)
6512 char *cp;
6513 int len;
6514 {
6515 register char *dp;
6516
6517 dp = xnew (len + 1, char);
6518 strncpy (dp, cp, len);
6519 dp[len] = '\0';
6520 return dp;
6521 }
6522
6523 /*
6524 * Return the ptr in sp at which the character c last
6525 * appears; NULL if not found
6526 *
6527 * Identical to POSIX strrchr, included for portability.
6528 */
6529 static char *
6530 etags_strrchr (sp, c)
6531 register const char *sp;
6532 register int c;
6533 {
6534 register const char *r;
6535
6536 r = NULL;
6537 do
6538 {
6539 if (*sp == c)
6540 r = sp;
6541 } while (*sp++);
6542 return (char *)r;
6543 }
6544
6545 /*
6546 * Return the ptr in sp at which the character c first
6547 * appears; NULL if not found
6548 *
6549 * Identical to POSIX strchr, included for portability.
6550 */
6551 static char *
6552 etags_strchr (sp, c)
6553 register const char *sp;
6554 register int c;
6555 {
6556 do
6557 {
6558 if (*sp == c)
6559 return (char *)sp;
6560 } while (*sp++);
6561 return NULL;
6562 }
6563
6564 /*
6565 * Compare two strings, ignoring case for alphabetic characters.
6566 *
6567 * Same as BSD's strcasecmp, included for portability.
6568 */
6569 static int
6570 etags_strcasecmp (s1, s2)
6571 register const char *s1;
6572 register const char *s2;
6573 {
6574 while (*s1 != '\0'
6575 && (ISALPHA (*s1) && ISALPHA (*s2)
6576 ? lowcase (*s1) == lowcase (*s2)
6577 : *s1 == *s2))
6578 s1++, s2++;
6579
6580 return (ISALPHA (*s1) && ISALPHA (*s2)
6581 ? lowcase (*s1) - lowcase (*s2)
6582 : *s1 - *s2);
6583 }
6584
6585 /*
6586 * Compare two strings, ignoring case for alphabetic characters.
6587 * Stop after a given number of characters
6588 *
6589 * Same as BSD's strncasecmp, included for portability.
6590 */
6591 static int
6592 etags_strncasecmp (s1, s2, n)
6593 register const char *s1;
6594 register const char *s2;
6595 register int n;
6596 {
6597 while (*s1 != '\0' && n-- > 0
6598 && (ISALPHA (*s1) && ISALPHA (*s2)
6599 ? lowcase (*s1) == lowcase (*s2)
6600 : *s1 == *s2))
6601 s1++, s2++;
6602
6603 if (n < 0)
6604 return 0;
6605 else
6606 return (ISALPHA (*s1) && ISALPHA (*s2)
6607 ? lowcase (*s1) - lowcase (*s2)
6608 : *s1 - *s2);
6609 }
6610
6611 /* Skip spaces (end of string is not space), return new pointer. */
6612 static char *
6613 skip_spaces (cp)
6614 char *cp;
6615 {
6616 while (iswhite (*cp))
6617 cp++;
6618 return cp;
6619 }
6620
6621 /* Skip non spaces, except end of string, return new pointer. */
6622 static char *
6623 skip_non_spaces (cp)
6624 char *cp;
6625 {
6626 while (*cp != '\0' && !iswhite (*cp))
6627 cp++;
6628 return cp;
6629 }
6630
6631 /* Print error message and exit. */
6632 void
6633 fatal (s1, s2)
6634 char *s1, *s2;
6635 {
6636 error (s1, s2);
6637 exit (EXIT_FAILURE);
6638 }
6639
6640 static void
6641 pfatal (s1)
6642 char *s1;
6643 {
6644 perror (s1);
6645 exit (EXIT_FAILURE);
6646 }
6647
6648 static void
6649 suggest_asking_for_help ()
6650 {
6651 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6652 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6653 exit (EXIT_FAILURE);
6654 }
6655
6656 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6657 static void
6658 error (s1, s2)
6659 const char *s1, *s2;
6660 {
6661 fprintf (stderr, "%s: ", progname);
6662 fprintf (stderr, s1, s2);
6663 fprintf (stderr, "\n");
6664 }
6665
6666 /* Return a newly-allocated string whose contents
6667 concatenate those of s1, s2, s3. */
6668 static char *
6669 concat (s1, s2, s3)
6670 char *s1, *s2, *s3;
6671 {
6672 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6673 char *result = xnew (len1 + len2 + len3 + 1, char);
6674
6675 strcpy (result, s1);
6676 strcpy (result + len1, s2);
6677 strcpy (result + len1 + len2, s3);
6678 result[len1 + len2 + len3] = '\0';
6679
6680 return result;
6681 }
6682
6683 \f
6684 /* Does the same work as the system V getcwd, but does not need to
6685 guess the buffer size in advance. */
6686 static char *
6687 etags_getcwd ()
6688 {
6689 #ifdef HAVE_GETCWD
6690 int bufsize = 200;
6691 char *path = xnew (bufsize, char);
6692
6693 while (getcwd (path, bufsize) == NULL)
6694 {
6695 if (errno != ERANGE)
6696 pfatal ("getcwd");
6697 bufsize *= 2;
6698 free (path);
6699 path = xnew (bufsize, char);
6700 }
6701
6702 canonicalize_filename (path);
6703 return path;
6704
6705 #else /* not HAVE_GETCWD */
6706 #if MSDOS
6707
6708 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6709
6710 getwd (path);
6711
6712 for (p = path; *p != '\0'; p++)
6713 if (*p == '\\')
6714 *p = '/';
6715 else
6716 *p = lowcase (*p);
6717
6718 return strdup (path);
6719 #else /* not MSDOS */
6720 linebuffer path;
6721 FILE *pipe;
6722
6723 linebuffer_init (&path);
6724 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6725 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6726 pfatal ("pwd");
6727 pclose (pipe);
6728
6729 return path.buffer;
6730 #endif /* not MSDOS */
6731 #endif /* not HAVE_GETCWD */
6732 }
6733
6734 /* Return a newly allocated string containing the file name of FILE
6735 relative to the absolute directory DIR (which should end with a slash). */
6736 static char *
6737 relative_filename (file, dir)
6738 char *file, *dir;
6739 {
6740 char *fp, *dp, *afn, *res;
6741 int i;
6742
6743 /* Find the common root of file and dir (with a trailing slash). */
6744 afn = absolute_filename (file, cwd);
6745 fp = afn;
6746 dp = dir;
6747 while (*fp++ == *dp++)
6748 continue;
6749 fp--, dp--; /* back to the first differing char */
6750 #ifdef DOS_NT
6751 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6752 return afn;
6753 #endif
6754 do /* look at the equal chars until '/' */
6755 fp--, dp--;
6756 while (*fp != '/');
6757
6758 /* Build a sequence of "../" strings for the resulting relative file name. */
6759 i = 0;
6760 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6761 i += 1;
6762 res = xnew (3*i + strlen (fp + 1) + 1, char);
6763 res[0] = '\0';
6764 while (i-- > 0)
6765 strcat (res, "../");
6766
6767 /* Add the file name relative to the common root of file and dir. */
6768 strcat (res, fp + 1);
6769 free (afn);
6770
6771 return res;
6772 }
6773
6774 /* Return a newly allocated string containing the absolute file name
6775 of FILE given DIR (which should end with a slash). */
6776 static char *
6777 absolute_filename (file, dir)
6778 char *file, *dir;
6779 {
6780 char *slashp, *cp, *res;
6781
6782 if (filename_is_absolute (file))
6783 res = savestr (file);
6784 #ifdef DOS_NT
6785 /* We don't support non-absolute file names with a drive
6786 letter, like `d:NAME' (it's too much hassle). */
6787 else if (file[1] == ':')
6788 fatal ("%s: relative file names with drive letters not supported", file);
6789 #endif
6790 else
6791 res = concat (dir, file, "");
6792
6793 /* Delete the "/dirname/.." and "/." substrings. */
6794 slashp = etags_strchr (res, '/');
6795 while (slashp != NULL && slashp[0] != '\0')
6796 {
6797 if (slashp[1] == '.')
6798 {
6799 if (slashp[2] == '.'
6800 && (slashp[3] == '/' || slashp[3] == '\0'))
6801 {
6802 cp = slashp;
6803 do
6804 cp--;
6805 while (cp >= res && !filename_is_absolute (cp));
6806 if (cp < res)
6807 cp = slashp; /* the absolute name begins with "/.." */
6808 #ifdef DOS_NT
6809 /* Under MSDOS and NT we get `d:/NAME' as absolute
6810 file name, so the luser could say `d:/../NAME'.
6811 We silently treat this as `d:/NAME'. */
6812 else if (cp[0] != '/')
6813 cp = slashp;
6814 #endif
6815 strcpy (cp, slashp + 3);
6816 slashp = cp;
6817 continue;
6818 }
6819 else if (slashp[2] == '/' || slashp[2] == '\0')
6820 {
6821 strcpy (slashp, slashp + 2);
6822 continue;
6823 }
6824 }
6825
6826 slashp = etags_strchr (slashp + 1, '/');
6827 }
6828
6829 if (res[0] == '\0') /* just a safety net: should never happen */
6830 {
6831 free (res);
6832 return savestr ("/");
6833 }
6834 else
6835 return res;
6836 }
6837
6838 /* Return a newly allocated string containing the absolute
6839 file name of dir where FILE resides given DIR (which should
6840 end with a slash). */
6841 static char *
6842 absolute_dirname (file, dir)
6843 char *file, *dir;
6844 {
6845 char *slashp, *res;
6846 char save;
6847
6848 canonicalize_filename (file);
6849 slashp = etags_strrchr (file, '/');
6850 if (slashp == NULL)
6851 return savestr (dir);
6852 save = slashp[1];
6853 slashp[1] = '\0';
6854 res = absolute_filename (file, dir);
6855 slashp[1] = save;
6856
6857 return res;
6858 }
6859
6860 /* Whether the argument string is an absolute file name. The argument
6861 string must have been canonicalized with canonicalize_filename. */
6862 static bool
6863 filename_is_absolute (fn)
6864 char *fn;
6865 {
6866 return (fn[0] == '/'
6867 #ifdef DOS_NT
6868 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6869 #endif
6870 );
6871 }
6872
6873 /* Translate backslashes into slashes. Works in place. */
6874 static void
6875 canonicalize_filename (fn)
6876 register char *fn;
6877 {
6878 #ifdef DOS_NT
6879 /* Canonicalize drive letter case. */
6880 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6881 fn[0] = upcase (fn[0]);
6882 /* Convert backslashes to slashes. */
6883 for (; *fn != '\0'; fn++)
6884 if (*fn == '\\')
6885 *fn = '/';
6886 #else
6887 /* No action. */
6888 fn = NULL; /* shut up the compiler */
6889 #endif
6890 }
6891
6892 \f
6893 /* Initialize a linebuffer for use */
6894 static void
6895 linebuffer_init (lbp)
6896 linebuffer *lbp;
6897 {
6898 lbp->size = (DEBUG) ? 3 : 200;
6899 lbp->buffer = xnew (lbp->size, char);
6900 lbp->buffer[0] = '\0';
6901 lbp->len = 0;
6902 }
6903
6904 /* Set the minimum size of a string contained in a linebuffer. */
6905 static void
6906 linebuffer_setlen (lbp, toksize)
6907 linebuffer *lbp;
6908 int toksize;
6909 {
6910 while (lbp->size <= toksize)
6911 {
6912 lbp->size *= 2;
6913 xrnew (lbp->buffer, lbp->size, char);
6914 }
6915 lbp->len = toksize;
6916 }
6917
6918 /* Like malloc but get fatal error if memory is exhausted. */
6919 static PTR
6920 xmalloc (size)
6921 unsigned int size;
6922 {
6923 PTR result = (PTR) malloc (size);
6924 if (result == NULL)
6925 fatal ("virtual memory exhausted", (char *)NULL);
6926 return result;
6927 }
6928
6929 static PTR
6930 xrealloc (ptr, size)
6931 char *ptr;
6932 unsigned int size;
6933 {
6934 PTR result = (PTR) realloc (ptr, size);
6935 if (result == NULL)
6936 fatal ("virtual memory exhausted", (char *)NULL);
6937 return result;
6938 }
6939
6940 /*
6941 * Local Variables:
6942 * indent-tabs-mode: t
6943 * tab-width: 8
6944 * fill-column: 79
6945 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6946 * c-file-style: "gnu"
6947 * End:
6948 */
6949
6950 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6951 (do not change this comment) */
6952
6953 /* etags.c ends here */