(main): Pass the -u option to sort in ctags mode.
[bpt/emacs.git] / lib-src / etags.c
1 /* Tags file maker to go with GNU Emacs -*- coding: latin-1 -*-
2 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995,
3 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006 Free Software Foundation, Inc. and Ken Arnold
5
6 This file is not considered part of GNU Emacs.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software Foundation,
20 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21
22 /*
23 * Authors:
24 * Ctags originally by Ken Arnold.
25 * Fortran added by Jim Kleckner.
26 * Ed Pelegri-Llopart added C typedefs.
27 * Gnu Emacs TAGS format and modifications by RMS?
28 * 1989 Sam Kendall added C++.
29 * 1992 Joseph B. Wells improved C and C++ parsing.
30 * 1993 Francesco Potortì reorganised C and C++.
31 * 1994 Line-by-line regexp tags by Tom Tromey.
32 * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
33 * 2002 #line directives by Francesco Potortì.
34 *
35 * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
36 */
37
38 /*
39 * If you want to add support for a new language, start by looking at the LUA
40 * language, which is the simplest. Alternatively, consider shipping a
41 * configuration file containing regexp definitions for etags.
42 */
43
44 char pot_etags_version[] = "@(#) pot revision number is 17.26";
45
46 #define TRUE 1
47 #define FALSE 0
48
49 #ifdef DEBUG
50 # undef DEBUG
51 # define DEBUG TRUE
52 #else
53 # define DEBUG FALSE
54 # define NDEBUG /* disable assert */
55 #endif
56
57 #ifdef HAVE_CONFIG_H
58 # include <config.h>
59 /* On some systems, Emacs defines static as nothing for the sake
60 of unexec. We don't want that here since we don't use unexec. */
61 # undef static
62 # ifndef PTR /* for XEmacs */
63 # define PTR void *
64 # endif
65 # ifndef __P /* for XEmacs */
66 # define __P(args) args
67 # endif
68 #else /* no config.h */
69 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
70 # define __P(args) args /* use prototypes */
71 # define PTR void * /* for generic pointers */
72 # else /* not standard C */
73 # define __P(args) () /* no prototypes */
74 # define const /* remove const for old compilers' sake */
75 # define PTR long * /* don't use void* */
76 # endif
77 #endif /* !HAVE_CONFIG_H */
78
79 #ifndef _GNU_SOURCE
80 # define _GNU_SOURCE 1 /* enables some compiler checks on GNU */
81 #endif
82
83 /* WIN32_NATIVE is for XEmacs.
84 MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
85 #ifdef WIN32_NATIVE
86 # undef MSDOS
87 # undef WINDOWSNT
88 # define WINDOWSNT
89 #endif /* WIN32_NATIVE */
90
91 #ifdef MSDOS
92 # undef MSDOS
93 # define MSDOS TRUE
94 # include <fcntl.h>
95 # include <sys/param.h>
96 # include <io.h>
97 # ifndef HAVE_CONFIG_H
98 # define DOS_NT
99 # include <sys/config.h>
100 # endif
101 #else
102 # define MSDOS FALSE
103 #endif /* MSDOS */
104
105 #ifdef WINDOWSNT
106 # include <stdlib.h>
107 # include <fcntl.h>
108 # include <string.h>
109 # include <direct.h>
110 # include <io.h>
111 # define MAXPATHLEN _MAX_PATH
112 # undef HAVE_NTGUI
113 # undef DOS_NT
114 # define DOS_NT
115 # ifndef HAVE_GETCWD
116 # define HAVE_GETCWD
117 # endif /* undef HAVE_GETCWD */
118 #else /* not WINDOWSNT */
119 # ifdef STDC_HEADERS
120 # include <stdlib.h>
121 # include <string.h>
122 # else /* no standard C headers */
123 extern char *getenv ();
124 # ifdef VMS
125 # define EXIT_SUCCESS 1
126 # define EXIT_FAILURE 0
127 # else /* no VMS */
128 # define EXIT_SUCCESS 0
129 # define EXIT_FAILURE 1
130 # endif
131 # endif
132 #endif /* !WINDOWSNT */
133
134 #ifdef HAVE_UNISTD_H
135 # include <unistd.h>
136 #else
137 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
138 extern char *getcwd (char *buf, size_t size);
139 # endif
140 #endif /* HAVE_UNISTD_H */
141
142 #include <stdio.h>
143 #include <ctype.h>
144 #include <errno.h>
145 #ifndef errno
146 extern int errno;
147 #endif
148 #include <sys/types.h>
149 #include <sys/stat.h>
150
151 #include <assert.h>
152 #ifdef NDEBUG
153 # undef assert /* some systems have a buggy assert.h */
154 # define assert(x) ((void) 0)
155 #endif
156
157 #if !defined (S_ISREG) && defined (S_IFREG)
158 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
159 #endif
160
161 #ifdef NO_LONG_OPTIONS /* define this if you don't have GNU getopt */
162 # define NO_LONG_OPTIONS TRUE
163 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
164 extern char *optarg;
165 extern int optind, opterr;
166 #else
167 # define NO_LONG_OPTIONS FALSE
168 # include <getopt.h>
169 #endif /* NO_LONG_OPTIONS */
170
171 #ifndef HAVE_CONFIG_H /* this is a standalone compilation */
172 # ifdef __CYGWIN__ /* compiling on Cygwin */
173 !!! NOTICE !!!
174 the regex.h distributed with Cygwin is not compatible with etags, alas!
175 If you want regular expression support, you should delete this notice and
176 arrange to use the GNU regex.h and regex.c.
177 # endif
178 #endif
179 #include <regex.h>
180
181 /* Define CTAGS to make the program "ctags" compatible with the usual one.
182 Leave it undefined to make the program "etags", which makes emacs-style
183 tag tables and tags typedefs, #defines and struct/union/enum by default. */
184 #ifdef CTAGS
185 # undef CTAGS
186 # define CTAGS TRUE
187 #else
188 # define CTAGS FALSE
189 #endif
190
191 #define streq(s,t) (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
192 #define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
193 #define strneq(s,t,n) (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
194 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
195
196 #define CHARS 256 /* 2^sizeof(char) */
197 #define CHAR(x) ((unsigned int)(x) & (CHARS - 1))
198 #define iswhite(c) (_wht[CHAR(c)]) /* c is white (see white) */
199 #define notinname(c) (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
200 #define begtoken(c) (_btk[CHAR(c)]) /* c can start token (see begtk) */
201 #define intoken(c) (_itk[CHAR(c)]) /* c can be in token (see midtk) */
202 #define endtoken(c) (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
203
204 #define ISALNUM(c) isalnum (CHAR(c))
205 #define ISALPHA(c) isalpha (CHAR(c))
206 #define ISDIGIT(c) isdigit (CHAR(c))
207 #define ISLOWER(c) islower (CHAR(c))
208
209 #define lowcase(c) tolower (CHAR(c))
210 #define upcase(c) toupper (CHAR(c))
211
212
213 /*
214 * xnew, xrnew -- allocate, reallocate storage
215 *
216 * SYNOPSIS: Type *xnew (int n, Type);
217 * void xrnew (OldPointer, int n, Type);
218 */
219 #if DEBUG
220 # include "chkmalloc.h"
221 # define xnew(n,Type) ((Type *) trace_malloc (__FILE__, __LINE__, \
222 (n) * sizeof (Type)))
223 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
224 (char *) (op), (n) * sizeof (Type)))
225 #else
226 # define xnew(n,Type) ((Type *) xmalloc ((n) * sizeof (Type)))
227 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
228 (char *) (op), (n) * sizeof (Type)))
229 #endif
230
231 #define bool int
232
233 typedef void Lang_function __P((FILE *));
234
235 typedef struct
236 {
237 char *suffix; /* file name suffix for this compressor */
238 char *command; /* takes one arg and decompresses to stdout */
239 } compressor;
240
241 typedef struct
242 {
243 char *name; /* language name */
244 char *help; /* detailed help for the language */
245 Lang_function *function; /* parse function */
246 char **suffixes; /* name suffixes of this language's files */
247 char **filenames; /* names of this language's files */
248 char **interpreters; /* interpreters for this language */
249 bool metasource; /* source used to generate other sources */
250 } language;
251
252 typedef struct fdesc
253 {
254 struct fdesc *next; /* for the linked list */
255 char *infname; /* uncompressed input file name */
256 char *infabsname; /* absolute uncompressed input file name */
257 char *infabsdir; /* absolute dir of input file */
258 char *taggedfname; /* file name to write in tagfile */
259 language *lang; /* language of file */
260 char *prop; /* file properties to write in tagfile */
261 bool usecharno; /* etags tags shall contain char number */
262 bool written; /* entry written in the tags file */
263 } fdesc;
264
265 typedef struct node_st
266 { /* sorting structure */
267 struct node_st *left, *right; /* left and right sons */
268 fdesc *fdp; /* description of file to whom tag belongs */
269 char *name; /* tag name */
270 char *regex; /* search regexp */
271 bool valid; /* write this tag on the tag file */
272 bool is_func; /* function tag: use regexp in CTAGS mode */
273 bool been_warned; /* warning already given for duplicated tag */
274 int lno; /* line number tag is on */
275 long cno; /* character number line starts on */
276 } node;
277
278 /*
279 * A `linebuffer' is a structure which holds a line of text.
280 * `readline_internal' reads a line from a stream into a linebuffer
281 * and works regardless of the length of the line.
282 * SIZE is the size of BUFFER, LEN is the length of the string in
283 * BUFFER after readline reads it.
284 */
285 typedef struct
286 {
287 long size;
288 int len;
289 char *buffer;
290 } linebuffer;
291
292 /* Used to support mixing of --lang and file names. */
293 typedef struct
294 {
295 enum {
296 at_language, /* a language specification */
297 at_regexp, /* a regular expression */
298 at_filename, /* a file name */
299 at_stdin, /* read from stdin here */
300 at_end /* stop parsing the list */
301 } arg_type; /* argument type */
302 language *lang; /* language associated with the argument */
303 char *what; /* the argument itself */
304 } argument;
305
306 /* Structure defining a regular expression. */
307 typedef struct regexp
308 {
309 struct regexp *p_next; /* pointer to next in list */
310 language *lang; /* if set, use only for this language */
311 char *pattern; /* the regexp pattern */
312 char *name; /* tag name */
313 struct re_pattern_buffer *pat; /* the compiled pattern */
314 struct re_registers regs; /* re registers */
315 bool error_signaled; /* already signaled for this regexp */
316 bool force_explicit_name; /* do not allow implict tag name */
317 bool ignore_case; /* ignore case when matching */
318 bool multi_line; /* do a multi-line match on the whole file */
319 } regexp;
320
321
322 /* Many compilers barf on this:
323 Lang_function Ada_funcs;
324 so let's write it this way */
325 static void Ada_funcs __P((FILE *));
326 static void Asm_labels __P((FILE *));
327 static void C_entries __P((int c_ext, FILE *));
328 static void default_C_entries __P((FILE *));
329 static void plain_C_entries __P((FILE *));
330 static void Cjava_entries __P((FILE *));
331 static void Cobol_paragraphs __P((FILE *));
332 static void Cplusplus_entries __P((FILE *));
333 static void Cstar_entries __P((FILE *));
334 static void Erlang_functions __P((FILE *));
335 static void Forth_words __P((FILE *));
336 static void Fortran_functions __P((FILE *));
337 static void HTML_labels __P((FILE *));
338 static void Lisp_functions __P((FILE *));
339 static void Lua_functions __P((FILE *));
340 static void Makefile_targets __P((FILE *));
341 static void Pascal_functions __P((FILE *));
342 static void Perl_functions __P((FILE *));
343 static void PHP_functions __P((FILE *));
344 static void PS_functions __P((FILE *));
345 static void Prolog_functions __P((FILE *));
346 static void Python_functions __P((FILE *));
347 static void Scheme_functions __P((FILE *));
348 static void TeX_commands __P((FILE *));
349 static void Texinfo_nodes __P((FILE *));
350 static void Yacc_entries __P((FILE *));
351 static void just_read_file __P((FILE *));
352
353 static void print_language_names __P((void));
354 static void print_version __P((void));
355 static void print_help __P((argument *));
356 int main __P((int, char **));
357
358 static compressor *get_compressor_from_suffix __P((char *, char **));
359 static language *get_language_from_langname __P((const char *));
360 static language *get_language_from_interpreter __P((char *));
361 static language *get_language_from_filename __P((char *, bool));
362 static void readline __P((linebuffer *, FILE *));
363 static long readline_internal __P((linebuffer *, FILE *));
364 static bool nocase_tail __P((char *));
365 static void get_tag __P((char *, char **));
366
367 static void analyse_regex __P((char *));
368 static void free_regexps __P((void));
369 static void regex_tag_multiline __P((void));
370 static void error __P((const char *, const char *));
371 static void suggest_asking_for_help __P((void));
372 void fatal __P((char *, char *));
373 static void pfatal __P((char *));
374 static void add_node __P((node *, node **));
375
376 static void init __P((void));
377 static void process_file_name __P((char *, language *));
378 static void process_file __P((FILE *, char *, language *));
379 static void find_entries __P((FILE *));
380 static void free_tree __P((node *));
381 static void free_fdesc __P((fdesc *));
382 static void pfnote __P((char *, bool, char *, int, int, long));
383 static void make_tag __P((char *, int, bool, char *, int, int, long));
384 static void invalidate_nodes __P((fdesc *, node **));
385 static void put_entries __P((node *));
386
387 static char *concat __P((char *, char *, char *));
388 static char *skip_spaces __P((char *));
389 static char *skip_non_spaces __P((char *));
390 static char *savenstr __P((char *, int));
391 static char *savestr __P((char *));
392 static char *etags_strchr __P((const char *, int));
393 static char *etags_strrchr __P((const char *, int));
394 static int etags_strcasecmp __P((const char *, const char *));
395 static int etags_strncasecmp __P((const char *, const char *, int));
396 static char *etags_getcwd __P((void));
397 static char *relative_filename __P((char *, char *));
398 static char *absolute_filename __P((char *, char *));
399 static char *absolute_dirname __P((char *, char *));
400 static bool filename_is_absolute __P((char *f));
401 static void canonicalize_filename __P((char *));
402 static void linebuffer_init __P((linebuffer *));
403 static void linebuffer_setlen __P((linebuffer *, int));
404 static PTR xmalloc __P((unsigned int));
405 static PTR xrealloc __P((char *, unsigned int));
406
407 \f
408 static char searchar = '/'; /* use /.../ searches */
409
410 static char *tagfile; /* output file */
411 static char *progname; /* name this program was invoked with */
412 static char *cwd; /* current working directory */
413 static char *tagfiledir; /* directory of tagfile */
414 static FILE *tagf; /* ioptr for tags file */
415
416 static fdesc *fdhead; /* head of file description list */
417 static fdesc *curfdp; /* current file description */
418 static int lineno; /* line number of current line */
419 static long charno; /* current character number */
420 static long linecharno; /* charno of start of current line */
421 static char *dbp; /* pointer to start of current tag */
422
423 static const int invalidcharno = -1;
424
425 static node *nodehead; /* the head of the binary tree of tags */
426 static node *last_node; /* the last node created */
427
428 static linebuffer lb; /* the current line */
429 static linebuffer filebuf; /* a buffer containing the whole file */
430 static linebuffer token_name; /* a buffer containing a tag name */
431
432 /* boolean "functions" (see init) */
433 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
434 static char
435 /* white chars */
436 *white = " \f\t\n\r\v",
437 /* not in a name */
438 *nonam = " \f\t\n\r()=,;", /* look at make_tag before modifying! */
439 /* token ending chars */
440 *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
441 /* token starting chars */
442 *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
443 /* valid in-token chars */
444 *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
445
446 static bool append_to_tagfile; /* -a: append to tags */
447 /* The next four default to TRUE for etags, but to FALSE for ctags. */
448 static bool typedefs; /* -t: create tags for C and Ada typedefs */
449 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
450 /* 0 struct/enum/union decls, and C++ */
451 /* member functions. */
452 static bool constantypedefs; /* -d: create tags for C #define, enum */
453 /* constants and variables. */
454 /* -D: opposite of -d. Default under ctags. */
455 static bool globals; /* create tags for global variables */
456 static bool declarations; /* --declarations: tag them and extern in C&Co*/
457 static bool members; /* create tags for C member variables */
458 static bool no_line_directive; /* ignore #line directives (undocumented) */
459 static bool no_duplicates; /* no duplicate tags for ctags (undocumented) */
460 static bool update; /* -u: update tags */
461 static bool vgrind_style; /* -v: create vgrind style index output */
462 static bool no_warnings; /* -w: suppress warnings (undocumented) */
463 static bool cxref_style; /* -x: create cxref style output */
464 static bool cplusplus; /* .[hc] means C++, not C */
465 static bool ignoreindent; /* -I: ignore indentation in C */
466 static bool packages_only; /* --packages-only: in Ada, only tag packages*/
467
468 /* STDIN is defined in LynxOS system headers */
469 #ifdef STDIN
470 # undef STDIN
471 #endif
472
473 #define STDIN 0x1001 /* returned by getopt_long on --parse-stdin */
474 static bool parsing_stdin; /* --parse-stdin used */
475
476 static regexp *p_head; /* list of all regexps */
477 static bool need_filebuf; /* some regexes are multi-line */
478
479 static struct option longopts[] =
480 {
481 { "append", no_argument, NULL, 'a' },
482 { "packages-only", no_argument, &packages_only, TRUE },
483 { "c++", no_argument, NULL, 'C' },
484 { "declarations", no_argument, &declarations, TRUE },
485 { "no-line-directive", no_argument, &no_line_directive, TRUE },
486 { "no-duplicates", no_argument, &no_duplicates, TRUE },
487 { "help", no_argument, NULL, 'h' },
488 { "help", no_argument, NULL, 'H' },
489 { "ignore-indentation", no_argument, NULL, 'I' },
490 { "language", required_argument, NULL, 'l' },
491 { "members", no_argument, &members, TRUE },
492 { "no-members", no_argument, &members, FALSE },
493 { "output", required_argument, NULL, 'o' },
494 { "regex", required_argument, NULL, 'r' },
495 { "no-regex", no_argument, NULL, 'R' },
496 { "ignore-case-regex", required_argument, NULL, 'c' },
497 { "parse-stdin", required_argument, NULL, STDIN },
498 { "version", no_argument, NULL, 'V' },
499
500 #if CTAGS /* Ctags options */
501 { "backward-search", no_argument, NULL, 'B' },
502 { "cxref", no_argument, NULL, 'x' },
503 { "defines", no_argument, NULL, 'd' },
504 { "globals", no_argument, &globals, TRUE },
505 { "typedefs", no_argument, NULL, 't' },
506 { "typedefs-and-c++", no_argument, NULL, 'T' },
507 { "update", no_argument, NULL, 'u' },
508 { "vgrind", no_argument, NULL, 'v' },
509 { "no-warn", no_argument, NULL, 'w' },
510
511 #else /* Etags options */
512 { "no-defines", no_argument, NULL, 'D' },
513 { "no-globals", no_argument, &globals, FALSE },
514 { "include", required_argument, NULL, 'i' },
515 #endif
516 { NULL }
517 };
518
519 static compressor compressors[] =
520 {
521 { "z", "gzip -d -c"},
522 { "Z", "gzip -d -c"},
523 { "gz", "gzip -d -c"},
524 { "GZ", "gzip -d -c"},
525 { "bz2", "bzip2 -d -c" },
526 { NULL }
527 };
528
529 /*
530 * Language stuff.
531 */
532
533 /* Ada code */
534 static char *Ada_suffixes [] =
535 { "ads", "adb", "ada", NULL };
536 static char Ada_help [] =
537 "In Ada code, functions, procedures, packages, tasks and types are\n\
538 tags. Use the `--packages-only' option to create tags for\n\
539 packages only.\n\
540 Ada tag names have suffixes indicating the type of entity:\n\
541 Entity type: Qualifier:\n\
542 ------------ ----------\n\
543 function /f\n\
544 procedure /p\n\
545 package spec /s\n\
546 package body /b\n\
547 type /t\n\
548 task /k\n\
549 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
550 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
551 will just search for any tag `bidule'.";
552
553 /* Assembly code */
554 static char *Asm_suffixes [] =
555 { "a", /* Unix assembler */
556 "asm", /* Microcontroller assembly */
557 "def", /* BSO/Tasking definition includes */
558 "inc", /* Microcontroller include files */
559 "ins", /* Microcontroller include files */
560 "s", "sa", /* Unix assembler */
561 "S", /* cpp-processed Unix assembler */
562 "src", /* BSO/Tasking C compiler output */
563 NULL
564 };
565 static char Asm_help [] =
566 "In assembler code, labels appearing at the beginning of a line,\n\
567 followed by a colon, are tags.";
568
569
570 /* Note that .c and .h can be considered C++, if the --c++ flag was
571 given, or if the `class' or `template' keyowrds are met inside the file.
572 That is why default_C_entries is called for these. */
573 static char *default_C_suffixes [] =
574 { "c", "h", NULL };
575 static char default_C_help [] =
576 "In C code, any C function or typedef is a tag, and so are\n\
577 definitions of `struct', `union' and `enum'. `#define' macro\n\
578 definitions and `enum' constants are tags unless you specify\n\
579 `--no-defines'. Global variables are tags unless you specify\n\
580 `--no-globals'. Use of `--no-globals' and `--no-defines'\n\
581 can make the tags table file much smaller.\n\
582 You can tag function declarations and external variables by\n\
583 using `--declarations', and struct members by using `--members'.";
584
585 static char *Cplusplus_suffixes [] =
586 { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
587 "M", /* Objective C++ */
588 "pdb", /* Postscript with C syntax */
589 NULL };
590 static char Cplusplus_help [] =
591 "In C++ code, all the tag constructs of C code are tagged. (Use\n\
592 --help --lang=c --lang=c++ for full help.)\n\
593 In addition to C tags, member functions are also recognized, and\n\
594 optionally member variables if you use the `--members' option.\n\
595 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
596 and `CLASS::FUNCTION'. `operator' definitions have tag names like\n\
597 `operator+'.";
598
599 static char *Cjava_suffixes [] =
600 { "java", NULL };
601 static char Cjava_help [] =
602 "In Java code, all the tags constructs of C and C++ code are\n\
603 tagged. (Use --help --lang=c --lang=c++ --lang=java for full help.)";
604
605
606 static char *Cobol_suffixes [] =
607 { "COB", "cob", NULL };
608 static char Cobol_help [] =
609 "In Cobol code, tags are paragraph names; that is, any word\n\
610 starting in column 8 and followed by a period.";
611
612 static char *Cstar_suffixes [] =
613 { "cs", "hs", NULL };
614
615 static char *Erlang_suffixes [] =
616 { "erl", "hrl", NULL };
617 static char Erlang_help [] =
618 "In Erlang code, the tags are the functions, records and macros\n\
619 defined in the file.";
620
621 char *Forth_suffixes [] =
622 { "fth", "tok", NULL };
623 static char Forth_help [] =
624 "In Forth code, tags are words defined by `:',\n\
625 constant, code, create, defer, value, variable, buffer:, field.";
626
627 static char *Fortran_suffixes [] =
628 { "F", "f", "f90", "for", NULL };
629 static char Fortran_help [] =
630 "In Fortran code, functions, subroutines and block data are tags.";
631
632 static char *HTML_suffixes [] =
633 { "htm", "html", "shtml", NULL };
634 static char HTML_help [] =
635 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
636 `h3' headers. Also, tags are `name=' in anchors and all\n\
637 occurrences of `id='.";
638
639 static char *Lisp_suffixes [] =
640 { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
641 static char Lisp_help [] =
642 "In Lisp code, any function defined with `defun', any variable\n\
643 defined with `defvar' or `defconst', and in general the first\n\
644 argument of any expression that starts with `(def' in column zero\n\
645 is a tag.";
646
647 static char *Lua_suffixes [] =
648 { "lua", "LUA", NULL };
649 static char Lua_help [] =
650 "In Lua scripts, all functions are tags.";
651
652 static char *Makefile_filenames [] =
653 { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
654 static char Makefile_help [] =
655 "In makefiles, targets are tags; additionally, variables are tags\n\
656 unless you specify `--no-globals'.";
657
658 static char *Objc_suffixes [] =
659 { "lm", /* Objective lex file */
660 "m", /* Objective C file */
661 NULL };
662 static char Objc_help [] =
663 "In Objective C code, tags include Objective C definitions for classes,\n\
664 class categories, methods and protocols. Tags for variables and\n\
665 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
666 (Use --help --lang=c --lang=objc --lang=java for full help.)";
667
668 static char *Pascal_suffixes [] =
669 { "p", "pas", NULL };
670 static char Pascal_help [] =
671 "In Pascal code, the tags are the functions and procedures defined\n\
672 in the file.";
673 /* " // this is for working around an Emacs highlighting bug... */
674
675 static char *Perl_suffixes [] =
676 { "pl", "pm", NULL };
677 static char *Perl_interpreters [] =
678 { "perl", "@PERL@", NULL };
679 static char Perl_help [] =
680 "In Perl code, the tags are the packages, subroutines and variables\n\
681 defined by the `package', `sub', `my' and `local' keywords. Use\n\
682 `--globals' if you want to tag global variables. Tags for\n\
683 subroutines are named `PACKAGE::SUB'. The name for subroutines\n\
684 defined in the default package is `main::SUB'.";
685
686 static char *PHP_suffixes [] =
687 { "php", "php3", "php4", NULL };
688 static char PHP_help [] =
689 "In PHP code, tags are functions, classes and defines. When using\n\
690 the `--members' option, vars are tags too.";
691
692 static char *plain_C_suffixes [] =
693 { "pc", /* Pro*C file */
694 NULL };
695
696 static char *PS_suffixes [] =
697 { "ps", "psw", NULL }; /* .psw is for PSWrap */
698 static char PS_help [] =
699 "In PostScript code, the tags are the functions.";
700
701 static char *Prolog_suffixes [] =
702 { "prolog", NULL };
703 static char Prolog_help [] =
704 "In Prolog code, tags are predicates and rules at the beginning of\n\
705 line.";
706
707 static char *Python_suffixes [] =
708 { "py", NULL };
709 static char Python_help [] =
710 "In Python code, `def' or `class' at the beginning of a line\n\
711 generate a tag.";
712
713 /* Can't do the `SCM' or `scm' prefix with a version number. */
714 static char *Scheme_suffixes [] =
715 { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
716 static char Scheme_help [] =
717 "In Scheme code, tags include anything defined with `def' or with a\n\
718 construct whose name starts with `def'. They also include\n\
719 variables set with `set!' at top level in the file.";
720
721 static char *TeX_suffixes [] =
722 { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
723 static char TeX_help [] =
724 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
725 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
726 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
727 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
728 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
729 \n\
730 Other commands can be specified by setting the environment variable\n\
731 `TEXTAGS' to a colon-separated list like, for example,\n\
732 TEXTAGS=\"mycommand:myothercommand\".";
733
734
735 static char *Texinfo_suffixes [] =
736 { "texi", "texinfo", "txi", NULL };
737 static char Texinfo_help [] =
738 "for texinfo files, lines starting with @node are tagged.";
739
740 static char *Yacc_suffixes [] =
741 { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
742 static char Yacc_help [] =
743 "In Bison or Yacc input files, each rule defines as a tag the\n\
744 nonterminal it constructs. The portions of the file that contain\n\
745 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
746 for full help).";
747
748 static char auto_help [] =
749 "`auto' is not a real language, it indicates to use\n\
750 a default language for files base on file name suffix and file contents.";
751
752 static char none_help [] =
753 "`none' is not a real language, it indicates to only do\n\
754 regexp processing on files.";
755
756 static char no_lang_help [] =
757 "No detailed help available for this language.";
758
759
760 /*
761 * Table of languages.
762 *
763 * It is ok for a given function to be listed under more than one
764 * name. I just didn't.
765 */
766
767 static language lang_names [] =
768 {
769 { "ada", Ada_help, Ada_funcs, Ada_suffixes },
770 { "asm", Asm_help, Asm_labels, Asm_suffixes },
771 { "c", default_C_help, default_C_entries, default_C_suffixes },
772 { "c++", Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
773 { "c*", no_lang_help, Cstar_entries, Cstar_suffixes },
774 { "cobol", Cobol_help, Cobol_paragraphs, Cobol_suffixes },
775 { "erlang", Erlang_help, Erlang_functions, Erlang_suffixes },
776 { "forth", Forth_help, Forth_words, Forth_suffixes },
777 { "fortran", Fortran_help, Fortran_functions, Fortran_suffixes },
778 { "html", HTML_help, HTML_labels, HTML_suffixes },
779 { "java", Cjava_help, Cjava_entries, Cjava_suffixes },
780 { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
781 { "lua", Lua_help, Lua_functions, Lua_suffixes },
782 { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
783 { "objc", Objc_help, plain_C_entries, Objc_suffixes },
784 { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
785 { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
786 { "php", PHP_help, PHP_functions, PHP_suffixes },
787 { "postscript",PS_help, PS_functions, PS_suffixes },
788 { "proc", no_lang_help, plain_C_entries, plain_C_suffixes },
789 { "prolog", Prolog_help, Prolog_functions, Prolog_suffixes },
790 { "python", Python_help, Python_functions, Python_suffixes },
791 { "scheme", Scheme_help, Scheme_functions, Scheme_suffixes },
792 { "tex", TeX_help, TeX_commands, TeX_suffixes },
793 { "texinfo", Texinfo_help, Texinfo_nodes, Texinfo_suffixes },
794 { "yacc", Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
795 { "auto", auto_help }, /* default guessing scheme */
796 { "none", none_help, just_read_file }, /* regexp matching only */
797 { NULL } /* end of list */
798 };
799
800 \f
801 static void
802 print_language_names ()
803 {
804 language *lang;
805 char **name, **ext;
806
807 puts ("\nThese are the currently supported languages, along with the\n\
808 default file names and dot suffixes:");
809 for (lang = lang_names; lang->name != NULL; lang++)
810 {
811 printf (" %-*s", 10, lang->name);
812 if (lang->filenames != NULL)
813 for (name = lang->filenames; *name != NULL; name++)
814 printf (" %s", *name);
815 if (lang->suffixes != NULL)
816 for (ext = lang->suffixes; *ext != NULL; ext++)
817 printf (" .%s", *ext);
818 puts ("");
819 }
820 puts ("where `auto' means use default language for files based on file\n\
821 name suffix, and `none' means only do regexp processing on files.\n\
822 If no language is specified and no matching suffix is found,\n\
823 the first line of the file is read for a sharp-bang (#!) sequence\n\
824 followed by the name of an interpreter. If no such sequence is found,\n\
825 Fortran is tried first; if no tags are found, C is tried next.\n\
826 When parsing any C file, a \"class\" or \"template\" keyword\n\
827 switches to C++.");
828 puts ("Compressed files are supported using gzip and bzip2.\n\
829 \n\
830 For detailed help on a given language use, for example,\n\
831 etags --help --lang=ada.");
832 }
833
834 #ifndef EMACS_NAME
835 # define EMACS_NAME "standalone"
836 #endif
837 #ifndef VERSION
838 # define VERSION "version"
839 #endif
840 static void
841 print_version ()
842 {
843 printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
844 puts ("Copyright (C) 2006 Free Software Foundation, Inc. and Ken Arnold");
845 puts ("This program is distributed under the same terms as Emacs");
846
847 exit (EXIT_SUCCESS);
848 }
849
850 static void
851 print_help (argbuffer)
852 argument *argbuffer;
853 {
854 bool help_for_lang = FALSE;
855
856 for (; argbuffer->arg_type != at_end; argbuffer++)
857 if (argbuffer->arg_type == at_language)
858 {
859 if (help_for_lang)
860 puts ("");
861 puts (argbuffer->lang->help);
862 help_for_lang = TRUE;
863 }
864
865 if (help_for_lang)
866 exit (EXIT_SUCCESS);
867
868 printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
869 \n\
870 These are the options accepted by %s.\n", progname, progname);
871 if (NO_LONG_OPTIONS)
872 puts ("WARNING: long option names do not work with this executable,\n\
873 as it is not linked with GNU getopt.");
874 else
875 puts ("You may use unambiguous abbreviations for the long option names.");
876 puts (" A - as file name means read names from stdin (one per line).\n\
877 Absolute names are stored in the output file as they are.\n\
878 Relative ones are stored relative to the output file's directory.\n");
879
880 puts ("-a, --append\n\
881 Append tag entries to existing tags file.");
882
883 puts ("--packages-only\n\
884 For Ada files, only generate tags for packages.");
885
886 if (CTAGS)
887 puts ("-B, --backward-search\n\
888 Write the search commands for the tag entries using '?', the\n\
889 backward-search command instead of '/', the forward-search command.");
890
891 /* This option is mostly obsolete, because etags can now automatically
892 detect C++. Retained for backward compatibility and for debugging and
893 experimentation. In principle, we could want to tag as C++ even
894 before any "class" or "template" keyword.
895 puts ("-C, --c++\n\
896 Treat files whose name suffix defaults to C language as C++ files.");
897 */
898
899 puts ("--declarations\n\
900 In C and derived languages, create tags for function declarations,");
901 if (CTAGS)
902 puts ("\tand create tags for extern variables if --globals is used.");
903 else
904 puts
905 ("\tand create tags for extern variables unless --no-globals is used.");
906
907 if (CTAGS)
908 puts ("-d, --defines\n\
909 Create tag entries for C #define constants and enum constants, too.");
910 else
911 puts ("-D, --no-defines\n\
912 Don't create tag entries for C #define constants and enum constants.\n\
913 This makes the tags file smaller.");
914
915 if (!CTAGS)
916 puts ("-i FILE, --include=FILE\n\
917 Include a note in tag file indicating that, when searching for\n\
918 a tag, one should also consult the tags file FILE after\n\
919 checking the current file.");
920
921 puts ("-l LANG, --language=LANG\n\
922 Force the following files to be considered as written in the\n\
923 named language up to the next --language=LANG option.");
924
925 if (CTAGS)
926 puts ("--globals\n\
927 Create tag entries for global variables in some languages.");
928 else
929 puts ("--no-globals\n\
930 Do not create tag entries for global variables in some\n\
931 languages. This makes the tags file smaller.");
932 puts ("--members\n\
933 Create tag entries for members of structures in some languages.");
934
935 puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
936 Make a tag for each line matching a regular expression pattern\n\
937 in the following files. {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
938 files only. REGEXFILE is a file containing one REGEXP per line.\n\
939 REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
940 optional. The TAGREGEXP pattern is anchored (as if preceded by ^).");
941 puts (" If TAGNAME/ is present, the tags created are named.\n\
942 For example Tcl named tags can be created with:\n\
943 --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
944 MODS are optional one-letter modifiers: `i' means to ignore case,\n\
945 `m' means to allow multi-line matches, `s' implies `m' and\n\
946 causes dot to match any character, including newline.");
947 puts ("-R, --no-regex\n\
948 Don't create tags from regexps for the following files.");
949 puts ("-I, --ignore-indentation\n\
950 In C and C++ do not assume that a closing brace in the first\n\
951 column is the final brace of a function or structure definition.");
952 puts ("-o FILE, --output=FILE\n\
953 Write the tags to FILE.");
954 puts ("--parse-stdin=NAME\n\
955 Read from standard input and record tags as belonging to file NAME.");
956
957 if (CTAGS)
958 {
959 puts ("-t, --typedefs\n\
960 Generate tag entries for C and Ada typedefs.");
961 puts ("-T, --typedefs-and-c++\n\
962 Generate tag entries for C typedefs, C struct/enum/union tags,\n\
963 and C++ member functions.");
964 }
965
966 if (CTAGS)
967 puts ("-u, --update\n\
968 Update the tag entries for the given files, leaving tag\n\
969 entries for other files in place. Currently, this is\n\
970 implemented by deleting the existing entries for the given\n\
971 files and then rewriting the new entries at the end of the\n\
972 tags file. It is often faster to simply rebuild the entire\n\
973 tag file than to use this.");
974
975 if (CTAGS)
976 {
977 puts ("-v, --vgrind\n\
978 Print on the standard output an index of items intended for\n\
979 human consumption, similar to the output of vgrind. The index\n\
980 is sorted, and gives the page number of each item.");
981 # if PRINT_UNDOCUMENTED_OPTIONS_HELP
982 puts ("-w, --no-duplicates\n\
983 Do not create duplicate tag entries, for compatibility with\n\
984 traditional ctags.");
985 puts ("-w, --no-warn\n\
986 Suppress warning messages about duplicate tag entries.");
987 # endif /* PRINT_UNDOCUMENTED_OPTIONS_HELP */
988 puts ("-x, --cxref\n\
989 Like --vgrind, but in the style of cxref, rather than vgrind.\n\
990 The output uses line numbers instead of page numbers, but\n\
991 beyond that the differences are cosmetic; try both to see\n\
992 which you like.");
993 }
994
995 puts ("-V, --version\n\
996 Print the version of the program.\n\
997 -h, --help\n\
998 Print this help message.\n\
999 Followed by one or more `--language' options prints detailed\n\
1000 help about tag generation for the specified languages.");
1001
1002 print_language_names ();
1003
1004 puts ("");
1005 puts ("Report bugs to bug-gnu-emacs@gnu.org");
1006
1007 exit (EXIT_SUCCESS);
1008 }
1009
1010 \f
1011 #ifdef VMS /* VMS specific functions */
1012
1013 #define EOS '\0'
1014
1015 /* This is a BUG! ANY arbitrary limit is a BUG!
1016 Won't someone please fix this? */
1017 #define MAX_FILE_SPEC_LEN 255
1018 typedef struct {
1019 short curlen;
1020 char body[MAX_FILE_SPEC_LEN + 1];
1021 } vspec;
1022
1023 /*
1024 v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1025 returning in each successive call the next file name matching the input
1026 spec. The function expects that each in_spec passed
1027 to it will be processed to completion; in particular, up to and
1028 including the call following that in which the last matching name
1029 is returned, the function ignores the value of in_spec, and will
1030 only start processing a new spec with the following call.
1031 If an error occurs, on return out_spec contains the value
1032 of in_spec when the error occurred.
1033
1034 With each successive file name returned in out_spec, the
1035 function's return value is one. When there are no more matching
1036 names the function returns zero. If on the first call no file
1037 matches in_spec, or there is any other error, -1 is returned.
1038 */
1039
1040 #include <rmsdef.h>
1041 #include <descrip.h>
1042 #define OUTSIZE MAX_FILE_SPEC_LEN
1043 static short
1044 fn_exp (out, in)
1045 vspec *out;
1046 char *in;
1047 {
1048 static long context = 0;
1049 static struct dsc$descriptor_s o;
1050 static struct dsc$descriptor_s i;
1051 static bool pass1 = TRUE;
1052 long status;
1053 short retval;
1054
1055 if (pass1)
1056 {
1057 pass1 = FALSE;
1058 o.dsc$a_pointer = (char *) out;
1059 o.dsc$w_length = (short)OUTSIZE;
1060 i.dsc$a_pointer = in;
1061 i.dsc$w_length = (short)strlen(in);
1062 i.dsc$b_dtype = DSC$K_DTYPE_T;
1063 i.dsc$b_class = DSC$K_CLASS_S;
1064 o.dsc$b_dtype = DSC$K_DTYPE_VT;
1065 o.dsc$b_class = DSC$K_CLASS_VS;
1066 }
1067 if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1068 {
1069 out->body[out->curlen] = EOS;
1070 return 1;
1071 }
1072 else if (status == RMS$_NMF)
1073 retval = 0;
1074 else
1075 {
1076 strcpy(out->body, in);
1077 retval = -1;
1078 }
1079 lib$find_file_end(&context);
1080 pass1 = TRUE;
1081 return retval;
1082 }
1083
1084 /*
1085 v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1086 name of each file specified by the provided arg expanding wildcards.
1087 */
1088 static char *
1089 gfnames (arg, p_error)
1090 char *arg;
1091 bool *p_error;
1092 {
1093 static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1094
1095 switch (fn_exp (&filename, arg))
1096 {
1097 case 1:
1098 *p_error = FALSE;
1099 return filename.body;
1100 case 0:
1101 *p_error = FALSE;
1102 return NULL;
1103 default:
1104 *p_error = TRUE;
1105 return filename.body;
1106 }
1107 }
1108
1109 #ifndef OLD /* Newer versions of VMS do provide `system'. */
1110 system (cmd)
1111 char *cmd;
1112 {
1113 error ("%s", "system() function not implemented under VMS");
1114 }
1115 #endif
1116
1117 #define VERSION_DELIM ';'
1118 char *massage_name (s)
1119 char *s;
1120 {
1121 char *start = s;
1122
1123 for ( ; *s; s++)
1124 if (*s == VERSION_DELIM)
1125 {
1126 *s = EOS;
1127 break;
1128 }
1129 else
1130 *s = lowcase (*s);
1131 return start;
1132 }
1133 #endif /* VMS */
1134
1135 \f
1136 int
1137 main (argc, argv)
1138 int argc;
1139 char *argv[];
1140 {
1141 int i;
1142 unsigned int nincluded_files;
1143 char **included_files;
1144 argument *argbuffer;
1145 int current_arg, file_count;
1146 linebuffer filename_lb;
1147 bool help_asked = FALSE;
1148 #ifdef VMS
1149 bool got_err;
1150 #endif
1151 char *optstring;
1152 int opt;
1153
1154
1155 #ifdef DOS_NT
1156 _fmode = O_BINARY; /* all of files are treated as binary files */
1157 #endif /* DOS_NT */
1158
1159 progname = argv[0];
1160 nincluded_files = 0;
1161 included_files = xnew (argc, char *);
1162 current_arg = 0;
1163 file_count = 0;
1164
1165 /* Allocate enough no matter what happens. Overkill, but each one
1166 is small. */
1167 argbuffer = xnew (argc, argument);
1168
1169 /*
1170 * If etags, always find typedefs and structure tags. Why not?
1171 * Also default to find macro constants, enum constants and
1172 * global variables.
1173 */
1174 if (!CTAGS)
1175 {
1176 typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1177 globals = TRUE;
1178 }
1179
1180 /* When the optstring begins with a '-' getopt_long does not rearrange the
1181 non-options arguments to be at the end, but leaves them alone. */
1182 optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1183 "ac:Cf:Il:o:r:RSVhH",
1184 (CTAGS) ? "BxdtTuvw" : "Di:");
1185
1186 while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1187 switch (opt)
1188 {
1189 case 0:
1190 /* If getopt returns 0, then it has already processed a
1191 long-named option. We should do nothing. */
1192 break;
1193
1194 case 1:
1195 /* This means that a file name has been seen. Record it. */
1196 argbuffer[current_arg].arg_type = at_filename;
1197 argbuffer[current_arg].what = optarg;
1198 ++current_arg;
1199 ++file_count;
1200 break;
1201
1202 case STDIN:
1203 /* Parse standard input. Idea by Vivek <vivek@etla.org>. */
1204 argbuffer[current_arg].arg_type = at_stdin;
1205 argbuffer[current_arg].what = optarg;
1206 ++current_arg;
1207 ++file_count;
1208 if (parsing_stdin)
1209 fatal ("cannot parse standard input more than once", (char *)NULL);
1210 parsing_stdin = TRUE;
1211 break;
1212
1213 /* Common options. */
1214 case 'a': append_to_tagfile = TRUE; break;
1215 case 'C': cplusplus = TRUE; break;
1216 case 'f': /* for compatibility with old makefiles */
1217 case 'o':
1218 if (tagfile)
1219 {
1220 error ("-o option may only be given once.", (char *)NULL);
1221 suggest_asking_for_help ();
1222 /* NOTREACHED */
1223 }
1224 tagfile = optarg;
1225 break;
1226 case 'I':
1227 case 'S': /* for backward compatibility */
1228 ignoreindent = TRUE;
1229 break;
1230 case 'l':
1231 {
1232 language *lang = get_language_from_langname (optarg);
1233 if (lang != NULL)
1234 {
1235 argbuffer[current_arg].lang = lang;
1236 argbuffer[current_arg].arg_type = at_language;
1237 ++current_arg;
1238 }
1239 }
1240 break;
1241 case 'c':
1242 /* Backward compatibility: support obsolete --ignore-case-regexp. */
1243 optarg = concat (optarg, "i", ""); /* memory leak here */
1244 /* FALLTHRU */
1245 case 'r':
1246 argbuffer[current_arg].arg_type = at_regexp;
1247 argbuffer[current_arg].what = optarg;
1248 ++current_arg;
1249 break;
1250 case 'R':
1251 argbuffer[current_arg].arg_type = at_regexp;
1252 argbuffer[current_arg].what = NULL;
1253 ++current_arg;
1254 break;
1255 case 'V':
1256 print_version ();
1257 break;
1258 case 'h':
1259 case 'H':
1260 help_asked = TRUE;
1261 break;
1262
1263 /* Etags options */
1264 case 'D': constantypedefs = FALSE; break;
1265 case 'i': included_files[nincluded_files++] = optarg; break;
1266
1267 /* Ctags options. */
1268 case 'B': searchar = '?'; break;
1269 case 'd': constantypedefs = TRUE; break;
1270 case 't': typedefs = TRUE; break;
1271 case 'T': typedefs = typedefs_or_cplusplus = TRUE; break;
1272 case 'u': update = TRUE; break;
1273 case 'v': vgrind_style = TRUE; /*FALLTHRU*/
1274 case 'x': cxref_style = TRUE; break;
1275 case 'w': no_warnings = TRUE; break;
1276 default:
1277 suggest_asking_for_help ();
1278 /* NOTREACHED */
1279 }
1280
1281 /* No more options. Store the rest of arguments. */
1282 for (; optind < argc; optind++)
1283 {
1284 argbuffer[current_arg].arg_type = at_filename;
1285 argbuffer[current_arg].what = argv[optind];
1286 ++current_arg;
1287 ++file_count;
1288 }
1289
1290 argbuffer[current_arg].arg_type = at_end;
1291
1292 if (help_asked)
1293 print_help (argbuffer);
1294 /* NOTREACHED */
1295
1296 if (nincluded_files == 0 && file_count == 0)
1297 {
1298 error ("no input files specified.", (char *)NULL);
1299 suggest_asking_for_help ();
1300 /* NOTREACHED */
1301 }
1302
1303 if (tagfile == NULL)
1304 tagfile = CTAGS ? "tags" : "TAGS";
1305 cwd = etags_getcwd (); /* the current working directory */
1306 if (cwd[strlen (cwd) - 1] != '/')
1307 {
1308 char *oldcwd = cwd;
1309 cwd = concat (oldcwd, "/", "");
1310 free (oldcwd);
1311 }
1312 /* Relative file names are made relative to the current directory. */
1313 if (streq (tagfile, "-")
1314 || strneq (tagfile, "/dev/", 5))
1315 tagfiledir = cwd;
1316 else
1317 tagfiledir = absolute_dirname (tagfile, cwd);
1318
1319 init (); /* set up boolean "functions" */
1320
1321 linebuffer_init (&lb);
1322 linebuffer_init (&filename_lb);
1323 linebuffer_init (&filebuf);
1324 linebuffer_init (&token_name);
1325
1326 if (!CTAGS)
1327 {
1328 if (streq (tagfile, "-"))
1329 {
1330 tagf = stdout;
1331 #ifdef DOS_NT
1332 /* Switch redirected `stdout' to binary mode (setting `_fmode'
1333 doesn't take effect until after `stdout' is already open). */
1334 if (!isatty (fileno (stdout)))
1335 setmode (fileno (stdout), O_BINARY);
1336 #endif /* DOS_NT */
1337 }
1338 else
1339 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1340 if (tagf == NULL)
1341 pfatal (tagfile);
1342 }
1343
1344 /*
1345 * Loop through files finding functions.
1346 */
1347 for (i = 0; i < current_arg; i++)
1348 {
1349 static language *lang; /* non-NULL if language is forced */
1350 char *this_file;
1351
1352 switch (argbuffer[i].arg_type)
1353 {
1354 case at_language:
1355 lang = argbuffer[i].lang;
1356 break;
1357 case at_regexp:
1358 analyse_regex (argbuffer[i].what);
1359 break;
1360 case at_filename:
1361 #ifdef VMS
1362 while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1363 {
1364 if (got_err)
1365 {
1366 error ("can't find file %s\n", this_file);
1367 argc--, argv++;
1368 }
1369 else
1370 {
1371 this_file = massage_name (this_file);
1372 }
1373 #else
1374 this_file = argbuffer[i].what;
1375 #endif
1376 /* Input file named "-" means read file names from stdin
1377 (one per line) and use them. */
1378 if (streq (this_file, "-"))
1379 {
1380 if (parsing_stdin)
1381 fatal ("cannot parse standard input AND read file names from it",
1382 (char *)NULL);
1383 while (readline_internal (&filename_lb, stdin) > 0)
1384 process_file_name (filename_lb.buffer, lang);
1385 }
1386 else
1387 process_file_name (this_file, lang);
1388 #ifdef VMS
1389 }
1390 #endif
1391 break;
1392 case at_stdin:
1393 this_file = argbuffer[i].what;
1394 process_file (stdin, this_file, lang);
1395 break;
1396 }
1397 }
1398
1399 free_regexps ();
1400 free (lb.buffer);
1401 free (filebuf.buffer);
1402 free (token_name.buffer);
1403
1404 if (!CTAGS || cxref_style)
1405 {
1406 /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1407 put_entries (nodehead);
1408 free_tree (nodehead);
1409 nodehead = NULL;
1410 if (!CTAGS)
1411 {
1412 fdesc *fdp;
1413
1414 /* Output file entries that have no tags. */
1415 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1416 if (!fdp->written)
1417 fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1418
1419 while (nincluded_files-- > 0)
1420 fprintf (tagf, "\f\n%s,include\n", *included_files++);
1421
1422 if (fclose (tagf) == EOF)
1423 pfatal (tagfile);
1424 }
1425
1426 exit (EXIT_SUCCESS);
1427 }
1428
1429 if (update)
1430 {
1431 char cmd[BUFSIZ];
1432 for (i = 0; i < current_arg; ++i)
1433 {
1434 switch (argbuffer[i].arg_type)
1435 {
1436 case at_filename:
1437 case at_stdin:
1438 break;
1439 default:
1440 continue; /* the for loop */
1441 }
1442 sprintf (cmd,
1443 "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1444 tagfile, argbuffer[i].what, tagfile);
1445 if (system (cmd) != EXIT_SUCCESS)
1446 fatal ("failed to execute shell command", (char *)NULL);
1447 }
1448 append_to_tagfile = TRUE;
1449 }
1450
1451 tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1452 if (tagf == NULL)
1453 pfatal (tagfile);
1454 put_entries (nodehead); /* write all the tags (CTAGS) */
1455 free_tree (nodehead);
1456 nodehead = NULL;
1457 if (fclose (tagf) == EOF)
1458 pfatal (tagfile);
1459
1460 if (CTAGS)
1461 if (append_to_tagfile || update)
1462 {
1463 char cmd[2*BUFSIZ+20];
1464 /* Maybe these should be used:
1465 setenv ("LC_COLLATE", "C", 1);
1466 setenv ("LC_ALL", "C", 1); */
1467 sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1468 exit (system (cmd));
1469 }
1470 return EXIT_SUCCESS;
1471 }
1472
1473
1474 /*
1475 * Return a compressor given the file name. If EXTPTR is non-zero,
1476 * return a pointer into FILE where the compressor-specific
1477 * extension begins. If no compressor is found, NULL is returned
1478 * and EXTPTR is not significant.
1479 * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1480 */
1481 static compressor *
1482 get_compressor_from_suffix (file, extptr)
1483 char *file;
1484 char **extptr;
1485 {
1486 compressor *compr;
1487 char *slash, *suffix;
1488
1489 /* This relies on FN to be after canonicalize_filename,
1490 so we don't need to consider backslashes on DOS_NT. */
1491 slash = etags_strrchr (file, '/');
1492 suffix = etags_strrchr (file, '.');
1493 if (suffix == NULL || suffix < slash)
1494 return NULL;
1495 if (extptr != NULL)
1496 *extptr = suffix;
1497 suffix += 1;
1498 /* Let those poor souls who live with DOS 8+3 file name limits get
1499 some solace by treating foo.cgz as if it were foo.c.gz, etc.
1500 Only the first do loop is run if not MSDOS */
1501 do
1502 {
1503 for (compr = compressors; compr->suffix != NULL; compr++)
1504 if (streq (compr->suffix, suffix))
1505 return compr;
1506 if (!MSDOS)
1507 break; /* do it only once: not really a loop */
1508 if (extptr != NULL)
1509 *extptr = ++suffix;
1510 } while (*suffix != '\0');
1511 return NULL;
1512 }
1513
1514
1515
1516 /*
1517 * Return a language given the name.
1518 */
1519 static language *
1520 get_language_from_langname (name)
1521 const char *name;
1522 {
1523 language *lang;
1524
1525 if (name == NULL)
1526 error ("empty language name", (char *)NULL);
1527 else
1528 {
1529 for (lang = lang_names; lang->name != NULL; lang++)
1530 if (streq (name, lang->name))
1531 return lang;
1532 error ("unknown language \"%s\"", name);
1533 }
1534
1535 return NULL;
1536 }
1537
1538
1539 /*
1540 * Return a language given the interpreter name.
1541 */
1542 static language *
1543 get_language_from_interpreter (interpreter)
1544 char *interpreter;
1545 {
1546 language *lang;
1547 char **iname;
1548
1549 if (interpreter == NULL)
1550 return NULL;
1551 for (lang = lang_names; lang->name != NULL; lang++)
1552 if (lang->interpreters != NULL)
1553 for (iname = lang->interpreters; *iname != NULL; iname++)
1554 if (streq (*iname, interpreter))
1555 return lang;
1556
1557 return NULL;
1558 }
1559
1560
1561
1562 /*
1563 * Return a language given the file name.
1564 */
1565 static language *
1566 get_language_from_filename (file, case_sensitive)
1567 char *file;
1568 bool case_sensitive;
1569 {
1570 language *lang;
1571 char **name, **ext, *suffix;
1572
1573 /* Try whole file name first. */
1574 for (lang = lang_names; lang->name != NULL; lang++)
1575 if (lang->filenames != NULL)
1576 for (name = lang->filenames; *name != NULL; name++)
1577 if ((case_sensitive)
1578 ? streq (*name, file)
1579 : strcaseeq (*name, file))
1580 return lang;
1581
1582 /* If not found, try suffix after last dot. */
1583 suffix = etags_strrchr (file, '.');
1584 if (suffix == NULL)
1585 return NULL;
1586 suffix += 1;
1587 for (lang = lang_names; lang->name != NULL; lang++)
1588 if (lang->suffixes != NULL)
1589 for (ext = lang->suffixes; *ext != NULL; ext++)
1590 if ((case_sensitive)
1591 ? streq (*ext, suffix)
1592 : strcaseeq (*ext, suffix))
1593 return lang;
1594 return NULL;
1595 }
1596
1597 \f
1598 /*
1599 * This routine is called on each file argument.
1600 */
1601 static void
1602 process_file_name (file, lang)
1603 char *file;
1604 language *lang;
1605 {
1606 struct stat stat_buf;
1607 FILE *inf;
1608 fdesc *fdp;
1609 compressor *compr;
1610 char *compressed_name, *uncompressed_name;
1611 char *ext, *real_name;
1612 int retval;
1613
1614 canonicalize_filename (file);
1615 if (streq (file, tagfile) && !streq (tagfile, "-"))
1616 {
1617 error ("skipping inclusion of %s in self.", file);
1618 return;
1619 }
1620 if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1621 {
1622 compressed_name = NULL;
1623 real_name = uncompressed_name = savestr (file);
1624 }
1625 else
1626 {
1627 real_name = compressed_name = savestr (file);
1628 uncompressed_name = savenstr (file, ext - file);
1629 }
1630
1631 /* If the canonicalized uncompressed name
1632 has already been dealt with, skip it silently. */
1633 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1634 {
1635 assert (fdp->infname != NULL);
1636 if (streq (uncompressed_name, fdp->infname))
1637 goto cleanup;
1638 }
1639
1640 if (stat (real_name, &stat_buf) != 0)
1641 {
1642 /* Reset real_name and try with a different name. */
1643 real_name = NULL;
1644 if (compressed_name != NULL) /* try with the given suffix */
1645 {
1646 if (stat (uncompressed_name, &stat_buf) == 0)
1647 real_name = uncompressed_name;
1648 }
1649 else /* try all possible suffixes */
1650 {
1651 for (compr = compressors; compr->suffix != NULL; compr++)
1652 {
1653 compressed_name = concat (file, ".", compr->suffix);
1654 if (stat (compressed_name, &stat_buf) != 0)
1655 {
1656 if (MSDOS)
1657 {
1658 char *suf = compressed_name + strlen (file);
1659 size_t suflen = strlen (compr->suffix) + 1;
1660 for ( ; suf[1]; suf++, suflen--)
1661 {
1662 memmove (suf, suf + 1, suflen);
1663 if (stat (compressed_name, &stat_buf) == 0)
1664 {
1665 real_name = compressed_name;
1666 break;
1667 }
1668 }
1669 if (real_name != NULL)
1670 break;
1671 } /* MSDOS */
1672 free (compressed_name);
1673 compressed_name = NULL;
1674 }
1675 else
1676 {
1677 real_name = compressed_name;
1678 break;
1679 }
1680 }
1681 }
1682 if (real_name == NULL)
1683 {
1684 perror (file);
1685 goto cleanup;
1686 }
1687 } /* try with a different name */
1688
1689 if (!S_ISREG (stat_buf.st_mode))
1690 {
1691 error ("skipping %s: it is not a regular file.", real_name);
1692 goto cleanup;
1693 }
1694 if (real_name == compressed_name)
1695 {
1696 char *cmd = concat (compr->command, " ", real_name);
1697 inf = (FILE *) popen (cmd, "r");
1698 free (cmd);
1699 }
1700 else
1701 inf = fopen (real_name, "r");
1702 if (inf == NULL)
1703 {
1704 perror (real_name);
1705 goto cleanup;
1706 }
1707
1708 process_file (inf, uncompressed_name, lang);
1709
1710 if (real_name == compressed_name)
1711 retval = pclose (inf);
1712 else
1713 retval = fclose (inf);
1714 if (retval < 0)
1715 pfatal (file);
1716
1717 cleanup:
1718 if (compressed_name) free (compressed_name);
1719 if (uncompressed_name) free (uncompressed_name);
1720 last_node = NULL;
1721 curfdp = NULL;
1722 return;
1723 }
1724
1725 static void
1726 process_file (fh, fn, lang)
1727 FILE *fh;
1728 char *fn;
1729 language *lang;
1730 {
1731 static const fdesc emptyfdesc;
1732 fdesc *fdp;
1733
1734 /* Create a new input file description entry. */
1735 fdp = xnew (1, fdesc);
1736 *fdp = emptyfdesc;
1737 fdp->next = fdhead;
1738 fdp->infname = savestr (fn);
1739 fdp->lang = lang;
1740 fdp->infabsname = absolute_filename (fn, cwd);
1741 fdp->infabsdir = absolute_dirname (fn, cwd);
1742 if (filename_is_absolute (fn))
1743 {
1744 /* An absolute file name. Canonicalize it. */
1745 fdp->taggedfname = absolute_filename (fn, NULL);
1746 }
1747 else
1748 {
1749 /* A file name relative to cwd. Make it relative
1750 to the directory of the tags file. */
1751 fdp->taggedfname = relative_filename (fn, tagfiledir);
1752 }
1753 fdp->usecharno = TRUE; /* use char position when making tags */
1754 fdp->prop = NULL;
1755 fdp->written = FALSE; /* not written on tags file yet */
1756
1757 fdhead = fdp;
1758 curfdp = fdhead; /* the current file description */
1759
1760 find_entries (fh);
1761
1762 /* If not Ctags, and if this is not metasource and if it contained no #line
1763 directives, we can write the tags and free all nodes pointing to
1764 curfdp. */
1765 if (!CTAGS
1766 && curfdp->usecharno /* no #line directives in this file */
1767 && !curfdp->lang->metasource)
1768 {
1769 node *np, *prev;
1770
1771 /* Look for the head of the sublist relative to this file. See add_node
1772 for the structure of the node tree. */
1773 prev = NULL;
1774 for (np = nodehead; np != NULL; prev = np, np = np->left)
1775 if (np->fdp == curfdp)
1776 break;
1777
1778 /* If we generated tags for this file, write and delete them. */
1779 if (np != NULL)
1780 {
1781 /* This is the head of the last sublist, if any. The following
1782 instructions depend on this being true. */
1783 assert (np->left == NULL);
1784
1785 assert (fdhead == curfdp);
1786 assert (last_node->fdp == curfdp);
1787 put_entries (np); /* write tags for file curfdp->taggedfname */
1788 free_tree (np); /* remove the written nodes */
1789 if (prev == NULL)
1790 nodehead = NULL; /* no nodes left */
1791 else
1792 prev->left = NULL; /* delete the pointer to the sublist */
1793 }
1794 }
1795 }
1796
1797 /*
1798 * This routine sets up the boolean pseudo-functions which work
1799 * by setting boolean flags dependent upon the corresponding character.
1800 * Every char which is NOT in that string is not a white char. Therefore,
1801 * all of the array "_wht" is set to FALSE, and then the elements
1802 * subscripted by the chars in "white" are set to TRUE. Thus "_wht"
1803 * of a char is TRUE if it is the string "white", else FALSE.
1804 */
1805 static void
1806 init ()
1807 {
1808 register char *sp;
1809 register int i;
1810
1811 for (i = 0; i < CHARS; i++)
1812 iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1813 for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1814 for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1815 notinname('\0') = notinname('\n');
1816 for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1817 begtoken('\0') = begtoken('\n');
1818 for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1819 intoken('\0') = intoken('\n');
1820 for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1821 endtoken('\0') = endtoken('\n');
1822 }
1823
1824 /*
1825 * This routine opens the specified file and calls the function
1826 * which finds the function and type definitions.
1827 */
1828 static void
1829 find_entries (inf)
1830 FILE *inf;
1831 {
1832 char *cp;
1833 language *lang = curfdp->lang;
1834 Lang_function *parser = NULL;
1835
1836 /* If user specified a language, use it. */
1837 if (lang != NULL && lang->function != NULL)
1838 {
1839 parser = lang->function;
1840 }
1841
1842 /* Else try to guess the language given the file name. */
1843 if (parser == NULL)
1844 {
1845 lang = get_language_from_filename (curfdp->infname, TRUE);
1846 if (lang != NULL && lang->function != NULL)
1847 {
1848 curfdp->lang = lang;
1849 parser = lang->function;
1850 }
1851 }
1852
1853 /* Else look for sharp-bang as the first two characters. */
1854 if (parser == NULL
1855 && readline_internal (&lb, inf) > 0
1856 && lb.len >= 2
1857 && lb.buffer[0] == '#'
1858 && lb.buffer[1] == '!')
1859 {
1860 char *lp;
1861
1862 /* Set lp to point at the first char after the last slash in the
1863 line or, if no slashes, at the first nonblank. Then set cp to
1864 the first successive blank and terminate the string. */
1865 lp = etags_strrchr (lb.buffer+2, '/');
1866 if (lp != NULL)
1867 lp += 1;
1868 else
1869 lp = skip_spaces (lb.buffer + 2);
1870 cp = skip_non_spaces (lp);
1871 *cp = '\0';
1872
1873 if (strlen (lp) > 0)
1874 {
1875 lang = get_language_from_interpreter (lp);
1876 if (lang != NULL && lang->function != NULL)
1877 {
1878 curfdp->lang = lang;
1879 parser = lang->function;
1880 }
1881 }
1882 }
1883
1884 /* We rewind here, even if inf may be a pipe. We fail if the
1885 length of the first line is longer than the pipe block size,
1886 which is unlikely. */
1887 rewind (inf);
1888
1889 /* Else try to guess the language given the case insensitive file name. */
1890 if (parser == NULL)
1891 {
1892 lang = get_language_from_filename (curfdp->infname, FALSE);
1893 if (lang != NULL && lang->function != NULL)
1894 {
1895 curfdp->lang = lang;
1896 parser = lang->function;
1897 }
1898 }
1899
1900 /* Else try Fortran or C. */
1901 if (parser == NULL)
1902 {
1903 node *old_last_node = last_node;
1904
1905 curfdp->lang = get_language_from_langname ("fortran");
1906 find_entries (inf);
1907
1908 if (old_last_node == last_node)
1909 /* No Fortran entries found. Try C. */
1910 {
1911 /* We do not tag if rewind fails.
1912 Only the file name will be recorded in the tags file. */
1913 rewind (inf);
1914 curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1915 find_entries (inf);
1916 }
1917 return;
1918 }
1919
1920 if (!no_line_directive
1921 && curfdp->lang != NULL && curfdp->lang->metasource)
1922 /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1923 file, or anyway we parsed a file that is automatically generated from
1924 this one. If this is the case, the bingo.c file contained #line
1925 directives that generated tags pointing to this file. Let's delete
1926 them all before parsing this file, which is the real source. */
1927 {
1928 fdesc **fdpp = &fdhead;
1929 while (*fdpp != NULL)
1930 if (*fdpp != curfdp
1931 && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1932 /* We found one of those! We must delete both the file description
1933 and all tags referring to it. */
1934 {
1935 fdesc *badfdp = *fdpp;
1936
1937 /* Delete the tags referring to badfdp->taggedfname
1938 that were obtained from badfdp->infname. */
1939 invalidate_nodes (badfdp, &nodehead);
1940
1941 *fdpp = badfdp->next; /* remove the bad description from the list */
1942 free_fdesc (badfdp);
1943 }
1944 else
1945 fdpp = &(*fdpp)->next; /* advance the list pointer */
1946 }
1947
1948 assert (parser != NULL);
1949
1950 /* Generic initialisations before reading from file. */
1951 linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1952
1953 /* Generic initialisations before parsing file with readline. */
1954 lineno = 0; /* reset global line number */
1955 charno = 0; /* reset global char number */
1956 linecharno = 0; /* reset global char number of line start */
1957
1958 parser (inf);
1959
1960 regex_tag_multiline ();
1961 }
1962
1963 \f
1964 /*
1965 * Check whether an implicitly named tag should be created,
1966 * then call `pfnote'.
1967 * NAME is a string that is internally copied by this function.
1968 *
1969 * TAGS format specification
1970 * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1971 * The following is explained in some more detail in etc/ETAGS.EBNF.
1972 *
1973 * make_tag creates tags with "implicit tag names" (unnamed tags)
1974 * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1975 * 1. NAME does not contain any of the characters in NONAM;
1976 * 2. LINESTART contains name as either a rightmost, or rightmost but
1977 * one character, substring;
1978 * 3. the character, if any, immediately before NAME in LINESTART must
1979 * be a character in NONAM;
1980 * 4. the character, if any, immediately after NAME in LINESTART must
1981 * also be a character in NONAM.
1982 *
1983 * The implementation uses the notinname() macro, which recognises the
1984 * characters stored in the string `nonam'.
1985 * etags.el needs to use the same characters that are in NONAM.
1986 */
1987 static void
1988 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1989 char *name; /* tag name, or NULL if unnamed */
1990 int namelen; /* tag length */
1991 bool is_func; /* tag is a function */
1992 char *linestart; /* start of the line where tag is */
1993 int linelen; /* length of the line where tag is */
1994 int lno; /* line number */
1995 long cno; /* character number */
1996 {
1997 bool named = (name != NULL && namelen > 0);
1998
1999 if (!CTAGS && named) /* maybe set named to false */
2000 /* Let's try to make an implicit tag name, that is, create an unnamed tag
2001 such that etags.el can guess a name from it. */
2002 {
2003 int i;
2004 register char *cp = name;
2005
2006 for (i = 0; i < namelen; i++)
2007 if (notinname (*cp++))
2008 break;
2009 if (i == namelen) /* rule #1 */
2010 {
2011 cp = linestart + linelen - namelen;
2012 if (notinname (linestart[linelen-1]))
2013 cp -= 1; /* rule #4 */
2014 if (cp >= linestart /* rule #2 */
2015 && (cp == linestart
2016 || notinname (cp[-1])) /* rule #3 */
2017 && strneq (name, cp, namelen)) /* rule #2 */
2018 named = FALSE; /* use implicit tag name */
2019 }
2020 }
2021
2022 if (named)
2023 name = savenstr (name, namelen);
2024 else
2025 name = NULL;
2026 pfnote (name, is_func, linestart, linelen, lno, cno);
2027 }
2028
2029 /* Record a tag. */
2030 static void
2031 pfnote (name, is_func, linestart, linelen, lno, cno)
2032 char *name; /* tag name, or NULL if unnamed */
2033 bool is_func; /* tag is a function */
2034 char *linestart; /* start of the line where tag is */
2035 int linelen; /* length of the line where tag is */
2036 int lno; /* line number */
2037 long cno; /* character number */
2038 {
2039 register node *np;
2040
2041 assert (name == NULL || name[0] != '\0');
2042 if (CTAGS && name == NULL)
2043 return;
2044
2045 np = xnew (1, node);
2046
2047 /* If ctags mode, change name "main" to M<thisfilename>. */
2048 if (CTAGS && !cxref_style && streq (name, "main"))
2049 {
2050 register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2051 np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2052 fp = etags_strrchr (np->name, '.');
2053 if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2054 fp[0] = '\0';
2055 }
2056 else
2057 np->name = name;
2058 np->valid = TRUE;
2059 np->been_warned = FALSE;
2060 np->fdp = curfdp;
2061 np->is_func = is_func;
2062 np->lno = lno;
2063 if (np->fdp->usecharno)
2064 /* Our char numbers are 0-base, because of C language tradition?
2065 ctags compatibility? old versions compatibility? I don't know.
2066 Anyway, since emacs's are 1-base we expect etags.el to take care
2067 of the difference. If we wanted to have 1-based numbers, we would
2068 uncomment the +1 below. */
2069 np->cno = cno /* + 1 */ ;
2070 else
2071 np->cno = invalidcharno;
2072 np->left = np->right = NULL;
2073 if (CTAGS && !cxref_style)
2074 {
2075 if (strlen (linestart) < 50)
2076 np->regex = concat (linestart, "$", "");
2077 else
2078 np->regex = savenstr (linestart, 50);
2079 }
2080 else
2081 np->regex = savenstr (linestart, linelen);
2082
2083 add_node (np, &nodehead);
2084 }
2085
2086 /*
2087 * free_tree ()
2088 * recurse on left children, iterate on right children.
2089 */
2090 static void
2091 free_tree (np)
2092 register node *np;
2093 {
2094 while (np)
2095 {
2096 register node *node_right = np->right;
2097 free_tree (np->left);
2098 if (np->name != NULL)
2099 free (np->name);
2100 free (np->regex);
2101 free (np);
2102 np = node_right;
2103 }
2104 }
2105
2106 /*
2107 * free_fdesc ()
2108 * delete a file description
2109 */
2110 static void
2111 free_fdesc (fdp)
2112 register fdesc *fdp;
2113 {
2114 if (fdp->infname != NULL) free (fdp->infname);
2115 if (fdp->infabsname != NULL) free (fdp->infabsname);
2116 if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2117 if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2118 if (fdp->prop != NULL) free (fdp->prop);
2119 free (fdp);
2120 }
2121
2122 /*
2123 * add_node ()
2124 * Adds a node to the tree of nodes. In etags mode, sort by file
2125 * name. In ctags mode, sort by tag name. Make no attempt at
2126 * balancing.
2127 *
2128 * add_node is the only function allowed to add nodes, so it can
2129 * maintain state.
2130 */
2131 static void
2132 add_node (np, cur_node_p)
2133 node *np, **cur_node_p;
2134 {
2135 register int dif;
2136 register node *cur_node = *cur_node_p;
2137
2138 if (cur_node == NULL)
2139 {
2140 *cur_node_p = np;
2141 last_node = np;
2142 return;
2143 }
2144
2145 if (!CTAGS)
2146 /* Etags Mode */
2147 {
2148 /* For each file name, tags are in a linked sublist on the right
2149 pointer. The first tags of different files are a linked list
2150 on the left pointer. last_node points to the end of the last
2151 used sublist. */
2152 if (last_node != NULL && last_node->fdp == np->fdp)
2153 {
2154 /* Let's use the same sublist as the last added node. */
2155 assert (last_node->right == NULL);
2156 last_node->right = np;
2157 last_node = np;
2158 }
2159 else if (cur_node->fdp == np->fdp)
2160 {
2161 /* Scanning the list we found the head of a sublist which is
2162 good for us. Let's scan this sublist. */
2163 add_node (np, &cur_node->right);
2164 }
2165 else
2166 /* The head of this sublist is not good for us. Let's try the
2167 next one. */
2168 add_node (np, &cur_node->left);
2169 } /* if ETAGS mode */
2170
2171 else
2172 {
2173 /* Ctags Mode */
2174 dif = strcmp (np->name, cur_node->name);
2175
2176 /*
2177 * If this tag name matches an existing one, then
2178 * do not add the node, but maybe print a warning.
2179 */
2180 if (no_duplicates && !dif)
2181 {
2182 if (np->fdp == cur_node->fdp)
2183 {
2184 if (!no_warnings)
2185 {
2186 fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2187 np->fdp->infname, lineno, np->name);
2188 fprintf (stderr, "Second entry ignored\n");
2189 }
2190 }
2191 else if (!cur_node->been_warned && !no_warnings)
2192 {
2193 fprintf
2194 (stderr,
2195 "Duplicate entry in files %s and %s: %s (Warning only)\n",
2196 np->fdp->infname, cur_node->fdp->infname, np->name);
2197 cur_node->been_warned = TRUE;
2198 }
2199 return;
2200 }
2201
2202 /* Actually add the node */
2203 add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2204 } /* if CTAGS mode */
2205 }
2206
2207 /*
2208 * invalidate_nodes ()
2209 * Scan the node tree and invalidate all nodes pointing to the
2210 * given file description (CTAGS case) or free them (ETAGS case).
2211 */
2212 static void
2213 invalidate_nodes (badfdp, npp)
2214 fdesc *badfdp;
2215 node **npp;
2216 {
2217 node *np = *npp;
2218
2219 if (np == NULL)
2220 return;
2221
2222 if (CTAGS)
2223 {
2224 if (np->left != NULL)
2225 invalidate_nodes (badfdp, &np->left);
2226 if (np->fdp == badfdp)
2227 np->valid = FALSE;
2228 if (np->right != NULL)
2229 invalidate_nodes (badfdp, &np->right);
2230 }
2231 else
2232 {
2233 assert (np->fdp != NULL);
2234 if (np->fdp == badfdp)
2235 {
2236 *npp = np->left; /* detach the sublist from the list */
2237 np->left = NULL; /* isolate it */
2238 free_tree (np); /* free it */
2239 invalidate_nodes (badfdp, npp);
2240 }
2241 else
2242 invalidate_nodes (badfdp, &np->left);
2243 }
2244 }
2245
2246 \f
2247 static int total_size_of_entries __P((node *));
2248 static int number_len __P((long));
2249
2250 /* Length of a non-negative number's decimal representation. */
2251 static int
2252 number_len (num)
2253 long num;
2254 {
2255 int len = 1;
2256 while ((num /= 10) > 0)
2257 len += 1;
2258 return len;
2259 }
2260
2261 /*
2262 * Return total number of characters that put_entries will output for
2263 * the nodes in the linked list at the right of the specified node.
2264 * This count is irrelevant with etags.el since emacs 19.34 at least,
2265 * but is still supplied for backward compatibility.
2266 */
2267 static int
2268 total_size_of_entries (np)
2269 register node *np;
2270 {
2271 register int total = 0;
2272
2273 for (; np != NULL; np = np->right)
2274 if (np->valid)
2275 {
2276 total += strlen (np->regex) + 1; /* pat\177 */
2277 if (np->name != NULL)
2278 total += strlen (np->name) + 1; /* name\001 */
2279 total += number_len ((long) np->lno) + 1; /* lno, */
2280 if (np->cno != invalidcharno) /* cno */
2281 total += number_len (np->cno);
2282 total += 1; /* newline */
2283 }
2284
2285 return total;
2286 }
2287
2288 static void
2289 put_entries (np)
2290 register node *np;
2291 {
2292 register char *sp;
2293 static fdesc *fdp = NULL;
2294
2295 if (np == NULL)
2296 return;
2297
2298 /* Output subentries that precede this one */
2299 if (CTAGS)
2300 put_entries (np->left);
2301
2302 /* Output this entry */
2303 if (np->valid)
2304 {
2305 if (!CTAGS)
2306 {
2307 /* Etags mode */
2308 if (fdp != np->fdp)
2309 {
2310 fdp = np->fdp;
2311 fprintf (tagf, "\f\n%s,%d\n",
2312 fdp->taggedfname, total_size_of_entries (np));
2313 fdp->written = TRUE;
2314 }
2315 fputs (np->regex, tagf);
2316 fputc ('\177', tagf);
2317 if (np->name != NULL)
2318 {
2319 fputs (np->name, tagf);
2320 fputc ('\001', tagf);
2321 }
2322 fprintf (tagf, "%d,", np->lno);
2323 if (np->cno != invalidcharno)
2324 fprintf (tagf, "%ld", np->cno);
2325 fputs ("\n", tagf);
2326 }
2327 else
2328 {
2329 /* Ctags mode */
2330 if (np->name == NULL)
2331 error ("internal error: NULL name in ctags mode.", (char *)NULL);
2332
2333 if (cxref_style)
2334 {
2335 if (vgrind_style)
2336 fprintf (stdout, "%s %s %d\n",
2337 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2338 else
2339 fprintf (stdout, "%-16s %3d %-16s %s\n",
2340 np->name, np->lno, np->fdp->taggedfname, np->regex);
2341 }
2342 else
2343 {
2344 fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2345
2346 if (np->is_func)
2347 { /* function or #define macro with args */
2348 putc (searchar, tagf);
2349 putc ('^', tagf);
2350
2351 for (sp = np->regex; *sp; sp++)
2352 {
2353 if (*sp == '\\' || *sp == searchar)
2354 putc ('\\', tagf);
2355 putc (*sp, tagf);
2356 }
2357 putc (searchar, tagf);
2358 }
2359 else
2360 { /* anything else; text pattern inadequate */
2361 fprintf (tagf, "%d", np->lno);
2362 }
2363 putc ('\n', tagf);
2364 }
2365 }
2366 } /* if this node contains a valid tag */
2367
2368 /* Output subentries that follow this one */
2369 put_entries (np->right);
2370 if (!CTAGS)
2371 put_entries (np->left);
2372 }
2373
2374 \f
2375 /* C extensions. */
2376 #define C_EXT 0x00fff /* C extensions */
2377 #define C_PLAIN 0x00000 /* C */
2378 #define C_PLPL 0x00001 /* C++ */
2379 #define C_STAR 0x00003 /* C* */
2380 #define C_JAVA 0x00005 /* JAVA */
2381 #define C_AUTO 0x01000 /* C, but switch to C++ if `class' is met */
2382 #define YACC 0x10000 /* yacc file */
2383
2384 /*
2385 * The C symbol tables.
2386 */
2387 enum sym_type
2388 {
2389 st_none,
2390 st_C_objprot, st_C_objimpl, st_C_objend,
2391 st_C_gnumacro,
2392 st_C_ignore, st_C_attribute,
2393 st_C_javastruct,
2394 st_C_operator,
2395 st_C_class, st_C_template,
2396 st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2397 };
2398
2399 static unsigned int hash __P((const char *, unsigned int));
2400 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2401 static enum sym_type C_symtype __P((char *, int, int));
2402
2403 /* Feed stuff between (but not including) %[ and %] lines to:
2404 gperf -m 5
2405 %[
2406 %compare-strncmp
2407 %enum
2408 %struct-type
2409 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2410 %%
2411 if, 0, st_C_ignore
2412 for, 0, st_C_ignore
2413 while, 0, st_C_ignore
2414 switch, 0, st_C_ignore
2415 return, 0, st_C_ignore
2416 __attribute__, 0, st_C_attribute
2417 @interface, 0, st_C_objprot
2418 @protocol, 0, st_C_objprot
2419 @implementation,0, st_C_objimpl
2420 @end, 0, st_C_objend
2421 import, (C_JAVA & !C_PLPL), st_C_ignore
2422 package, (C_JAVA & !C_PLPL), st_C_ignore
2423 friend, C_PLPL, st_C_ignore
2424 extends, (C_JAVA & !C_PLPL), st_C_javastruct
2425 implements, (C_JAVA & !C_PLPL), st_C_javastruct
2426 interface, (C_JAVA & !C_PLPL), st_C_struct
2427 class, 0, st_C_class
2428 namespace, C_PLPL, st_C_struct
2429 domain, C_STAR, st_C_struct
2430 union, 0, st_C_struct
2431 struct, 0, st_C_struct
2432 extern, 0, st_C_extern
2433 enum, 0, st_C_enum
2434 typedef, 0, st_C_typedef
2435 define, 0, st_C_define
2436 undef, 0, st_C_define
2437 operator, C_PLPL, st_C_operator
2438 template, 0, st_C_template
2439 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2440 DEFUN, 0, st_C_gnumacro
2441 SYSCALL, 0, st_C_gnumacro
2442 ENTRY, 0, st_C_gnumacro
2443 PSEUDO, 0, st_C_gnumacro
2444 # These are defined inside C functions, so currently they are not met.
2445 # EXFUN used in glibc, DEFVAR_* in emacs.
2446 #EXFUN, 0, st_C_gnumacro
2447 #DEFVAR_, 0, st_C_gnumacro
2448 %]
2449 and replace lines between %< and %> with its output, then:
2450 - remove the #if characterset check
2451 - make in_word_set static and not inline. */
2452 /*%<*/
2453 /* C code produced by gperf version 3.0.1 */
2454 /* Command-line: gperf -m 5 */
2455 /* Computed positions: -k'2-3' */
2456
2457 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2458 /* maximum key range = 33, duplicates = 0 */
2459
2460 #ifdef __GNUC__
2461 __inline
2462 #else
2463 #ifdef __cplusplus
2464 inline
2465 #endif
2466 #endif
2467 static unsigned int
2468 hash (str, len)
2469 register const char *str;
2470 register unsigned int len;
2471 {
2472 static unsigned char asso_values[] =
2473 {
2474 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2475 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2476 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2477 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2478 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2479 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2480 35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2481 14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2482 35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2483 35, 35, 35, 35, 35, 1, 35, 16, 35, 6,
2484 23, 0, 0, 35, 22, 0, 35, 35, 5, 0,
2485 0, 15, 1, 35, 6, 35, 8, 19, 35, 16,
2486 4, 5, 35, 35, 35, 35, 35, 35, 35, 35,
2487 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2488 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2489 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2490 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2491 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2492 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2493 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2494 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2495 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2496 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2497 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2498 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2499 35, 35, 35, 35, 35, 35
2500 };
2501 register int hval = len;
2502
2503 switch (hval)
2504 {
2505 default:
2506 hval += asso_values[(unsigned char)str[2]];
2507 /*FALLTHROUGH*/
2508 case 2:
2509 hval += asso_values[(unsigned char)str[1]];
2510 break;
2511 }
2512 return hval;
2513 }
2514
2515 static struct C_stab_entry *
2516 in_word_set (str, len)
2517 register const char *str;
2518 register unsigned int len;
2519 {
2520 enum
2521 {
2522 TOTAL_KEYWORDS = 32,
2523 MIN_WORD_LENGTH = 2,
2524 MAX_WORD_LENGTH = 15,
2525 MIN_HASH_VALUE = 2,
2526 MAX_HASH_VALUE = 34
2527 };
2528
2529 static struct C_stab_entry wordlist[] =
2530 {
2531 {""}, {""},
2532 {"if", 0, st_C_ignore},
2533 {""},
2534 {"@end", 0, st_C_objend},
2535 {"union", 0, st_C_struct},
2536 {"define", 0, st_C_define},
2537 {"import", (C_JAVA & !C_PLPL), st_C_ignore},
2538 {"template", 0, st_C_template},
2539 {"operator", C_PLPL, st_C_operator},
2540 {"@interface", 0, st_C_objprot},
2541 {"implements", (C_JAVA & !C_PLPL), st_C_javastruct},
2542 {"friend", C_PLPL, st_C_ignore},
2543 {"typedef", 0, st_C_typedef},
2544 {"return", 0, st_C_ignore},
2545 {"@implementation",0, st_C_objimpl},
2546 {"@protocol", 0, st_C_objprot},
2547 {"interface", (C_JAVA & !C_PLPL), st_C_struct},
2548 {"extern", 0, st_C_extern},
2549 {"extends", (C_JAVA & !C_PLPL), st_C_javastruct},
2550 {"struct", 0, st_C_struct},
2551 {"domain", C_STAR, st_C_struct},
2552 {"switch", 0, st_C_ignore},
2553 {"enum", 0, st_C_enum},
2554 {"for", 0, st_C_ignore},
2555 {"namespace", C_PLPL, st_C_struct},
2556 {"class", 0, st_C_class},
2557 {"while", 0, st_C_ignore},
2558 {"undef", 0, st_C_define},
2559 {"package", (C_JAVA & !C_PLPL), st_C_ignore},
2560 {"__attribute__", 0, st_C_attribute},
2561 {"SYSCALL", 0, st_C_gnumacro},
2562 {"ENTRY", 0, st_C_gnumacro},
2563 {"PSEUDO", 0, st_C_gnumacro},
2564 {"DEFUN", 0, st_C_gnumacro}
2565 };
2566
2567 if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2568 {
2569 register int key = hash (str, len);
2570
2571 if (key <= MAX_HASH_VALUE && key >= 0)
2572 {
2573 register const char *s = wordlist[key].name;
2574
2575 if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2576 return &wordlist[key];
2577 }
2578 }
2579 return 0;
2580 }
2581 /*%>*/
2582
2583 static enum sym_type
2584 C_symtype (str, len, c_ext)
2585 char *str;
2586 int len;
2587 int c_ext;
2588 {
2589 register struct C_stab_entry *se = in_word_set (str, len);
2590
2591 if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2592 return st_none;
2593 return se->type;
2594 }
2595
2596 \f
2597 /*
2598 * Ignoring __attribute__ ((list))
2599 */
2600 static bool inattribute; /* looking at an __attribute__ construct */
2601
2602 /*
2603 * C functions and variables are recognized using a simple
2604 * finite automaton. fvdef is its state variable.
2605 */
2606 static enum
2607 {
2608 fvnone, /* nothing seen */
2609 fdefunkey, /* Emacs DEFUN keyword seen */
2610 fdefunname, /* Emacs DEFUN name seen */
2611 foperator, /* func: operator keyword seen (cplpl) */
2612 fvnameseen, /* function or variable name seen */
2613 fstartlist, /* func: just after open parenthesis */
2614 finlist, /* func: in parameter list */
2615 flistseen, /* func: after parameter list */
2616 fignore, /* func: before open brace */
2617 vignore /* var-like: ignore until ';' */
2618 } fvdef;
2619
2620 static bool fvextern; /* func or var: extern keyword seen; */
2621
2622 /*
2623 * typedefs are recognized using a simple finite automaton.
2624 * typdef is its state variable.
2625 */
2626 static enum
2627 {
2628 tnone, /* nothing seen */
2629 tkeyseen, /* typedef keyword seen */
2630 ttypeseen, /* defined type seen */
2631 tinbody, /* inside typedef body */
2632 tend, /* just before typedef tag */
2633 tignore /* junk after typedef tag */
2634 } typdef;
2635
2636 /*
2637 * struct-like structures (enum, struct and union) are recognized
2638 * using another simple finite automaton. `structdef' is its state
2639 * variable.
2640 */
2641 static enum
2642 {
2643 snone, /* nothing seen yet,
2644 or in struct body if bracelev > 0 */
2645 skeyseen, /* struct-like keyword seen */
2646 stagseen, /* struct-like tag seen */
2647 scolonseen /* colon seen after struct-like tag */
2648 } structdef;
2649
2650 /*
2651 * When objdef is different from onone, objtag is the name of the class.
2652 */
2653 static char *objtag = "<uninited>";
2654
2655 /*
2656 * Yet another little state machine to deal with preprocessor lines.
2657 */
2658 static enum
2659 {
2660 dnone, /* nothing seen */
2661 dsharpseen, /* '#' seen as first char on line */
2662 ddefineseen, /* '#' and 'define' seen */
2663 dignorerest /* ignore rest of line */
2664 } definedef;
2665
2666 /*
2667 * State machine for Objective C protocols and implementations.
2668 * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2669 */
2670 static enum
2671 {
2672 onone, /* nothing seen */
2673 oprotocol, /* @interface or @protocol seen */
2674 oimplementation, /* @implementations seen */
2675 otagseen, /* class name seen */
2676 oparenseen, /* parenthesis before category seen */
2677 ocatseen, /* category name seen */
2678 oinbody, /* in @implementation body */
2679 omethodsign, /* in @implementation body, after +/- */
2680 omethodtag, /* after method name */
2681 omethodcolon, /* after method colon */
2682 omethodparm, /* after method parameter */
2683 oignore /* wait for @end */
2684 } objdef;
2685
2686
2687 /*
2688 * Use this structure to keep info about the token read, and how it
2689 * should be tagged. Used by the make_C_tag function to build a tag.
2690 */
2691 static struct tok
2692 {
2693 char *line; /* string containing the token */
2694 int offset; /* where the token starts in LINE */
2695 int length; /* token length */
2696 /*
2697 The previous members can be used to pass strings around for generic
2698 purposes. The following ones specifically refer to creating tags. In this
2699 case the token contained here is the pattern that will be used to create a
2700 tag.
2701 */
2702 bool valid; /* do not create a tag; the token should be
2703 invalidated whenever a state machine is
2704 reset prematurely */
2705 bool named; /* create a named tag */
2706 int lineno; /* source line number of tag */
2707 long linepos; /* source char number of tag */
2708 } token; /* latest token read */
2709
2710 /*
2711 * Variables and functions for dealing with nested structures.
2712 * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2713 */
2714 static void pushclass_above __P((int, char *, int));
2715 static void popclass_above __P((int));
2716 static void write_classname __P((linebuffer *, char *qualifier));
2717
2718 static struct {
2719 char **cname; /* nested class names */
2720 int *bracelev; /* nested class brace level */
2721 int nl; /* class nesting level (elements used) */
2722 int size; /* length of the array */
2723 } cstack; /* stack for nested declaration tags */
2724 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2725 #define nestlev (cstack.nl)
2726 /* After struct keyword or in struct body, not inside a nested function. */
2727 #define instruct (structdef == snone && nestlev > 0 \
2728 && bracelev == cstack.bracelev[nestlev-1] + 1)
2729
2730 static void
2731 pushclass_above (bracelev, str, len)
2732 int bracelev;
2733 char *str;
2734 int len;
2735 {
2736 int nl;
2737
2738 popclass_above (bracelev);
2739 nl = cstack.nl;
2740 if (nl >= cstack.size)
2741 {
2742 int size = cstack.size *= 2;
2743 xrnew (cstack.cname, size, char *);
2744 xrnew (cstack.bracelev, size, int);
2745 }
2746 assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2747 cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2748 cstack.bracelev[nl] = bracelev;
2749 cstack.nl = nl + 1;
2750 }
2751
2752 static void
2753 popclass_above (bracelev)
2754 int bracelev;
2755 {
2756 int nl;
2757
2758 for (nl = cstack.nl - 1;
2759 nl >= 0 && cstack.bracelev[nl] >= bracelev;
2760 nl--)
2761 {
2762 if (cstack.cname[nl] != NULL)
2763 free (cstack.cname[nl]);
2764 cstack.nl = nl;
2765 }
2766 }
2767
2768 static void
2769 write_classname (cn, qualifier)
2770 linebuffer *cn;
2771 char *qualifier;
2772 {
2773 int i, len;
2774 int qlen = strlen (qualifier);
2775
2776 if (cstack.nl == 0 || cstack.cname[0] == NULL)
2777 {
2778 len = 0;
2779 cn->len = 0;
2780 cn->buffer[0] = '\0';
2781 }
2782 else
2783 {
2784 len = strlen (cstack.cname[0]);
2785 linebuffer_setlen (cn, len);
2786 strcpy (cn->buffer, cstack.cname[0]);
2787 }
2788 for (i = 1; i < cstack.nl; i++)
2789 {
2790 char *s;
2791 int slen;
2792
2793 s = cstack.cname[i];
2794 if (s == NULL)
2795 continue;
2796 slen = strlen (s);
2797 len += slen + qlen;
2798 linebuffer_setlen (cn, len);
2799 strncat (cn->buffer, qualifier, qlen);
2800 strncat (cn->buffer, s, slen);
2801 }
2802 }
2803
2804 \f
2805 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2806 static void make_C_tag __P((bool));
2807
2808 /*
2809 * consider_token ()
2810 * checks to see if the current token is at the start of a
2811 * function or variable, or corresponds to a typedef, or
2812 * is a struct/union/enum tag, or #define, or an enum constant.
2813 *
2814 * *IS_FUNC gets TRUE iff the token is a function or #define macro
2815 * with args. C_EXTP points to which language we are looking at.
2816 *
2817 * Globals
2818 * fvdef IN OUT
2819 * structdef IN OUT
2820 * definedef IN OUT
2821 * typdef IN OUT
2822 * objdef IN OUT
2823 */
2824
2825 static bool
2826 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2827 register char *str; /* IN: token pointer */
2828 register int len; /* IN: token length */
2829 register int c; /* IN: first char after the token */
2830 int *c_extp; /* IN, OUT: C extensions mask */
2831 int bracelev; /* IN: brace level */
2832 int parlev; /* IN: parenthesis level */
2833 bool *is_func_or_var; /* OUT: function or variable found */
2834 {
2835 /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2836 structtype is the type of the preceding struct-like keyword, and
2837 structbracelev is the brace level where it has been seen. */
2838 static enum sym_type structtype;
2839 static int structbracelev;
2840 static enum sym_type toktype;
2841
2842
2843 toktype = C_symtype (str, len, *c_extp);
2844
2845 /*
2846 * Skip __attribute__
2847 */
2848 if (toktype == st_C_attribute)
2849 {
2850 inattribute = TRUE;
2851 return FALSE;
2852 }
2853
2854 /*
2855 * Advance the definedef state machine.
2856 */
2857 switch (definedef)
2858 {
2859 case dnone:
2860 /* We're not on a preprocessor line. */
2861 if (toktype == st_C_gnumacro)
2862 {
2863 fvdef = fdefunkey;
2864 return FALSE;
2865 }
2866 break;
2867 case dsharpseen:
2868 if (toktype == st_C_define)
2869 {
2870 definedef = ddefineseen;
2871 }
2872 else
2873 {
2874 definedef = dignorerest;
2875 }
2876 return FALSE;
2877 case ddefineseen:
2878 /*
2879 * Make a tag for any macro, unless it is a constant
2880 * and constantypedefs is FALSE.
2881 */
2882 definedef = dignorerest;
2883 *is_func_or_var = (c == '(');
2884 if (!*is_func_or_var && !constantypedefs)
2885 return FALSE;
2886 else
2887 return TRUE;
2888 case dignorerest:
2889 return FALSE;
2890 default:
2891 error ("internal error: definedef value.", (char *)NULL);
2892 }
2893
2894 /*
2895 * Now typedefs
2896 */
2897 switch (typdef)
2898 {
2899 case tnone:
2900 if (toktype == st_C_typedef)
2901 {
2902 if (typedefs)
2903 typdef = tkeyseen;
2904 fvextern = FALSE;
2905 fvdef = fvnone;
2906 return FALSE;
2907 }
2908 break;
2909 case tkeyseen:
2910 switch (toktype)
2911 {
2912 case st_none:
2913 case st_C_class:
2914 case st_C_struct:
2915 case st_C_enum:
2916 typdef = ttypeseen;
2917 }
2918 break;
2919 case ttypeseen:
2920 if (structdef == snone && fvdef == fvnone)
2921 {
2922 fvdef = fvnameseen;
2923 return TRUE;
2924 }
2925 break;
2926 case tend:
2927 switch (toktype)
2928 {
2929 case st_C_class:
2930 case st_C_struct:
2931 case st_C_enum:
2932 return FALSE;
2933 }
2934 return TRUE;
2935 }
2936
2937 /*
2938 * This structdef business is NOT invoked when we are ctags and the
2939 * file is plain C. This is because a struct tag may have the same
2940 * name as another tag, and this loses with ctags.
2941 */
2942 switch (toktype)
2943 {
2944 case st_C_javastruct:
2945 if (structdef == stagseen)
2946 structdef = scolonseen;
2947 return FALSE;
2948 case st_C_template:
2949 case st_C_class:
2950 if ((*c_extp & C_AUTO) /* automatic detection of C++ language */
2951 && bracelev == 0
2952 && definedef == dnone && structdef == snone
2953 && typdef == tnone && fvdef == fvnone)
2954 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2955 if (toktype == st_C_template)
2956 break;
2957 /* FALLTHRU */
2958 case st_C_struct:
2959 case st_C_enum:
2960 if (parlev == 0
2961 && fvdef != vignore
2962 && (typdef == tkeyseen
2963 || (typedefs_or_cplusplus && structdef == snone)))
2964 {
2965 structdef = skeyseen;
2966 structtype = toktype;
2967 structbracelev = bracelev;
2968 if (fvdef == fvnameseen)
2969 fvdef = fvnone;
2970 }
2971 return FALSE;
2972 }
2973
2974 if (structdef == skeyseen)
2975 {
2976 structdef = stagseen;
2977 return TRUE;
2978 }
2979
2980 if (typdef != tnone)
2981 definedef = dnone;
2982
2983 /* Detect Objective C constructs. */
2984 switch (objdef)
2985 {
2986 case onone:
2987 switch (toktype)
2988 {
2989 case st_C_objprot:
2990 objdef = oprotocol;
2991 return FALSE;
2992 case st_C_objimpl:
2993 objdef = oimplementation;
2994 return FALSE;
2995 }
2996 break;
2997 case oimplementation:
2998 /* Save the class tag for functions or variables defined inside. */
2999 objtag = savenstr (str, len);
3000 objdef = oinbody;
3001 return FALSE;
3002 case oprotocol:
3003 /* Save the class tag for categories. */
3004 objtag = savenstr (str, len);
3005 objdef = otagseen;
3006 *is_func_or_var = TRUE;
3007 return TRUE;
3008 case oparenseen:
3009 objdef = ocatseen;
3010 *is_func_or_var = TRUE;
3011 return TRUE;
3012 case oinbody:
3013 break;
3014 case omethodsign:
3015 if (parlev == 0)
3016 {
3017 fvdef = fvnone;
3018 objdef = omethodtag;
3019 linebuffer_setlen (&token_name, len);
3020 strncpy (token_name.buffer, str, len);
3021 token_name.buffer[len] = '\0';
3022 return TRUE;
3023 }
3024 return FALSE;
3025 case omethodcolon:
3026 if (parlev == 0)
3027 objdef = omethodparm;
3028 return FALSE;
3029 case omethodparm:
3030 if (parlev == 0)
3031 {
3032 fvdef = fvnone;
3033 objdef = omethodtag;
3034 linebuffer_setlen (&token_name, token_name.len + len);
3035 strncat (token_name.buffer, str, len);
3036 return TRUE;
3037 }
3038 return FALSE;
3039 case oignore:
3040 if (toktype == st_C_objend)
3041 {
3042 /* Memory leakage here: the string pointed by objtag is
3043 never released, because many tests would be needed to
3044 avoid breaking on incorrect input code. The amount of
3045 memory leaked here is the sum of the lengths of the
3046 class tags.
3047 free (objtag); */
3048 objdef = onone;
3049 }
3050 return FALSE;
3051 }
3052
3053 /* A function, variable or enum constant? */
3054 switch (toktype)
3055 {
3056 case st_C_extern:
3057 fvextern = TRUE;
3058 switch (fvdef)
3059 {
3060 case finlist:
3061 case flistseen:
3062 case fignore:
3063 case vignore:
3064 break;
3065 default:
3066 fvdef = fvnone;
3067 }
3068 return FALSE;
3069 case st_C_ignore:
3070 fvextern = FALSE;
3071 fvdef = vignore;
3072 return FALSE;
3073 case st_C_operator:
3074 fvdef = foperator;
3075 *is_func_or_var = TRUE;
3076 return TRUE;
3077 case st_none:
3078 if (constantypedefs
3079 && structdef == snone
3080 && structtype == st_C_enum && bracelev > structbracelev)
3081 return TRUE; /* enum constant */
3082 switch (fvdef)
3083 {
3084 case fdefunkey:
3085 if (bracelev > 0)
3086 break;
3087 fvdef = fdefunname; /* GNU macro */
3088 *is_func_or_var = TRUE;
3089 return TRUE;
3090 case fvnone:
3091 switch (typdef)
3092 {
3093 case ttypeseen:
3094 return FALSE;
3095 case tnone:
3096 if ((strneq (str, "asm", 3) && endtoken (str[3]))
3097 || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3098 {
3099 fvdef = vignore;
3100 return FALSE;
3101 }
3102 break;
3103 }
3104 /* FALLTHRU */
3105 case fvnameseen:
3106 if (len >= 10 && strneq (str+len-10, "::operator", 10))
3107 {
3108 if (*c_extp & C_AUTO) /* automatic detection of C++ */
3109 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3110 fvdef = foperator;
3111 *is_func_or_var = TRUE;
3112 return TRUE;
3113 }
3114 if (bracelev > 0 && !instruct)
3115 break;
3116 fvdef = fvnameseen; /* function or variable */
3117 *is_func_or_var = TRUE;
3118 return TRUE;
3119 }
3120 break;
3121 }
3122
3123 return FALSE;
3124 }
3125
3126 \f
3127 /*
3128 * C_entries often keeps pointers to tokens or lines which are older than
3129 * the line currently read. By keeping two line buffers, and switching
3130 * them at end of line, it is possible to use those pointers.
3131 */
3132 static struct
3133 {
3134 long linepos;
3135 linebuffer lb;
3136 } lbs[2];
3137
3138 #define current_lb_is_new (newndx == curndx)
3139 #define switch_line_buffers() (curndx = 1 - curndx)
3140
3141 #define curlb (lbs[curndx].lb)
3142 #define newlb (lbs[newndx].lb)
3143 #define curlinepos (lbs[curndx].linepos)
3144 #define newlinepos (lbs[newndx].linepos)
3145
3146 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3147 #define cplpl (c_ext & C_PLPL)
3148 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3149
3150 #define CNL_SAVE_DEFINEDEF() \
3151 do { \
3152 curlinepos = charno; \
3153 readline (&curlb, inf); \
3154 lp = curlb.buffer; \
3155 quotednl = FALSE; \
3156 newndx = curndx; \
3157 } while (0)
3158
3159 #define CNL() \
3160 do { \
3161 CNL_SAVE_DEFINEDEF(); \
3162 if (savetoken.valid) \
3163 { \
3164 token = savetoken; \
3165 savetoken.valid = FALSE; \
3166 } \
3167 definedef = dnone; \
3168 } while (0)
3169
3170
3171 static void
3172 make_C_tag (isfun)
3173 bool isfun;
3174 {
3175 /* This function is never called when token.valid is FALSE, but
3176 we must protect against invalid input or internal errors. */
3177 if (!DEBUG && !token.valid)
3178 return;
3179
3180 if (token.valid)
3181 make_tag (token_name.buffer, token_name.len, isfun, token.line,
3182 token.offset+token.length+1, token.lineno, token.linepos);
3183 else /* this case is optimised away if !DEBUG */
3184 make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3185 token_name.len + 17, isfun, token.line,
3186 token.offset+token.length+1, token.lineno, token.linepos);
3187
3188 token.valid = FALSE;
3189 }
3190
3191
3192 /*
3193 * C_entries ()
3194 * This routine finds functions, variables, typedefs,
3195 * #define's, enum constants and struct/union/enum definitions in
3196 * C syntax and adds them to the list.
3197 */
3198 static void
3199 C_entries (c_ext, inf)
3200 int c_ext; /* extension of C */
3201 FILE *inf; /* input file */
3202 {
3203 register char c; /* latest char read; '\0' for end of line */
3204 register char *lp; /* pointer one beyond the character `c' */
3205 int curndx, newndx; /* indices for current and new lb */
3206 register int tokoff; /* offset in line of start of current token */
3207 register int toklen; /* length of current token */
3208 char *qualifier; /* string used to qualify names */
3209 int qlen; /* length of qualifier */
3210 int bracelev; /* current brace level */
3211 int bracketlev; /* current bracket level */
3212 int parlev; /* current parenthesis level */
3213 int attrparlev; /* __attribute__ parenthesis level */
3214 int templatelev; /* current template level */
3215 int typdefbracelev; /* bracelev where a typedef struct body begun */
3216 bool incomm, inquote, inchar, quotednl, midtoken;
3217 bool yacc_rules; /* in the rules part of a yacc file */
3218 struct tok savetoken = {0}; /* token saved during preprocessor handling */
3219
3220
3221 linebuffer_init (&lbs[0].lb);
3222 linebuffer_init (&lbs[1].lb);
3223 if (cstack.size == 0)
3224 {
3225 cstack.size = (DEBUG) ? 1 : 4;
3226 cstack.nl = 0;
3227 cstack.cname = xnew (cstack.size, char *);
3228 cstack.bracelev = xnew (cstack.size, int);
3229 }
3230
3231 tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3232 curndx = newndx = 0;
3233 lp = curlb.buffer;
3234 *lp = 0;
3235
3236 fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3237 structdef = snone; definedef = dnone; objdef = onone;
3238 yacc_rules = FALSE;
3239 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3240 token.valid = savetoken.valid = FALSE;
3241 bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3242 if (cjava)
3243 { qualifier = "."; qlen = 1; }
3244 else
3245 { qualifier = "::"; qlen = 2; }
3246
3247
3248 while (!feof (inf))
3249 {
3250 c = *lp++;
3251 if (c == '\\')
3252 {
3253 /* If we are at the end of the line, the next character is a
3254 '\0'; do not skip it, because it is what tells us
3255 to read the next line. */
3256 if (*lp == '\0')
3257 {
3258 quotednl = TRUE;
3259 continue;
3260 }
3261 lp++;
3262 c = ' ';
3263 }
3264 else if (incomm)
3265 {
3266 switch (c)
3267 {
3268 case '*':
3269 if (*lp == '/')
3270 {
3271 c = *lp++;
3272 incomm = FALSE;
3273 }
3274 break;
3275 case '\0':
3276 /* Newlines inside comments do not end macro definitions in
3277 traditional cpp. */
3278 CNL_SAVE_DEFINEDEF ();
3279 break;
3280 }
3281 continue;
3282 }
3283 else if (inquote)
3284 {
3285 switch (c)
3286 {
3287 case '"':
3288 inquote = FALSE;
3289 break;
3290 case '\0':
3291 /* Newlines inside strings do not end macro definitions
3292 in traditional cpp, even though compilers don't
3293 usually accept them. */
3294 CNL_SAVE_DEFINEDEF ();
3295 break;
3296 }
3297 continue;
3298 }
3299 else if (inchar)
3300 {
3301 switch (c)
3302 {
3303 case '\0':
3304 /* Hmmm, something went wrong. */
3305 CNL ();
3306 /* FALLTHRU */
3307 case '\'':
3308 inchar = FALSE;
3309 break;
3310 }
3311 continue;
3312 }
3313 else if (bracketlev > 0)
3314 {
3315 switch (c)
3316 {
3317 case ']':
3318 if (--bracketlev > 0)
3319 continue;
3320 break;
3321 case '\0':
3322 CNL_SAVE_DEFINEDEF ();
3323 break;
3324 }
3325 continue;
3326 }
3327 else switch (c)
3328 {
3329 case '"':
3330 inquote = TRUE;
3331 if (inattribute)
3332 break;
3333 switch (fvdef)
3334 {
3335 case fdefunkey:
3336 case fstartlist:
3337 case finlist:
3338 case fignore:
3339 case vignore:
3340 break;
3341 default:
3342 fvextern = FALSE;
3343 fvdef = fvnone;
3344 }
3345 continue;
3346 case '\'':
3347 inchar = TRUE;
3348 if (inattribute)
3349 break;
3350 if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3351 {
3352 fvextern = FALSE;
3353 fvdef = fvnone;
3354 }
3355 continue;
3356 case '/':
3357 if (*lp == '*')
3358 {
3359 lp++;
3360 incomm = TRUE;
3361 continue;
3362 }
3363 else if (/* cplpl && */ *lp == '/')
3364 {
3365 c = '\0';
3366 break;
3367 }
3368 else
3369 break;
3370 case '%':
3371 if ((c_ext & YACC) && *lp == '%')
3372 {
3373 /* Entering or exiting rules section in yacc file. */
3374 lp++;
3375 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3376 typdef = tnone; structdef = snone;
3377 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3378 bracelev = 0;
3379 yacc_rules = !yacc_rules;
3380 continue;
3381 }
3382 else
3383 break;
3384 case '#':
3385 if (definedef == dnone)
3386 {
3387 char *cp;
3388 bool cpptoken = TRUE;
3389
3390 /* Look back on this line. If all blanks, or nonblanks
3391 followed by an end of comment, this is a preprocessor
3392 token. */
3393 for (cp = newlb.buffer; cp < lp-1; cp++)
3394 if (!iswhite (*cp))
3395 {
3396 if (*cp == '*' && *(cp+1) == '/')
3397 {
3398 cp++;
3399 cpptoken = TRUE;
3400 }
3401 else
3402 cpptoken = FALSE;
3403 }
3404 if (cpptoken)
3405 definedef = dsharpseen;
3406 } /* if (definedef == dnone) */
3407 continue;
3408 case '[':
3409 bracketlev++;
3410 continue;
3411 } /* switch (c) */
3412
3413
3414 /* Consider token only if some involved conditions are satisfied. */
3415 if (typdef != tignore
3416 && definedef != dignorerest
3417 && fvdef != finlist
3418 && templatelev == 0
3419 && (definedef != dnone
3420 || structdef != scolonseen)
3421 && !inattribute)
3422 {
3423 if (midtoken)
3424 {
3425 if (endtoken (c))
3426 {
3427 if (c == ':' && *lp == ':' && begtoken (lp[1]))
3428 /* This handles :: in the middle,
3429 but not at the beginning of an identifier.
3430 Also, space-separated :: is not recognised. */
3431 {
3432 if (c_ext & C_AUTO) /* automatic detection of C++ */
3433 c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3434 lp += 2;
3435 toklen += 2;
3436 c = lp[-1];
3437 goto still_in_token;
3438 }
3439 else
3440 {
3441 bool funorvar = FALSE;
3442
3443 if (yacc_rules
3444 || consider_token (newlb.buffer + tokoff, toklen, c,
3445 &c_ext, bracelev, parlev,
3446 &funorvar))
3447 {
3448 if (fvdef == foperator)
3449 {
3450 char *oldlp = lp;
3451 lp = skip_spaces (lp-1);
3452 if (*lp != '\0')
3453 lp += 1;
3454 while (*lp != '\0'
3455 && !iswhite (*lp) && *lp != '(')
3456 lp += 1;
3457 c = *lp++;
3458 toklen += lp - oldlp;
3459 }
3460 token.named = FALSE;
3461 if (!plainc
3462 && nestlev > 0 && definedef == dnone)
3463 /* in struct body */
3464 {
3465 write_classname (&token_name, qualifier);
3466 linebuffer_setlen (&token_name,
3467 token_name.len+qlen+toklen);
3468 strcat (token_name.buffer, qualifier);
3469 strncat (token_name.buffer,
3470 newlb.buffer + tokoff, toklen);
3471 token.named = TRUE;
3472 }
3473 else if (objdef == ocatseen)
3474 /* Objective C category */
3475 {
3476 int len = strlen (objtag) + 2 + toklen;
3477 linebuffer_setlen (&token_name, len);
3478 strcpy (token_name.buffer, objtag);
3479 strcat (token_name.buffer, "(");
3480 strncat (token_name.buffer,
3481 newlb.buffer + tokoff, toklen);
3482 strcat (token_name.buffer, ")");
3483 token.named = TRUE;
3484 }
3485 else if (objdef == omethodtag
3486 || objdef == omethodparm)
3487 /* Objective C method */
3488 {
3489 token.named = TRUE;
3490 }
3491 else if (fvdef == fdefunname)
3492 /* GNU DEFUN and similar macros */
3493 {
3494 bool defun = (newlb.buffer[tokoff] == 'F');
3495 int off = tokoff;
3496 int len = toklen;
3497
3498 /* Rewrite the tag so that emacs lisp DEFUNs
3499 can be found by their elisp name */
3500 if (defun)
3501 {
3502 off += 1;
3503 len -= 1;
3504 }
3505 linebuffer_setlen (&token_name, len);
3506 strncpy (token_name.buffer,
3507 newlb.buffer + off, len);
3508 token_name.buffer[len] = '\0';
3509 if (defun)
3510 while (--len >= 0)
3511 if (token_name.buffer[len] == '_')
3512 token_name.buffer[len] = '-';
3513 token.named = defun;
3514 }
3515 else
3516 {
3517 linebuffer_setlen (&token_name, toklen);
3518 strncpy (token_name.buffer,
3519 newlb.buffer + tokoff, toklen);
3520 token_name.buffer[toklen] = '\0';
3521 /* Name macros and members. */
3522 token.named = (structdef == stagseen
3523 || typdef == ttypeseen
3524 || typdef == tend
3525 || (funorvar
3526 && definedef == dignorerest)
3527 || (funorvar
3528 && definedef == dnone
3529 && structdef == snone
3530 && bracelev > 0));
3531 }
3532 token.lineno = lineno;
3533 token.offset = tokoff;
3534 token.length = toklen;
3535 token.line = newlb.buffer;
3536 token.linepos = newlinepos;
3537 token.valid = TRUE;
3538
3539 if (definedef == dnone
3540 && (fvdef == fvnameseen
3541 || fvdef == foperator
3542 || structdef == stagseen
3543 || typdef == tend
3544 || typdef == ttypeseen
3545 || objdef != onone))
3546 {
3547 if (current_lb_is_new)
3548 switch_line_buffers ();
3549 }
3550 else if (definedef != dnone
3551 || fvdef == fdefunname
3552 || instruct)
3553 make_C_tag (funorvar);
3554 }
3555 else /* not yacc and consider_token failed */
3556 {
3557 if (inattribute && fvdef == fignore)
3558 {
3559 /* We have just met __attribute__ after a
3560 function parameter list: do not tag the
3561 function again. */
3562 fvdef = fvnone;
3563 }
3564 }
3565 midtoken = FALSE;
3566 }
3567 } /* if (endtoken (c)) */
3568 else if (intoken (c))
3569 still_in_token:
3570 {
3571 toklen++;
3572 continue;
3573 }
3574 } /* if (midtoken) */
3575 else if (begtoken (c))
3576 {
3577 switch (definedef)
3578 {
3579 case dnone:
3580 switch (fvdef)
3581 {
3582 case fstartlist:
3583 /* This prevents tagging fb in
3584 void (__attribute__((noreturn)) *fb) (void);
3585 Fixing this is not easy and not very important. */
3586 fvdef = finlist;
3587 continue;
3588 case flistseen:
3589 if (plainc || declarations)
3590 {
3591 make_C_tag (TRUE); /* a function */
3592 fvdef = fignore;
3593 }
3594 break;
3595 }
3596 if (structdef == stagseen && !cjava)
3597 {
3598 popclass_above (bracelev);
3599 structdef = snone;
3600 }
3601 break;
3602 case dsharpseen:
3603 savetoken = token;
3604 break;
3605 }
3606 if (!yacc_rules || lp == newlb.buffer + 1)
3607 {
3608 tokoff = lp - 1 - newlb.buffer;
3609 toklen = 1;
3610 midtoken = TRUE;
3611 }
3612 continue;
3613 } /* if (begtoken) */
3614 } /* if must look at token */
3615
3616
3617 /* Detect end of line, colon, comma, semicolon and various braces
3618 after having handled a token.*/
3619 switch (c)
3620 {
3621 case ':':
3622 if (inattribute)
3623 break;
3624 if (yacc_rules && token.offset == 0 && token.valid)
3625 {
3626 make_C_tag (FALSE); /* a yacc function */
3627 break;
3628 }
3629 if (definedef != dnone)
3630 break;
3631 switch (objdef)
3632 {
3633 case otagseen:
3634 objdef = oignore;
3635 make_C_tag (TRUE); /* an Objective C class */
3636 break;
3637 case omethodtag:
3638 case omethodparm:
3639 objdef = omethodcolon;
3640 linebuffer_setlen (&token_name, token_name.len + 1);
3641 strcat (token_name.buffer, ":");
3642 break;
3643 }
3644 if (structdef == stagseen)
3645 {
3646 structdef = scolonseen;
3647 break;
3648 }
3649 /* Should be useless, but may be work as a safety net. */
3650 if (cplpl && fvdef == flistseen)
3651 {
3652 make_C_tag (TRUE); /* a function */
3653 fvdef = fignore;
3654 break;
3655 }
3656 break;
3657 case ';':
3658 if (definedef != dnone || inattribute)
3659 break;
3660 switch (typdef)
3661 {
3662 case tend:
3663 case ttypeseen:
3664 make_C_tag (FALSE); /* a typedef */
3665 typdef = tnone;
3666 fvdef = fvnone;
3667 break;
3668 case tnone:
3669 case tinbody:
3670 case tignore:
3671 switch (fvdef)
3672 {
3673 case fignore:
3674 if (typdef == tignore || cplpl)
3675 fvdef = fvnone;
3676 break;
3677 case fvnameseen:
3678 if ((globals && bracelev == 0 && (!fvextern || declarations))
3679 || (members && instruct))
3680 make_C_tag (FALSE); /* a variable */
3681 fvextern = FALSE;
3682 fvdef = fvnone;
3683 token.valid = FALSE;
3684 break;
3685 case flistseen:
3686 if ((declarations
3687 && (cplpl || !instruct)
3688 && (typdef == tnone || (typdef != tignore && instruct)))
3689 || (members
3690 && plainc && instruct))
3691 make_C_tag (TRUE); /* a function */
3692 /* FALLTHRU */
3693 default:
3694 fvextern = FALSE;
3695 fvdef = fvnone;
3696 if (declarations
3697 && cplpl && structdef == stagseen)
3698 make_C_tag (FALSE); /* forward declaration */
3699 else
3700 token.valid = FALSE;
3701 } /* switch (fvdef) */
3702 /* FALLTHRU */
3703 default:
3704 if (!instruct)
3705 typdef = tnone;
3706 }
3707 if (structdef == stagseen)
3708 structdef = snone;
3709 break;
3710 case ',':
3711 if (definedef != dnone || inattribute)
3712 break;
3713 switch (objdef)
3714 {
3715 case omethodtag:
3716 case omethodparm:
3717 make_C_tag (TRUE); /* an Objective C method */
3718 objdef = oinbody;
3719 break;
3720 }
3721 switch (fvdef)
3722 {
3723 case fdefunkey:
3724 case foperator:
3725 case fstartlist:
3726 case finlist:
3727 case fignore:
3728 case vignore:
3729 break;
3730 case fdefunname:
3731 fvdef = fignore;
3732 break;
3733 case fvnameseen:
3734 if (parlev == 0
3735 && ((globals
3736 && bracelev == 0
3737 && templatelev == 0
3738 && (!fvextern || declarations))
3739 || (members && instruct)))
3740 make_C_tag (FALSE); /* a variable */
3741 break;
3742 case flistseen:
3743 if ((declarations && typdef == tnone && !instruct)
3744 || (members && typdef != tignore && instruct))
3745 {
3746 make_C_tag (TRUE); /* a function */
3747 fvdef = fvnameseen;
3748 }
3749 else if (!declarations)
3750 fvdef = fvnone;
3751 token.valid = FALSE;
3752 break;
3753 default:
3754 fvdef = fvnone;
3755 }
3756 if (structdef == stagseen)
3757 structdef = snone;
3758 break;
3759 case ']':
3760 if (definedef != dnone || inattribute)
3761 break;
3762 if (structdef == stagseen)
3763 structdef = snone;
3764 switch (typdef)
3765 {
3766 case ttypeseen:
3767 case tend:
3768 typdef = tignore;
3769 make_C_tag (FALSE); /* a typedef */
3770 break;
3771 case tnone:
3772 case tinbody:
3773 switch (fvdef)
3774 {
3775 case foperator:
3776 case finlist:
3777 case fignore:
3778 case vignore:
3779 break;
3780 case fvnameseen:
3781 if ((members && bracelev == 1)
3782 || (globals && bracelev == 0
3783 && (!fvextern || declarations)))
3784 make_C_tag (FALSE); /* a variable */
3785 /* FALLTHRU */
3786 default:
3787 fvdef = fvnone;
3788 }
3789 break;
3790 }
3791 break;
3792 case '(':
3793 if (inattribute)
3794 {
3795 attrparlev++;
3796 break;
3797 }
3798 if (definedef != dnone)
3799 break;
3800 if (objdef == otagseen && parlev == 0)
3801 objdef = oparenseen;
3802 switch (fvdef)
3803 {
3804 case fvnameseen:
3805 if (typdef == ttypeseen
3806 && *lp != '*'
3807 && !instruct)
3808 {
3809 /* This handles constructs like:
3810 typedef void OperatorFun (int fun); */
3811 make_C_tag (FALSE);
3812 typdef = tignore;
3813 fvdef = fignore;
3814 break;
3815 }
3816 /* FALLTHRU */
3817 case foperator:
3818 fvdef = fstartlist;
3819 break;
3820 case flistseen:
3821 fvdef = finlist;
3822 break;
3823 }
3824 parlev++;
3825 break;
3826 case ')':
3827 if (inattribute)
3828 {
3829 if (--attrparlev == 0)
3830 inattribute = FALSE;
3831 break;
3832 }
3833 if (definedef != dnone)
3834 break;
3835 if (objdef == ocatseen && parlev == 1)
3836 {
3837 make_C_tag (TRUE); /* an Objective C category */
3838 objdef = oignore;
3839 }
3840 if (--parlev == 0)
3841 {
3842 switch (fvdef)
3843 {
3844 case fstartlist:
3845 case finlist:
3846 fvdef = flistseen;
3847 break;
3848 }
3849 if (!instruct
3850 && (typdef == tend
3851 || typdef == ttypeseen))
3852 {
3853 typdef = tignore;
3854 make_C_tag (FALSE); /* a typedef */
3855 }
3856 }
3857 else if (parlev < 0) /* can happen due to ill-conceived #if's. */
3858 parlev = 0;
3859 break;
3860 case '{':
3861 if (definedef != dnone)
3862 break;
3863 if (typdef == ttypeseen)
3864 {
3865 /* Whenever typdef is set to tinbody (currently only
3866 here), typdefbracelev should be set to bracelev. */
3867 typdef = tinbody;
3868 typdefbracelev = bracelev;
3869 }
3870 switch (fvdef)
3871 {
3872 case flistseen:
3873 make_C_tag (TRUE); /* a function */
3874 /* FALLTHRU */
3875 case fignore:
3876 fvdef = fvnone;
3877 break;
3878 case fvnone:
3879 switch (objdef)
3880 {
3881 case otagseen:
3882 make_C_tag (TRUE); /* an Objective C class */
3883 objdef = oignore;
3884 break;
3885 case omethodtag:
3886 case omethodparm:
3887 make_C_tag (TRUE); /* an Objective C method */
3888 objdef = oinbody;
3889 break;
3890 default:
3891 /* Neutralize `extern "C" {' grot. */
3892 if (bracelev == 0 && structdef == snone && nestlev == 0
3893 && typdef == tnone)
3894 bracelev = -1;
3895 }
3896 break;
3897 }
3898 switch (structdef)
3899 {
3900 case skeyseen: /* unnamed struct */
3901 pushclass_above (bracelev, NULL, 0);
3902 structdef = snone;
3903 break;
3904 case stagseen: /* named struct or enum */
3905 case scolonseen: /* a class */
3906 pushclass_above (bracelev,token.line+token.offset, token.length);
3907 structdef = snone;
3908 make_C_tag (FALSE); /* a struct or enum */
3909 break;
3910 }
3911 bracelev++;
3912 break;
3913 case '*':
3914 if (definedef != dnone)
3915 break;
3916 if (fvdef == fstartlist)
3917 {
3918 fvdef = fvnone; /* avoid tagging `foo' in `foo (*bar()) ()' */
3919 token.valid = FALSE;
3920 }
3921 break;
3922 case '}':
3923 if (definedef != dnone)
3924 break;
3925 if (!ignoreindent && lp == newlb.buffer + 1)
3926 {
3927 if (bracelev != 0)
3928 token.valid = FALSE;
3929 bracelev = 0; /* reset brace level if first column */
3930 parlev = 0; /* also reset paren level, just in case... */
3931 }
3932 else if (bracelev > 0)
3933 bracelev--;
3934 else
3935 token.valid = FALSE; /* something gone amiss, token unreliable */
3936 popclass_above (bracelev);
3937 structdef = snone;
3938 /* Only if typdef == tinbody is typdefbracelev significant. */
3939 if (typdef == tinbody && bracelev <= typdefbracelev)
3940 {
3941 assert (bracelev == typdefbracelev);
3942 typdef = tend;
3943 }
3944 break;
3945 case '=':
3946 if (definedef != dnone)
3947 break;
3948 switch (fvdef)
3949 {
3950 case foperator:
3951 case finlist:
3952 case fignore:
3953 case vignore:
3954 break;
3955 case fvnameseen:
3956 if ((members && bracelev == 1)
3957 || (globals && bracelev == 0 && (!fvextern || declarations)))
3958 make_C_tag (FALSE); /* a variable */
3959 /* FALLTHRU */
3960 default:
3961 fvdef = vignore;
3962 }
3963 break;
3964 case '<':
3965 if (cplpl
3966 && (structdef == stagseen || fvdef == fvnameseen))
3967 {
3968 templatelev++;
3969 break;
3970 }
3971 goto resetfvdef;
3972 case '>':
3973 if (templatelev > 0)
3974 {
3975 templatelev--;
3976 break;
3977 }
3978 goto resetfvdef;
3979 case '+':
3980 case '-':
3981 if (objdef == oinbody && bracelev == 0)
3982 {
3983 objdef = omethodsign;
3984 break;
3985 }
3986 /* FALLTHRU */
3987 resetfvdef:
3988 case '#': case '~': case '&': case '%': case '/':
3989 case '|': case '^': case '!': case '.': case '?':
3990 if (definedef != dnone)
3991 break;
3992 /* These surely cannot follow a function tag in C. */
3993 switch (fvdef)
3994 {
3995 case foperator:
3996 case finlist:
3997 case fignore:
3998 case vignore:
3999 break;
4000 default:
4001 fvdef = fvnone;
4002 }
4003 break;
4004 case '\0':
4005 if (objdef == otagseen)
4006 {
4007 make_C_tag (TRUE); /* an Objective C class */
4008 objdef = oignore;
4009 }
4010 /* If a macro spans multiple lines don't reset its state. */
4011 if (quotednl)
4012 CNL_SAVE_DEFINEDEF ();
4013 else
4014 CNL ();
4015 break;
4016 } /* switch (c) */
4017
4018 } /* while not eof */
4019
4020 free (lbs[0].lb.buffer);
4021 free (lbs[1].lb.buffer);
4022 }
4023
4024 /*
4025 * Process either a C++ file or a C file depending on the setting
4026 * of a global flag.
4027 */
4028 static void
4029 default_C_entries (inf)
4030 FILE *inf;
4031 {
4032 C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4033 }
4034
4035 /* Always do plain C. */
4036 static void
4037 plain_C_entries (inf)
4038 FILE *inf;
4039 {
4040 C_entries (0, inf);
4041 }
4042
4043 /* Always do C++. */
4044 static void
4045 Cplusplus_entries (inf)
4046 FILE *inf;
4047 {
4048 C_entries (C_PLPL, inf);
4049 }
4050
4051 /* Always do Java. */
4052 static void
4053 Cjava_entries (inf)
4054 FILE *inf;
4055 {
4056 C_entries (C_JAVA, inf);
4057 }
4058
4059 /* Always do C*. */
4060 static void
4061 Cstar_entries (inf)
4062 FILE *inf;
4063 {
4064 C_entries (C_STAR, inf);
4065 }
4066
4067 /* Always do Yacc. */
4068 static void
4069 Yacc_entries (inf)
4070 FILE *inf;
4071 {
4072 C_entries (YACC, inf);
4073 }
4074
4075 \f
4076 /* Useful macros. */
4077 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer) \
4078 for (; /* loop initialization */ \
4079 !feof (file_pointer) /* loop test */ \
4080 && /* instructions at start of loop */ \
4081 (readline (&line_buffer, file_pointer), \
4082 char_pointer = line_buffer.buffer, \
4083 TRUE); \
4084 )
4085
4086 #define LOOKING_AT(cp, kw) /* kw is the keyword, a literal string */ \
4087 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4088 && strneq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4089 && notinname ((cp)[sizeof(kw)-1]) /* end of kw */ \
4090 && ((cp) = skip_spaces((cp)+sizeof(kw)-1))) /* skip spaces */
4091
4092 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4093 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4094 ((assert("" kw), TRUE) /* syntax error if not a literal string */ \
4095 && strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */ \
4096 && ((cp) += sizeof(kw)-1)) /* skip spaces */
4097
4098 /*
4099 * Read a file, but do no processing. This is used to do regexp
4100 * matching on files that have no language defined.
4101 */
4102 static void
4103 just_read_file (inf)
4104 FILE *inf;
4105 {
4106 register char *dummy;
4107
4108 LOOP_ON_INPUT_LINES (inf, lb, dummy)
4109 continue;
4110 }
4111
4112 \f
4113 /* Fortran parsing */
4114
4115 static void F_takeprec __P((void));
4116 static void F_getit __P((FILE *));
4117
4118 static void
4119 F_takeprec ()
4120 {
4121 dbp = skip_spaces (dbp);
4122 if (*dbp != '*')
4123 return;
4124 dbp++;
4125 dbp = skip_spaces (dbp);
4126 if (strneq (dbp, "(*)", 3))
4127 {
4128 dbp += 3;
4129 return;
4130 }
4131 if (!ISDIGIT (*dbp))
4132 {
4133 --dbp; /* force failure */
4134 return;
4135 }
4136 do
4137 dbp++;
4138 while (ISDIGIT (*dbp));
4139 }
4140
4141 static void
4142 F_getit (inf)
4143 FILE *inf;
4144 {
4145 register char *cp;
4146
4147 dbp = skip_spaces (dbp);
4148 if (*dbp == '\0')
4149 {
4150 readline (&lb, inf);
4151 dbp = lb.buffer;
4152 if (dbp[5] != '&')
4153 return;
4154 dbp += 6;
4155 dbp = skip_spaces (dbp);
4156 }
4157 if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4158 return;
4159 for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4160 continue;
4161 make_tag (dbp, cp-dbp, TRUE,
4162 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4163 }
4164
4165
4166 static void
4167 Fortran_functions (inf)
4168 FILE *inf;
4169 {
4170 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4171 {
4172 if (*dbp == '%')
4173 dbp++; /* Ratfor escape to fortran */
4174 dbp = skip_spaces (dbp);
4175 if (*dbp == '\0')
4176 continue;
4177 switch (lowcase (*dbp))
4178 {
4179 case 'i':
4180 if (nocase_tail ("integer"))
4181 F_takeprec ();
4182 break;
4183 case 'r':
4184 if (nocase_tail ("real"))
4185 F_takeprec ();
4186 break;
4187 case 'l':
4188 if (nocase_tail ("logical"))
4189 F_takeprec ();
4190 break;
4191 case 'c':
4192 if (nocase_tail ("complex") || nocase_tail ("character"))
4193 F_takeprec ();
4194 break;
4195 case 'd':
4196 if (nocase_tail ("double"))
4197 {
4198 dbp = skip_spaces (dbp);
4199 if (*dbp == '\0')
4200 continue;
4201 if (nocase_tail ("precision"))
4202 break;
4203 continue;
4204 }
4205 break;
4206 }
4207 dbp = skip_spaces (dbp);
4208 if (*dbp == '\0')
4209 continue;
4210 switch (lowcase (*dbp))
4211 {
4212 case 'f':
4213 if (nocase_tail ("function"))
4214 F_getit (inf);
4215 continue;
4216 case 's':
4217 if (nocase_tail ("subroutine"))
4218 F_getit (inf);
4219 continue;
4220 case 'e':
4221 if (nocase_tail ("entry"))
4222 F_getit (inf);
4223 continue;
4224 case 'b':
4225 if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4226 {
4227 dbp = skip_spaces (dbp);
4228 if (*dbp == '\0') /* assume un-named */
4229 make_tag ("blockdata", 9, TRUE,
4230 lb.buffer, dbp - lb.buffer, lineno, linecharno);
4231 else
4232 F_getit (inf); /* look for name */
4233 }
4234 continue;
4235 }
4236 }
4237 }
4238
4239 \f
4240 /*
4241 * Ada parsing
4242 * Original code by
4243 * Philippe Waroquiers (1998)
4244 */
4245
4246 static void Ada_getit __P((FILE *, char *));
4247
4248 /* Once we are positioned after an "interesting" keyword, let's get
4249 the real tag value necessary. */
4250 static void
4251 Ada_getit (inf, name_qualifier)
4252 FILE *inf;
4253 char *name_qualifier;
4254 {
4255 register char *cp;
4256 char *name;
4257 char c;
4258
4259 while (!feof (inf))
4260 {
4261 dbp = skip_spaces (dbp);
4262 if (*dbp == '\0'
4263 || (dbp[0] == '-' && dbp[1] == '-'))
4264 {
4265 readline (&lb, inf);
4266 dbp = lb.buffer;
4267 }
4268 switch (lowcase(*dbp))
4269 {
4270 case 'b':
4271 if (nocase_tail ("body"))
4272 {
4273 /* Skipping body of procedure body or package body or ....
4274 resetting qualifier to body instead of spec. */
4275 name_qualifier = "/b";
4276 continue;
4277 }
4278 break;
4279 case 't':
4280 /* Skipping type of task type or protected type ... */
4281 if (nocase_tail ("type"))
4282 continue;
4283 break;
4284 }
4285 if (*dbp == '"')
4286 {
4287 dbp += 1;
4288 for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4289 continue;
4290 }
4291 else
4292 {
4293 dbp = skip_spaces (dbp);
4294 for (cp = dbp;
4295 (*cp != '\0'
4296 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4297 cp++)
4298 continue;
4299 if (cp == dbp)
4300 return;
4301 }
4302 c = *cp;
4303 *cp = '\0';
4304 name = concat (dbp, name_qualifier, "");
4305 *cp = c;
4306 make_tag (name, strlen (name), TRUE,
4307 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4308 free (name);
4309 if (c == '"')
4310 dbp = cp + 1;
4311 return;
4312 }
4313 }
4314
4315 static void
4316 Ada_funcs (inf)
4317 FILE *inf;
4318 {
4319 bool inquote = FALSE;
4320 bool skip_till_semicolumn = FALSE;
4321
4322 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4323 {
4324 while (*dbp != '\0')
4325 {
4326 /* Skip a string i.e. "abcd". */
4327 if (inquote || (*dbp == '"'))
4328 {
4329 dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4330 if (dbp != NULL)
4331 {
4332 inquote = FALSE;
4333 dbp += 1;
4334 continue; /* advance char */
4335 }
4336 else
4337 {
4338 inquote = TRUE;
4339 break; /* advance line */
4340 }
4341 }
4342
4343 /* Skip comments. */
4344 if (dbp[0] == '-' && dbp[1] == '-')
4345 break; /* advance line */
4346
4347 /* Skip character enclosed in single quote i.e. 'a'
4348 and skip single quote starting an attribute i.e. 'Image. */
4349 if (*dbp == '\'')
4350 {
4351 dbp++ ;
4352 if (*dbp != '\0')
4353 dbp++;
4354 continue;
4355 }
4356
4357 if (skip_till_semicolumn)
4358 {
4359 if (*dbp == ';')
4360 skip_till_semicolumn = FALSE;
4361 dbp++;
4362 continue; /* advance char */
4363 }
4364
4365 /* Search for beginning of a token. */
4366 if (!begtoken (*dbp))
4367 {
4368 dbp++;
4369 continue; /* advance char */
4370 }
4371
4372 /* We are at the beginning of a token. */
4373 switch (lowcase(*dbp))
4374 {
4375 case 'f':
4376 if (!packages_only && nocase_tail ("function"))
4377 Ada_getit (inf, "/f");
4378 else
4379 break; /* from switch */
4380 continue; /* advance char */
4381 case 'p':
4382 if (!packages_only && nocase_tail ("procedure"))
4383 Ada_getit (inf, "/p");
4384 else if (nocase_tail ("package"))
4385 Ada_getit (inf, "/s");
4386 else if (nocase_tail ("protected")) /* protected type */
4387 Ada_getit (inf, "/t");
4388 else
4389 break; /* from switch */
4390 continue; /* advance char */
4391
4392 case 'u':
4393 if (typedefs && !packages_only && nocase_tail ("use"))
4394 {
4395 /* when tagging types, avoid tagging use type Pack.Typename;
4396 for this, we will skip everything till a ; */
4397 skip_till_semicolumn = TRUE;
4398 continue; /* advance char */
4399 }
4400
4401 case 't':
4402 if (!packages_only && nocase_tail ("task"))
4403 Ada_getit (inf, "/k");
4404 else if (typedefs && !packages_only && nocase_tail ("type"))
4405 {
4406 Ada_getit (inf, "/t");
4407 while (*dbp != '\0')
4408 dbp += 1;
4409 }
4410 else
4411 break; /* from switch */
4412 continue; /* advance char */
4413 }
4414
4415 /* Look for the end of the token. */
4416 while (!endtoken (*dbp))
4417 dbp++;
4418
4419 } /* advance char */
4420 } /* advance line */
4421 }
4422
4423 \f
4424 /*
4425 * Unix and microcontroller assembly tag handling
4426 * Labels: /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4427 * Idea by Bob Weiner, Motorola Inc. (1994)
4428 */
4429 static void
4430 Asm_labels (inf)
4431 FILE *inf;
4432 {
4433 register char *cp;
4434
4435 LOOP_ON_INPUT_LINES (inf, lb, cp)
4436 {
4437 /* If first char is alphabetic or one of [_.$], test for colon
4438 following identifier. */
4439 if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4440 {
4441 /* Read past label. */
4442 cp++;
4443 while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4444 cp++;
4445 if (*cp == ':' || iswhite (*cp))
4446 /* Found end of label, so copy it and add it to the table. */
4447 make_tag (lb.buffer, cp - lb.buffer, TRUE,
4448 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4449 }
4450 }
4451 }
4452
4453 \f
4454 /*
4455 * Perl support
4456 * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4457 * Perl variable names: /^(my|local).../
4458 * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4459 * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4460 * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4461 */
4462 static void
4463 Perl_functions (inf)
4464 FILE *inf;
4465 {
4466 char *package = savestr ("main"); /* current package name */
4467 register char *cp;
4468
4469 LOOP_ON_INPUT_LINES (inf, lb, cp)
4470 {
4471 skip_spaces(cp);
4472
4473 if (LOOKING_AT (cp, "package"))
4474 {
4475 free (package);
4476 get_tag (cp, &package);
4477 }
4478 else if (LOOKING_AT (cp, "sub"))
4479 {
4480 char *pos;
4481 char *sp = cp;
4482
4483 while (!notinname (*cp))
4484 cp++;
4485 if (cp == sp)
4486 continue; /* nothing found */
4487 if ((pos = etags_strchr (sp, ':')) != NULL
4488 && pos < cp && pos[1] == ':')
4489 /* The name is already qualified. */
4490 make_tag (sp, cp - sp, TRUE,
4491 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4492 else
4493 /* Qualify it. */
4494 {
4495 char savechar, *name;
4496
4497 savechar = *cp;
4498 *cp = '\0';
4499 name = concat (package, "::", sp);
4500 *cp = savechar;
4501 make_tag (name, strlen(name), TRUE,
4502 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4503 free (name);
4504 }
4505 }
4506 else if (globals) /* only if we are tagging global vars */
4507 {
4508 /* Skip a qualifier, if any. */
4509 bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4510 /* After "my" or "local", but before any following paren or space. */
4511 char *varstart = cp;
4512
4513 if (qual /* should this be removed? If yes, how? */
4514 && (*cp == '$' || *cp == '@' || *cp == '%'))
4515 {
4516 varstart += 1;
4517 do
4518 cp++;
4519 while (ISALNUM (*cp) || *cp == '_');
4520 }
4521 else if (qual)
4522 {
4523 /* Should be examining a variable list at this point;
4524 could insist on seeing an open parenthesis. */
4525 while (*cp != '\0' && *cp != ';' && *cp != '=' && *cp != ')')
4526 cp++;
4527 }
4528 else
4529 continue;
4530
4531 make_tag (varstart, cp - varstart, FALSE,
4532 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4533 }
4534 }
4535 free (package);
4536 }
4537
4538
4539 /*
4540 * Python support
4541 * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4542 * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4543 * More ideas by seb bacon <seb@jamkit.com> (2002)
4544 */
4545 static void
4546 Python_functions (inf)
4547 FILE *inf;
4548 {
4549 register char *cp;
4550
4551 LOOP_ON_INPUT_LINES (inf, lb, cp)
4552 {
4553 cp = skip_spaces (cp);
4554 if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4555 {
4556 char *name = cp;
4557 while (!notinname (*cp) && *cp != ':')
4558 cp++;
4559 make_tag (name, cp - name, TRUE,
4560 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4561 }
4562 }
4563 }
4564
4565 \f
4566 /*
4567 * PHP support
4568 * Look for:
4569 * - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4570 * - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4571 * - /^[ \t]*define\(\"[^\"]+/
4572 * Only with --members:
4573 * - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4574 * Idea by Diez B. Roggisch (2001)
4575 */
4576 static void
4577 PHP_functions (inf)
4578 FILE *inf;
4579 {
4580 register char *cp, *name;
4581 bool search_identifier = FALSE;
4582
4583 LOOP_ON_INPUT_LINES (inf, lb, cp)
4584 {
4585 cp = skip_spaces (cp);
4586 name = cp;
4587 if (search_identifier
4588 && *cp != '\0')
4589 {
4590 while (!notinname (*cp))
4591 cp++;
4592 make_tag (name, cp - name, TRUE,
4593 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4594 search_identifier = FALSE;
4595 }
4596 else if (LOOKING_AT (cp, "function"))
4597 {
4598 if(*cp == '&')
4599 cp = skip_spaces (cp+1);
4600 if(*cp != '\0')
4601 {
4602 name = cp;
4603 while (!notinname (*cp))
4604 cp++;
4605 make_tag (name, cp - name, TRUE,
4606 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4607 }
4608 else
4609 search_identifier = TRUE;
4610 }
4611 else if (LOOKING_AT (cp, "class"))
4612 {
4613 if (*cp != '\0')
4614 {
4615 name = cp;
4616 while (*cp != '\0' && !iswhite (*cp))
4617 cp++;
4618 make_tag (name, cp - name, FALSE,
4619 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4620 }
4621 else
4622 search_identifier = TRUE;
4623 }
4624 else if (strneq (cp, "define", 6)
4625 && (cp = skip_spaces (cp+6))
4626 && *cp++ == '('
4627 && (*cp == '"' || *cp == '\''))
4628 {
4629 char quote = *cp++;
4630 name = cp;
4631 while (*cp != quote && *cp != '\0')
4632 cp++;
4633 make_tag (name, cp - name, FALSE,
4634 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4635 }
4636 else if (members
4637 && LOOKING_AT (cp, "var")
4638 && *cp == '$')
4639 {
4640 name = cp;
4641 while (!notinname(*cp))
4642 cp++;
4643 make_tag (name, cp - name, FALSE,
4644 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4645 }
4646 }
4647 }
4648
4649 \f
4650 /*
4651 * Cobol tag functions
4652 * We could look for anything that could be a paragraph name.
4653 * i.e. anything that starts in column 8 is one word and ends in a full stop.
4654 * Idea by Corny de Souza (1993)
4655 */
4656 static void
4657 Cobol_paragraphs (inf)
4658 FILE *inf;
4659 {
4660 register char *bp, *ep;
4661
4662 LOOP_ON_INPUT_LINES (inf, lb, bp)
4663 {
4664 if (lb.len < 9)
4665 continue;
4666 bp += 8;
4667
4668 /* If eoln, compiler option or comment ignore whole line. */
4669 if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4670 continue;
4671
4672 for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4673 continue;
4674 if (*ep++ == '.')
4675 make_tag (bp, ep - bp, TRUE,
4676 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4677 }
4678 }
4679
4680 \f
4681 /*
4682 * Makefile support
4683 * Ideas by Assar Westerlund <assar@sics.se> (2001)
4684 */
4685 static void
4686 Makefile_targets (inf)
4687 FILE *inf;
4688 {
4689 register char *bp;
4690
4691 LOOP_ON_INPUT_LINES (inf, lb, bp)
4692 {
4693 if (*bp == '\t' || *bp == '#')
4694 continue;
4695 while (*bp != '\0' && *bp != '=' && *bp != ':')
4696 bp++;
4697 if (*bp == ':' || (globals && *bp == '='))
4698 {
4699 /* We should detect if there is more than one tag, but we do not.
4700 We just skip initial and final spaces. */
4701 char * namestart = skip_spaces (lb.buffer);
4702 while (--bp > namestart)
4703 if (!notinname (*bp))
4704 break;
4705 make_tag (namestart, bp - namestart + 1, TRUE,
4706 lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4707 }
4708 }
4709 }
4710
4711 \f
4712 /*
4713 * Pascal parsing
4714 * Original code by Mosur K. Mohan (1989)
4715 *
4716 * Locates tags for procedures & functions. Doesn't do any type- or
4717 * var-definitions. It does look for the keyword "extern" or
4718 * "forward" immediately following the procedure statement; if found,
4719 * the tag is skipped.
4720 */
4721 static void
4722 Pascal_functions (inf)
4723 FILE *inf;
4724 {
4725 linebuffer tline; /* mostly copied from C_entries */
4726 long save_lcno;
4727 int save_lineno, namelen, taglen;
4728 char c, *name;
4729
4730 bool /* each of these flags is TRUE iff: */
4731 incomment, /* point is inside a comment */
4732 inquote, /* point is inside '..' string */
4733 get_tagname, /* point is after PROCEDURE/FUNCTION
4734 keyword, so next item = potential tag */
4735 found_tag, /* point is after a potential tag */
4736 inparms, /* point is within parameter-list */
4737 verify_tag; /* point has passed the parm-list, so the
4738 next token will determine whether this
4739 is a FORWARD/EXTERN to be ignored, or
4740 whether it is a real tag */
4741
4742 save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4743 name = NULL; /* keep compiler quiet */
4744 dbp = lb.buffer;
4745 *dbp = '\0';
4746 linebuffer_init (&tline);
4747
4748 incomment = inquote = FALSE;
4749 found_tag = FALSE; /* have a proc name; check if extern */
4750 get_tagname = FALSE; /* found "procedure" keyword */
4751 inparms = FALSE; /* found '(' after "proc" */
4752 verify_tag = FALSE; /* check if "extern" is ahead */
4753
4754
4755 while (!feof (inf)) /* long main loop to get next char */
4756 {
4757 c = *dbp++;
4758 if (c == '\0') /* if end of line */
4759 {
4760 readline (&lb, inf);
4761 dbp = lb.buffer;
4762 if (*dbp == '\0')
4763 continue;
4764 if (!((found_tag && verify_tag)
4765 || get_tagname))
4766 c = *dbp++; /* only if don't need *dbp pointing
4767 to the beginning of the name of
4768 the procedure or function */
4769 }
4770 if (incomment)
4771 {
4772 if (c == '}') /* within { } comments */
4773 incomment = FALSE;
4774 else if (c == '*' && *dbp == ')') /* within (* *) comments */
4775 {
4776 dbp++;
4777 incomment = FALSE;
4778 }
4779 continue;
4780 }
4781 else if (inquote)
4782 {
4783 if (c == '\'')
4784 inquote = FALSE;
4785 continue;
4786 }
4787 else
4788 switch (c)
4789 {
4790 case '\'':
4791 inquote = TRUE; /* found first quote */
4792 continue;
4793 case '{': /* found open { comment */
4794 incomment = TRUE;
4795 continue;
4796 case '(':
4797 if (*dbp == '*') /* found open (* comment */
4798 {
4799 incomment = TRUE;
4800 dbp++;
4801 }
4802 else if (found_tag) /* found '(' after tag, i.e., parm-list */
4803 inparms = TRUE;
4804 continue;
4805 case ')': /* end of parms list */
4806 if (inparms)
4807 inparms = FALSE;
4808 continue;
4809 case ';':
4810 if (found_tag && !inparms) /* end of proc or fn stmt */
4811 {
4812 verify_tag = TRUE;
4813 break;
4814 }
4815 continue;
4816 }
4817 if (found_tag && verify_tag && (*dbp != ' '))
4818 {
4819 /* Check if this is an "extern" declaration. */
4820 if (*dbp == '\0')
4821 continue;
4822 if (lowcase (*dbp == 'e'))
4823 {
4824 if (nocase_tail ("extern")) /* superfluous, really! */
4825 {
4826 found_tag = FALSE;
4827 verify_tag = FALSE;
4828 }
4829 }
4830 else if (lowcase (*dbp) == 'f')
4831 {
4832 if (nocase_tail ("forward")) /* check for forward reference */
4833 {
4834 found_tag = FALSE;
4835 verify_tag = FALSE;
4836 }
4837 }
4838 if (found_tag && verify_tag) /* not external proc, so make tag */
4839 {
4840 found_tag = FALSE;
4841 verify_tag = FALSE;
4842 make_tag (name, namelen, TRUE,
4843 tline.buffer, taglen, save_lineno, save_lcno);
4844 continue;
4845 }
4846 }
4847 if (get_tagname) /* grab name of proc or fn */
4848 {
4849 char *cp;
4850
4851 if (*dbp == '\0')
4852 continue;
4853
4854 /* Find block name. */
4855 for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4856 continue;
4857
4858 /* Save all values for later tagging. */
4859 linebuffer_setlen (&tline, lb.len);
4860 strcpy (tline.buffer, lb.buffer);
4861 save_lineno = lineno;
4862 save_lcno = linecharno;
4863 name = tline.buffer + (dbp - lb.buffer);
4864 namelen = cp - dbp;
4865 taglen = cp - lb.buffer + 1;
4866
4867 dbp = cp; /* set dbp to e-o-token */
4868 get_tagname = FALSE;
4869 found_tag = TRUE;
4870 continue;
4871
4872 /* And proceed to check for "extern". */
4873 }
4874 else if (!incomment && !inquote && !found_tag)
4875 {
4876 /* Check for proc/fn keywords. */
4877 switch (lowcase (c))
4878 {
4879 case 'p':
4880 if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4881 get_tagname = TRUE;
4882 continue;
4883 case 'f':
4884 if (nocase_tail ("unction"))
4885 get_tagname = TRUE;
4886 continue;
4887 }
4888 }
4889 } /* while not eof */
4890
4891 free (tline.buffer);
4892 }
4893
4894 \f
4895 /*
4896 * Lisp tag functions
4897 * look for (def or (DEF, quote or QUOTE
4898 */
4899
4900 static void L_getit __P((void));
4901
4902 static void
4903 L_getit ()
4904 {
4905 if (*dbp == '\'') /* Skip prefix quote */
4906 dbp++;
4907 else if (*dbp == '(')
4908 {
4909 dbp++;
4910 /* Try to skip "(quote " */
4911 if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4912 /* Ok, then skip "(" before name in (defstruct (foo)) */
4913 dbp = skip_spaces (dbp);
4914 }
4915 get_tag (dbp, NULL);
4916 }
4917
4918 static void
4919 Lisp_functions (inf)
4920 FILE *inf;
4921 {
4922 LOOP_ON_INPUT_LINES (inf, lb, dbp)
4923 {
4924 if (dbp[0] != '(')
4925 continue;
4926
4927 if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4928 {
4929 dbp = skip_non_spaces (dbp);
4930 dbp = skip_spaces (dbp);
4931 L_getit ();
4932 }
4933 else
4934 {
4935 /* Check for (foo::defmumble name-defined ... */
4936 do
4937 dbp++;
4938 while (!notinname (*dbp) && *dbp != ':');
4939 if (*dbp == ':')
4940 {
4941 do
4942 dbp++;
4943 while (*dbp == ':');
4944
4945 if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4946 {
4947 dbp = skip_non_spaces (dbp);
4948 dbp = skip_spaces (dbp);
4949 L_getit ();
4950 }
4951 }
4952 }
4953 }
4954 }
4955
4956 \f
4957 /*
4958 * Lua script language parsing
4959 * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4960 *
4961 * "function" and "local function" are tags if they start at column 1.
4962 */
4963 static void
4964 Lua_functions (inf)
4965 FILE *inf;
4966 {
4967 register char *bp;
4968
4969 LOOP_ON_INPUT_LINES (inf, lb, bp)
4970 {
4971 if (bp[0] != 'f' && bp[0] != 'l')
4972 continue;
4973
4974 (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4975
4976 if (LOOKING_AT (bp, "function"))
4977 get_tag (bp, NULL);
4978 }
4979 }
4980
4981 \f
4982 /*
4983 * Postscript tags
4984 * Just look for lines where the first character is '/'
4985 * Also look at "defineps" for PSWrap
4986 * Ideas by:
4987 * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4988 * Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4989 */
4990 static void
4991 PS_functions (inf)
4992 FILE *inf;
4993 {
4994 register char *bp, *ep;
4995
4996 LOOP_ON_INPUT_LINES (inf, lb, bp)
4997 {
4998 if (bp[0] == '/')
4999 {
5000 for (ep = bp+1;
5001 *ep != '\0' && *ep != ' ' && *ep != '{';
5002 ep++)
5003 continue;
5004 make_tag (bp, ep - bp, TRUE,
5005 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5006 }
5007 else if (LOOKING_AT (bp, "defineps"))
5008 get_tag (bp, NULL);
5009 }
5010 }
5011
5012 \f
5013 /*
5014 * Forth tags
5015 * Ignore anything after \ followed by space or in ( )
5016 * Look for words defined by :
5017 * Look for constant, code, create, defer, value, and variable
5018 * OBP extensions: Look for buffer:, field,
5019 * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5020 */
5021 static void
5022 Forth_words (inf)
5023 FILE *inf;
5024 {
5025 register char *bp;
5026
5027 LOOP_ON_INPUT_LINES (inf, lb, bp)
5028 while ((bp = skip_spaces (bp))[0] != '\0')
5029 if (bp[0] == '\\' && iswhite(bp[1]))
5030 break; /* read next line */
5031 else if (bp[0] == '(' && iswhite(bp[1]))
5032 do /* skip to ) or eol */
5033 bp++;
5034 while (*bp != ')' && *bp != '\0');
5035 else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5036 || LOOKING_AT_NOCASE (bp, "constant")
5037 || LOOKING_AT_NOCASE (bp, "code")
5038 || LOOKING_AT_NOCASE (bp, "create")
5039 || LOOKING_AT_NOCASE (bp, "defer")
5040 || LOOKING_AT_NOCASE (bp, "value")
5041 || LOOKING_AT_NOCASE (bp, "variable")
5042 || LOOKING_AT_NOCASE (bp, "buffer:")
5043 || LOOKING_AT_NOCASE (bp, "field"))
5044 get_tag (skip_spaces (bp), NULL); /* Yay! A definition! */
5045 else
5046 bp = skip_non_spaces (bp);
5047 }
5048
5049 \f
5050 /*
5051 * Scheme tag functions
5052 * look for (def... xyzzy
5053 * (def... (xyzzy
5054 * (def ... ((...(xyzzy ....
5055 * (set! xyzzy
5056 * Original code by Ken Haase (1985?)
5057 */
5058 static void
5059 Scheme_functions (inf)
5060 FILE *inf;
5061 {
5062 register char *bp;
5063
5064 LOOP_ON_INPUT_LINES (inf, lb, bp)
5065 {
5066 if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5067 {
5068 bp = skip_non_spaces (bp+4);
5069 /* Skip over open parens and white space */
5070 while (notinname (*bp))
5071 bp++;
5072 get_tag (bp, NULL);
5073 }
5074 if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5075 get_tag (bp, NULL);
5076 }
5077 }
5078
5079 \f
5080 /* Find tags in TeX and LaTeX input files. */
5081
5082 /* TEX_toktab is a table of TeX control sequences that define tags.
5083 * Each entry records one such control sequence.
5084 *
5085 * Original code from who knows whom.
5086 * Ideas by:
5087 * Stefan Monnier (2002)
5088 */
5089
5090 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5091
5092 /* Default set of control sequences to put into TEX_toktab.
5093 The value of environment var TEXTAGS is prepended to this. */
5094 static char *TEX_defenv = "\
5095 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5096 :part:appendix:entry:index:def\
5097 :newcommand:renewcommand:newenvironment:renewenvironment";
5098
5099 static void TEX_mode __P((FILE *));
5100 static void TEX_decode_env __P((char *, char *));
5101
5102 static char TEX_esc = '\\';
5103 static char TEX_opgrp = '{';
5104 static char TEX_clgrp = '}';
5105
5106 /*
5107 * TeX/LaTeX scanning loop.
5108 */
5109 static void
5110 TeX_commands (inf)
5111 FILE *inf;
5112 {
5113 char *cp;
5114 linebuffer *key;
5115
5116 /* Select either \ or ! as escape character. */
5117 TEX_mode (inf);
5118
5119 /* Initialize token table once from environment. */
5120 if (TEX_toktab == NULL)
5121 TEX_decode_env ("TEXTAGS", TEX_defenv);
5122
5123 LOOP_ON_INPUT_LINES (inf, lb, cp)
5124 {
5125 /* Look at each TEX keyword in line. */
5126 for (;;)
5127 {
5128 /* Look for a TEX escape. */
5129 while (*cp++ != TEX_esc)
5130 if (cp[-1] == '\0' || cp[-1] == '%')
5131 goto tex_next_line;
5132
5133 for (key = TEX_toktab; key->buffer != NULL; key++)
5134 if (strneq (cp, key->buffer, key->len))
5135 {
5136 register char *p;
5137 int namelen, linelen;
5138 bool opgrp = FALSE;
5139
5140 cp = skip_spaces (cp + key->len);
5141 if (*cp == TEX_opgrp)
5142 {
5143 opgrp = TRUE;
5144 cp++;
5145 }
5146 for (p = cp;
5147 (!iswhite (*p) && *p != '#' &&
5148 *p != TEX_opgrp && *p != TEX_clgrp);
5149 p++)
5150 continue;
5151 namelen = p - cp;
5152 linelen = lb.len;
5153 if (!opgrp || *p == TEX_clgrp)
5154 {
5155 while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5156 p++;
5157 linelen = p - lb.buffer + 1;
5158 }
5159 make_tag (cp, namelen, TRUE,
5160 lb.buffer, linelen, lineno, linecharno);
5161 goto tex_next_line; /* We only tag a line once */
5162 }
5163 }
5164 tex_next_line:
5165 ;
5166 }
5167 }
5168
5169 #define TEX_LESC '\\'
5170 #define TEX_SESC '!'
5171
5172 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5173 chars accordingly. */
5174 static void
5175 TEX_mode (inf)
5176 FILE *inf;
5177 {
5178 int c;
5179
5180 while ((c = getc (inf)) != EOF)
5181 {
5182 /* Skip to next line if we hit the TeX comment char. */
5183 if (c == '%')
5184 while (c != '\n' && c != EOF)
5185 c = getc (inf);
5186 else if (c == TEX_LESC || c == TEX_SESC )
5187 break;
5188 }
5189
5190 if (c == TEX_LESC)
5191 {
5192 TEX_esc = TEX_LESC;
5193 TEX_opgrp = '{';
5194 TEX_clgrp = '}';
5195 }
5196 else
5197 {
5198 TEX_esc = TEX_SESC;
5199 TEX_opgrp = '<';
5200 TEX_clgrp = '>';
5201 }
5202 /* If the input file is compressed, inf is a pipe, and rewind may fail.
5203 No attempt is made to correct the situation. */
5204 rewind (inf);
5205 }
5206
5207 /* Read environment and prepend it to the default string.
5208 Build token table. */
5209 static void
5210 TEX_decode_env (evarname, defenv)
5211 char *evarname;
5212 char *defenv;
5213 {
5214 register char *env, *p;
5215 int i, len;
5216
5217 /* Append default string to environment. */
5218 env = getenv (evarname);
5219 if (!env)
5220 env = defenv;
5221 else
5222 {
5223 char *oldenv = env;
5224 env = concat (oldenv, defenv, "");
5225 }
5226
5227 /* Allocate a token table */
5228 for (len = 1, p = env; p;)
5229 if ((p = etags_strchr (p, ':')) && *++p != '\0')
5230 len++;
5231 TEX_toktab = xnew (len, linebuffer);
5232
5233 /* Unpack environment string into token table. Be careful about */
5234 /* zero-length strings (leading ':', "::" and trailing ':') */
5235 for (i = 0; *env != '\0';)
5236 {
5237 p = etags_strchr (env, ':');
5238 if (!p) /* End of environment string. */
5239 p = env + strlen (env);
5240 if (p - env > 0)
5241 { /* Only non-zero strings. */
5242 TEX_toktab[i].buffer = savenstr (env, p - env);
5243 TEX_toktab[i].len = p - env;
5244 i++;
5245 }
5246 if (*p)
5247 env = p + 1;
5248 else
5249 {
5250 TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5251 TEX_toktab[i].len = 0;
5252 break;
5253 }
5254 }
5255 }
5256
5257 \f
5258 /* Texinfo support. Dave Love, Mar. 2000. */
5259 static void
5260 Texinfo_nodes (inf)
5261 FILE * inf;
5262 {
5263 char *cp, *start;
5264 LOOP_ON_INPUT_LINES (inf, lb, cp)
5265 if (LOOKING_AT (cp, "@node"))
5266 {
5267 start = cp;
5268 while (*cp != '\0' && *cp != ',')
5269 cp++;
5270 make_tag (start, cp - start, TRUE,
5271 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5272 }
5273 }
5274
5275 \f
5276 /*
5277 * HTML support.
5278 * Contents of <title>, <h1>, <h2>, <h3> are tags.
5279 * Contents of <a name=xxx> are tags with name xxx.
5280 *
5281 * Francesco Potortì, 2002.
5282 */
5283 static void
5284 HTML_labels (inf)
5285 FILE * inf;
5286 {
5287 bool getnext = FALSE; /* next text outside of HTML tags is a tag */
5288 bool skiptag = FALSE; /* skip to the end of the current HTML tag */
5289 bool intag = FALSE; /* inside an html tag, looking for ID= */
5290 bool inanchor = FALSE; /* when INTAG, is an anchor, look for NAME= */
5291 char *end;
5292
5293
5294 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5295
5296 LOOP_ON_INPUT_LINES (inf, lb, dbp)
5297 for (;;) /* loop on the same line */
5298 {
5299 if (skiptag) /* skip HTML tag */
5300 {
5301 while (*dbp != '\0' && *dbp != '>')
5302 dbp++;
5303 if (*dbp == '>')
5304 {
5305 dbp += 1;
5306 skiptag = FALSE;
5307 continue; /* look on the same line */
5308 }
5309 break; /* go to next line */
5310 }
5311
5312 else if (intag) /* look for "name=" or "id=" */
5313 {
5314 while (*dbp != '\0' && *dbp != '>'
5315 && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5316 dbp++;
5317 if (*dbp == '\0')
5318 break; /* go to next line */
5319 if (*dbp == '>')
5320 {
5321 dbp += 1;
5322 intag = FALSE;
5323 continue; /* look on the same line */
5324 }
5325 if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5326 || LOOKING_AT_NOCASE (dbp, "id="))
5327 {
5328 bool quoted = (dbp[0] == '"');
5329
5330 if (quoted)
5331 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5332 continue;
5333 else
5334 for (end = dbp; *end != '\0' && intoken (*end); end++)
5335 continue;
5336 linebuffer_setlen (&token_name, end - dbp);
5337 strncpy (token_name.buffer, dbp, end - dbp);
5338 token_name.buffer[end - dbp] = '\0';
5339
5340 dbp = end;
5341 intag = FALSE; /* we found what we looked for */
5342 skiptag = TRUE; /* skip to the end of the tag */
5343 getnext = TRUE; /* then grab the text */
5344 continue; /* look on the same line */
5345 }
5346 dbp += 1;
5347 }
5348
5349 else if (getnext) /* grab next tokens and tag them */
5350 {
5351 dbp = skip_spaces (dbp);
5352 if (*dbp == '\0')
5353 break; /* go to next line */
5354 if (*dbp == '<')
5355 {
5356 intag = TRUE;
5357 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5358 continue; /* look on the same line */
5359 }
5360
5361 for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5362 continue;
5363 make_tag (token_name.buffer, token_name.len, TRUE,
5364 dbp, end - dbp, lineno, linecharno);
5365 linebuffer_setlen (&token_name, 0); /* no name in buffer */
5366 getnext = FALSE;
5367 break; /* go to next line */
5368 }
5369
5370 else /* look for an interesting HTML tag */
5371 {
5372 while (*dbp != '\0' && *dbp != '<')
5373 dbp++;
5374 if (*dbp == '\0')
5375 break; /* go to next line */
5376 intag = TRUE;
5377 if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5378 {
5379 inanchor = TRUE;
5380 continue; /* look on the same line */
5381 }
5382 else if (LOOKING_AT_NOCASE (dbp, "<title>")
5383 || LOOKING_AT_NOCASE (dbp, "<h1>")
5384 || LOOKING_AT_NOCASE (dbp, "<h2>")
5385 || LOOKING_AT_NOCASE (dbp, "<h3>"))
5386 {
5387 intag = FALSE;
5388 getnext = TRUE;
5389 continue; /* look on the same line */
5390 }
5391 dbp += 1;
5392 }
5393 }
5394 }
5395
5396 \f
5397 /*
5398 * Prolog support
5399 *
5400 * Assumes that the predicate or rule starts at column 0.
5401 * Only the first clause of a predicate or rule is added.
5402 * Original code by Sunichirou Sugou (1989)
5403 * Rewritten by Anders Lindgren (1996)
5404 */
5405 static int prolog_pr __P((char *, char *));
5406 static void prolog_skip_comment __P((linebuffer *, FILE *));
5407 static int prolog_atom __P((char *, int));
5408
5409 static void
5410 Prolog_functions (inf)
5411 FILE *inf;
5412 {
5413 char *cp, *last;
5414 int len;
5415 int allocated;
5416
5417 allocated = 0;
5418 len = 0;
5419 last = NULL;
5420
5421 LOOP_ON_INPUT_LINES (inf, lb, cp)
5422 {
5423 if (cp[0] == '\0') /* Empty line */
5424 continue;
5425 else if (iswhite (cp[0])) /* Not a predicate */
5426 continue;
5427 else if (cp[0] == '/' && cp[1] == '*') /* comment. */
5428 prolog_skip_comment (&lb, inf);
5429 else if ((len = prolog_pr (cp, last)) > 0)
5430 {
5431 /* Predicate or rule. Store the function name so that we
5432 only generate a tag for the first clause. */
5433 if (last == NULL)
5434 last = xnew(len + 1, char);
5435 else if (len + 1 > allocated)
5436 xrnew (last, len + 1, char);
5437 allocated = len + 1;
5438 strncpy (last, cp, len);
5439 last[len] = '\0';
5440 }
5441 }
5442 if (last != NULL)
5443 free (last);
5444 }
5445
5446
5447 static void
5448 prolog_skip_comment (plb, inf)
5449 linebuffer *plb;
5450 FILE *inf;
5451 {
5452 char *cp;
5453
5454 do
5455 {
5456 for (cp = plb->buffer; *cp != '\0'; cp++)
5457 if (cp[0] == '*' && cp[1] == '/')
5458 return;
5459 readline (plb, inf);
5460 }
5461 while (!feof(inf));
5462 }
5463
5464 /*
5465 * A predicate or rule definition is added if it matches:
5466 * <beginning of line><Prolog Atom><whitespace>(
5467 * or <beginning of line><Prolog Atom><whitespace>:-
5468 *
5469 * It is added to the tags database if it doesn't match the
5470 * name of the previous clause header.
5471 *
5472 * Return the size of the name of the predicate or rule, or 0 if no
5473 * header was found.
5474 */
5475 static int
5476 prolog_pr (s, last)
5477 char *s;
5478 char *last; /* Name of last clause. */
5479 {
5480 int pos;
5481 int len;
5482
5483 pos = prolog_atom (s, 0);
5484 if (pos < 1)
5485 return 0;
5486
5487 len = pos;
5488 pos = skip_spaces (s + pos) - s;
5489
5490 if ((s[pos] == '.'
5491 || (s[pos] == '(' && (pos += 1))
5492 || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5493 && (last == NULL /* save only the first clause */
5494 || len != (int)strlen (last)
5495 || !strneq (s, last, len)))
5496 {
5497 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5498 return len;
5499 }
5500 else
5501 return 0;
5502 }
5503
5504 /*
5505 * Consume a Prolog atom.
5506 * Return the number of bytes consumed, or -1 if there was an error.
5507 *
5508 * A prolog atom, in this context, could be one of:
5509 * - An alphanumeric sequence, starting with a lower case letter.
5510 * - A quoted arbitrary string. Single quotes can escape themselves.
5511 * Backslash quotes everything.
5512 */
5513 static int
5514 prolog_atom (s, pos)
5515 char *s;
5516 int pos;
5517 {
5518 int origpos;
5519
5520 origpos = pos;
5521
5522 if (ISLOWER(s[pos]) || (s[pos] == '_'))
5523 {
5524 /* The atom is unquoted. */
5525 pos++;
5526 while (ISALNUM(s[pos]) || (s[pos] == '_'))
5527 {
5528 pos++;
5529 }
5530 return pos - origpos;
5531 }
5532 else if (s[pos] == '\'')
5533 {
5534 pos++;
5535
5536 for (;;)
5537 {
5538 if (s[pos] == '\'')
5539 {
5540 pos++;
5541 if (s[pos] != '\'')
5542 break;
5543 pos++; /* A double quote */
5544 }
5545 else if (s[pos] == '\0')
5546 /* Multiline quoted atoms are ignored. */
5547 return -1;
5548 else if (s[pos] == '\\')
5549 {
5550 if (s[pos+1] == '\0')
5551 return -1;
5552 pos += 2;
5553 }
5554 else
5555 pos++;
5556 }
5557 return pos - origpos;
5558 }
5559 else
5560 return -1;
5561 }
5562
5563 \f
5564 /*
5565 * Support for Erlang
5566 *
5567 * Generates tags for functions, defines, and records.
5568 * Assumes that Erlang functions start at column 0.
5569 * Original code by Anders Lindgren (1996)
5570 */
5571 static int erlang_func __P((char *, char *));
5572 static void erlang_attribute __P((char *));
5573 static int erlang_atom __P((char *));
5574
5575 static void
5576 Erlang_functions (inf)
5577 FILE *inf;
5578 {
5579 char *cp, *last;
5580 int len;
5581 int allocated;
5582
5583 allocated = 0;
5584 len = 0;
5585 last = NULL;
5586
5587 LOOP_ON_INPUT_LINES (inf, lb, cp)
5588 {
5589 if (cp[0] == '\0') /* Empty line */
5590 continue;
5591 else if (iswhite (cp[0])) /* Not function nor attribute */
5592 continue;
5593 else if (cp[0] == '%') /* comment */
5594 continue;
5595 else if (cp[0] == '"') /* Sometimes, strings start in column one */
5596 continue;
5597 else if (cp[0] == '-') /* attribute, e.g. "-define" */
5598 {
5599 erlang_attribute (cp);
5600 if (last != NULL)
5601 {
5602 free (last);
5603 last = NULL;
5604 }
5605 }
5606 else if ((len = erlang_func (cp, last)) > 0)
5607 {
5608 /*
5609 * Function. Store the function name so that we only
5610 * generates a tag for the first clause.
5611 */
5612 if (last == NULL)
5613 last = xnew (len + 1, char);
5614 else if (len + 1 > allocated)
5615 xrnew (last, len + 1, char);
5616 allocated = len + 1;
5617 strncpy (last, cp, len);
5618 last[len] = '\0';
5619 }
5620 }
5621 if (last != NULL)
5622 free (last);
5623 }
5624
5625
5626 /*
5627 * A function definition is added if it matches:
5628 * <beginning of line><Erlang Atom><whitespace>(
5629 *
5630 * It is added to the tags database if it doesn't match the
5631 * name of the previous clause header.
5632 *
5633 * Return the size of the name of the function, or 0 if no function
5634 * was found.
5635 */
5636 static int
5637 erlang_func (s, last)
5638 char *s;
5639 char *last; /* Name of last clause. */
5640 {
5641 int pos;
5642 int len;
5643
5644 pos = erlang_atom (s);
5645 if (pos < 1)
5646 return 0;
5647
5648 len = pos;
5649 pos = skip_spaces (s + pos) - s;
5650
5651 /* Save only the first clause. */
5652 if (s[pos++] == '('
5653 && (last == NULL
5654 || len != (int)strlen (last)
5655 || !strneq (s, last, len)))
5656 {
5657 make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5658 return len;
5659 }
5660
5661 return 0;
5662 }
5663
5664
5665 /*
5666 * Handle attributes. Currently, tags are generated for defines
5667 * and records.
5668 *
5669 * They are on the form:
5670 * -define(foo, bar).
5671 * -define(Foo(M, N), M+N).
5672 * -record(graph, {vtab = notable, cyclic = true}).
5673 */
5674 static void
5675 erlang_attribute (s)
5676 char *s;
5677 {
5678 char *cp = s;
5679
5680 if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5681 && *cp++ == '(')
5682 {
5683 int len = erlang_atom (skip_spaces (cp));
5684 if (len > 0)
5685 make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5686 }
5687 return;
5688 }
5689
5690
5691 /*
5692 * Consume an Erlang atom (or variable).
5693 * Return the number of bytes consumed, or -1 if there was an error.
5694 */
5695 static int
5696 erlang_atom (s)
5697 char *s;
5698 {
5699 int pos = 0;
5700
5701 if (ISALPHA (s[pos]) || s[pos] == '_')
5702 {
5703 /* The atom is unquoted. */
5704 do
5705 pos++;
5706 while (ISALNUM (s[pos]) || s[pos] == '_');
5707 }
5708 else if (s[pos] == '\'')
5709 {
5710 for (pos++; s[pos] != '\''; pos++)
5711 if (s[pos] == '\0' /* multiline quoted atoms are ignored */
5712 || (s[pos] == '\\' && s[++pos] == '\0'))
5713 return 0;
5714 pos++;
5715 }
5716
5717 return pos;
5718 }
5719
5720 \f
5721 static char *scan_separators __P((char *));
5722 static void add_regex __P((char *, language *));
5723 static char *substitute __P((char *, char *, struct re_registers *));
5724
5725 /*
5726 * Take a string like "/blah/" and turn it into "blah", verifying
5727 * that the first and last characters are the same, and handling
5728 * quoted separator characters. Actually, stops on the occurrence of
5729 * an unquoted separator. Also process \t, \n, etc. and turn into
5730 * appropriate characters. Works in place. Null terminates name string.
5731 * Returns pointer to terminating separator, or NULL for
5732 * unterminated regexps.
5733 */
5734 static char *
5735 scan_separators (name)
5736 char *name;
5737 {
5738 char sep = name[0];
5739 char *copyto = name;
5740 bool quoted = FALSE;
5741
5742 for (++name; *name != '\0'; ++name)
5743 {
5744 if (quoted)
5745 {
5746 switch (*name)
5747 {
5748 case 'a': *copyto++ = '\007'; break; /* BEL (bell) */
5749 case 'b': *copyto++ = '\b'; break; /* BS (back space) */
5750 case 'd': *copyto++ = 0177; break; /* DEL (delete) */
5751 case 'e': *copyto++ = 033; break; /* ESC (delete) */
5752 case 'f': *copyto++ = '\f'; break; /* FF (form feed) */
5753 case 'n': *copyto++ = '\n'; break; /* NL (new line) */
5754 case 'r': *copyto++ = '\r'; break; /* CR (carriage return) */
5755 case 't': *copyto++ = '\t'; break; /* TAB (horizontal tab) */
5756 case 'v': *copyto++ = '\v'; break; /* VT (vertical tab) */
5757 default:
5758 if (*name == sep)
5759 *copyto++ = sep;
5760 else
5761 {
5762 /* Something else is quoted, so preserve the quote. */
5763 *copyto++ = '\\';
5764 *copyto++ = *name;
5765 }
5766 break;
5767 }
5768 quoted = FALSE;
5769 }
5770 else if (*name == '\\')
5771 quoted = TRUE;
5772 else if (*name == sep)
5773 break;
5774 else
5775 *copyto++ = *name;
5776 }
5777 if (*name != sep)
5778 name = NULL; /* signal unterminated regexp */
5779
5780 /* Terminate copied string. */
5781 *copyto = '\0';
5782 return name;
5783 }
5784
5785 /* Look at the argument of --regex or --no-regex and do the right
5786 thing. Same for each line of a regexp file. */
5787 static void
5788 analyse_regex (regex_arg)
5789 char *regex_arg;
5790 {
5791 if (regex_arg == NULL)
5792 {
5793 free_regexps (); /* --no-regex: remove existing regexps */
5794 return;
5795 }
5796
5797 /* A real --regexp option or a line in a regexp file. */
5798 switch (regex_arg[0])
5799 {
5800 /* Comments in regexp file or null arg to --regex. */
5801 case '\0':
5802 case ' ':
5803 case '\t':
5804 break;
5805
5806 /* Read a regex file. This is recursive and may result in a
5807 loop, which will stop when the file descriptors are exhausted. */
5808 case '@':
5809 {
5810 FILE *regexfp;
5811 linebuffer regexbuf;
5812 char *regexfile = regex_arg + 1;
5813
5814 /* regexfile is a file containing regexps, one per line. */
5815 regexfp = fopen (regexfile, "r");
5816 if (regexfp == NULL)
5817 {
5818 pfatal (regexfile);
5819 return;
5820 }
5821 linebuffer_init (&regexbuf);
5822 while (readline_internal (&regexbuf, regexfp) > 0)
5823 analyse_regex (regexbuf.buffer);
5824 free (regexbuf.buffer);
5825 fclose (regexfp);
5826 }
5827 break;
5828
5829 /* Regexp to be used for a specific language only. */
5830 case '{':
5831 {
5832 language *lang;
5833 char *lang_name = regex_arg + 1;
5834 char *cp;
5835
5836 for (cp = lang_name; *cp != '}'; cp++)
5837 if (*cp == '\0')
5838 {
5839 error ("unterminated language name in regex: %s", regex_arg);
5840 return;
5841 }
5842 *cp++ = '\0';
5843 lang = get_language_from_langname (lang_name);
5844 if (lang == NULL)
5845 return;
5846 add_regex (cp, lang);
5847 }
5848 break;
5849
5850 /* Regexp to be used for any language. */
5851 default:
5852 add_regex (regex_arg, NULL);
5853 break;
5854 }
5855 }
5856
5857 /* Separate the regexp pattern, compile it,
5858 and care for optional name and modifiers. */
5859 static void
5860 add_regex (regexp_pattern, lang)
5861 char *regexp_pattern;
5862 language *lang;
5863 {
5864 static struct re_pattern_buffer zeropattern;
5865 char sep, *pat, *name, *modifiers;
5866 const char *err;
5867 struct re_pattern_buffer *patbuf;
5868 regexp *rp;
5869 bool
5870 force_explicit_name = TRUE, /* do not use implicit tag names */
5871 ignore_case = FALSE, /* case is significant */
5872 multi_line = FALSE, /* matches are done one line at a time */
5873 single_line = FALSE; /* dot does not match newline */
5874
5875
5876 if (strlen(regexp_pattern) < 3)
5877 {
5878 error ("null regexp", (char *)NULL);
5879 return;
5880 }
5881 sep = regexp_pattern[0];
5882 name = scan_separators (regexp_pattern);
5883 if (name == NULL)
5884 {
5885 error ("%s: unterminated regexp", regexp_pattern);
5886 return;
5887 }
5888 if (name[1] == sep)
5889 {
5890 error ("null name for regexp \"%s\"", regexp_pattern);
5891 return;
5892 }
5893 modifiers = scan_separators (name);
5894 if (modifiers == NULL) /* no terminating separator --> no name */
5895 {
5896 modifiers = name;
5897 name = "";
5898 }
5899 else
5900 modifiers += 1; /* skip separator */
5901
5902 /* Parse regex modifiers. */
5903 for (; modifiers[0] != '\0'; modifiers++)
5904 switch (modifiers[0])
5905 {
5906 case 'N':
5907 if (modifiers == name)
5908 error ("forcing explicit tag name but no name, ignoring", NULL);
5909 force_explicit_name = TRUE;
5910 break;
5911 case 'i':
5912 ignore_case = TRUE;
5913 break;
5914 case 's':
5915 single_line = TRUE;
5916 /* FALLTHRU */
5917 case 'm':
5918 multi_line = TRUE;
5919 need_filebuf = TRUE;
5920 break;
5921 default:
5922 {
5923 char wrongmod [2];
5924 wrongmod[0] = modifiers[0];
5925 wrongmod[1] = '\0';
5926 error ("invalid regexp modifier `%s', ignoring", wrongmod);
5927 }
5928 break;
5929 }
5930
5931 patbuf = xnew (1, struct re_pattern_buffer);
5932 *patbuf = zeropattern;
5933 if (ignore_case)
5934 {
5935 static char lc_trans[CHARS];
5936 int i;
5937 for (i = 0; i < CHARS; i++)
5938 lc_trans[i] = lowcase (i);
5939 patbuf->translate = lc_trans; /* translation table to fold case */
5940 }
5941
5942 if (multi_line)
5943 pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5944 else
5945 pat = regexp_pattern;
5946
5947 if (single_line)
5948 re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5949 else
5950 re_set_syntax (RE_SYNTAX_EMACS);
5951
5952 err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5953 if (multi_line)
5954 free (pat);
5955 if (err != NULL)
5956 {
5957 error ("%s while compiling pattern", err);
5958 return;
5959 }
5960
5961 rp = p_head;
5962 p_head = xnew (1, regexp);
5963 p_head->pattern = savestr (regexp_pattern);
5964 p_head->p_next = rp;
5965 p_head->lang = lang;
5966 p_head->pat = patbuf;
5967 p_head->name = savestr (name);
5968 p_head->error_signaled = FALSE;
5969 p_head->force_explicit_name = force_explicit_name;
5970 p_head->ignore_case = ignore_case;
5971 p_head->multi_line = multi_line;
5972 }
5973
5974 /*
5975 * Do the substitutions indicated by the regular expression and
5976 * arguments.
5977 */
5978 static char *
5979 substitute (in, out, regs)
5980 char *in, *out;
5981 struct re_registers *regs;
5982 {
5983 char *result, *t;
5984 int size, dig, diglen;
5985
5986 result = NULL;
5987 size = strlen (out);
5988
5989 /* Pass 1: figure out how much to allocate by finding all \N strings. */
5990 if (out[size - 1] == '\\')
5991 fatal ("pattern error in \"%s\"", out);
5992 for (t = etags_strchr (out, '\\');
5993 t != NULL;
5994 t = etags_strchr (t + 2, '\\'))
5995 if (ISDIGIT (t[1]))
5996 {
5997 dig = t[1] - '0';
5998 diglen = regs->end[dig] - regs->start[dig];
5999 size += diglen - 2;
6000 }
6001 else
6002 size -= 1;
6003
6004 /* Allocate space and do the substitutions. */
6005 assert (size >= 0);
6006 result = xnew (size + 1, char);
6007
6008 for (t = result; *out != '\0'; out++)
6009 if (*out == '\\' && ISDIGIT (*++out))
6010 {
6011 dig = *out - '0';
6012 diglen = regs->end[dig] - regs->start[dig];
6013 strncpy (t, in + regs->start[dig], diglen);
6014 t += diglen;
6015 }
6016 else
6017 *t++ = *out;
6018 *t = '\0';
6019
6020 assert (t <= result + size);
6021 assert (t - result == (int)strlen (result));
6022
6023 return result;
6024 }
6025
6026 /* Deallocate all regexps. */
6027 static void
6028 free_regexps ()
6029 {
6030 regexp *rp;
6031 while (p_head != NULL)
6032 {
6033 rp = p_head->p_next;
6034 free (p_head->pattern);
6035 free (p_head->name);
6036 free (p_head);
6037 p_head = rp;
6038 }
6039 return;
6040 }
6041
6042 /*
6043 * Reads the whole file as a single string from `filebuf' and looks for
6044 * multi-line regular expressions, creating tags on matches.
6045 * readline already dealt with normal regexps.
6046 *
6047 * Idea by Ben Wing <ben@666.com> (2002).
6048 */
6049 static void
6050 regex_tag_multiline ()
6051 {
6052 char *buffer = filebuf.buffer;
6053 regexp *rp;
6054 char *name;
6055
6056 for (rp = p_head; rp != NULL; rp = rp->p_next)
6057 {
6058 int match = 0;
6059
6060 if (!rp->multi_line)
6061 continue; /* skip normal regexps */
6062
6063 /* Generic initialisations before parsing file from memory. */
6064 lineno = 1; /* reset global line number */
6065 charno = 0; /* reset global char number */
6066 linecharno = 0; /* reset global char number of line start */
6067
6068 /* Only use generic regexps or those for the current language. */
6069 if (rp->lang != NULL && rp->lang != curfdp->lang)
6070 continue;
6071
6072 while (match >= 0 && match < filebuf.len)
6073 {
6074 match = re_search (rp->pat, buffer, filebuf.len, charno,
6075 filebuf.len - match, &rp->regs);
6076 switch (match)
6077 {
6078 case -2:
6079 /* Some error. */
6080 if (!rp->error_signaled)
6081 {
6082 error ("regexp stack overflow while matching \"%s\"",
6083 rp->pattern);
6084 rp->error_signaled = TRUE;
6085 }
6086 break;
6087 case -1:
6088 /* No match. */
6089 break;
6090 default:
6091 if (match == rp->regs.end[0])
6092 {
6093 if (!rp->error_signaled)
6094 {
6095 error ("regexp matches the empty string: \"%s\"",
6096 rp->pattern);
6097 rp->error_signaled = TRUE;
6098 }
6099 match = -3; /* exit from while loop */
6100 break;
6101 }
6102
6103 /* Match occurred. Construct a tag. */
6104 while (charno < rp->regs.end[0])
6105 if (buffer[charno++] == '\n')
6106 lineno++, linecharno = charno;
6107 name = rp->name;
6108 if (name[0] == '\0')
6109 name = NULL;
6110 else /* make a named tag */
6111 name = substitute (buffer, rp->name, &rp->regs);
6112 if (rp->force_explicit_name)
6113 /* Force explicit tag name, if a name is there. */
6114 pfnote (name, TRUE, buffer + linecharno,
6115 charno - linecharno + 1, lineno, linecharno);
6116 else
6117 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6118 charno - linecharno + 1, lineno, linecharno);
6119 break;
6120 }
6121 }
6122 }
6123 }
6124
6125 \f
6126 static bool
6127 nocase_tail (cp)
6128 char *cp;
6129 {
6130 register int len = 0;
6131
6132 while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6133 cp++, len++;
6134 if (*cp == '\0' && !intoken (dbp[len]))
6135 {
6136 dbp += len;
6137 return TRUE;
6138 }
6139 return FALSE;
6140 }
6141
6142 static void
6143 get_tag (bp, namepp)
6144 register char *bp;
6145 char **namepp;
6146 {
6147 register char *cp = bp;
6148
6149 if (*bp != '\0')
6150 {
6151 /* Go till you get to white space or a syntactic break */
6152 for (cp = bp + 1; !notinname (*cp); cp++)
6153 continue;
6154 make_tag (bp, cp - bp, TRUE,
6155 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6156 }
6157
6158 if (namepp != NULL)
6159 *namepp = savenstr (bp, cp - bp);
6160 }
6161
6162 /*
6163 * Read a line of text from `stream' into `lbp', excluding the
6164 * newline or CR-NL, if any. Return the number of characters read from
6165 * `stream', which is the length of the line including the newline.
6166 *
6167 * On DOS or Windows we do not count the CR character, if any before the
6168 * NL, in the returned length; this mirrors the behavior of Emacs on those
6169 * platforms (for text files, it translates CR-NL to NL as it reads in the
6170 * file).
6171 *
6172 * If multi-line regular expressions are requested, each line read is
6173 * appended to `filebuf'.
6174 */
6175 static long
6176 readline_internal (lbp, stream)
6177 linebuffer *lbp;
6178 register FILE *stream;
6179 {
6180 char *buffer = lbp->buffer;
6181 register char *p = lbp->buffer;
6182 register char *pend;
6183 int chars_deleted;
6184
6185 pend = p + lbp->size; /* Separate to avoid 386/IX compiler bug. */
6186
6187 for (;;)
6188 {
6189 register int c = getc (stream);
6190 if (p == pend)
6191 {
6192 /* We're at the end of linebuffer: expand it. */
6193 lbp->size *= 2;
6194 xrnew (buffer, lbp->size, char);
6195 p += buffer - lbp->buffer;
6196 pend = buffer + lbp->size;
6197 lbp->buffer = buffer;
6198 }
6199 if (c == EOF)
6200 {
6201 *p = '\0';
6202 chars_deleted = 0;
6203 break;
6204 }
6205 if (c == '\n')
6206 {
6207 if (p > buffer && p[-1] == '\r')
6208 {
6209 p -= 1;
6210 #ifdef DOS_NT
6211 /* Assume CRLF->LF translation will be performed by Emacs
6212 when loading this file, so CRs won't appear in the buffer.
6213 It would be cleaner to compensate within Emacs;
6214 however, Emacs does not know how many CRs were deleted
6215 before any given point in the file. */
6216 chars_deleted = 1;
6217 #else
6218 chars_deleted = 2;
6219 #endif
6220 }
6221 else
6222 {
6223 chars_deleted = 1;
6224 }
6225 *p = '\0';
6226 break;
6227 }
6228 *p++ = c;
6229 }
6230 lbp->len = p - buffer;
6231
6232 if (need_filebuf /* we need filebuf for multi-line regexps */
6233 && chars_deleted > 0) /* not at EOF */
6234 {
6235 while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6236 {
6237 /* Expand filebuf. */
6238 filebuf.size *= 2;
6239 xrnew (filebuf.buffer, filebuf.size, char);
6240 }
6241 strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6242 filebuf.len += lbp->len;
6243 filebuf.buffer[filebuf.len++] = '\n';
6244 filebuf.buffer[filebuf.len] = '\0';
6245 }
6246
6247 return lbp->len + chars_deleted;
6248 }
6249
6250 /*
6251 * Like readline_internal, above, but in addition try to match the
6252 * input line against relevant regular expressions and manage #line
6253 * directives.
6254 */
6255 static void
6256 readline (lbp, stream)
6257 linebuffer *lbp;
6258 FILE *stream;
6259 {
6260 long result;
6261
6262 linecharno = charno; /* update global char number of line start */
6263 result = readline_internal (lbp, stream); /* read line */
6264 lineno += 1; /* increment global line number */
6265 charno += result; /* increment global char number */
6266
6267 /* Honour #line directives. */
6268 if (!no_line_directive)
6269 {
6270 static bool discard_until_line_directive;
6271
6272 /* Check whether this is a #line directive. */
6273 if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6274 {
6275 unsigned int lno;
6276 int start = 0;
6277
6278 if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6279 && start > 0) /* double quote character found */
6280 {
6281 char *endp = lbp->buffer + start;
6282
6283 while ((endp = etags_strchr (endp, '"')) != NULL
6284 && endp[-1] == '\\')
6285 endp++;
6286 if (endp != NULL)
6287 /* Ok, this is a real #line directive. Let's deal with it. */
6288 {
6289 char *taggedabsname; /* absolute name of original file */
6290 char *taggedfname; /* name of original file as given */
6291 char *name; /* temp var */
6292
6293 discard_until_line_directive = FALSE; /* found it */
6294 name = lbp->buffer + start;
6295 *endp = '\0';
6296 canonicalize_filename (name); /* for DOS */
6297 taggedabsname = absolute_filename (name, tagfiledir);
6298 if (filename_is_absolute (name)
6299 || filename_is_absolute (curfdp->infname))
6300 taggedfname = savestr (taggedabsname);
6301 else
6302 taggedfname = relative_filename (taggedabsname,tagfiledir);
6303
6304 if (streq (curfdp->taggedfname, taggedfname))
6305 /* The #line directive is only a line number change. We
6306 deal with this afterwards. */
6307 free (taggedfname);
6308 else
6309 /* The tags following this #line directive should be
6310 attributed to taggedfname. In order to do this, set
6311 curfdp accordingly. */
6312 {
6313 fdesc *fdp; /* file description pointer */
6314
6315 /* Go look for a file description already set up for the
6316 file indicated in the #line directive. If there is
6317 one, use it from now until the next #line
6318 directive. */
6319 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6320 if (streq (fdp->infname, curfdp->infname)
6321 && streq (fdp->taggedfname, taggedfname))
6322 /* If we remove the second test above (after the &&)
6323 then all entries pertaining to the same file are
6324 coalesced in the tags file. If we use it, then
6325 entries pertaining to the same file but generated
6326 from different files (via #line directives) will
6327 go into separate sections in the tags file. These
6328 alternatives look equivalent. The first one
6329 destroys some apparently useless information. */
6330 {
6331 curfdp = fdp;
6332 free (taggedfname);
6333 break;
6334 }
6335 /* Else, if we already tagged the real file, skip all
6336 input lines until the next #line directive. */
6337 if (fdp == NULL) /* not found */
6338 for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6339 if (streq (fdp->infabsname, taggedabsname))
6340 {
6341 discard_until_line_directive = TRUE;
6342 free (taggedfname);
6343 break;
6344 }
6345 /* Else create a new file description and use that from
6346 now on, until the next #line directive. */
6347 if (fdp == NULL) /* not found */
6348 {
6349 fdp = fdhead;
6350 fdhead = xnew (1, fdesc);
6351 *fdhead = *curfdp; /* copy curr. file description */
6352 fdhead->next = fdp;
6353 fdhead->infname = savestr (curfdp->infname);
6354 fdhead->infabsname = savestr (curfdp->infabsname);
6355 fdhead->infabsdir = savestr (curfdp->infabsdir);
6356 fdhead->taggedfname = taggedfname;
6357 fdhead->usecharno = FALSE;
6358 fdhead->prop = NULL;
6359 fdhead->written = FALSE;
6360 curfdp = fdhead;
6361 }
6362 }
6363 free (taggedabsname);
6364 lineno = lno - 1;
6365 readline (lbp, stream);
6366 return;
6367 } /* if a real #line directive */
6368 } /* if #line is followed by a a number */
6369 } /* if line begins with "#line " */
6370
6371 /* If we are here, no #line directive was found. */
6372 if (discard_until_line_directive)
6373 {
6374 if (result > 0)
6375 {
6376 /* Do a tail recursion on ourselves, thus discarding the contents
6377 of the line buffer. */
6378 readline (lbp, stream);
6379 return;
6380 }
6381 /* End of file. */
6382 discard_until_line_directive = FALSE;
6383 return;
6384 }
6385 } /* if #line directives should be considered */
6386
6387 {
6388 int match;
6389 regexp *rp;
6390 char *name;
6391
6392 /* Match against relevant regexps. */
6393 if (lbp->len > 0)
6394 for (rp = p_head; rp != NULL; rp = rp->p_next)
6395 {
6396 /* Only use generic regexps or those for the current language.
6397 Also do not use multiline regexps, which is the job of
6398 regex_tag_multiline. */
6399 if ((rp->lang != NULL && rp->lang != fdhead->lang)
6400 || rp->multi_line)
6401 continue;
6402
6403 match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6404 switch (match)
6405 {
6406 case -2:
6407 /* Some error. */
6408 if (!rp->error_signaled)
6409 {
6410 error ("regexp stack overflow while matching \"%s\"",
6411 rp->pattern);
6412 rp->error_signaled = TRUE;
6413 }
6414 break;
6415 case -1:
6416 /* No match. */
6417 break;
6418 case 0:
6419 /* Empty string matched. */
6420 if (!rp->error_signaled)
6421 {
6422 error ("regexp matches the empty string: \"%s\"", rp->pattern);
6423 rp->error_signaled = TRUE;
6424 }
6425 break;
6426 default:
6427 /* Match occurred. Construct a tag. */
6428 name = rp->name;
6429 if (name[0] == '\0')
6430 name = NULL;
6431 else /* make a named tag */
6432 name = substitute (lbp->buffer, rp->name, &rp->regs);
6433 if (rp->force_explicit_name)
6434 /* Force explicit tag name, if a name is there. */
6435 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6436 else
6437 make_tag (name, strlen (name), TRUE,
6438 lbp->buffer, match, lineno, linecharno);
6439 break;
6440 }
6441 }
6442 }
6443 }
6444
6445 \f
6446 /*
6447 * Return a pointer to a space of size strlen(cp)+1 allocated
6448 * with xnew where the string CP has been copied.
6449 */
6450 static char *
6451 savestr (cp)
6452 char *cp;
6453 {
6454 return savenstr (cp, strlen (cp));
6455 }
6456
6457 /*
6458 * Return a pointer to a space of size LEN+1 allocated with xnew where
6459 * the string CP has been copied for at most the first LEN characters.
6460 */
6461 static char *
6462 savenstr (cp, len)
6463 char *cp;
6464 int len;
6465 {
6466 register char *dp;
6467
6468 dp = xnew (len + 1, char);
6469 strncpy (dp, cp, len);
6470 dp[len] = '\0';
6471 return dp;
6472 }
6473
6474 /*
6475 * Return the ptr in sp at which the character c last
6476 * appears; NULL if not found
6477 *
6478 * Identical to POSIX strrchr, included for portability.
6479 */
6480 static char *
6481 etags_strrchr (sp, c)
6482 register const char *sp;
6483 register int c;
6484 {
6485 register const char *r;
6486
6487 r = NULL;
6488 do
6489 {
6490 if (*sp == c)
6491 r = sp;
6492 } while (*sp++);
6493 return (char *)r;
6494 }
6495
6496 /*
6497 * Return the ptr in sp at which the character c first
6498 * appears; NULL if not found
6499 *
6500 * Identical to POSIX strchr, included for portability.
6501 */
6502 static char *
6503 etags_strchr (sp, c)
6504 register const char *sp;
6505 register int c;
6506 {
6507 do
6508 {
6509 if (*sp == c)
6510 return (char *)sp;
6511 } while (*sp++);
6512 return NULL;
6513 }
6514
6515 /*
6516 * Compare two strings, ignoring case for alphabetic characters.
6517 *
6518 * Same as BSD's strcasecmp, included for portability.
6519 */
6520 static int
6521 etags_strcasecmp (s1, s2)
6522 register const char *s1;
6523 register const char *s2;
6524 {
6525 while (*s1 != '\0'
6526 && (ISALPHA (*s1) && ISALPHA (*s2)
6527 ? lowcase (*s1) == lowcase (*s2)
6528 : *s1 == *s2))
6529 s1++, s2++;
6530
6531 return (ISALPHA (*s1) && ISALPHA (*s2)
6532 ? lowcase (*s1) - lowcase (*s2)
6533 : *s1 - *s2);
6534 }
6535
6536 /*
6537 * Compare two strings, ignoring case for alphabetic characters.
6538 * Stop after a given number of characters
6539 *
6540 * Same as BSD's strncasecmp, included for portability.
6541 */
6542 static int
6543 etags_strncasecmp (s1, s2, n)
6544 register const char *s1;
6545 register const char *s2;
6546 register int n;
6547 {
6548 while (*s1 != '\0' && n-- > 0
6549 && (ISALPHA (*s1) && ISALPHA (*s2)
6550 ? lowcase (*s1) == lowcase (*s2)
6551 : *s1 == *s2))
6552 s1++, s2++;
6553
6554 if (n < 0)
6555 return 0;
6556 else
6557 return (ISALPHA (*s1) && ISALPHA (*s2)
6558 ? lowcase (*s1) - lowcase (*s2)
6559 : *s1 - *s2);
6560 }
6561
6562 /* Skip spaces (end of string is not space), return new pointer. */
6563 static char *
6564 skip_spaces (cp)
6565 char *cp;
6566 {
6567 while (iswhite (*cp))
6568 cp++;
6569 return cp;
6570 }
6571
6572 /* Skip non spaces, except end of string, return new pointer. */
6573 static char *
6574 skip_non_spaces (cp)
6575 char *cp;
6576 {
6577 while (*cp != '\0' && !iswhite (*cp))
6578 cp++;
6579 return cp;
6580 }
6581
6582 /* Print error message and exit. */
6583 void
6584 fatal (s1, s2)
6585 char *s1, *s2;
6586 {
6587 error (s1, s2);
6588 exit (EXIT_FAILURE);
6589 }
6590
6591 static void
6592 pfatal (s1)
6593 char *s1;
6594 {
6595 perror (s1);
6596 exit (EXIT_FAILURE);
6597 }
6598
6599 static void
6600 suggest_asking_for_help ()
6601 {
6602 fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6603 progname, NO_LONG_OPTIONS ? "-h" : "--help");
6604 exit (EXIT_FAILURE);
6605 }
6606
6607 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
6608 static void
6609 error (s1, s2)
6610 const char *s1, *s2;
6611 {
6612 fprintf (stderr, "%s: ", progname);
6613 fprintf (stderr, s1, s2);
6614 fprintf (stderr, "\n");
6615 }
6616
6617 /* Return a newly-allocated string whose contents
6618 concatenate those of s1, s2, s3. */
6619 static char *
6620 concat (s1, s2, s3)
6621 char *s1, *s2, *s3;
6622 {
6623 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6624 char *result = xnew (len1 + len2 + len3 + 1, char);
6625
6626 strcpy (result, s1);
6627 strcpy (result + len1, s2);
6628 strcpy (result + len1 + len2, s3);
6629 result[len1 + len2 + len3] = '\0';
6630
6631 return result;
6632 }
6633
6634 \f
6635 /* Does the same work as the system V getcwd, but does not need to
6636 guess the buffer size in advance. */
6637 static char *
6638 etags_getcwd ()
6639 {
6640 #ifdef HAVE_GETCWD
6641 int bufsize = 200;
6642 char *path = xnew (bufsize, char);
6643
6644 while (getcwd (path, bufsize) == NULL)
6645 {
6646 if (errno != ERANGE)
6647 pfatal ("getcwd");
6648 bufsize *= 2;
6649 free (path);
6650 path = xnew (bufsize, char);
6651 }
6652
6653 canonicalize_filename (path);
6654 return path;
6655
6656 #else /* not HAVE_GETCWD */
6657 #if MSDOS
6658
6659 char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS. */
6660
6661 getwd (path);
6662
6663 for (p = path; *p != '\0'; p++)
6664 if (*p == '\\')
6665 *p = '/';
6666 else
6667 *p = lowcase (*p);
6668
6669 return strdup (path);
6670 #else /* not MSDOS */
6671 linebuffer path;
6672 FILE *pipe;
6673
6674 linebuffer_init (&path);
6675 pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6676 if (pipe == NULL || readline_internal (&path, pipe) == 0)
6677 pfatal ("pwd");
6678 pclose (pipe);
6679
6680 return path.buffer;
6681 #endif /* not MSDOS */
6682 #endif /* not HAVE_GETCWD */
6683 }
6684
6685 /* Return a newly allocated string containing the file name of FILE
6686 relative to the absolute directory DIR (which should end with a slash). */
6687 static char *
6688 relative_filename (file, dir)
6689 char *file, *dir;
6690 {
6691 char *fp, *dp, *afn, *res;
6692 int i;
6693
6694 /* Find the common root of file and dir (with a trailing slash). */
6695 afn = absolute_filename (file, cwd);
6696 fp = afn;
6697 dp = dir;
6698 while (*fp++ == *dp++)
6699 continue;
6700 fp--, dp--; /* back to the first differing char */
6701 #ifdef DOS_NT
6702 if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6703 return afn;
6704 #endif
6705 do /* look at the equal chars until '/' */
6706 fp--, dp--;
6707 while (*fp != '/');
6708
6709 /* Build a sequence of "../" strings for the resulting relative file name. */
6710 i = 0;
6711 while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6712 i += 1;
6713 res = xnew (3*i + strlen (fp + 1) + 1, char);
6714 res[0] = '\0';
6715 while (i-- > 0)
6716 strcat (res, "../");
6717
6718 /* Add the file name relative to the common root of file and dir. */
6719 strcat (res, fp + 1);
6720 free (afn);
6721
6722 return res;
6723 }
6724
6725 /* Return a newly allocated string containing the absolute file name
6726 of FILE given DIR (which should end with a slash). */
6727 static char *
6728 absolute_filename (file, dir)
6729 char *file, *dir;
6730 {
6731 char *slashp, *cp, *res;
6732
6733 if (filename_is_absolute (file))
6734 res = savestr (file);
6735 #ifdef DOS_NT
6736 /* We don't support non-absolute file names with a drive
6737 letter, like `d:NAME' (it's too much hassle). */
6738 else if (file[1] == ':')
6739 fatal ("%s: relative file names with drive letters not supported", file);
6740 #endif
6741 else
6742 res = concat (dir, file, "");
6743
6744 /* Delete the "/dirname/.." and "/." substrings. */
6745 slashp = etags_strchr (res, '/');
6746 while (slashp != NULL && slashp[0] != '\0')
6747 {
6748 if (slashp[1] == '.')
6749 {
6750 if (slashp[2] == '.'
6751 && (slashp[3] == '/' || slashp[3] == '\0'))
6752 {
6753 cp = slashp;
6754 do
6755 cp--;
6756 while (cp >= res && !filename_is_absolute (cp));
6757 if (cp < res)
6758 cp = slashp; /* the absolute name begins with "/.." */
6759 #ifdef DOS_NT
6760 /* Under MSDOS and NT we get `d:/NAME' as absolute
6761 file name, so the luser could say `d:/../NAME'.
6762 We silently treat this as `d:/NAME'. */
6763 else if (cp[0] != '/')
6764 cp = slashp;
6765 #endif
6766 strcpy (cp, slashp + 3);
6767 slashp = cp;
6768 continue;
6769 }
6770 else if (slashp[2] == '/' || slashp[2] == '\0')
6771 {
6772 strcpy (slashp, slashp + 2);
6773 continue;
6774 }
6775 }
6776
6777 slashp = etags_strchr (slashp + 1, '/');
6778 }
6779
6780 if (res[0] == '\0') /* just a safety net: should never happen */
6781 {
6782 free (res);
6783 return savestr ("/");
6784 }
6785 else
6786 return res;
6787 }
6788
6789 /* Return a newly allocated string containing the absolute
6790 file name of dir where FILE resides given DIR (which should
6791 end with a slash). */
6792 static char *
6793 absolute_dirname (file, dir)
6794 char *file, *dir;
6795 {
6796 char *slashp, *res;
6797 char save;
6798
6799 canonicalize_filename (file);
6800 slashp = etags_strrchr (file, '/');
6801 if (slashp == NULL)
6802 return savestr (dir);
6803 save = slashp[1];
6804 slashp[1] = '\0';
6805 res = absolute_filename (file, dir);
6806 slashp[1] = save;
6807
6808 return res;
6809 }
6810
6811 /* Whether the argument string is an absolute file name. The argument
6812 string must have been canonicalized with canonicalize_filename. */
6813 static bool
6814 filename_is_absolute (fn)
6815 char *fn;
6816 {
6817 return (fn[0] == '/'
6818 #ifdef DOS_NT
6819 || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6820 #endif
6821 );
6822 }
6823
6824 /* Translate backslashes into slashes. Works in place. */
6825 static void
6826 canonicalize_filename (fn)
6827 register char *fn;
6828 {
6829 #ifdef DOS_NT
6830 /* Canonicalize drive letter case. */
6831 if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6832 fn[0] = upcase (fn[0]);
6833 /* Convert backslashes to slashes. */
6834 for (; *fn != '\0'; fn++)
6835 if (*fn == '\\')
6836 *fn = '/';
6837 #else
6838 /* No action. */
6839 fn = NULL; /* shut up the compiler */
6840 #endif
6841 }
6842
6843 \f
6844 /* Initialize a linebuffer for use */
6845 static void
6846 linebuffer_init (lbp)
6847 linebuffer *lbp;
6848 {
6849 lbp->size = (DEBUG) ? 3 : 200;
6850 lbp->buffer = xnew (lbp->size, char);
6851 lbp->buffer[0] = '\0';
6852 lbp->len = 0;
6853 }
6854
6855 /* Set the minimum size of a string contained in a linebuffer. */
6856 static void
6857 linebuffer_setlen (lbp, toksize)
6858 linebuffer *lbp;
6859 int toksize;
6860 {
6861 while (lbp->size <= toksize)
6862 {
6863 lbp->size *= 2;
6864 xrnew (lbp->buffer, lbp->size, char);
6865 }
6866 lbp->len = toksize;
6867 }
6868
6869 /* Like malloc but get fatal error if memory is exhausted. */
6870 static PTR
6871 xmalloc (size)
6872 unsigned int size;
6873 {
6874 PTR result = (PTR) malloc (size);
6875 if (result == NULL)
6876 fatal ("virtual memory exhausted", (char *)NULL);
6877 return result;
6878 }
6879
6880 static PTR
6881 xrealloc (ptr, size)
6882 char *ptr;
6883 unsigned int size;
6884 {
6885 PTR result = (PTR) realloc (ptr, size);
6886 if (result == NULL)
6887 fatal ("virtual memory exhausted", (char *)NULL);
6888 return result;
6889 }
6890
6891 /*
6892 * Local Variables:
6893 * indent-tabs-mode: t
6894 * tab-width: 8
6895 * fill-column: 79
6896 * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6897 * End:
6898 */
6899
6900 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6901 (do not change this comment) */
6902
6903 /* etags.c ends here */