lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2012
  32   Free Software Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  *
  72  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #define TRUE    1
  84 #define FALSE   0
  85
  86 #ifdef DEBUG
  87 #  undef DEBUG
  88 #  define DEBUG TRUE
  89 #else
  90 #  define DEBUG  FALSE
  91 #  define NDEBUG                /* disable assert */
  92 #endif
  93
  94 #include <config.h>
  95
  96 #ifndef _GNU_SOURCE
  97 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  98 #endif
  99
 100 /* WIN32_NATIVE is for XEmacs.
 101    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 102 #ifdef WIN32_NATIVE
 103 # undef MSDOS
 104 # undef  WINDOWSNT
 105 # define WINDOWSNT
 106 #endif /* WIN32_NATIVE */
 107
 108 #ifdef MSDOS
 109 # undef MSDOS
 110 # define MSDOS TRUE
 111 # include <fcntl.h>
 112 # include <sys/param.h>
 113 # include <io.h>
 114 #else
 115 # define MSDOS FALSE
 116 #endif /* MSDOS */
 117
 118 #ifdef WINDOWSNT
 119 # include <fcntl.h>
 120 # include <direct.h>
 121 # include <io.h>
 122 # define MAXPATHLEN _MAX_PATH
 123 # undef HAVE_NTGUI
 124 # undef  DOS_NT
 125 # define DOS_NT
 126 #endif /* WINDOWSNT */
 127
 128 #include <unistd.h>
 129 #include <stdarg.h>
 130 #include <stdlib.h>
 131 #include <string.h>
 132 #include <stdio.h>
 133 #include <ctype.h>
 134 #include <errno.h>
 135 #include <sys/types.h>
 136 #include <sys/stat.h>
 137 #include <c-strcase.h>
 138
 139 #include <assert.h>
 140 #ifdef NDEBUG
 141 # undef  assert                 /* some systems have a buggy assert.h */
 142 # define assert(x) ((void) 0)
 143 #endif
 144
 145 #include <getopt.h>
 146 #include <regex.h>
 147
 148 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 149  Leave it undefined to make the program "etags", which makes emacs-style
 150  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 151 #ifdef CTAGS
 152 # undef  CTAGS
 153 # define CTAGS TRUE
 154 #else
 155 # define CTAGS FALSE
 156 #endif
 157
 158 #define streq(s,t)      (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 159 #define strcaseeq(s,t)  (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
 160 #define strneq(s,t,n)   (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 161 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
 162
 163 #define CHARS 256               /* 2^sizeof(char) */
 164 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 165 #define iswhite(c)      (_wht[CHAR (c)]) /* c is white (see white) */
 166 #define notinname(c)    (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
 167 #define begtoken(c)     (_btk[CHAR (c)]) /* c can start token (see begtk) */
 168 #define intoken(c)      (_itk[CHAR (c)]) /* c can be in token (see midtk) */
 169 #define endtoken(c)     (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
 170
 171 #define ISALNUM(c)      isalnum (CHAR (c))
 172 #define ISALPHA(c)      isalpha (CHAR (c))
 173 #define ISDIGIT(c)      isdigit (CHAR (c))
 174 #define ISLOWER(c)      islower (CHAR (c))
 175
 176 #define lowcase(c)      tolower (CHAR (c))
 177
 178
 179 /*
 180  *      xnew, xrnew -- allocate, reallocate storage
 181  *
 182  * SYNOPSIS:    Type *xnew (int n, Type);
 183  *              void xrnew (OldPointer, int n, Type);
 184  */
 185 #if DEBUG
 186 # include "chkmalloc.h"
 187 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 188                                                   (n) * sizeof (Type)))
 189 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 190                                         (char *) (op), (n) * sizeof (Type)))
 191 #else
 192 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 193 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 194                                         (char *) (op), (n) * sizeof (Type)))
 195 #endif
 196
 197 #define bool int
 198
 199 typedef void Lang_function (FILE *);
 200
 201 typedef struct
 202 {
 203   const char *suffix;           /* file name suffix for this compressor */
 204   const char *command;          /* takes one arg and decompresses to stdout */
 205 } compressor;
 206
 207 typedef struct
 208 {
 209   const char *name;             /* language name */
 210   const char *help;             /* detailed help for the language */
 211   Lang_function *function;      /* parse function */
 212   const char **suffixes;        /* name suffixes of this language's files */
 213   const char **filenames;       /* names of this language's files */
 214   const char **interpreters;    /* interpreters for this language */
 215   bool metasource;              /* source used to generate other sources */
 216 } language;
 217
 218 typedef struct fdesc
 219 {
 220   struct fdesc *next;           /* for the linked list */
 221   char *infname;                /* uncompressed input file name */
 222   char *infabsname;             /* absolute uncompressed input file name */
 223   char *infabsdir;              /* absolute dir of input file */
 224   char *taggedfname;            /* file name to write in tagfile */
 225   language *lang;               /* language of file */
 226   char *prop;                   /* file properties to write in tagfile */
 227   bool usecharno;               /* etags tags shall contain char number */
 228   bool written;                 /* entry written in the tags file */
 229 } fdesc;
 230
 231 typedef struct node_st
 232 {                               /* sorting structure */
 233   struct node_st *left, *right; /* left and right sons */
 234   fdesc *fdp;                   /* description of file to whom tag belongs */
 235   char *name;                   /* tag name */
 236   char *regex;                  /* search regexp */
 237   bool valid;                   /* write this tag on the tag file */
 238   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 239   bool been_warned;             /* warning already given for duplicated tag */
 240   int lno;                      /* line number tag is on */
 241   long cno;                     /* character number line starts on */
 242 } node;
 243
 244 /*
 245  * A `linebuffer' is a structure which holds a line of text.
 246  * `readline_internal' reads a line from a stream into a linebuffer
 247  * and works regardless of the length of the line.
 248  * SIZE is the size of BUFFER, LEN is the length of the string in
 249  * BUFFER after readline reads it.
 250  */
 251 typedef struct
 252 {
 253   long size;
 254   int len;
 255   char *buffer;
 256 } linebuffer;
 257
 258 /* Used to support mixing of --lang and file names. */
 259 typedef struct
 260 {
 261   enum {
 262     at_language,                /* a language specification */
 263     at_regexp,                  /* a regular expression */
 264     at_filename,                /* a file name */
 265     at_stdin,                   /* read from stdin here */
 266     at_end                      /* stop parsing the list */
 267   } arg_type;                   /* argument type */
 268   language *lang;               /* language associated with the argument */
 269   char *what;                   /* the argument itself */
 270 } argument;
 271
 272 /* Structure defining a regular expression. */
 273 typedef struct regexp
 274 {
 275   struct regexp *p_next;        /* pointer to next in list */
 276   language *lang;               /* if set, use only for this language */
 277   char *pattern;                /* the regexp pattern */
 278   char *name;                   /* tag name */
 279   struct re_pattern_buffer *pat; /* the compiled pattern */
 280   struct re_registers regs;     /* re registers */
 281   bool error_signaled;          /* already signaled for this regexp */
 282   bool force_explicit_name;     /* do not allow implicit tag name */
 283   bool ignore_case;             /* ignore case when matching */
 284   bool multi_line;              /* do a multi-line match on the whole file */
 285 } regexp;
 286
 287
 288 /* Many compilers barf on this:
 289         Lang_function Ada_funcs;
 290    so let's write it this way */
 291 static void Ada_funcs (FILE *);
 292 static void Asm_labels (FILE *);
 293 static void C_entries (int c_ext, FILE *);
 294 static void default_C_entries (FILE *);
 295 static void plain_C_entries (FILE *);
 296 static void Cjava_entries (FILE *);
 297 static void Cobol_paragraphs (FILE *);
 298 static void Cplusplus_entries (FILE *);
 299 static void Cstar_entries (FILE *);
 300 static void Erlang_functions (FILE *);
 301 static void Forth_words (FILE *);
 302 static void Fortran_functions (FILE *);
 303 static void HTML_labels (FILE *);
 304 static void Lisp_functions (FILE *);
 305 static void Lua_functions (FILE *);
 306 static void Makefile_targets (FILE *);
 307 static void Pascal_functions (FILE *);
 308 static void Perl_functions (FILE *);
 309 static void PHP_functions (FILE *);
 310 static void PS_functions (FILE *);
 311 static void Prolog_functions (FILE *);
 312 static void Python_functions (FILE *);
 313 static void Scheme_functions (FILE *);
 314 static void TeX_commands (FILE *);
 315 static void Texinfo_nodes (FILE *);
 316 static void Yacc_entries (FILE *);
 317 static void just_read_file (FILE *);
 318
 319 static void print_language_names (void);
 320 static void print_version (void);
 321 static void print_help (argument *);
 322 int main (int, char **);
 323
 324 static compressor *get_compressor_from_suffix (char *, char **);
 325 static language *get_language_from_langname (const char *);
 326 static language *get_language_from_interpreter (char *);
 327 static language *get_language_from_filename (char *, bool);
 328 static void readline (linebuffer *, FILE *);
 329 static long readline_internal (linebuffer *, FILE *);
 330 static bool nocase_tail (const char *);
 331 static void get_tag (char *, char **);
 332
 333 static void analyse_regex (char *);
 334 static void free_regexps (void);
 335 static void regex_tag_multiline (void);
 336 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 337 static _Noreturn void suggest_asking_for_help (void);
 338 _Noreturn void fatal (const char *, const char *);
 339 static _Noreturn void pfatal (const char *);
 340 static void add_node (node *, node **);
 341
 342 static void init (void);
 343 static void process_file_name (char *, language *);
 344 static void process_file (FILE *, char *, language *);
 345 static void find_entries (FILE *);
 346 static void free_tree (node *);
 347 static void free_fdesc (fdesc *);
 348 static void pfnote (char *, bool, char *, int, int, long);
 349 static void make_tag (const char *, int, bool, char *, int, int, long);
 350 static void invalidate_nodes (fdesc *, node **);
 351 static void put_entries (node *);
 352
 353 static char *concat (const char *, const char *, const char *);
 354 static char *skip_spaces (char *);
 355 static char *skip_non_spaces (char *);
 356 static char *savenstr (const char *, int);
 357 static char *savestr (const char *);
 358 static char *etags_strchr (const char *, int);
 359 static char *etags_strrchr (const char *, int);
 360 static char *etags_getcwd (void);
 361 static char *relative_filename (char *, char *);
 362 static char *absolute_filename (char *, char *);
 363 static char *absolute_dirname (char *, char *);
 364 static bool filename_is_absolute (char *f);
 365 static void canonicalize_filename (char *);
 366 static void linebuffer_init (linebuffer *);
 367 static void linebuffer_setlen (linebuffer *, int);
 368 static void *xmalloc (size_t);
 369 static void *xrealloc (char *, size_t);
 370
 371 \f
 372 static char searchar = '/';     /* use /.../ searches */
 373
 374 static char *tagfile;           /* output file */
 375 static char *progname;          /* name this program was invoked with */
 376 static char *cwd;               /* current working directory */
 377 static char *tagfiledir;        /* directory of tagfile */
 378 static FILE *tagf;              /* ioptr for tags file */
 379 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 380
 381 static fdesc *fdhead;           /* head of file description list */
 382 static fdesc *curfdp;           /* current file description */
 383 static int lineno;              /* line number of current line */
 384 static long charno;             /* current character number */
 385 static long linecharno;         /* charno of start of current line */
 386 static char *dbp;               /* pointer to start of current tag */
 387
 388 static const int invalidcharno = -1;
 389
 390 static node *nodehead;          /* the head of the binary tree of tags */
 391 static node *last_node;         /* the last node created */
 392
 393 static linebuffer lb;           /* the current line */
 394 static linebuffer filebuf;      /* a buffer containing the whole file */
 395 static linebuffer token_name;   /* a buffer containing a tag name */
 396
 397 /* boolean "functions" (see init)       */
 398 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 399 static const char
 400   /* white chars */
 401   *white = " \f\t\n\r\v",
 402   /* not in a name */
 403   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 404   /* token ending chars */
 405   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 406   /* token starting chars */
 407   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 408   /* valid in-token chars */
 409   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 410
 411 static bool append_to_tagfile;  /* -a: append to tags */
 412 /* The next five default to TRUE in C and derived languages.  */
 413 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 414 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 415                                 /* 0 struct/enum/union decls, and C++ */
 416                                 /* member functions. */
 417 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 418                                 /* constants and variables. */
 419                                 /* -D: opposite of -d.  Default under ctags. */
 420 static bool globals;            /* create tags for global variables */
 421 static bool members;            /* create tags for C member variables */
 422 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 423 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 424 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 425 static bool update;             /* -u: update tags */
 426 static bool vgrind_style;       /* -v: create vgrind style index output */
 427 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 428 static bool cxref_style;        /* -x: create cxref style output */
 429 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 430 static bool ignoreindent;       /* -I: ignore indentation in C */
 431 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 432
 433 /* STDIN is defined in LynxOS system headers */
 434 #ifdef STDIN
 435 # undef STDIN
 436 #endif
 437
 438 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 439 static bool parsing_stdin;      /* --parse-stdin used */
 440
 441 static regexp *p_head;          /* list of all regexps */
 442 static bool need_filebuf;       /* some regexes are multi-line */
 443
 444 static struct option longopts[] =
 445 {
 446   { "append",             no_argument,       NULL,               'a'   },
 447   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 448   { "c++",                no_argument,       NULL,               'C'   },
 449   { "declarations",       no_argument,       &declarations,      TRUE  },
 450   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 451   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 452   { "help",               no_argument,       NULL,               'h'   },
 453   { "help",               no_argument,       NULL,               'H'   },
 454   { "ignore-indentation", no_argument,       NULL,               'I'   },
 455   { "language",           required_argument, NULL,               'l'   },
 456   { "members",            no_argument,       &members,           TRUE  },
 457   { "no-members",         no_argument,       &members,           FALSE },
 458   { "output",             required_argument, NULL,               'o'   },
 459   { "regex",              required_argument, NULL,               'r'   },
 460   { "no-regex",           no_argument,       NULL,               'R'   },
 461   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 462   { "parse-stdin",        required_argument, NULL,               STDIN },
 463   { "version",            no_argument,       NULL,               'V'   },
 464
 465 #if CTAGS /* Ctags options */
 466   { "backward-search",    no_argument,       NULL,               'B'   },
 467   { "cxref",              no_argument,       NULL,               'x'   },
 468   { "defines",            no_argument,       NULL,               'd'   },
 469   { "globals",            no_argument,       &globals,           TRUE  },
 470   { "typedefs",           no_argument,       NULL,               't'   },
 471   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 472   { "update",             no_argument,       NULL,               'u'   },
 473   { "vgrind",             no_argument,       NULL,               'v'   },
 474   { "no-warn",            no_argument,       NULL,               'w'   },
 475
 476 #else /* Etags options */
 477   { "no-defines",         no_argument,       NULL,               'D'   },
 478   { "no-globals",         no_argument,       &globals,           FALSE },
 479   { "include",            required_argument, NULL,               'i'   },
 480 #endif
 481   { NULL }
 482 };
 483
 484 static compressor compressors[] =
 485 {
 486   { "z", "gzip -d -c"},
 487   { "Z", "gzip -d -c"},
 488   { "gz", "gzip -d -c"},
 489   { "GZ", "gzip -d -c"},
 490   { "bz2", "bzip2 -d -c" },
 491   { "xz", "xz -d -c" },
 492   { NULL }
 493 };
 494
 495 /*
 496  * Language stuff.
 497  */
 498
 499 /* Ada code */
 500 static const char *Ada_suffixes [] =
 501   { "ads", "adb", "ada", NULL };
 502 static const char Ada_help [] =
 503 "In Ada code, functions, procedures, packages, tasks and types are\n\
 504 tags.  Use the `--packages-only' option to create tags for\n\
 505 packages only.\n\
 506 Ada tag names have suffixes indicating the type of entity:\n\
 507         Entity type:    Qualifier:\n\
 508         ------------    ----------\n\
 509         function        /f\n\
 510         procedure       /p\n\
 511         package spec    /s\n\
 512         package body    /b\n\
 513         type            /t\n\
 514         task            /k\n\
 515 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 516 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 517 will just search for any tag `bidule'.";
 518
 519 /* Assembly code */
 520 static const char *Asm_suffixes [] =
 521   { "a",        /* Unix assembler */
 522     "asm", /* Microcontroller assembly */
 523     "def", /* BSO/Tasking definition includes  */
 524     "inc", /* Microcontroller include files */
 525     "ins", /* Microcontroller include files */
 526     "s", "sa", /* Unix assembler */
 527     "S",   /* cpp-processed Unix assembler */
 528     "src", /* BSO/Tasking C compiler output */
 529     NULL
 530   };
 531 static const char Asm_help [] =
 532 "In assembler code, labels appearing at the beginning of a line,\n\
 533 followed by a colon, are tags.";
 534
 535
 536 /* Note that .c and .h can be considered C++, if the --c++ flag was
 537    given, or if the `class' or `template' keywords are met inside the file.
 538    That is why default_C_entries is called for these. */
 539 static const char *default_C_suffixes [] =
 540   { "c", "h", NULL };
 541 #if CTAGS                               /* C help for Ctags */
 542 static const char default_C_help [] =
 543 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 544 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 545 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 546 Use --globals to tag global variables.\n\
 547 You can tag function declarations and external variables by\n\
 548 using `--declarations', and struct members by using `--members'.";
 549 #else                                   /* C help for Etags */
 550 static const char default_C_help [] =
 551 "In C code, any C function or typedef is a tag, and so are\n\
 552 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 553 definitions and `enum' constants are tags unless you specify\n\
 554 `--no-defines'.  Global variables are tags unless you specify\n\
 555 `--no-globals' and so are struct members unless you specify\n\
 556 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 557 `--no-members' can make the tags table file much smaller.\n\
 558 You can tag function declarations and external variables by\n\
 559 using `--declarations'.";
 560 #endif  /* C help for Ctags and Etags */
 561
 562 static const char *Cplusplus_suffixes [] =
 563   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 564     "M",                        /* Objective C++ */
 565     "pdb",                      /* PostScript with C syntax */
 566     NULL };
 567 static const char Cplusplus_help [] =
 568 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 569 --help --lang=c --lang=c++ for full help.)\n\
 570 In addition to C tags, member functions are also recognized.  Member\n\
 571 variables are recognized unless you use the `--no-members' option.\n\
 572 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 573 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 574 `operator+'.";
 575
 576 static const char *Cjava_suffixes [] =
 577   { "java", NULL };
 578 static char Cjava_help [] =
 579 "In Java code, all the tags constructs of C and C++ code are\n\
 580 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 581
 582
 583 static const char *Cobol_suffixes [] =
 584   { "COB", "cob", NULL };
 585 static char Cobol_help [] =
 586 "In Cobol code, tags are paragraph names; that is, any word\n\
 587 starting in column 8 and followed by a period.";
 588
 589 static const char *Cstar_suffixes [] =
 590   { "cs", "hs", NULL };
 591
 592 static const char *Erlang_suffixes [] =
 593   { "erl", "hrl", NULL };
 594 static const char Erlang_help [] =
 595 "In Erlang code, the tags are the functions, records and macros\n\
 596 defined in the file.";
 597
 598 const char *Forth_suffixes [] =
 599   { "fth", "tok", NULL };
 600 static const char Forth_help [] =
 601 "In Forth code, tags are words defined by `:',\n\
 602 constant, code, create, defer, value, variable, buffer:, field.";
 603
 604 static const char *Fortran_suffixes [] =
 605   { "F", "f", "f90", "for", NULL };
 606 static const char Fortran_help [] =
 607 "In Fortran code, functions, subroutines and block data are tags.";
 608
 609 static const char *HTML_suffixes [] =
 610   { "htm", "html", "shtml", NULL };
 611 static const char HTML_help [] =
 612 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 613 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 614 occurrences of `id='.";
 615
 616 static const char *Lisp_suffixes [] =
 617   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 618 static const char Lisp_help [] =
 619 "In Lisp code, any function defined with `defun', any variable\n\
 620 defined with `defvar' or `defconst', and in general the first\n\
 621 argument of any expression that starts with `(def' in column zero\n\
 622 is a tag.";
 623
 624 static const char *Lua_suffixes [] =
 625   { "lua", "LUA", NULL };
 626 static const char Lua_help [] =
 627 "In Lua scripts, all functions are tags.";
 628
 629 static const char *Makefile_filenames [] =
 630   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 631 static const char Makefile_help [] =
 632 "In makefiles, targets are tags; additionally, variables are tags\n\
 633 unless you specify `--no-globals'.";
 634
 635 static const char *Objc_suffixes [] =
 636   { "lm",                       /* Objective lex file */
 637     "m",                        /* Objective C file */
 638      NULL };
 639 static const char Objc_help [] =
 640 "In Objective C code, tags include Objective C definitions for classes,\n\
 641 class categories, methods and protocols.  Tags for variables and\n\
 642 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 643 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 644
 645 static const char *Pascal_suffixes [] =
 646   { "p", "pas", NULL };
 647 static const char Pascal_help [] =
 648 "In Pascal code, the tags are the functions and procedures defined\n\
 649 in the file.";
 650 /* " // this is for working around an Emacs highlighting bug... */
 651
 652 static const char *Perl_suffixes [] =
 653   { "pl", "pm", NULL };
 654 static const char *Perl_interpreters [] =
 655   { "perl", "@PERL@", NULL };
 656 static const char Perl_help [] =
 657 "In Perl code, the tags are the packages, subroutines and variables\n\
 658 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 659 `--globals' if you want to tag global variables.  Tags for\n\
 660 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 661 defined in the default package is `main::SUB'.";
 662
 663 static const char *PHP_suffixes [] =
 664   { "php", "php3", "php4", NULL };
 665 static const char PHP_help [] =
 666 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 667 the `--no-members' option, vars are tags too.";
 668
 669 static const char *plain_C_suffixes [] =
 670   { "pc",                       /* Pro*C file */
 671      NULL };
 672
 673 static const char *PS_suffixes [] =
 674   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 675 static const char PS_help [] =
 676 "In PostScript code, the tags are the functions.";
 677
 678 static const char *Prolog_suffixes [] =
 679   { "prolog", NULL };
 680 static const char Prolog_help [] =
 681 "In Prolog code, tags are predicates and rules at the beginning of\n\
 682 line.";
 683
 684 static const char *Python_suffixes [] =
 685   { "py", NULL };
 686 static const char Python_help [] =
 687 "In Python code, `def' or `class' at the beginning of a line\n\
 688 generate a tag.";
 689
 690 /* Can't do the `SCM' or `scm' prefix with a version number. */
 691 static const char *Scheme_suffixes [] =
 692   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 693 static const char Scheme_help [] =
 694 "In Scheme code, tags include anything defined with `def' or with a\n\
 695 construct whose name starts with `def'.  They also include\n\
 696 variables set with `set!' at top level in the file.";
 697
 698 static const char *TeX_suffixes [] =
 699   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 700 static const char TeX_help [] =
 701 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 702 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 703 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 704 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 705 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 706 \n\
 707 Other commands can be specified by setting the environment variable\n\
 708 `TEXTAGS' to a colon-separated list like, for example,\n\
 709      TEXTAGS=\"mycommand:myothercommand\".";
 710
 711
 712 static const char *Texinfo_suffixes [] =
 713   { "texi", "texinfo", "txi", NULL };
 714 static const char Texinfo_help [] =
 715 "for texinfo files, lines starting with @node are tagged.";
 716
 717 static const char *Yacc_suffixes [] =
 718   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 719 static const char Yacc_help [] =
 720 "In Bison or Yacc input files, each rule defines as a tag the\n\
 721 nonterminal it constructs.  The portions of the file that contain\n\
 722 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 723 for full help).";
 724
 725 static const char auto_help [] =
 726 "`auto' is not a real language, it indicates to use\n\
 727 a default language for files base on file name suffix and file contents.";
 728
 729 static const char none_help [] =
 730 "`none' is not a real language, it indicates to only do\n\
 731 regexp processing on files.";
 732
 733 static const char no_lang_help [] =
 734 "No detailed help available for this language.";
 735
 736
 737 /*
 738  * Table of languages.
 739  *
 740  * It is ok for a given function to be listed under more than one
 741  * name.  I just didn't.
 742  */
 743
 744 static language lang_names [] =
 745 {
 746   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 747   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 748   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 749   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 750   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 751   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 752   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 753   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 754   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 755   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 756   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 757   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 758   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 759   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 760   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 761   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 762   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 763   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 764   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 765   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 766   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 767   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 768   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 769   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 770   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 771   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 772   { "auto",      auto_help },                      /* default guessing scheme */
 773   { "none",      none_help,      just_read_file }, /* regexp matching only */
 774   { NULL }                /* end of list */
 775 };
 776
 777 \f
 778 static void
 779 print_language_names (void)
 780 {
 781   language *lang;
 782   const char **name, **ext;
 783
 784   puts ("\nThese are the currently supported languages, along with the\n\
 785 default file names and dot suffixes:");
 786   for (lang = lang_names; lang->name != NULL; lang++)
 787     {
 788       printf ("  %-*s", 10, lang->name);
 789       if (lang->filenames != NULL)
 790         for (name = lang->filenames; *name != NULL; name++)
 791           printf (" %s", *name);
 792       if (lang->suffixes != NULL)
 793         for (ext = lang->suffixes; *ext != NULL; ext++)
 794           printf (" .%s", *ext);
 795       puts ("");
 796     }
 797   puts ("where `auto' means use default language for files based on file\n\
 798 name suffix, and `none' means only do regexp processing on files.\n\
 799 If no language is specified and no matching suffix is found,\n\
 800 the first line of the file is read for a sharp-bang (#!) sequence\n\
 801 followed by the name of an interpreter.  If no such sequence is found,\n\
 802 Fortran is tried first; if no tags are found, C is tried next.\n\
 803 When parsing any C file, a \"class\" or \"template\" keyword\n\
 804 switches to C++.");
 805   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 806 \n\
 807 For detailed help on a given language use, for example,\n\
 808 etags --help --lang=ada.");
 809 }
 810
 811 #ifndef EMACS_NAME
 812 # define EMACS_NAME "standalone"
 813 #endif
 814 #ifndef VERSION
 815 # define VERSION "17.38.1.4"
 816 #endif
 817 static void
 818 print_version (void)
 819 {
 820   char emacs_copyright[] = COPYRIGHT;
 821
 822   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 823   puts (emacs_copyright);
 824   puts ("This program is distributed under the terms in ETAGS.README");
 825
 826   exit (EXIT_SUCCESS);
 827 }
 828
 829 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 830 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 831 #endif
 832
 833 static void
 834 print_help (argument *argbuffer)
 835 {
 836   bool help_for_lang = FALSE;
 837
 838   for (; argbuffer->arg_type != at_end; argbuffer++)
 839     if (argbuffer->arg_type == at_language)
 840       {
 841         if (help_for_lang)
 842           puts ("");
 843         puts (argbuffer->lang->help);
 844         help_for_lang = TRUE;
 845       }
 846
 847   if (help_for_lang)
 848     exit (EXIT_SUCCESS);
 849
 850   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 851 \n\
 852 These are the options accepted by %s.\n", progname, progname);
 853   puts ("You may use unambiguous abbreviations for the long option names.");
 854   puts ("  A - as file name means read names from stdin (one per line).\n\
 855 Absolute names are stored in the output file as they are.\n\
 856 Relative ones are stored relative to the output file's directory.\n");
 857
 858   puts ("-a, --append\n\
 859         Append tag entries to existing tags file.");
 860
 861   puts ("--packages-only\n\
 862         For Ada files, only generate tags for packages.");
 863
 864   if (CTAGS)
 865     puts ("-B, --backward-search\n\
 866         Write the search commands for the tag entries using '?', the\n\
 867         backward-search command instead of '/', the forward-search command.");
 868
 869   /* This option is mostly obsolete, because etags can now automatically
 870      detect C++.  Retained for backward compatibility and for debugging and
 871      experimentation.  In principle, we could want to tag as C++ even
 872      before any "class" or "template" keyword.
 873   puts ("-C, --c++\n\
 874         Treat files whose name suffix defaults to C language as C++ files.");
 875   */
 876
 877   puts ("--declarations\n\
 878         In C and derived languages, create tags for function declarations,");
 879   if (CTAGS)
 880     puts ("\tand create tags for extern variables if --globals is used.");
 881   else
 882     puts
 883       ("\tand create tags for extern variables unless --no-globals is used.");
 884
 885   if (CTAGS)
 886     puts ("-d, --defines\n\
 887         Create tag entries for C #define constants and enum constants, too.");
 888   else
 889     puts ("-D, --no-defines\n\
 890         Don't create tag entries for C #define constants and enum constants.\n\
 891         This makes the tags file smaller.");
 892
 893   if (!CTAGS)
 894     puts ("-i FILE, --include=FILE\n\
 895         Include a note in tag file indicating that, when searching for\n\
 896         a tag, one should also consult the tags file FILE after\n\
 897         checking the current file.");
 898
 899   puts ("-l LANG, --language=LANG\n\
 900         Force the following files to be considered as written in the\n\
 901         named language up to the next --language=LANG option.");
 902
 903   if (CTAGS)
 904     puts ("--globals\n\
 905         Create tag entries for global variables in some languages.");
 906   else
 907     puts ("--no-globals\n\
 908         Do not create tag entries for global variables in some\n\
 909         languages.  This makes the tags file smaller.");
 910
 911   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 912     puts ("--no-line-directive\n\
 913         Ignore #line preprocessor directives in C and derived languages.");
 914
 915   if (CTAGS)
 916     puts ("--members\n\
 917         Create tag entries for members of structures in some languages.");
 918   else
 919     puts ("--no-members\n\
 920         Do not create tag entries for members of structures\n\
 921         in some languages.");
 922
 923   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 924         Make a tag for each line matching a regular expression pattern\n\
 925         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 926         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 927         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 928         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 929   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 930         For example Tcl named tags can be created with:\n\
 931           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 932         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 933         `m' means to allow multi-line matches, `s' implies `m' and\n\
 934         causes dot to match any character, including newline.");
 935
 936   puts ("-R, --no-regex\n\
 937         Don't create tags from regexps for the following files.");
 938
 939   puts ("-I, --ignore-indentation\n\
 940         In C and C++ do not assume that a closing brace in the first\n\
 941         column is the final brace of a function or structure definition.");
 942
 943   puts ("-o FILE, --output=FILE\n\
 944         Write the tags to FILE.");
 945
 946   puts ("--parse-stdin=NAME\n\
 947         Read from standard input and record tags as belonging to file NAME.");
 948
 949   if (CTAGS)
 950     {
 951       puts ("-t, --typedefs\n\
 952         Generate tag entries for C and Ada typedefs.");
 953       puts ("-T, --typedefs-and-c++\n\
 954         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 955         and C++ member functions.");
 956     }
 957
 958   if (CTAGS)
 959     puts ("-u, --update\n\
 960         Update the tag entries for the given files, leaving tag\n\
 961         entries for other files in place.  Currently, this is\n\
 962         implemented by deleting the existing entries for the given\n\
 963         files and then rewriting the new entries at the end of the\n\
 964         tags file.  It is often faster to simply rebuild the entire\n\
 965         tag file than to use this.");
 966
 967   if (CTAGS)
 968     {
 969       puts ("-v, --vgrind\n\
 970         Print on the standard output an index of items intended for\n\
 971         human consumption, similar to the output of vgrind.  The index\n\
 972         is sorted, and gives the page number of each item.");
 973
 974       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 975         puts ("-w, --no-duplicates\n\
 976         Do not create duplicate tag entries, for compatibility with\n\
 977         traditional ctags.");
 978
 979       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 980         puts ("-w, --no-warn\n\
 981         Suppress warning messages about duplicate tag entries.");
 982
 983       puts ("-x, --cxref\n\
 984         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 985         The output uses line numbers instead of page numbers, but\n\
 986         beyond that the differences are cosmetic; try both to see\n\
 987         which you like.");
 988     }
 989
 990   puts ("-V, --version\n\
 991         Print the version of the program.\n\
 992 -h, --help\n\
 993         Print this help message.\n\
 994         Followed by one or more `--language' options prints detailed\n\
 995         help about tag generation for the specified languages.");
 996
 997   print_language_names ();
 998
 999   puts ("");
1000   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1001
1002   exit (EXIT_SUCCESS);
1003 }
1004
1005 \f
1006 int
1007 main (int argc, char **argv)
1008 {
1009   int i;
1010   unsigned int nincluded_files;
1011   char **included_files;
1012   argument *argbuffer;
1013   int current_arg, file_count;
1014   linebuffer filename_lb;
1015   bool help_asked = FALSE;
1016   ptrdiff_t len;
1017  char *optstring;
1018  int opt;
1019
1020
1021 #ifdef DOS_NT
1022   _fmode = O_BINARY;   /* all of files are treated as binary files */
1023 #endif /* DOS_NT */
1024
1025   progname = argv[0];
1026   nincluded_files = 0;
1027   included_files = xnew (argc, char *);
1028   current_arg = 0;
1029   file_count = 0;
1030
1031   /* Allocate enough no matter what happens.  Overkill, but each one
1032      is small. */
1033   argbuffer = xnew (argc, argument);
1034
1035   /*
1036    * Always find typedefs and structure tags.
1037    * Also default to find macro constants, enum constants, struct
1038    * members and global variables.  Do it for both etags and ctags.
1039    */
1040   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1041   globals = members = TRUE;
1042
1043   /* When the optstring begins with a '-' getopt_long does not rearrange the
1044      non-options arguments to be at the end, but leaves them alone. */
1045   optstring = concat ("-ac:Cf:Il:o:r:RSVhH",
1046                       (CTAGS) ? "BxdtTuvw" : "Di:",
1047                       "");
1048
1049   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1050     switch (opt)
1051       {
1052       case 0:
1053         /* If getopt returns 0, then it has already processed a
1054            long-named option.  We should do nothing.  */
1055         break;
1056
1057       case 1:
1058         /* This means that a file name has been seen.  Record it. */
1059         argbuffer[current_arg].arg_type = at_filename;
1060         argbuffer[current_arg].what     = optarg;
1061         len = strlen (optarg);
1062         if (whatlen_max < len)
1063           whatlen_max = len;
1064         ++current_arg;
1065         ++file_count;
1066         break;
1067
1068       case STDIN:
1069         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1070         argbuffer[current_arg].arg_type = at_stdin;
1071         argbuffer[current_arg].what     = optarg;
1072         len = strlen (optarg);
1073         if (whatlen_max < len)
1074           whatlen_max = len;
1075         ++current_arg;
1076         ++file_count;
1077         if (parsing_stdin)
1078           fatal ("cannot parse standard input more than once", (char *)NULL);
1079         parsing_stdin = TRUE;
1080         break;
1081
1082         /* Common options. */
1083       case 'a': append_to_tagfile = TRUE;       break;
1084       case 'C': cplusplus = TRUE;               break;
1085       case 'f':         /* for compatibility with old makefiles */
1086       case 'o':
1087         if (tagfile)
1088           {
1089             error ("-o option may only be given once.");
1090             suggest_asking_for_help ();
1091             /* NOTREACHED */
1092           }
1093         tagfile = optarg;
1094         break;
1095       case 'I':
1096       case 'S':         /* for backward compatibility */
1097         ignoreindent = TRUE;
1098         break;
1099       case 'l':
1100         {
1101           language *lang = get_language_from_langname (optarg);
1102           if (lang != NULL)
1103             {
1104               argbuffer[current_arg].lang = lang;
1105               argbuffer[current_arg].arg_type = at_language;
1106               ++current_arg;
1107             }
1108         }
1109         break;
1110       case 'c':
1111         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1112         optarg = concat (optarg, "i", ""); /* memory leak here */
1113         /* FALLTHRU */
1114       case 'r':
1115         argbuffer[current_arg].arg_type = at_regexp;
1116         argbuffer[current_arg].what = optarg;
1117         len = strlen (optarg);
1118         if (whatlen_max < len)
1119           whatlen_max = len;
1120         ++current_arg;
1121         break;
1122       case 'R':
1123         argbuffer[current_arg].arg_type = at_regexp;
1124         argbuffer[current_arg].what = NULL;
1125         ++current_arg;
1126         break;
1127       case 'V':
1128         print_version ();
1129         break;
1130       case 'h':
1131       case 'H':
1132         help_asked = TRUE;
1133         break;
1134
1135         /* Etags options */
1136       case 'D': constantypedefs = FALSE;                        break;
1137       case 'i': included_files[nincluded_files++] = optarg;     break;
1138
1139         /* Ctags options. */
1140       case 'B': searchar = '?';                                 break;
1141       case 'd': constantypedefs = TRUE;                         break;
1142       case 't': typedefs = TRUE;                                break;
1143       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1144       case 'u': update = TRUE;                                  break;
1145       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1146       case 'x': cxref_style = TRUE;                             break;
1147       case 'w': no_warnings = TRUE;                             break;
1148       default:
1149         suggest_asking_for_help ();
1150         /* NOTREACHED */
1151       }
1152
1153   /* No more options.  Store the rest of arguments. */
1154   for (; optind < argc; optind++)
1155     {
1156       argbuffer[current_arg].arg_type = at_filename;
1157       argbuffer[current_arg].what = argv[optind];
1158       len = strlen (argv[optind]);
1159       if (whatlen_max < len)
1160         whatlen_max = len;
1161       ++current_arg;
1162       ++file_count;
1163     }
1164
1165   argbuffer[current_arg].arg_type = at_end;
1166
1167   if (help_asked)
1168     print_help (argbuffer);
1169     /* NOTREACHED */
1170
1171   if (nincluded_files == 0 && file_count == 0)
1172     {
1173       error ("no input files specified.");
1174       suggest_asking_for_help ();
1175       /* NOTREACHED */
1176     }
1177
1178   if (tagfile == NULL)
1179     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1180   cwd = etags_getcwd ();        /* the current working directory */
1181   if (cwd[strlen (cwd) - 1] != '/')
1182     {
1183       char *oldcwd = cwd;
1184       cwd = concat (oldcwd, "/", "");
1185       free (oldcwd);
1186     }
1187
1188   /* Compute base directory for relative file names. */
1189   if (streq (tagfile, "-")
1190       || strneq (tagfile, "/dev/", 5))
1191     tagfiledir = cwd;            /* relative file names are relative to cwd */
1192   else
1193     {
1194       canonicalize_filename (tagfile);
1195       tagfiledir = absolute_dirname (tagfile, cwd);
1196     }
1197
1198   init ();                      /* set up boolean "functions" */
1199
1200   linebuffer_init (&lb);
1201   linebuffer_init (&filename_lb);
1202   linebuffer_init (&filebuf);
1203   linebuffer_init (&token_name);
1204
1205   if (!CTAGS)
1206     {
1207       if (streq (tagfile, "-"))
1208         {
1209           tagf = stdout;
1210 #ifdef DOS_NT
1211           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1212              doesn't take effect until after `stdout' is already open). */
1213           if (!isatty (fileno (stdout)))
1214             setmode (fileno (stdout), O_BINARY);
1215 #endif /* DOS_NT */
1216         }
1217       else
1218         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1219       if (tagf == NULL)
1220         pfatal (tagfile);
1221     }
1222
1223   /*
1224    * Loop through files finding functions.
1225    */
1226   for (i = 0; i < current_arg; i++)
1227     {
1228       static language *lang;    /* non-NULL if language is forced */
1229       char *this_file;
1230
1231       switch (argbuffer[i].arg_type)
1232         {
1233         case at_language:
1234           lang = argbuffer[i].lang;
1235           break;
1236         case at_regexp:
1237           analyse_regex (argbuffer[i].what);
1238           break;
1239         case at_filename:
1240               this_file = argbuffer[i].what;
1241               /* Input file named "-" means read file names from stdin
1242                  (one per line) and use them. */
1243               if (streq (this_file, "-"))
1244                 {
1245                   if (parsing_stdin)
1246                     fatal ("cannot parse standard input AND read file names from it",
1247                            (char *)NULL);
1248                   while (readline_internal (&filename_lb, stdin) > 0)
1249                     process_file_name (filename_lb.buffer, lang);
1250                 }
1251               else
1252                 process_file_name (this_file, lang);
1253           break;
1254         case at_stdin:
1255           this_file = argbuffer[i].what;
1256           process_file (stdin, this_file, lang);
1257           break;
1258         }
1259     }
1260
1261   free_regexps ();
1262   free (lb.buffer);
1263   free (filebuf.buffer);
1264   free (token_name.buffer);
1265
1266   if (!CTAGS || cxref_style)
1267     {
1268       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1269       put_entries (nodehead);
1270       free_tree (nodehead);
1271       nodehead = NULL;
1272       if (!CTAGS)
1273         {
1274           fdesc *fdp;
1275
1276           /* Output file entries that have no tags. */
1277           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1278             if (!fdp->written)
1279               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1280
1281           while (nincluded_files-- > 0)
1282             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1283
1284           if (fclose (tagf) == EOF)
1285             pfatal (tagfile);
1286         }
1287
1288       exit (EXIT_SUCCESS);
1289     }
1290
1291   /* From here on, we are in (CTAGS && !cxref_style) */
1292   if (update)
1293     {
1294       char *cmd =
1295         xmalloc (strlen (tagfile) + whatlen_max +
1296                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1297       for (i = 0; i < current_arg; ++i)
1298         {
1299           switch (argbuffer[i].arg_type)
1300             {
1301             case at_filename:
1302             case at_stdin:
1303               break;
1304             default:
1305               continue;         /* the for loop */
1306             }
1307           strcpy (cmd, "mv ");
1308           strcat (cmd, tagfile);
1309           strcat (cmd, " OTAGS;fgrep -v '\t");
1310           strcat (cmd, argbuffer[i].what);
1311           strcat (cmd, "\t' OTAGS >");
1312           strcat (cmd, tagfile);
1313           strcat (cmd, ";rm OTAGS");
1314           if (system (cmd) != EXIT_SUCCESS)
1315             fatal ("failed to execute shell command", (char *)NULL);
1316         }
1317       free (cmd);
1318       append_to_tagfile = TRUE;
1319     }
1320
1321   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1322   if (tagf == NULL)
1323     pfatal (tagfile);
1324   put_entries (nodehead);       /* write all the tags (CTAGS) */
1325   free_tree (nodehead);
1326   nodehead = NULL;
1327   if (fclose (tagf) == EOF)
1328     pfatal (tagfile);
1329
1330   if (CTAGS)
1331     if (append_to_tagfile || update)
1332       {
1333         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1334         /* Maybe these should be used:
1335            setenv ("LC_COLLATE", "C", 1);
1336            setenv ("LC_ALL", "C", 1); */
1337         strcpy (cmd, "sort -u -o ");
1338         strcat (cmd, tagfile);
1339         strcat (cmd, " ");
1340         strcat (cmd, tagfile);
1341         exit (system (cmd));
1342       }
1343   return EXIT_SUCCESS;
1344 }
1345
1346
1347 /*
1348  * Return a compressor given the file name.  If EXTPTR is non-zero,
1349  * return a pointer into FILE where the compressor-specific
1350  * extension begins.  If no compressor is found, NULL is returned
1351  * and EXTPTR is not significant.
1352  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1353  */
1354 static compressor *
1355 get_compressor_from_suffix (char *file, char **extptr)
1356 {
1357   compressor *compr;
1358   char *slash, *suffix;
1359
1360   /* File has been processed by canonicalize_filename,
1361      so we don't need to consider backslashes on DOS_NT.  */
1362   slash = etags_strrchr (file, '/');
1363   suffix = etags_strrchr (file, '.');
1364   if (suffix == NULL || suffix < slash)
1365     return NULL;
1366   if (extptr != NULL)
1367     *extptr = suffix;
1368   suffix += 1;
1369   /* Let those poor souls who live with DOS 8+3 file name limits get
1370      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1371      Only the first do loop is run if not MSDOS */
1372   do
1373     {
1374       for (compr = compressors; compr->suffix != NULL; compr++)
1375         if (streq (compr->suffix, suffix))
1376           return compr;
1377       if (!MSDOS)
1378         break;                  /* do it only once: not really a loop */
1379       if (extptr != NULL)
1380         *extptr = ++suffix;
1381     } while (*suffix != '\0');
1382   return NULL;
1383 }
1384
1385
1386
1387 /*
1388  * Return a language given the name.
1389  */
1390 static language *
1391 get_language_from_langname (const char *name)
1392 {
1393   language *lang;
1394
1395   if (name == NULL)
1396     error ("empty language name");
1397   else
1398     {
1399       for (lang = lang_names; lang->name != NULL; lang++)
1400         if (streq (name, lang->name))
1401           return lang;
1402       error ("unknown language \"%s\"", name);
1403     }
1404
1405   return NULL;
1406 }
1407
1408
1409 /*
1410  * Return a language given the interpreter name.
1411  */
1412 static language *
1413 get_language_from_interpreter (char *interpreter)
1414 {
1415   language *lang;
1416   const char **iname;
1417
1418   if (interpreter == NULL)
1419     return NULL;
1420   for (lang = lang_names; lang->name != NULL; lang++)
1421     if (lang->interpreters != NULL)
1422       for (iname = lang->interpreters; *iname != NULL; iname++)
1423         if (streq (*iname, interpreter))
1424             return lang;
1425
1426   return NULL;
1427 }
1428
1429
1430
1431 /*
1432  * Return a language given the file name.
1433  */
1434 static language *
1435 get_language_from_filename (char *file, int case_sensitive)
1436 {
1437   language *lang;
1438   const char **name, **ext, *suffix;
1439
1440   /* Try whole file name first. */
1441   for (lang = lang_names; lang->name != NULL; lang++)
1442     if (lang->filenames != NULL)
1443       for (name = lang->filenames; *name != NULL; name++)
1444         if ((case_sensitive)
1445             ? streq (*name, file)
1446             : strcaseeq (*name, file))
1447           return lang;
1448
1449   /* If not found, try suffix after last dot. */
1450   suffix = etags_strrchr (file, '.');
1451   if (suffix == NULL)
1452     return NULL;
1453   suffix += 1;
1454   for (lang = lang_names; lang->name != NULL; lang++)
1455     if (lang->suffixes != NULL)
1456       for (ext = lang->suffixes; *ext != NULL; ext++)
1457         if ((case_sensitive)
1458             ? streq (*ext, suffix)
1459             : strcaseeq (*ext, suffix))
1460           return lang;
1461   return NULL;
1462 }
1463
1464 \f
1465 /*
1466  * This routine is called on each file argument.
1467  */
1468 static void
1469 process_file_name (char *file, language *lang)
1470 {
1471   struct stat stat_buf;
1472   FILE *inf;
1473   fdesc *fdp;
1474   compressor *compr;
1475   char *compressed_name, *uncompressed_name;
1476   char *ext, *real_name;
1477   int retval;
1478
1479   canonicalize_filename (file);
1480   if (streq (file, tagfile) && !streq (tagfile, "-"))
1481     {
1482       error ("skipping inclusion of %s in self.", file);
1483       return;
1484     }
1485   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1486     {
1487       compressed_name = NULL;
1488       real_name = uncompressed_name = savestr (file);
1489     }
1490   else
1491     {
1492       real_name = compressed_name = savestr (file);
1493       uncompressed_name = savenstr (file, ext - file);
1494     }
1495
1496   /* If the canonicalized uncompressed name
1497      has already been dealt with, skip it silently. */
1498   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1499     {
1500       assert (fdp->infname != NULL);
1501       if (streq (uncompressed_name, fdp->infname))
1502         goto cleanup;
1503     }
1504
1505   if (stat (real_name, &stat_buf) != 0)
1506     {
1507       /* Reset real_name and try with a different name. */
1508       real_name = NULL;
1509       if (compressed_name != NULL) /* try with the given suffix */
1510         {
1511           if (stat (uncompressed_name, &stat_buf) == 0)
1512             real_name = uncompressed_name;
1513         }
1514       else                      /* try all possible suffixes */
1515         {
1516           for (compr = compressors; compr->suffix != NULL; compr++)
1517             {
1518               compressed_name = concat (file, ".", compr->suffix);
1519               if (stat (compressed_name, &stat_buf) != 0)
1520                 {
1521                   if (MSDOS)
1522                     {
1523                       char *suf = compressed_name + strlen (file);
1524                       size_t suflen = strlen (compr->suffix) + 1;
1525                       for ( ; suf[1]; suf++, suflen--)
1526                         {
1527                           memmove (suf, suf + 1, suflen);
1528                           if (stat (compressed_name, &stat_buf) == 0)
1529                             {
1530                               real_name = compressed_name;
1531                               break;
1532                             }
1533                         }
1534                       if (real_name != NULL)
1535                         break;
1536                     } /* MSDOS */
1537                   free (compressed_name);
1538                   compressed_name = NULL;
1539                 }
1540               else
1541                 {
1542                   real_name = compressed_name;
1543                   break;
1544                 }
1545             }
1546         }
1547       if (real_name == NULL)
1548         {
1549           perror (file);
1550           goto cleanup;
1551         }
1552     } /* try with a different name */
1553
1554   if (!S_ISREG (stat_buf.st_mode))
1555     {
1556       error ("skipping %s: it is not a regular file.", real_name);
1557       goto cleanup;
1558     }
1559   if (real_name == compressed_name)
1560     {
1561       char *cmd = concat (compr->command, " ", real_name);
1562       inf = (FILE *) popen (cmd, "r");
1563       free (cmd);
1564     }
1565   else
1566     inf = fopen (real_name, "r");
1567   if (inf == NULL)
1568     {
1569       perror (real_name);
1570       goto cleanup;
1571     }
1572
1573   process_file (inf, uncompressed_name, lang);
1574
1575   if (real_name == compressed_name)
1576     retval = pclose (inf);
1577   else
1578     retval = fclose (inf);
1579   if (retval < 0)
1580     pfatal (file);
1581
1582  cleanup:
1583   free (compressed_name);
1584   free (uncompressed_name);
1585   last_node = NULL;
1586   curfdp = NULL;
1587   return;
1588 }
1589
1590 static void
1591 process_file (FILE *fh, char *fn, language *lang)
1592 {
1593   static const fdesc emptyfdesc;
1594   fdesc *fdp;
1595
1596   /* Create a new input file description entry. */
1597   fdp = xnew (1, fdesc);
1598   *fdp = emptyfdesc;
1599   fdp->next = fdhead;
1600   fdp->infname = savestr (fn);
1601   fdp->lang = lang;
1602   fdp->infabsname = absolute_filename (fn, cwd);
1603   fdp->infabsdir = absolute_dirname (fn, cwd);
1604   if (filename_is_absolute (fn))
1605     {
1606       /* An absolute file name.  Canonicalize it. */
1607       fdp->taggedfname = absolute_filename (fn, NULL);
1608     }
1609   else
1610     {
1611       /* A file name relative to cwd.  Make it relative
1612          to the directory of the tags file. */
1613       fdp->taggedfname = relative_filename (fn, tagfiledir);
1614     }
1615   fdp->usecharno = TRUE;        /* use char position when making tags */
1616   fdp->prop = NULL;
1617   fdp->written = FALSE;         /* not written on tags file yet */
1618
1619   fdhead = fdp;
1620   curfdp = fdhead;              /* the current file description */
1621
1622   find_entries (fh);
1623
1624   /* If not Ctags, and if this is not metasource and if it contained no #line
1625      directives, we can write the tags and free all nodes pointing to
1626      curfdp. */
1627   if (!CTAGS
1628       && curfdp->usecharno      /* no #line directives in this file */
1629       && !curfdp->lang->metasource)
1630     {
1631       node *np, *prev;
1632
1633       /* Look for the head of the sublist relative to this file.  See add_node
1634          for the structure of the node tree. */
1635       prev = NULL;
1636       for (np = nodehead; np != NULL; prev = np, np = np->left)
1637         if (np->fdp == curfdp)
1638           break;
1639
1640       /* If we generated tags for this file, write and delete them. */
1641       if (np != NULL)
1642         {
1643           /* This is the head of the last sublist, if any.  The following
1644              instructions depend on this being true. */
1645           assert (np->left == NULL);
1646
1647           assert (fdhead == curfdp);
1648           assert (last_node->fdp == curfdp);
1649           put_entries (np);     /* write tags for file curfdp->taggedfname */
1650           free_tree (np);       /* remove the written nodes */
1651           if (prev == NULL)
1652             nodehead = NULL;    /* no nodes left */
1653           else
1654             prev->left = NULL;  /* delete the pointer to the sublist */
1655         }
1656     }
1657 }
1658
1659 /*
1660  * This routine sets up the boolean pseudo-functions which work
1661  * by setting boolean flags dependent upon the corresponding character.
1662  * Every char which is NOT in that string is not a white char.  Therefore,
1663  * all of the array "_wht" is set to FALSE, and then the elements
1664  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1665  * of a char is TRUE if it is the string "white", else FALSE.
1666  */
1667 static void
1668 init (void)
1669 {
1670   register const char *sp;
1671   register int i;
1672
1673   for (i = 0; i < CHARS; i++)
1674     iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i) = FALSE;
1675   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1676   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1677   notinname ('\0') = notinname ('\n');
1678   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1679   begtoken ('\0') = begtoken ('\n');
1680   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1681   intoken ('\0') = intoken ('\n');
1682   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1683   endtoken ('\0') = endtoken ('\n');
1684 }
1685
1686 /*
1687  * This routine opens the specified file and calls the function
1688  * which finds the function and type definitions.
1689  */
1690 static void
1691 find_entries (FILE *inf)
1692 {
1693   char *cp;
1694   language *lang = curfdp->lang;
1695   Lang_function *parser = NULL;
1696
1697   /* If user specified a language, use it. */
1698   if (lang != NULL && lang->function != NULL)
1699     {
1700       parser = lang->function;
1701     }
1702
1703   /* Else try to guess the language given the file name. */
1704   if (parser == NULL)
1705     {
1706       lang = get_language_from_filename (curfdp->infname, TRUE);
1707       if (lang != NULL && lang->function != NULL)
1708         {
1709           curfdp->lang = lang;
1710           parser = lang->function;
1711         }
1712     }
1713
1714   /* Else look for sharp-bang as the first two characters. */
1715   if (parser == NULL
1716       && readline_internal (&lb, inf) > 0
1717       && lb.len >= 2
1718       && lb.buffer[0] == '#'
1719       && lb.buffer[1] == '!')
1720     {
1721       char *lp;
1722
1723       /* Set lp to point at the first char after the last slash in the
1724          line or, if no slashes, at the first nonblank.  Then set cp to
1725          the first successive blank and terminate the string. */
1726       lp = etags_strrchr (lb.buffer+2, '/');
1727       if (lp != NULL)
1728         lp += 1;
1729       else
1730         lp = skip_spaces (lb.buffer + 2);
1731       cp = skip_non_spaces (lp);
1732       *cp = '\0';
1733
1734       if (strlen (lp) > 0)
1735         {
1736           lang = get_language_from_interpreter (lp);
1737           if (lang != NULL && lang->function != NULL)
1738             {
1739               curfdp->lang = lang;
1740               parser = lang->function;
1741             }
1742         }
1743     }
1744
1745   /* We rewind here, even if inf may be a pipe.  We fail if the
1746      length of the first line is longer than the pipe block size,
1747      which is unlikely. */
1748   rewind (inf);
1749
1750   /* Else try to guess the language given the case insensitive file name. */
1751   if (parser == NULL)
1752     {
1753       lang = get_language_from_filename (curfdp->infname, FALSE);
1754       if (lang != NULL && lang->function != NULL)
1755         {
1756           curfdp->lang = lang;
1757           parser = lang->function;
1758         }
1759     }
1760
1761   /* Else try Fortran or C. */
1762   if (parser == NULL)
1763     {
1764       node *old_last_node = last_node;
1765
1766       curfdp->lang = get_language_from_langname ("fortran");
1767       find_entries (inf);
1768
1769       if (old_last_node == last_node)
1770         /* No Fortran entries found.  Try C. */
1771         {
1772           /* We do not tag if rewind fails.
1773              Only the file name will be recorded in the tags file. */
1774           rewind (inf);
1775           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1776           find_entries (inf);
1777         }
1778       return;
1779     }
1780
1781   if (!no_line_directive
1782       && curfdp->lang != NULL && curfdp->lang->metasource)
1783     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1784        file, or anyway we parsed a file that is automatically generated from
1785        this one.  If this is the case, the bingo.c file contained #line
1786        directives that generated tags pointing to this file.  Let's delete
1787        them all before parsing this file, which is the real source. */
1788     {
1789       fdesc **fdpp = &fdhead;
1790       while (*fdpp != NULL)
1791         if (*fdpp != curfdp
1792             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1793           /* We found one of those!  We must delete both the file description
1794              and all tags referring to it. */
1795           {
1796             fdesc *badfdp = *fdpp;
1797
1798             /* Delete the tags referring to badfdp->taggedfname
1799                that were obtained from badfdp->infname. */
1800             invalidate_nodes (badfdp, &nodehead);
1801
1802             *fdpp = badfdp->next; /* remove the bad description from the list */
1803             free_fdesc (badfdp);
1804           }
1805         else
1806           fdpp = &(*fdpp)->next; /* advance the list pointer */
1807     }
1808
1809   assert (parser != NULL);
1810
1811   /* Generic initializations before reading from file. */
1812   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1813
1814   /* Generic initializations before parsing file with readline. */
1815   lineno = 0;                  /* reset global line number */
1816   charno = 0;                  /* reset global char number */
1817   linecharno = 0;              /* reset global char number of line start */
1818
1819   parser (inf);
1820
1821   regex_tag_multiline ();
1822 }
1823
1824 \f
1825 /*
1826  * Check whether an implicitly named tag should be created,
1827  * then call `pfnote'.
1828  * NAME is a string that is internally copied by this function.
1829  *
1830  * TAGS format specification
1831  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1832  * The following is explained in some more detail in etc/ETAGS.EBNF.
1833  *
1834  * make_tag creates tags with "implicit tag names" (unnamed tags)
1835  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1836  *  1. NAME does not contain any of the characters in NONAM;
1837  *  2. LINESTART contains name as either a rightmost, or rightmost but
1838  *     one character, substring;
1839  *  3. the character, if any, immediately before NAME in LINESTART must
1840  *     be a character in NONAM;
1841  *  4. the character, if any, immediately after NAME in LINESTART must
1842  *     also be a character in NONAM.
1843  *
1844  * The implementation uses the notinname() macro, which recognizes the
1845  * characters stored in the string `nonam'.
1846  * etags.el needs to use the same characters that are in NONAM.
1847  */
1848 static void
1849 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1850           int namelen,          /* tag length */
1851           int is_func,          /* tag is a function */
1852           char *linestart,      /* start of the line where tag is */
1853           int linelen,          /* length of the line where tag is */
1854           int lno,              /* line number */
1855           long int cno)         /* character number */
1856 {
1857   bool named = (name != NULL && namelen > 0);
1858   char *nname = NULL;
1859
1860   if (!CTAGS && named)          /* maybe set named to false */
1861     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1862        such that etags.el can guess a name from it. */
1863     {
1864       int i;
1865       register const char *cp = name;
1866
1867       for (i = 0; i < namelen; i++)
1868         if (notinname (*cp++))
1869           break;
1870       if (i == namelen)                         /* rule #1 */
1871         {
1872           cp = linestart + linelen - namelen;
1873           if (notinname (linestart[linelen-1]))
1874             cp -= 1;                            /* rule #4 */
1875           if (cp >= linestart                   /* rule #2 */
1876               && (cp == linestart
1877                   || notinname (cp[-1]))        /* rule #3 */
1878               && strneq (name, cp, namelen))    /* rule #2 */
1879             named = FALSE;      /* use implicit tag name */
1880         }
1881     }
1882
1883   if (named)
1884     nname = savenstr (name, namelen);
1885
1886   pfnote (nname, is_func, linestart, linelen, lno, cno);
1887 }
1888
1889 /* Record a tag. */
1890 static void
1891 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1892                                 /* tag name, or NULL if unnamed */
1893                                 /* tag is a function */
1894                                 /* start of the line where tag is */
1895                                 /* length of the line where tag is */
1896                                 /* line number */
1897                                 /* character number */
1898 {
1899   register node *np;
1900
1901   assert (name == NULL || name[0] != '\0');
1902   if (CTAGS && name == NULL)
1903     return;
1904
1905   np = xnew (1, node);
1906
1907   /* If ctags mode, change name "main" to M<thisfilename>. */
1908   if (CTAGS && !cxref_style && streq (name, "main"))
1909     {
1910       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1911       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1912       fp = etags_strrchr (np->name, '.');
1913       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1914         fp[0] = '\0';
1915     }
1916   else
1917     np->name = name;
1918   np->valid = TRUE;
1919   np->been_warned = FALSE;
1920   np->fdp = curfdp;
1921   np->is_func = is_func;
1922   np->lno = lno;
1923   if (np->fdp->usecharno)
1924     /* Our char numbers are 0-base, because of C language tradition?
1925        ctags compatibility?  old versions compatibility?   I don't know.
1926        Anyway, since emacs's are 1-base we expect etags.el to take care
1927        of the difference.  If we wanted to have 1-based numbers, we would
1928        uncomment the +1 below. */
1929     np->cno = cno /* + 1 */ ;
1930   else
1931     np->cno = invalidcharno;
1932   np->left = np->right = NULL;
1933   if (CTAGS && !cxref_style)
1934     {
1935       if (strlen (linestart) < 50)
1936         np->regex = concat (linestart, "$", "");
1937       else
1938         np->regex = savenstr (linestart, 50);
1939     }
1940   else
1941     np->regex = savenstr (linestart, linelen);
1942
1943   add_node (np, &nodehead);
1944 }
1945
1946 /*
1947  * free_tree ()
1948  *      recurse on left children, iterate on right children.
1949  */
1950 static void
1951 free_tree (register node *np)
1952 {
1953   while (np)
1954     {
1955       register node *node_right = np->right;
1956       free_tree (np->left);
1957       free (np->name);
1958       free (np->regex);
1959       free (np);
1960       np = node_right;
1961     }
1962 }
1963
1964 /*
1965  * free_fdesc ()
1966  *      delete a file description
1967  */
1968 static void
1969 free_fdesc (register fdesc *fdp)
1970 {
1971   free (fdp->infname);
1972   free (fdp->infabsname);
1973   free (fdp->infabsdir);
1974   free (fdp->taggedfname);
1975   free (fdp->prop);
1976   free (fdp);
1977 }
1978
1979 /*
1980  * add_node ()
1981  *      Adds a node to the tree of nodes.  In etags mode, sort by file
1982  *      name.  In ctags mode, sort by tag name.  Make no attempt at
1983  *      balancing.
1984  *
1985  *      add_node is the only function allowed to add nodes, so it can
1986  *      maintain state.
1987  */
1988 static void
1989 add_node (node *np, node **cur_node_p)
1990 {
1991   register int dif;
1992   register node *cur_node = *cur_node_p;
1993
1994   if (cur_node == NULL)
1995     {
1996       *cur_node_p = np;
1997       last_node = np;
1998       return;
1999     }
2000
2001   if (!CTAGS)
2002     /* Etags Mode */
2003     {
2004       /* For each file name, tags are in a linked sublist on the right
2005          pointer.  The first tags of different files are a linked list
2006          on the left pointer.  last_node points to the end of the last
2007          used sublist. */
2008       if (last_node != NULL && last_node->fdp == np->fdp)
2009         {
2010           /* Let's use the same sublist as the last added node. */
2011           assert (last_node->right == NULL);
2012           last_node->right = np;
2013           last_node = np;
2014         }
2015       else if (cur_node->fdp == np->fdp)
2016         {
2017           /* Scanning the list we found the head of a sublist which is
2018              good for us.  Let's scan this sublist. */
2019           add_node (np, &cur_node->right);
2020         }
2021       else
2022         /* The head of this sublist is not good for us.  Let's try the
2023            next one. */
2024         add_node (np, &cur_node->left);
2025     } /* if ETAGS mode */
2026
2027   else
2028     {
2029       /* Ctags Mode */
2030       dif = strcmp (np->name, cur_node->name);
2031
2032       /*
2033        * If this tag name matches an existing one, then
2034        * do not add the node, but maybe print a warning.
2035        */
2036       if (no_duplicates && !dif)
2037         {
2038           if (np->fdp == cur_node->fdp)
2039             {
2040               if (!no_warnings)
2041                 {
2042                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2043                            np->fdp->infname, lineno, np->name);
2044                   fprintf (stderr, "Second entry ignored\n");
2045                 }
2046             }
2047           else if (!cur_node->been_warned && !no_warnings)
2048             {
2049               fprintf
2050                 (stderr,
2051                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2052                  np->fdp->infname, cur_node->fdp->infname, np->name);
2053               cur_node->been_warned = TRUE;
2054             }
2055           return;
2056         }
2057
2058       /* Actually add the node */
2059       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2060     } /* if CTAGS mode */
2061 }
2062
2063 /*
2064  * invalidate_nodes ()
2065  *      Scan the node tree and invalidate all nodes pointing to the
2066  *      given file description (CTAGS case) or free them (ETAGS case).
2067  */
2068 static void
2069 invalidate_nodes (fdesc *badfdp, node **npp)
2070 {
2071   node *np = *npp;
2072
2073   if (np == NULL)
2074     return;
2075
2076   if (CTAGS)
2077     {
2078       if (np->left != NULL)
2079         invalidate_nodes (badfdp, &np->left);
2080       if (np->fdp == badfdp)
2081         np->valid = FALSE;
2082       if (np->right != NULL)
2083         invalidate_nodes (badfdp, &np->right);
2084     }
2085   else
2086     {
2087       assert (np->fdp != NULL);
2088       if (np->fdp == badfdp)
2089         {
2090           *npp = np->left;      /* detach the sublist from the list */
2091           np->left = NULL;      /* isolate it */
2092           free_tree (np);       /* free it */
2093           invalidate_nodes (badfdp, npp);
2094         }
2095       else
2096         invalidate_nodes (badfdp, &np->left);
2097     }
2098 }
2099
2100 \f
2101 static int total_size_of_entries (node *);
2102 static int number_len (long) ATTRIBUTE_CONST;
2103
2104 /* Length of a non-negative number's decimal representation. */
2105 static int
2106 number_len (long int num)
2107 {
2108   int len = 1;
2109   while ((num /= 10) > 0)
2110     len += 1;
2111   return len;
2112 }
2113
2114 /*
2115  * Return total number of characters that put_entries will output for
2116  * the nodes in the linked list at the right of the specified node.
2117  * This count is irrelevant with etags.el since emacs 19.34 at least,
2118  * but is still supplied for backward compatibility.
2119  */
2120 static int
2121 total_size_of_entries (register node *np)
2122 {
2123   register int total = 0;
2124
2125   for (; np != NULL; np = np->right)
2126     if (np->valid)
2127       {
2128         total += strlen (np->regex) + 1;                /* pat\177 */
2129         if (np->name != NULL)
2130           total += strlen (np->name) + 1;               /* name\001 */
2131         total += number_len ((long) np->lno) + 1;       /* lno, */
2132         if (np->cno != invalidcharno)                   /* cno */
2133           total += number_len (np->cno);
2134         total += 1;                                     /* newline */
2135       }
2136
2137   return total;
2138 }
2139
2140 static void
2141 put_entries (register node *np)
2142 {
2143   register char *sp;
2144   static fdesc *fdp = NULL;
2145
2146   if (np == NULL)
2147     return;
2148
2149   /* Output subentries that precede this one */
2150   if (CTAGS)
2151     put_entries (np->left);
2152
2153   /* Output this entry */
2154   if (np->valid)
2155     {
2156       if (!CTAGS)
2157         {
2158           /* Etags mode */
2159           if (fdp != np->fdp)
2160             {
2161               fdp = np->fdp;
2162               fprintf (tagf, "\f\n%s,%d\n",
2163                        fdp->taggedfname, total_size_of_entries (np));
2164               fdp->written = TRUE;
2165             }
2166           fputs (np->regex, tagf);
2167           fputc ('\177', tagf);
2168           if (np->name != NULL)
2169             {
2170               fputs (np->name, tagf);
2171               fputc ('\001', tagf);
2172             }
2173           fprintf (tagf, "%d,", np->lno);
2174           if (np->cno != invalidcharno)
2175             fprintf (tagf, "%ld", np->cno);
2176           fputs ("\n", tagf);
2177         }
2178       else
2179         {
2180           /* Ctags mode */
2181           if (np->name == NULL)
2182             error ("internal error: NULL name in ctags mode.");
2183
2184           if (cxref_style)
2185             {
2186               if (vgrind_style)
2187                 fprintf (stdout, "%s %s %d\n",
2188                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2189               else
2190                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2191                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2192             }
2193           else
2194             {
2195               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2196
2197               if (np->is_func)
2198                 {               /* function or #define macro with args */
2199                   putc (searchar, tagf);
2200                   putc ('^', tagf);
2201
2202                   for (sp = np->regex; *sp; sp++)
2203                     {
2204                       if (*sp == '\\' || *sp == searchar)
2205                         putc ('\\', tagf);
2206                       putc (*sp, tagf);
2207                     }
2208                   putc (searchar, tagf);
2209                 }
2210               else
2211                 {               /* anything else; text pattern inadequate */
2212                   fprintf (tagf, "%d", np->lno);
2213                 }
2214               putc ('\n', tagf);
2215             }
2216         }
2217     } /* if this node contains a valid tag */
2218
2219   /* Output subentries that follow this one */
2220   put_entries (np->right);
2221   if (!CTAGS)
2222     put_entries (np->left);
2223 }
2224
2225 \f
2226 /* C extensions. */
2227 #define C_EXT   0x00fff         /* C extensions */
2228 #define C_PLAIN 0x00000         /* C */
2229 #define C_PLPL  0x00001         /* C++ */
2230 #define C_STAR  0x00003         /* C* */
2231 #define C_JAVA  0x00005         /* JAVA */
2232 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2233 #define YACC    0x10000         /* yacc file */
2234
2235 /*
2236  * The C symbol tables.
2237  */
2238 enum sym_type
2239 {
2240   st_none,
2241   st_C_objprot, st_C_objimpl, st_C_objend,
2242   st_C_gnumacro,
2243   st_C_ignore, st_C_attribute,
2244   st_C_javastruct,
2245   st_C_operator,
2246   st_C_class, st_C_template,
2247   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2248 };
2249
2250 static unsigned int hash (const char *, unsigned int);
2251 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2252 static enum sym_type C_symtype (char *, int, int);
2253
2254 /* Feed stuff between (but not including) %[ and %] lines to:
2255      gperf -m 5
2256 %[
2257 %compare-strncmp
2258 %enum
2259 %struct-type
2260 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2261 %%
2262 if,             0,                      st_C_ignore
2263 for,            0,                      st_C_ignore
2264 while,          0,                      st_C_ignore
2265 switch,         0,                      st_C_ignore
2266 return,         0,                      st_C_ignore
2267 __attribute__,  0,                      st_C_attribute
2268 GTY,            0,                      st_C_attribute
2269 @interface,     0,                      st_C_objprot
2270 @protocol,      0,                      st_C_objprot
2271 @implementation,0,                      st_C_objimpl
2272 @end,           0,                      st_C_objend
2273 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2274 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2275 friend,         C_PLPL,                 st_C_ignore
2276 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2277 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2278 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2279 class,          0,                      st_C_class
2280 namespace,      C_PLPL,                 st_C_struct
2281 domain,         C_STAR,                 st_C_struct
2282 union,          0,                      st_C_struct
2283 struct,         0,                      st_C_struct
2284 extern,         0,                      st_C_extern
2285 enum,           0,                      st_C_enum
2286 typedef,        0,                      st_C_typedef
2287 define,         0,                      st_C_define
2288 undef,          0,                      st_C_define
2289 operator,       C_PLPL,                 st_C_operator
2290 template,       0,                      st_C_template
2291 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2292 DEFUN,          0,                      st_C_gnumacro
2293 SYSCALL,        0,                      st_C_gnumacro
2294 ENTRY,          0,                      st_C_gnumacro
2295 PSEUDO,         0,                      st_C_gnumacro
2296 # These are defined inside C functions, so currently they are not met.
2297 # EXFUN used in glibc, DEFVAR_* in emacs.
2298 #EXFUN,         0,                      st_C_gnumacro
2299 #DEFVAR_,       0,                      st_C_gnumacro
2300 %]
2301 and replace lines between %< and %> with its output, then:
2302  - remove the #if characterset check
2303  - make in_word_set static and not inline. */
2304 /*%<*/
2305 /* C code produced by gperf version 3.0.1 */
2306 /* Command-line: gperf -m 5  */
2307 /* Computed positions: -k'2-3' */
2308
2309 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2310 /* maximum key range = 33, duplicates = 0 */
2311
2312 static inline unsigned int
2313 hash (register const char *str, register unsigned int len)
2314 {
2315   static unsigned char asso_values[] =
2316     {
2317       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2318       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2319       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2320       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2321       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2322       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2323       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2324       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2325       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2326       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2327       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2328        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2329        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2330       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2331       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2332       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2333       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2334       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2335       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2336       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2337       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2338       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2339       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2340       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2341       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2342       35, 35, 35, 35, 35, 35
2343     };
2344   register int hval = len;
2345
2346   switch (hval)
2347     {
2348       default:
2349         hval += asso_values[(unsigned char)str[2]];
2350       /*FALLTHROUGH*/
2351       case 2:
2352         hval += asso_values[(unsigned char)str[1]];
2353         break;
2354     }
2355   return hval;
2356 }
2357
2358 static struct C_stab_entry *
2359 in_word_set (register const char *str, register unsigned int len)
2360 {
2361   enum
2362     {
2363       TOTAL_KEYWORDS = 33,
2364       MIN_WORD_LENGTH = 2,
2365       MAX_WORD_LENGTH = 15,
2366       MIN_HASH_VALUE = 2,
2367       MAX_HASH_VALUE = 34
2368     };
2369
2370   static struct C_stab_entry wordlist[] =
2371     {
2372       {""}, {""},
2373       {"if",            0,                      st_C_ignore},
2374       {"GTY",           0,                      st_C_attribute},
2375       {"@end",          0,                      st_C_objend},
2376       {"union",         0,                      st_C_struct},
2377       {"define",                0,                      st_C_define},
2378       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2379       {"template",      0,                      st_C_template},
2380       {"operator",      C_PLPL,                 st_C_operator},
2381       {"@interface",    0,                      st_C_objprot},
2382       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2383       {"friend",                C_PLPL,                 st_C_ignore},
2384       {"typedef",       0,                      st_C_typedef},
2385       {"return",                0,                      st_C_ignore},
2386       {"@implementation",0,                     st_C_objimpl},
2387       {"@protocol",     0,                      st_C_objprot},
2388       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2389       {"extern",                0,                      st_C_extern},
2390       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2391       {"struct",                0,                      st_C_struct},
2392       {"domain",                C_STAR,                 st_C_struct},
2393       {"switch",                0,                      st_C_ignore},
2394       {"enum",          0,                      st_C_enum},
2395       {"for",           0,                      st_C_ignore},
2396       {"namespace",     C_PLPL,                 st_C_struct},
2397       {"class",         0,                      st_C_class},
2398       {"while",         0,                      st_C_ignore},
2399       {"undef",         0,                      st_C_define},
2400       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2401       {"__attribute__", 0,                      st_C_attribute},
2402       {"SYSCALL",       0,                      st_C_gnumacro},
2403       {"ENTRY",         0,                      st_C_gnumacro},
2404       {"PSEUDO",                0,                      st_C_gnumacro},
2405       {"DEFUN",         0,                      st_C_gnumacro}
2406     };
2407
2408   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2409     {
2410       register int key = hash (str, len);
2411
2412       if (key <= MAX_HASH_VALUE && key >= 0)
2413         {
2414           register const char *s = wordlist[key].name;
2415
2416           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2417             return &wordlist[key];
2418         }
2419     }
2420   return 0;
2421 }
2422 /*%>*/
2423
2424 static enum sym_type
2425 C_symtype (char *str, int len, int c_ext)
2426 {
2427   register struct C_stab_entry *se = in_word_set (str, len);
2428
2429   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2430     return st_none;
2431   return se->type;
2432 }
2433
2434 \f
2435 /*
2436  * Ignoring __attribute__ ((list))
2437  */
2438 static bool inattribute;        /* looking at an __attribute__ construct */
2439
2440 /*
2441  * C functions and variables are recognized using a simple
2442  * finite automaton.  fvdef is its state variable.
2443  */
2444 static enum
2445 {
2446   fvnone,                       /* nothing seen */
2447   fdefunkey,                    /* Emacs DEFUN keyword seen */
2448   fdefunname,                   /* Emacs DEFUN name seen */
2449   foperator,                    /* func: operator keyword seen (cplpl) */
2450   fvnameseen,                   /* function or variable name seen */
2451   fstartlist,                   /* func: just after open parenthesis */
2452   finlist,                      /* func: in parameter list */
2453   flistseen,                    /* func: after parameter list */
2454   fignore,                      /* func: before open brace */
2455   vignore                       /* var-like: ignore until ';' */
2456 } fvdef;
2457
2458 static bool fvextern;           /* func or var: extern keyword seen; */
2459
2460 /*
2461  * typedefs are recognized using a simple finite automaton.
2462  * typdef is its state variable.
2463  */
2464 static enum
2465 {
2466   tnone,                        /* nothing seen */
2467   tkeyseen,                     /* typedef keyword seen */
2468   ttypeseen,                    /* defined type seen */
2469   tinbody,                      /* inside typedef body */
2470   tend,                         /* just before typedef tag */
2471   tignore                       /* junk after typedef tag */
2472 } typdef;
2473
2474 /*
2475  * struct-like structures (enum, struct and union) are recognized
2476  * using another simple finite automaton.  `structdef' is its state
2477  * variable.
2478  */
2479 static enum
2480 {
2481   snone,                        /* nothing seen yet,
2482                                    or in struct body if bracelev > 0 */
2483   skeyseen,                     /* struct-like keyword seen */
2484   stagseen,                     /* struct-like tag seen */
2485   scolonseen                    /* colon seen after struct-like tag */
2486 } structdef;
2487
2488 /*
2489  * When objdef is different from onone, objtag is the name of the class.
2490  */
2491 static const char *objtag = "<uninited>";
2492
2493 /*
2494  * Yet another little state machine to deal with preprocessor lines.
2495  */
2496 static enum
2497 {
2498   dnone,                        /* nothing seen */
2499   dsharpseen,                   /* '#' seen as first char on line */
2500   ddefineseen,                  /* '#' and 'define' seen */
2501   dignorerest                   /* ignore rest of line */
2502 } definedef;
2503
2504 /*
2505  * State machine for Objective C protocols and implementations.
2506  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2507  */
2508 static enum
2509 {
2510   onone,                        /* nothing seen */
2511   oprotocol,                    /* @interface or @protocol seen */
2512   oimplementation,              /* @implementations seen */
2513   otagseen,                     /* class name seen */
2514   oparenseen,                   /* parenthesis before category seen */
2515   ocatseen,                     /* category name seen */
2516   oinbody,                      /* in @implementation body */
2517   omethodsign,                  /* in @implementation body, after +/- */
2518   omethodtag,                   /* after method name */
2519   omethodcolon,                 /* after method colon */
2520   omethodparm,                  /* after method parameter */
2521   oignore                       /* wait for @end */
2522 } objdef;
2523
2524
2525 /*
2526  * Use this structure to keep info about the token read, and how it
2527  * should be tagged.  Used by the make_C_tag function to build a tag.
2528  */
2529 static struct tok
2530 {
2531   char *line;                   /* string containing the token */
2532   int offset;                   /* where the token starts in LINE */
2533   int length;                   /* token length */
2534   /*
2535     The previous members can be used to pass strings around for generic
2536     purposes.  The following ones specifically refer to creating tags.  In this
2537     case the token contained here is the pattern that will be used to create a
2538     tag.
2539   */
2540   bool valid;                   /* do not create a tag; the token should be
2541                                    invalidated whenever a state machine is
2542                                    reset prematurely */
2543   bool named;                   /* create a named tag */
2544   int lineno;                   /* source line number of tag */
2545   long linepos;                 /* source char number of tag */
2546 } token;                        /* latest token read */
2547
2548 /*
2549  * Variables and functions for dealing with nested structures.
2550  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2551  */
2552 static void pushclass_above (int, char *, int);
2553 static void popclass_above (int);
2554 static void write_classname (linebuffer *, const char *qualifier);
2555
2556 static struct {
2557   char **cname;                 /* nested class names */
2558   int *bracelev;                /* nested class brace level */
2559   int nl;                       /* class nesting level (elements used) */
2560   int size;                     /* length of the array */
2561 } cstack;                       /* stack for nested declaration tags */
2562 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2563 #define nestlev         (cstack.nl)
2564 /* After struct keyword or in struct body, not inside a nested function. */
2565 #define instruct        (structdef == snone && nestlev > 0                      \
2566                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2567
2568 static void
2569 pushclass_above (int bracelev, char *str, int len)
2570 {
2571   int nl;
2572
2573   popclass_above (bracelev);
2574   nl = cstack.nl;
2575   if (nl >= cstack.size)
2576     {
2577       int size = cstack.size *= 2;
2578       xrnew (cstack.cname, size, char *);
2579       xrnew (cstack.bracelev, size, int);
2580     }
2581   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2582   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2583   cstack.bracelev[nl] = bracelev;
2584   cstack.nl = nl + 1;
2585 }
2586
2587 static void
2588 popclass_above (int bracelev)
2589 {
2590   int nl;
2591
2592   for (nl = cstack.nl - 1;
2593        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2594        nl--)
2595     {
2596       free (cstack.cname[nl]);
2597       cstack.nl = nl;
2598     }
2599 }
2600
2601 static void
2602 write_classname (linebuffer *cn, const char *qualifier)
2603 {
2604   int i, len;
2605   int qlen = strlen (qualifier);
2606
2607   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2608     {
2609       len = 0;
2610       cn->len = 0;
2611       cn->buffer[0] = '\0';
2612     }
2613   else
2614     {
2615       len = strlen (cstack.cname[0]);
2616       linebuffer_setlen (cn, len);
2617       strcpy (cn->buffer, cstack.cname[0]);
2618     }
2619   for (i = 1; i < cstack.nl; i++)
2620     {
2621       char *s = cstack.cname[i];
2622       if (s == NULL)
2623         continue;
2624       linebuffer_setlen (cn, len + qlen + strlen (s));
2625       len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2626     }
2627 }
2628
2629 \f
2630 static bool consider_token (char *, int, int, int *, int, int, bool *);
2631 static void make_C_tag (bool);
2632
2633 /*
2634  * consider_token ()
2635  *      checks to see if the current token is at the start of a
2636  *      function or variable, or corresponds to a typedef, or
2637  *      is a struct/union/enum tag, or #define, or an enum constant.
2638  *
2639  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2640  *      with args.  C_EXTP points to which language we are looking at.
2641  *
2642  * Globals
2643  *      fvdef                   IN OUT
2644  *      structdef               IN OUT
2645  *      definedef               IN OUT
2646  *      typdef                  IN OUT
2647  *      objdef                  IN OUT
2648  */
2649
2650 static bool
2651 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2652                                 /* IN: token pointer */
2653                                 /* IN: token length */
2654                                 /* IN: first char after the token */
2655                                 /* IN, OUT: C extensions mask */
2656                                 /* IN: brace level */
2657                                 /* IN: parenthesis level */
2658                                 /* OUT: function or variable found */
2659 {
2660   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2661      structtype is the type of the preceding struct-like keyword, and
2662      structbracelev is the brace level where it has been seen. */
2663   static enum sym_type structtype;
2664   static int structbracelev;
2665   static enum sym_type toktype;
2666
2667
2668   toktype = C_symtype (str, len, *c_extp);
2669
2670   /*
2671    * Skip __attribute__
2672    */
2673   if (toktype == st_C_attribute)
2674     {
2675       inattribute = TRUE;
2676       return FALSE;
2677      }
2678
2679    /*
2680     * Advance the definedef state machine.
2681     */
2682    switch (definedef)
2683      {
2684      case dnone:
2685        /* We're not on a preprocessor line. */
2686        if (toktype == st_C_gnumacro)
2687          {
2688            fvdef = fdefunkey;
2689            return FALSE;
2690          }
2691        break;
2692      case dsharpseen:
2693        if (toktype == st_C_define)
2694          {
2695            definedef = ddefineseen;
2696          }
2697        else
2698          {
2699            definedef = dignorerest;
2700          }
2701        return FALSE;
2702      case ddefineseen:
2703        /*
2704         * Make a tag for any macro, unless it is a constant
2705         * and constantypedefs is FALSE.
2706         */
2707        definedef = dignorerest;
2708        *is_func_or_var = (c == '(');
2709        if (!*is_func_or_var && !constantypedefs)
2710          return FALSE;
2711        else
2712          return TRUE;
2713      case dignorerest:
2714        return FALSE;
2715      default:
2716        error ("internal error: definedef value.");
2717      }
2718
2719    /*
2720     * Now typedefs
2721     */
2722    switch (typdef)
2723      {
2724      case tnone:
2725        if (toktype == st_C_typedef)
2726          {
2727            if (typedefs)
2728              typdef = tkeyseen;
2729            fvextern = FALSE;
2730            fvdef = fvnone;
2731            return FALSE;
2732          }
2733        break;
2734      case tkeyseen:
2735        switch (toktype)
2736          {
2737          case st_none:
2738          case st_C_class:
2739          case st_C_struct:
2740          case st_C_enum:
2741            typdef = ttypeseen;
2742          }
2743        break;
2744      case ttypeseen:
2745        if (structdef == snone && fvdef == fvnone)
2746          {
2747            fvdef = fvnameseen;
2748            return TRUE;
2749          }
2750        break;
2751      case tend:
2752        switch (toktype)
2753          {
2754          case st_C_class:
2755          case st_C_struct:
2756          case st_C_enum:
2757            return FALSE;
2758          }
2759        return TRUE;
2760      }
2761
2762    switch (toktype)
2763      {
2764      case st_C_javastruct:
2765        if (structdef == stagseen)
2766          structdef = scolonseen;
2767        return FALSE;
2768      case st_C_template:
2769      case st_C_class:
2770        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2771            && bracelev == 0
2772            && definedef == dnone && structdef == snone
2773            && typdef == tnone && fvdef == fvnone)
2774          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2775        if (toktype == st_C_template)
2776          break;
2777        /* FALLTHRU */
2778      case st_C_struct:
2779      case st_C_enum:
2780        if (parlev == 0
2781            && fvdef != vignore
2782            && (typdef == tkeyseen
2783                || (typedefs_or_cplusplus && structdef == snone)))
2784          {
2785            structdef = skeyseen;
2786            structtype = toktype;
2787            structbracelev = bracelev;
2788            if (fvdef == fvnameseen)
2789              fvdef = fvnone;
2790          }
2791        return FALSE;
2792      }
2793
2794    if (structdef == skeyseen)
2795      {
2796        structdef = stagseen;
2797        return TRUE;
2798      }
2799
2800    if (typdef != tnone)
2801      definedef = dnone;
2802
2803    /* Detect Objective C constructs. */
2804    switch (objdef)
2805      {
2806      case onone:
2807        switch (toktype)
2808          {
2809          case st_C_objprot:
2810            objdef = oprotocol;
2811            return FALSE;
2812          case st_C_objimpl:
2813            objdef = oimplementation;
2814            return FALSE;
2815          }
2816        break;
2817      case oimplementation:
2818        /* Save the class tag for functions or variables defined inside. */
2819        objtag = savenstr (str, len);
2820        objdef = oinbody;
2821        return FALSE;
2822      case oprotocol:
2823        /* Save the class tag for categories. */
2824        objtag = savenstr (str, len);
2825        objdef = otagseen;
2826        *is_func_or_var = TRUE;
2827        return TRUE;
2828      case oparenseen:
2829        objdef = ocatseen;
2830        *is_func_or_var = TRUE;
2831        return TRUE;
2832      case oinbody:
2833        break;
2834      case omethodsign:
2835        if (parlev == 0)
2836          {
2837            fvdef = fvnone;
2838            objdef = omethodtag;
2839            linebuffer_setlen (&token_name, len);
2840            memcpy (token_name.buffer, str, len);
2841            token_name.buffer[len] = '\0';
2842            return TRUE;
2843          }
2844        return FALSE;
2845      case omethodcolon:
2846        if (parlev == 0)
2847          objdef = omethodparm;
2848        return FALSE;
2849      case omethodparm:
2850        if (parlev == 0)
2851          {
2852            int oldlen = token_name.len;
2853            fvdef = fvnone;
2854            objdef = omethodtag;
2855            linebuffer_setlen (&token_name, oldlen + len);
2856            memcpy (token_name.buffer + oldlen, str, len);
2857            token_name.buffer[oldlen + len] = '\0';
2858            return TRUE;
2859          }
2860        return FALSE;
2861      case oignore:
2862        if (toktype == st_C_objend)
2863          {
2864            /* Memory leakage here: the string pointed by objtag is
2865               never released, because many tests would be needed to
2866               avoid breaking on incorrect input code.  The amount of
2867               memory leaked here is the sum of the lengths of the
2868               class tags.
2869            free (objtag); */
2870            objdef = onone;
2871          }
2872        return FALSE;
2873      }
2874
2875    /* A function, variable or enum constant? */
2876    switch (toktype)
2877      {
2878      case st_C_extern:
2879        fvextern = TRUE;
2880        switch  (fvdef)
2881          {
2882          case finlist:
2883          case flistseen:
2884          case fignore:
2885          case vignore:
2886            break;
2887          default:
2888            fvdef = fvnone;
2889          }
2890        return FALSE;
2891      case st_C_ignore:
2892        fvextern = FALSE;
2893        fvdef = vignore;
2894        return FALSE;
2895      case st_C_operator:
2896        fvdef = foperator;
2897        *is_func_or_var = TRUE;
2898        return TRUE;
2899      case st_none:
2900        if (constantypedefs
2901            && structdef == snone
2902            && structtype == st_C_enum && bracelev > structbracelev)
2903          return TRUE;           /* enum constant */
2904        switch (fvdef)
2905          {
2906          case fdefunkey:
2907            if (bracelev > 0)
2908              break;
2909            fvdef = fdefunname;  /* GNU macro */
2910            *is_func_or_var = TRUE;
2911            return TRUE;
2912          case fvnone:
2913            switch (typdef)
2914              {
2915              case ttypeseen:
2916                return FALSE;
2917              case tnone:
2918                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2919                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2920                  {
2921                    fvdef = vignore;
2922                    return FALSE;
2923                  }
2924                break;
2925              }
2926           /* FALLTHRU */
2927           case fvnameseen:
2928           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2929             {
2930               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2931                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2932               fvdef = foperator;
2933               *is_func_or_var = TRUE;
2934               return TRUE;
2935             }
2936           if (bracelev > 0 && !instruct)
2937             break;
2938           fvdef = fvnameseen;   /* function or variable */
2939           *is_func_or_var = TRUE;
2940           return TRUE;
2941         }
2942       break;
2943     }
2944
2945   return FALSE;
2946 }
2947
2948 \f
2949 /*
2950  * C_entries often keeps pointers to tokens or lines which are older than
2951  * the line currently read.  By keeping two line buffers, and switching
2952  * them at end of line, it is possible to use those pointers.
2953  */
2954 static struct
2955 {
2956   long linepos;
2957   linebuffer lb;
2958 } lbs[2];
2959
2960 #define current_lb_is_new (newndx == curndx)
2961 #define switch_line_buffers() (curndx = 1 - curndx)
2962
2963 #define curlb (lbs[curndx].lb)
2964 #define newlb (lbs[newndx].lb)
2965 #define curlinepos (lbs[curndx].linepos)
2966 #define newlinepos (lbs[newndx].linepos)
2967
2968 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2969 #define cplpl (c_ext & C_PLPL)
2970 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2971
2972 #define CNL_SAVE_DEFINEDEF()                                            \
2973 do {                                                                    \
2974   curlinepos = charno;                                                  \
2975   readline (&curlb, inf);                                               \
2976   lp = curlb.buffer;                                                    \
2977   quotednl = FALSE;                                                     \
2978   newndx = curndx;                                                      \
2979 } while (0)
2980
2981 #define CNL()                                                           \
2982 do {                                                                    \
2983   CNL_SAVE_DEFINEDEF();                                                 \
2984   if (savetoken.valid)                                                  \
2985     {                                                                   \
2986       token = savetoken;                                                \
2987       savetoken.valid = FALSE;                                          \
2988     }                                                                   \
2989   definedef = dnone;                                                    \
2990 } while (0)
2991
2992
2993 static void
2994 make_C_tag (int isfun)
2995 {
2996   /* This function is never called when token.valid is FALSE, but
2997      we must protect against invalid input or internal errors. */
2998   if (token.valid)
2999     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3000               token.offset+token.length+1, token.lineno, token.linepos);
3001   else if (DEBUG)
3002     {                             /* this branch is optimized away if !DEBUG */
3003       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3004                 token_name.len + 17, isfun, token.line,
3005                 token.offset+token.length+1, token.lineno, token.linepos);
3006       error ("INVALID TOKEN");
3007     }
3008
3009   token.valid = FALSE;
3010 }
3011
3012
3013 /*
3014  * C_entries ()
3015  *      This routine finds functions, variables, typedefs,
3016  *      #define's, enum constants and struct/union/enum definitions in
3017  *      C syntax and adds them to the list.
3018  */
3019 static void
3020 C_entries (int c_ext, FILE *inf)
3021                                 /* extension of C */
3022                                 /* input file */
3023 {
3024   register char c;              /* latest char read; '\0' for end of line */
3025   register char *lp;            /* pointer one beyond the character `c' */
3026   int curndx, newndx;           /* indices for current and new lb */
3027   register int tokoff;          /* offset in line of start of current token */
3028   register int toklen;          /* length of current token */
3029   const char *qualifier;        /* string used to qualify names */
3030   int qlen;                     /* length of qualifier */
3031   int bracelev;                 /* current brace level */
3032   int bracketlev;               /* current bracket level */
3033   int parlev;                   /* current parenthesis level */
3034   int attrparlev;               /* __attribute__ parenthesis level */
3035   int templatelev;              /* current template level */
3036   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3037   bool incomm, inquote, inchar, quotednl, midtoken;
3038   bool yacc_rules;              /* in the rules part of a yacc file */
3039   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3040
3041
3042   linebuffer_init (&lbs[0].lb);
3043   linebuffer_init (&lbs[1].lb);
3044   if (cstack.size == 0)
3045     {
3046       cstack.size = (DEBUG) ? 1 : 4;
3047       cstack.nl = 0;
3048       cstack.cname = xnew (cstack.size, char *);
3049       cstack.bracelev = xnew (cstack.size, int);
3050     }
3051
3052   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3053   curndx = newndx = 0;
3054   lp = curlb.buffer;
3055   *lp = 0;
3056
3057   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3058   structdef = snone; definedef = dnone; objdef = onone;
3059   yacc_rules = FALSE;
3060   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3061   token.valid = savetoken.valid = FALSE;
3062   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3063   if (cjava)
3064     { qualifier = "."; qlen = 1; }
3065   else
3066     { qualifier = "::"; qlen = 2; }
3067
3068
3069   while (!feof (inf))
3070     {
3071       c = *lp++;
3072       if (c == '\\')
3073         {
3074           /* If we are at the end of the line, the next character is a
3075              '\0'; do not skip it, because it is what tells us
3076              to read the next line.  */
3077           if (*lp == '\0')
3078             {
3079               quotednl = TRUE;
3080               continue;
3081             }
3082           lp++;
3083           c = ' ';
3084         }
3085       else if (incomm)
3086         {
3087           switch (c)
3088             {
3089             case '*':
3090               if (*lp == '/')
3091                 {
3092                   c = *lp++;
3093                   incomm = FALSE;
3094                 }
3095               break;
3096             case '\0':
3097               /* Newlines inside comments do not end macro definitions in
3098                  traditional cpp. */
3099               CNL_SAVE_DEFINEDEF ();
3100               break;
3101             }
3102           continue;
3103         }
3104       else if (inquote)
3105         {
3106           switch (c)
3107             {
3108             case '"':
3109               inquote = FALSE;
3110               break;
3111             case '\0':
3112               /* Newlines inside strings do not end macro definitions
3113                  in traditional cpp, even though compilers don't
3114                  usually accept them. */
3115               CNL_SAVE_DEFINEDEF ();
3116               break;
3117             }
3118           continue;
3119         }
3120       else if (inchar)
3121         {
3122           switch (c)
3123             {
3124             case '\0':
3125               /* Hmmm, something went wrong. */
3126               CNL ();
3127               /* FALLTHRU */
3128             case '\'':
3129               inchar = FALSE;
3130               break;
3131             }
3132           continue;
3133         }
3134       else switch (c)
3135         {
3136         case '"':
3137           inquote = TRUE;
3138           if (bracketlev > 0)
3139             continue;
3140           if (inattribute)
3141             break;
3142           switch (fvdef)
3143             {
3144             case fdefunkey:
3145             case fstartlist:
3146             case finlist:
3147             case fignore:
3148             case vignore:
3149               break;
3150             default:
3151               fvextern = FALSE;
3152               fvdef = fvnone;
3153             }
3154           continue;
3155         case '\'':
3156           inchar = TRUE;
3157           if (bracketlev > 0)
3158             continue;
3159           if (inattribute)
3160             break;
3161           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3162             {
3163               fvextern = FALSE;
3164               fvdef = fvnone;
3165             }
3166           continue;
3167         case '/':
3168           if (*lp == '*')
3169             {
3170               incomm = TRUE;
3171               lp++;
3172               c = ' ';
3173               if (bracketlev > 0)
3174                 continue;
3175             }
3176           else if (/* cplpl && */ *lp == '/')
3177             {
3178               c = '\0';
3179             }
3180           break;
3181         case '%':
3182           if ((c_ext & YACC) && *lp == '%')
3183             {
3184               /* Entering or exiting rules section in yacc file. */
3185               lp++;
3186               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3187               typdef = tnone; structdef = snone;
3188               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3189               bracelev = 0;
3190               yacc_rules = !yacc_rules;
3191               continue;
3192             }
3193           else
3194             break;
3195         case '#':
3196           if (definedef == dnone)
3197             {
3198               char *cp;
3199               bool cpptoken = TRUE;
3200
3201               /* Look back on this line.  If all blanks, or nonblanks
3202                  followed by an end of comment, this is a preprocessor
3203                  token. */
3204               for (cp = newlb.buffer; cp < lp-1; cp++)
3205                 if (!iswhite (*cp))
3206                   {
3207                     if (*cp == '*' && cp[1] == '/')
3208                       {
3209                         cp++;
3210                         cpptoken = TRUE;
3211                       }
3212                     else
3213                       cpptoken = FALSE;
3214                   }
3215               if (cpptoken)
3216                 definedef = dsharpseen;
3217             } /* if (definedef == dnone) */
3218           continue;
3219         case '[':
3220           bracketlev++;
3221           continue;
3222         default:
3223           if (bracketlev > 0)
3224             {
3225               if (c == ']')
3226                 --bracketlev;
3227               else if (c == '\0')
3228                 CNL_SAVE_DEFINEDEF ();
3229               continue;
3230             }
3231           break;
3232         } /* switch (c) */
3233
3234
3235       /* Consider token only if some involved conditions are satisfied. */
3236       if (typdef != tignore
3237           && definedef != dignorerest
3238           && fvdef != finlist
3239           && templatelev == 0
3240           && (definedef != dnone
3241               || structdef != scolonseen)
3242           && !inattribute)
3243         {
3244           if (midtoken)
3245             {
3246               if (endtoken (c))
3247                 {
3248                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3249                     /* This handles :: in the middle,
3250                        but not at the beginning of an identifier.
3251                        Also, space-separated :: is not recognized. */
3252                     {
3253                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3254                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3255                       lp += 2;
3256                       toklen += 2;
3257                       c = lp[-1];
3258                       goto still_in_token;
3259                     }
3260                   else
3261                     {
3262                       bool funorvar = FALSE;
3263
3264                       if (yacc_rules
3265                           || consider_token (newlb.buffer + tokoff, toklen, c,
3266                                              &c_ext, bracelev, parlev,
3267                                              &funorvar))
3268                         {
3269                           if (fvdef == foperator)
3270                             {
3271                               char *oldlp = lp;
3272                               lp = skip_spaces (lp-1);
3273                               if (*lp != '\0')
3274                                 lp += 1;
3275                               while (*lp != '\0'
3276                                      && !iswhite (*lp) && *lp != '(')
3277                                 lp += 1;
3278                               c = *lp++;
3279                               toklen += lp - oldlp;
3280                             }
3281                           token.named = FALSE;
3282                           if (!plainc
3283                               && nestlev > 0 && definedef == dnone)
3284                             /* in struct body */
3285                             {
3286                               int len;
3287                               write_classname (&token_name, qualifier);
3288                               len = token_name.len;
3289                               linebuffer_setlen (&token_name, len+qlen+toklen);
3290                               sprintf (token_name.buffer + len, "%s%.*s",
3291                                        qualifier, toklen, newlb.buffer + tokoff);
3292                               token.named = TRUE;
3293                             }
3294                           else if (objdef == ocatseen)
3295                             /* Objective C category */
3296                             {
3297                               int len = strlen (objtag) + 2 + toklen;
3298                               linebuffer_setlen (&token_name, len);
3299                               sprintf (token_name.buffer, "%s(%.*s)",
3300                                        objtag, toklen, newlb.buffer + tokoff);
3301                               token.named = TRUE;
3302                             }
3303                           else if (objdef == omethodtag
3304                                    || objdef == omethodparm)
3305                             /* Objective C method */
3306                             {
3307                               token.named = TRUE;
3308                             }
3309                           else if (fvdef == fdefunname)
3310                             /* GNU DEFUN and similar macros */
3311                             {
3312                               bool defun = (newlb.buffer[tokoff] == 'F');
3313                               int off = tokoff;
3314                               int len = toklen;
3315
3316                               /* Rewrite the tag so that emacs lisp DEFUNs
3317                                  can be found by their elisp name */
3318                               if (defun)
3319                                 {
3320                                   off += 1;
3321                                   len -= 1;
3322                                 }
3323                               linebuffer_setlen (&token_name, len);
3324                               memcpy (token_name.buffer,
3325                                       newlb.buffer + off, len);
3326                               token_name.buffer[len] = '\0';
3327                               if (defun)
3328                                 while (--len >= 0)
3329                                   if (token_name.buffer[len] == '_')
3330                                     token_name.buffer[len] = '-';
3331                               token.named = defun;
3332                             }
3333                           else
3334                             {
3335                               linebuffer_setlen (&token_name, toklen);
3336                               memcpy (token_name.buffer,
3337                                       newlb.buffer + tokoff, toklen);
3338                               token_name.buffer[toklen] = '\0';
3339                               /* Name macros and members. */
3340                               token.named = (structdef == stagseen
3341                                              || typdef == ttypeseen
3342                                              || typdef == tend
3343                                              || (funorvar
3344                                                  && definedef == dignorerest)
3345                                              || (funorvar
3346                                                  && definedef == dnone
3347                                                  && structdef == snone
3348                                                  && bracelev > 0));
3349                             }
3350                           token.lineno = lineno;
3351                           token.offset = tokoff;
3352                           token.length = toklen;
3353                           token.line = newlb.buffer;
3354                           token.linepos = newlinepos;
3355                           token.valid = TRUE;
3356
3357                           if (definedef == dnone
3358                               && (fvdef == fvnameseen
3359                                   || fvdef == foperator
3360                                   || structdef == stagseen
3361                                   || typdef == tend
3362                                   || typdef == ttypeseen
3363                                   || objdef != onone))
3364                             {
3365                               if (current_lb_is_new)
3366                                 switch_line_buffers ();
3367                             }
3368                           else if (definedef != dnone
3369                                    || fvdef == fdefunname
3370                                    || instruct)
3371                             make_C_tag (funorvar);
3372                         }
3373                       else /* not yacc and consider_token failed */
3374                         {
3375                           if (inattribute && fvdef == fignore)
3376                             {
3377                               /* We have just met __attribute__ after a
3378                                  function parameter list: do not tag the
3379                                  function again. */
3380                               fvdef = fvnone;
3381                             }
3382                         }
3383                       midtoken = FALSE;
3384                     }
3385                 } /* if (endtoken (c)) */
3386               else if (intoken (c))
3387                 still_in_token:
3388                 {
3389                   toklen++;
3390                   continue;
3391                 }
3392             } /* if (midtoken) */
3393           else if (begtoken (c))
3394             {
3395               switch (definedef)
3396                 {
3397                 case dnone:
3398                   switch (fvdef)
3399                     {
3400                     case fstartlist:
3401                       /* This prevents tagging fb in
3402                          void (__attribute__((noreturn)) *fb) (void);
3403                          Fixing this is not easy and not very important. */
3404                       fvdef = finlist;
3405                       continue;
3406                     case flistseen:
3407                       if (plainc || declarations)
3408                         {
3409                           make_C_tag (TRUE); /* a function */
3410                           fvdef = fignore;
3411                         }
3412                       break;
3413                     }
3414                   if (structdef == stagseen && !cjava)
3415                     {
3416                       popclass_above (bracelev);
3417                       structdef = snone;
3418                     }
3419                   break;
3420                 case dsharpseen:
3421                   savetoken = token;
3422                   break;
3423                 }
3424               if (!yacc_rules || lp == newlb.buffer + 1)
3425                 {
3426                   tokoff = lp - 1 - newlb.buffer;
3427                   toklen = 1;
3428                   midtoken = TRUE;
3429                 }
3430               continue;
3431             } /* if (begtoken) */
3432         } /* if must look at token */
3433
3434
3435       /* Detect end of line, colon, comma, semicolon and various braces
3436          after having handled a token.*/
3437       switch (c)
3438         {
3439         case ':':
3440           if (inattribute)
3441             break;
3442           if (yacc_rules && token.offset == 0 && token.valid)
3443             {
3444               make_C_tag (FALSE); /* a yacc function */
3445               break;
3446             }
3447           if (definedef != dnone)
3448             break;
3449           switch (objdef)
3450             {
3451             case  otagseen:
3452               objdef = oignore;
3453               make_C_tag (TRUE); /* an Objective C class */
3454               break;
3455             case omethodtag:
3456             case omethodparm:
3457               objdef = omethodcolon;
3458               linebuffer_setlen (&token_name, token_name.len + 1);
3459               strcat (token_name.buffer, ":");
3460               break;
3461             }
3462           if (structdef == stagseen)
3463             {
3464               structdef = scolonseen;
3465               break;
3466             }
3467           /* Should be useless, but may be work as a safety net. */
3468           if (cplpl && fvdef == flistseen)
3469             {
3470               make_C_tag (TRUE); /* a function */
3471               fvdef = fignore;
3472               break;
3473             }
3474           break;
3475         case ';':
3476           if (definedef != dnone || inattribute)
3477             break;
3478           switch (typdef)
3479             {
3480             case tend:
3481             case ttypeseen:
3482               make_C_tag (FALSE); /* a typedef */
3483               typdef = tnone;
3484               fvdef = fvnone;
3485               break;
3486             case tnone:
3487             case tinbody:
3488             case tignore:
3489               switch (fvdef)
3490                 {
3491                 case fignore:
3492                   if (typdef == tignore || cplpl)
3493                     fvdef = fvnone;
3494                   break;
3495                 case fvnameseen:
3496                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3497                       || (members && instruct))
3498                     make_C_tag (FALSE); /* a variable */
3499                   fvextern = FALSE;
3500                   fvdef = fvnone;
3501                   token.valid = FALSE;
3502                   break;
3503                 case flistseen:
3504                   if ((declarations
3505                        && (cplpl || !instruct)
3506                        && (typdef == tnone || (typdef != tignore && instruct)))
3507                       || (members
3508                           && plainc && instruct))
3509                     make_C_tag (TRUE);  /* a function */
3510                   /* FALLTHRU */
3511                 default:
3512                   fvextern = FALSE;
3513                   fvdef = fvnone;
3514                   if (declarations
3515                        && cplpl && structdef == stagseen)
3516                     make_C_tag (FALSE); /* forward declaration */
3517                   else
3518                     token.valid = FALSE;
3519                 } /* switch (fvdef) */
3520               /* FALLTHRU */
3521             default:
3522               if (!instruct)
3523                 typdef = tnone;
3524             }
3525           if (structdef == stagseen)
3526             structdef = snone;
3527           break;
3528         case ',':
3529           if (definedef != dnone || inattribute)
3530             break;
3531           switch (objdef)
3532             {
3533             case omethodtag:
3534             case omethodparm:
3535               make_C_tag (TRUE); /* an Objective C method */
3536               objdef = oinbody;
3537               break;
3538             }
3539           switch (fvdef)
3540             {
3541             case fdefunkey:
3542             case foperator:
3543             case fstartlist:
3544             case finlist:
3545             case fignore:
3546             case vignore:
3547               break;
3548             case fdefunname:
3549               fvdef = fignore;
3550               break;
3551             case fvnameseen:
3552               if (parlev == 0
3553                   && ((globals
3554                        && bracelev == 0
3555                        && templatelev == 0
3556                        && (!fvextern || declarations))
3557                       || (members && instruct)))
3558                   make_C_tag (FALSE); /* a variable */
3559               break;
3560             case flistseen:
3561               if ((declarations && typdef == tnone && !instruct)
3562                   || (members && typdef != tignore && instruct))
3563                 {
3564                   make_C_tag (TRUE); /* a function */
3565                   fvdef = fvnameseen;
3566                 }
3567               else if (!declarations)
3568                 fvdef = fvnone;
3569               token.valid = FALSE;
3570               break;
3571             default:
3572               fvdef = fvnone;
3573             }
3574           if (structdef == stagseen)
3575             structdef = snone;
3576           break;
3577         case ']':
3578           if (definedef != dnone || inattribute)
3579             break;
3580           if (structdef == stagseen)
3581             structdef = snone;
3582           switch (typdef)
3583             {
3584             case ttypeseen:
3585             case tend:
3586               typdef = tignore;
3587               make_C_tag (FALSE);       /* a typedef */
3588               break;
3589             case tnone:
3590             case tinbody:
3591               switch (fvdef)
3592                 {
3593                 case foperator:
3594                 case finlist:
3595                 case fignore:
3596                 case vignore:
3597                   break;
3598                 case fvnameseen:
3599                   if ((members && bracelev == 1)
3600                       || (globals && bracelev == 0
3601                           && (!fvextern || declarations)))
3602                     make_C_tag (FALSE); /* a variable */
3603                   /* FALLTHRU */
3604                 default:
3605                   fvdef = fvnone;
3606                 }
3607               break;
3608             }
3609           break;
3610         case '(':
3611           if (inattribute)
3612             {
3613               attrparlev++;
3614               break;
3615             }
3616           if (definedef != dnone)
3617             break;
3618           if (objdef == otagseen && parlev == 0)
3619             objdef = oparenseen;
3620           switch (fvdef)
3621             {
3622             case fvnameseen:
3623               if (typdef == ttypeseen
3624                   && *lp != '*'
3625                   && !instruct)
3626                 {
3627                   /* This handles constructs like:
3628                      typedef void OperatorFun (int fun); */
3629                   make_C_tag (FALSE);
3630                   typdef = tignore;
3631                   fvdef = fignore;
3632                   break;
3633                 }
3634               /* FALLTHRU */
3635             case foperator:
3636               fvdef = fstartlist;
3637               break;
3638             case flistseen:
3639               fvdef = finlist;
3640               break;
3641             }
3642           parlev++;
3643           break;
3644         case ')':
3645           if (inattribute)
3646             {
3647               if (--attrparlev == 0)
3648                 inattribute = FALSE;
3649               break;
3650             }
3651           if (definedef != dnone)
3652             break;
3653           if (objdef == ocatseen && parlev == 1)
3654             {
3655               make_C_tag (TRUE); /* an Objective C category */
3656               objdef = oignore;
3657             }
3658           if (--parlev == 0)
3659             {
3660               switch (fvdef)
3661                 {
3662                 case fstartlist:
3663                 case finlist:
3664                   fvdef = flistseen;
3665                   break;
3666                 }
3667               if (!instruct
3668                   && (typdef == tend
3669                       || typdef == ttypeseen))
3670                 {
3671                   typdef = tignore;
3672                   make_C_tag (FALSE); /* a typedef */
3673                 }
3674             }
3675           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3676             parlev = 0;
3677           break;
3678         case '{':
3679           if (definedef != dnone)
3680             break;
3681           if (typdef == ttypeseen)
3682             {
3683               /* Whenever typdef is set to tinbody (currently only
3684                  here), typdefbracelev should be set to bracelev. */
3685               typdef = tinbody;
3686               typdefbracelev = bracelev;
3687             }
3688           switch (fvdef)
3689             {
3690             case flistseen:
3691               make_C_tag (TRUE);    /* a function */
3692               /* FALLTHRU */
3693             case fignore:
3694               fvdef = fvnone;
3695               break;
3696             case fvnone:
3697               switch (objdef)
3698                 {
3699                 case otagseen:
3700                   make_C_tag (TRUE); /* an Objective C class */
3701                   objdef = oignore;
3702                   break;
3703                 case omethodtag:
3704                 case omethodparm:
3705                   make_C_tag (TRUE); /* an Objective C method */
3706                   objdef = oinbody;
3707                   break;
3708                 default:
3709                   /* Neutralize `extern "C" {' grot. */
3710                   if (bracelev == 0 && structdef == snone && nestlev == 0
3711                       && typdef == tnone)
3712                     bracelev = -1;
3713                 }
3714               break;
3715             }
3716           switch (structdef)
3717             {
3718             case skeyseen:         /* unnamed struct */
3719               pushclass_above (bracelev, NULL, 0);
3720               structdef = snone;
3721               break;
3722             case stagseen:         /* named struct or enum */
3723             case scolonseen:       /* a class */
3724               pushclass_above (bracelev,token.line+token.offset, token.length);
3725               structdef = snone;
3726               make_C_tag (FALSE);  /* a struct or enum */
3727               break;
3728             }
3729           bracelev += 1;
3730           break;
3731         case '*':
3732           if (definedef != dnone)
3733             break;
3734           if (fvdef == fstartlist)
3735             {
3736               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3737               token.valid = FALSE;
3738             }
3739           break;
3740         case '}':
3741           if (definedef != dnone)
3742             break;
3743           bracelev -= 1;
3744           if (!ignoreindent && lp == newlb.buffer + 1)
3745             {
3746               if (bracelev != 0)
3747                 token.valid = FALSE; /* unexpected value, token unreliable */
3748               bracelev = 0;     /* reset brace level if first column */
3749               parlev = 0;       /* also reset paren level, just in case... */
3750             }
3751           else if (bracelev < 0)
3752             {
3753               token.valid = FALSE; /* something gone amiss, token unreliable */
3754               bracelev = 0;
3755             }
3756           if (bracelev == 0 && fvdef == vignore)
3757             fvdef = fvnone;             /* end of function */
3758           popclass_above (bracelev);
3759           structdef = snone;
3760           /* Only if typdef == tinbody is typdefbracelev significant. */
3761           if (typdef == tinbody && bracelev <= typdefbracelev)
3762             {
3763               assert (bracelev == typdefbracelev);
3764               typdef = tend;
3765             }
3766           break;
3767         case '=':
3768           if (definedef != dnone)
3769             break;
3770           switch (fvdef)
3771             {
3772             case foperator:
3773             case finlist:
3774             case fignore:
3775             case vignore:
3776               break;
3777             case fvnameseen:
3778               if ((members && bracelev == 1)
3779                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3780                 make_C_tag (FALSE); /* a variable */
3781               /* FALLTHRU */
3782             default:
3783               fvdef = vignore;
3784             }
3785           break;
3786         case '<':
3787           if (cplpl
3788               && (structdef == stagseen || fvdef == fvnameseen))
3789             {
3790               templatelev++;
3791               break;
3792             }
3793           goto resetfvdef;
3794         case '>':
3795           if (templatelev > 0)
3796             {
3797               templatelev--;
3798               break;
3799             }
3800           goto resetfvdef;
3801         case '+':
3802         case '-':
3803           if (objdef == oinbody && bracelev == 0)
3804             {
3805               objdef = omethodsign;
3806               break;
3807             }
3808           /* FALLTHRU */
3809         resetfvdef:
3810         case '#': case '~': case '&': case '%': case '/':
3811         case '|': case '^': case '!': case '.': case '?':
3812           if (definedef != dnone)
3813             break;
3814           /* These surely cannot follow a function tag in C. */
3815           switch (fvdef)
3816             {
3817             case foperator:
3818             case finlist:
3819             case fignore:
3820             case vignore:
3821               break;
3822             default:
3823               fvdef = fvnone;
3824             }
3825           break;
3826         case '\0':
3827           if (objdef == otagseen)
3828             {
3829               make_C_tag (TRUE); /* an Objective C class */
3830               objdef = oignore;
3831             }
3832           /* If a macro spans multiple lines don't reset its state. */
3833           if (quotednl)
3834             CNL_SAVE_DEFINEDEF ();
3835           else
3836             CNL ();
3837           break;
3838         } /* switch (c) */
3839
3840     } /* while not eof */
3841
3842   free (lbs[0].lb.buffer);
3843   free (lbs[1].lb.buffer);
3844 }
3845
3846 /*
3847  * Process either a C++ file or a C file depending on the setting
3848  * of a global flag.
3849  */
3850 static void
3851 default_C_entries (FILE *inf)
3852 {
3853   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3854 }
3855
3856 /* Always do plain C. */
3857 static void
3858 plain_C_entries (FILE *inf)
3859 {
3860   C_entries (0, inf);
3861 }
3862
3863 /* Always do C++. */
3864 static void
3865 Cplusplus_entries (FILE *inf)
3866 {
3867   C_entries (C_PLPL, inf);
3868 }
3869
3870 /* Always do Java. */
3871 static void
3872 Cjava_entries (FILE *inf)
3873 {
3874   C_entries (C_JAVA, inf);
3875 }
3876
3877 /* Always do C*. */
3878 static void
3879 Cstar_entries (FILE *inf)
3880 {
3881   C_entries (C_STAR, inf);
3882 }
3883
3884 /* Always do Yacc. */
3885 static void
3886 Yacc_entries (FILE *inf)
3887 {
3888   C_entries (YACC, inf);
3889 }
3890
3891 \f
3892 /* Useful macros. */
3893 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3894   for (;                        /* loop initialization */               \
3895        !feof (file_pointer)     /* loop test */                         \
3896        &&                       /* instructions at start of loop */     \
3897           (readline (&line_buffer, file_pointer),                       \
3898            char_pointer = line_buffer.buffer,                           \
3899            TRUE);                                                       \
3900       )
3901
3902 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3903   ((assert ("" kw), TRUE)   /* syntax error if not a literal string */  \
3904    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
3905    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
3906    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3907
3908 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3909 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3910   ((assert ("" kw), TRUE) /* syntax error if not a literal string */    \
3911    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
3912    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
3913
3914 /*
3915  * Read a file, but do no processing.  This is used to do regexp
3916  * matching on files that have no language defined.
3917  */
3918 static void
3919 just_read_file (FILE *inf)
3920 {
3921   while (!feof (inf))
3922     readline (&lb, inf);
3923 }
3924
3925 \f
3926 /* Fortran parsing */
3927
3928 static void F_takeprec (void);
3929 static void F_getit (FILE *);
3930
3931 static void
3932 F_takeprec (void)
3933 {
3934   dbp = skip_spaces (dbp);
3935   if (*dbp != '*')
3936     return;
3937   dbp++;
3938   dbp = skip_spaces (dbp);
3939   if (strneq (dbp, "(*)", 3))
3940     {
3941       dbp += 3;
3942       return;
3943     }
3944   if (!ISDIGIT (*dbp))
3945     {
3946       --dbp;                    /* force failure */
3947       return;
3948     }
3949   do
3950     dbp++;
3951   while (ISDIGIT (*dbp));
3952 }
3953
3954 static void
3955 F_getit (FILE *inf)
3956 {
3957   register char *cp;
3958
3959   dbp = skip_spaces (dbp);
3960   if (*dbp == '\0')
3961     {
3962       readline (&lb, inf);
3963       dbp = lb.buffer;
3964       if (dbp[5] != '&')
3965         return;
3966       dbp += 6;
3967       dbp = skip_spaces (dbp);
3968     }
3969   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3970     return;
3971   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3972     continue;
3973   make_tag (dbp, cp-dbp, TRUE,
3974             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3975 }
3976
3977
3978 static void
3979 Fortran_functions (FILE *inf)
3980 {
3981   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3982     {
3983       if (*dbp == '%')
3984         dbp++;                  /* Ratfor escape to fortran */
3985       dbp = skip_spaces (dbp);
3986       if (*dbp == '\0')
3987         continue;
3988
3989       if (LOOKING_AT_NOCASE (dbp, "recursive"))
3990         dbp = skip_spaces (dbp);
3991
3992       if (LOOKING_AT_NOCASE (dbp, "pure"))
3993         dbp = skip_spaces (dbp);
3994
3995       if (LOOKING_AT_NOCASE (dbp, "elemental"))
3996         dbp = skip_spaces (dbp);
3997
3998       switch (lowcase (*dbp))
3999         {
4000         case 'i':
4001           if (nocase_tail ("integer"))
4002             F_takeprec ();
4003           break;
4004         case 'r':
4005           if (nocase_tail ("real"))
4006             F_takeprec ();
4007           break;
4008         case 'l':
4009           if (nocase_tail ("logical"))
4010             F_takeprec ();
4011           break;
4012         case 'c':
4013           if (nocase_tail ("complex") || nocase_tail ("character"))
4014             F_takeprec ();
4015           break;
4016         case 'd':
4017           if (nocase_tail ("double"))
4018             {
4019               dbp = skip_spaces (dbp);
4020               if (*dbp == '\0')
4021                 continue;
4022               if (nocase_tail ("precision"))
4023                 break;
4024               continue;
4025             }
4026           break;
4027         }
4028       dbp = skip_spaces (dbp);
4029       if (*dbp == '\0')
4030         continue;
4031       switch (lowcase (*dbp))
4032         {
4033         case 'f':
4034           if (nocase_tail ("function"))
4035             F_getit (inf);
4036           continue;
4037         case 's':
4038           if (nocase_tail ("subroutine"))
4039             F_getit (inf);
4040           continue;
4041         case 'e':
4042           if (nocase_tail ("entry"))
4043             F_getit (inf);
4044           continue;
4045         case 'b':
4046           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4047             {
4048               dbp = skip_spaces (dbp);
4049               if (*dbp == '\0') /* assume un-named */
4050                 make_tag ("blockdata", 9, TRUE,
4051                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4052               else
4053                 F_getit (inf);  /* look for name */
4054             }
4055           continue;
4056         }
4057     }
4058 }
4059
4060 \f
4061 /*
4062  * Ada parsing
4063  * Original code by
4064  * Philippe Waroquiers (1998)
4065  */
4066
4067 /* Once we are positioned after an "interesting" keyword, let's get
4068    the real tag value necessary. */
4069 static void
4070 Ada_getit (FILE *inf, const char *name_qualifier)
4071 {
4072   register char *cp;
4073   char *name;
4074   char c;
4075
4076   while (!feof (inf))
4077     {
4078       dbp = skip_spaces (dbp);
4079       if (*dbp == '\0'
4080           || (dbp[0] == '-' && dbp[1] == '-'))
4081         {
4082           readline (&lb, inf);
4083           dbp = lb.buffer;
4084         }
4085       switch (lowcase (*dbp))
4086         {
4087         case 'b':
4088           if (nocase_tail ("body"))
4089             {
4090               /* Skipping body of   procedure body   or   package body or ....
4091                  resetting qualifier to body instead of spec. */
4092               name_qualifier = "/b";
4093               continue;
4094             }
4095           break;
4096         case 't':
4097           /* Skipping type of   task type   or   protected type ... */
4098           if (nocase_tail ("type"))
4099             continue;
4100           break;
4101         }
4102       if (*dbp == '"')
4103         {
4104           dbp += 1;
4105           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4106             continue;
4107         }
4108       else
4109         {
4110           dbp = skip_spaces (dbp);
4111           for (cp = dbp;
4112                (*cp != '\0'
4113                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4114                cp++)
4115             continue;
4116           if (cp == dbp)
4117             return;
4118         }
4119       c = *cp;
4120       *cp = '\0';
4121       name = concat (dbp, name_qualifier, "");
4122       *cp = c;
4123       make_tag (name, strlen (name), TRUE,
4124                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4125       free (name);
4126       if (c == '"')
4127         dbp = cp + 1;
4128       return;
4129     }
4130 }
4131
4132 static void
4133 Ada_funcs (FILE *inf)
4134 {
4135   bool inquote = FALSE;
4136   bool skip_till_semicolumn = FALSE;
4137
4138   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4139     {
4140       while (*dbp != '\0')
4141         {
4142           /* Skip a string i.e. "abcd". */
4143           if (inquote || (*dbp == '"'))
4144             {
4145               dbp = etags_strchr (dbp + !inquote, '"');
4146               if (dbp != NULL)
4147                 {
4148                   inquote = FALSE;
4149                   dbp += 1;
4150                   continue;     /* advance char */
4151                 }
4152               else
4153                 {
4154                   inquote = TRUE;
4155                   break;        /* advance line */
4156                 }
4157             }
4158
4159           /* Skip comments. */
4160           if (dbp[0] == '-' && dbp[1] == '-')
4161             break;              /* advance line */
4162
4163           /* Skip character enclosed in single quote i.e. 'a'
4164              and skip single quote starting an attribute i.e. 'Image. */
4165           if (*dbp == '\'')
4166             {
4167               dbp++ ;
4168               if (*dbp != '\0')
4169                 dbp++;
4170               continue;
4171             }
4172
4173           if (skip_till_semicolumn)
4174             {
4175               if (*dbp == ';')
4176                 skip_till_semicolumn = FALSE;
4177               dbp++;
4178               continue;         /* advance char */
4179             }
4180
4181           /* Search for beginning of a token.  */
4182           if (!begtoken (*dbp))
4183             {
4184               dbp++;
4185               continue;         /* advance char */
4186             }
4187
4188           /* We are at the beginning of a token. */
4189           switch (lowcase (*dbp))
4190             {
4191             case 'f':
4192               if (!packages_only && nocase_tail ("function"))
4193                 Ada_getit (inf, "/f");
4194               else
4195                 break;          /* from switch */
4196               continue;         /* advance char */
4197             case 'p':
4198               if (!packages_only && nocase_tail ("procedure"))
4199                 Ada_getit (inf, "/p");
4200               else if (nocase_tail ("package"))
4201                 Ada_getit (inf, "/s");
4202               else if (nocase_tail ("protected")) /* protected type */
4203                 Ada_getit (inf, "/t");
4204               else
4205                 break;          /* from switch */
4206               continue;         /* advance char */
4207
4208             case 'u':
4209               if (typedefs && !packages_only && nocase_tail ("use"))
4210                 {
4211                   /* when tagging types, avoid tagging  use type Pack.Typename;
4212                      for this, we will skip everything till a ; */
4213                   skip_till_semicolumn = TRUE;
4214                   continue;     /* advance char */
4215                 }
4216
4217             case 't':
4218               if (!packages_only && nocase_tail ("task"))
4219                 Ada_getit (inf, "/k");
4220               else if (typedefs && !packages_only && nocase_tail ("type"))
4221                 {
4222                   Ada_getit (inf, "/t");
4223                   while (*dbp != '\0')
4224                     dbp += 1;
4225                 }
4226               else
4227                 break;          /* from switch */
4228               continue;         /* advance char */
4229             }
4230
4231           /* Look for the end of the token. */
4232           while (!endtoken (*dbp))
4233             dbp++;
4234
4235         } /* advance char */
4236     } /* advance line */
4237 }
4238
4239 \f
4240 /*
4241  * Unix and microcontroller assembly tag handling
4242  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4243  * Idea by Bob Weiner, Motorola Inc. (1994)
4244  */
4245 static void
4246 Asm_labels (FILE *inf)
4247 {
4248   register char *cp;
4249
4250   LOOP_ON_INPUT_LINES (inf, lb, cp)
4251     {
4252       /* If first char is alphabetic or one of [_.$], test for colon
4253          following identifier. */
4254       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4255         {
4256           /* Read past label. */
4257           cp++;
4258           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4259             cp++;
4260           if (*cp == ':' || iswhite (*cp))
4261             /* Found end of label, so copy it and add it to the table. */
4262             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4263                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4264         }
4265     }
4266 }
4267
4268 \f
4269 /*
4270  * Perl support
4271  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4272  * Perl variable names: /^(my|local).../
4273  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4274  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4275  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4276  */
4277 static void
4278 Perl_functions (FILE *inf)
4279 {
4280   char *package = savestr ("main"); /* current package name */
4281   register char *cp;
4282
4283   LOOP_ON_INPUT_LINES (inf, lb, cp)
4284     {
4285       cp = skip_spaces (cp);
4286
4287       if (LOOKING_AT (cp, "package"))
4288         {
4289           free (package);
4290           get_tag (cp, &package);
4291         }
4292       else if (LOOKING_AT (cp, "sub"))
4293         {
4294           char *pos;
4295           char *sp = cp;
4296
4297           while (!notinname (*cp))
4298             cp++;
4299           if (cp == sp)
4300             continue;           /* nothing found */
4301           if ((pos = etags_strchr (sp, ':')) != NULL
4302               && pos < cp && pos[1] == ':')
4303             /* The name is already qualified. */
4304             make_tag (sp, cp - sp, TRUE,
4305                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4306           else
4307             /* Qualify it. */
4308             {
4309               char savechar, *name;
4310
4311               savechar = *cp;
4312               *cp = '\0';
4313               name = concat (package, "::", sp);
4314               *cp = savechar;
4315               make_tag (name, strlen (name), TRUE,
4316                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4317               free (name);
4318             }
4319         }
4320        else if (globals)        /* only if we are tagging global vars */
4321         {
4322           /* Skip a qualifier, if any. */
4323           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4324           /* After "my" or "local", but before any following paren or space. */
4325           char *varstart = cp;
4326
4327           if (qual              /* should this be removed?  If yes, how? */
4328               && (*cp == '$' || *cp == '@' || *cp == '%'))
4329             {
4330               varstart += 1;
4331               do
4332                 cp++;
4333               while (ISALNUM (*cp) || *cp == '_');
4334             }
4335           else if (qual)
4336             {
4337               /* Should be examining a variable list at this point;
4338                  could insist on seeing an open parenthesis. */
4339               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4340                 cp++;
4341             }
4342           else
4343             continue;
4344
4345           make_tag (varstart, cp - varstart, FALSE,
4346                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4347         }
4348     }
4349   free (package);
4350 }
4351
4352
4353 /*
4354  * Python support
4355  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4356  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4357  * More ideas by seb bacon <seb@jamkit.com> (2002)
4358  */
4359 static void
4360 Python_functions (FILE *inf)
4361 {
4362   register char *cp;
4363
4364   LOOP_ON_INPUT_LINES (inf, lb, cp)
4365     {
4366       cp = skip_spaces (cp);
4367       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4368         {
4369           char *name = cp;
4370           while (!notinname (*cp) && *cp != ':')
4371             cp++;
4372           make_tag (name, cp - name, TRUE,
4373                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4374         }
4375     }
4376 }
4377
4378 \f
4379 /*
4380  * PHP support
4381  * Look for:
4382  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4383  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4384  *  - /^[ \t]*define\(\"[^\"]+/
4385  * Only with --members:
4386  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4387  * Idea by Diez B. Roggisch (2001)
4388  */
4389 static void
4390 PHP_functions (FILE *inf)
4391 {
4392   register char *cp, *name;
4393   bool search_identifier = FALSE;
4394
4395   LOOP_ON_INPUT_LINES (inf, lb, cp)
4396     {
4397       cp = skip_spaces (cp);
4398       name = cp;
4399       if (search_identifier
4400           && *cp != '\0')
4401         {
4402           while (!notinname (*cp))
4403             cp++;
4404           make_tag (name, cp - name, TRUE,
4405                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4406           search_identifier = FALSE;
4407         }
4408       else if (LOOKING_AT (cp, "function"))
4409         {
4410           if (*cp == '&')
4411             cp = skip_spaces (cp+1);
4412           if (*cp != '\0')
4413             {
4414               name = cp;
4415               while (!notinname (*cp))
4416                 cp++;
4417               make_tag (name, cp - name, TRUE,
4418                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4419             }
4420           else
4421             search_identifier = TRUE;
4422         }
4423       else if (LOOKING_AT (cp, "class"))
4424         {
4425           if (*cp != '\0')
4426             {
4427               name = cp;
4428               while (*cp != '\0' && !iswhite (*cp))
4429                 cp++;
4430               make_tag (name, cp - name, FALSE,
4431                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4432             }
4433           else
4434             search_identifier = TRUE;
4435         }
4436       else if (strneq (cp, "define", 6)
4437                && (cp = skip_spaces (cp+6))
4438                && *cp++ == '('
4439                && (*cp == '"' || *cp == '\''))
4440         {
4441           char quote = *cp++;
4442           name = cp;
4443           while (*cp != quote && *cp != '\0')
4444             cp++;
4445           make_tag (name, cp - name, FALSE,
4446                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4447         }
4448       else if (members
4449                && LOOKING_AT (cp, "var")
4450                && *cp == '$')
4451         {
4452           name = cp;
4453           while (!notinname (*cp))
4454             cp++;
4455           make_tag (name, cp - name, FALSE,
4456                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4457         }
4458     }
4459 }
4460
4461 \f
4462 /*
4463  * Cobol tag functions
4464  * We could look for anything that could be a paragraph name.
4465  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4466  * Idea by Corny de Souza (1993)
4467  */
4468 static void
4469 Cobol_paragraphs (FILE *inf)
4470 {
4471   register char *bp, *ep;
4472
4473   LOOP_ON_INPUT_LINES (inf, lb, bp)
4474     {
4475       if (lb.len < 9)
4476         continue;
4477       bp += 8;
4478
4479       /* If eoln, compiler option or comment ignore whole line. */
4480       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4481         continue;
4482
4483       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4484         continue;
4485       if (*ep++ == '.')
4486         make_tag (bp, ep - bp, TRUE,
4487                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4488     }
4489 }
4490
4491 \f
4492 /*
4493  * Makefile support
4494  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4495  */
4496 static void
4497 Makefile_targets (FILE *inf)
4498 {
4499   register char *bp;
4500
4501   LOOP_ON_INPUT_LINES (inf, lb, bp)
4502     {
4503       if (*bp == '\t' || *bp == '#')
4504         continue;
4505       while (*bp != '\0' && *bp != '=' && *bp != ':')
4506         bp++;
4507       if (*bp == ':' || (globals && *bp == '='))
4508         {
4509           /* We should detect if there is more than one tag, but we do not.
4510              We just skip initial and final spaces. */
4511           char * namestart = skip_spaces (lb.buffer);
4512           while (--bp > namestart)
4513             if (!notinname (*bp))
4514               break;
4515           make_tag (namestart, bp - namestart + 1, TRUE,
4516                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4517         }
4518     }
4519 }
4520
4521 \f
4522 /*
4523  * Pascal parsing
4524  * Original code by Mosur K. Mohan (1989)
4525  *
4526  *  Locates tags for procedures & functions.  Doesn't do any type- or
4527  *  var-definitions.  It does look for the keyword "extern" or
4528  *  "forward" immediately following the procedure statement; if found,
4529  *  the tag is skipped.
4530  */
4531 static void
4532 Pascal_functions (FILE *inf)
4533 {
4534   linebuffer tline;             /* mostly copied from C_entries */
4535   long save_lcno;
4536   int save_lineno, namelen, taglen;
4537   char c, *name;
4538
4539   bool                          /* each of these flags is TRUE if: */
4540     incomment,                  /* point is inside a comment */
4541     inquote,                    /* point is inside '..' string */
4542     get_tagname,                /* point is after PROCEDURE/FUNCTION
4543                                    keyword, so next item = potential tag */
4544     found_tag,                  /* point is after a potential tag */
4545     inparms,                    /* point is within parameter-list */
4546     verify_tag;                 /* point has passed the parm-list, so the
4547                                    next token will determine whether this
4548                                    is a FORWARD/EXTERN to be ignored, or
4549                                    whether it is a real tag */
4550
4551   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4552   name = NULL;                  /* keep compiler quiet */
4553   dbp = lb.buffer;
4554   *dbp = '\0';
4555   linebuffer_init (&tline);
4556
4557   incomment = inquote = FALSE;
4558   found_tag = FALSE;            /* have a proc name; check if extern */
4559   get_tagname = FALSE;          /* found "procedure" keyword         */
4560   inparms = FALSE;              /* found '(' after "proc"            */
4561   verify_tag = FALSE;           /* check if "extern" is ahead        */
4562
4563
4564   while (!feof (inf))           /* long main loop to get next char */
4565     {
4566       c = *dbp++;
4567       if (c == '\0')            /* if end of line */
4568         {
4569           readline (&lb, inf);
4570           dbp = lb.buffer;
4571           if (*dbp == '\0')
4572             continue;
4573           if (!((found_tag && verify_tag)
4574                 || get_tagname))
4575             c = *dbp++;         /* only if don't need *dbp pointing
4576                                    to the beginning of the name of
4577                                    the procedure or function */
4578         }
4579       if (incomment)
4580         {
4581           if (c == '}')         /* within { } comments */
4582             incomment = FALSE;
4583           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4584             {
4585               dbp++;
4586               incomment = FALSE;
4587             }
4588           continue;
4589         }
4590       else if (inquote)
4591         {
4592           if (c == '\'')
4593             inquote = FALSE;
4594           continue;
4595         }
4596       else
4597         switch (c)
4598           {
4599           case '\'':
4600             inquote = TRUE;     /* found first quote */
4601             continue;
4602           case '{':             /* found open { comment */
4603             incomment = TRUE;
4604             continue;
4605           case '(':
4606             if (*dbp == '*')    /* found open (* comment */
4607               {
4608                 incomment = TRUE;
4609                 dbp++;
4610               }
4611             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4612               inparms = TRUE;
4613             continue;
4614           case ')':             /* end of parms list */
4615             if (inparms)
4616               inparms = FALSE;
4617             continue;
4618           case ';':
4619             if (found_tag && !inparms) /* end of proc or fn stmt */
4620               {
4621                 verify_tag = TRUE;
4622                 break;
4623               }
4624             continue;
4625           }
4626       if (found_tag && verify_tag && (*dbp != ' '))
4627         {
4628           /* Check if this is an "extern" declaration. */
4629           if (*dbp == '\0')
4630             continue;
4631           if (lowcase (*dbp) == 'e')
4632             {
4633               if (nocase_tail ("extern")) /* superfluous, really! */
4634                 {
4635                   found_tag = FALSE;
4636                   verify_tag = FALSE;
4637                 }
4638             }
4639           else if (lowcase (*dbp) == 'f')
4640             {
4641               if (nocase_tail ("forward")) /* check for forward reference */
4642                 {
4643                   found_tag = FALSE;
4644                   verify_tag = FALSE;
4645                 }
4646             }
4647           if (found_tag && verify_tag) /* not external proc, so make tag */
4648             {
4649               found_tag = FALSE;
4650               verify_tag = FALSE;
4651               make_tag (name, namelen, TRUE,
4652                         tline.buffer, taglen, save_lineno, save_lcno);
4653               continue;
4654             }
4655         }
4656       if (get_tagname)          /* grab name of proc or fn */
4657         {
4658           char *cp;
4659
4660           if (*dbp == '\0')
4661             continue;
4662
4663           /* Find block name. */
4664           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4665             continue;
4666
4667           /* Save all values for later tagging. */
4668           linebuffer_setlen (&tline, lb.len);
4669           strcpy (tline.buffer, lb.buffer);
4670           save_lineno = lineno;
4671           save_lcno = linecharno;
4672           name = tline.buffer + (dbp - lb.buffer);
4673           namelen = cp - dbp;
4674           taglen = cp - lb.buffer + 1;
4675
4676           dbp = cp;             /* set dbp to e-o-token */
4677           get_tagname = FALSE;
4678           found_tag = TRUE;
4679           continue;
4680
4681           /* And proceed to check for "extern". */
4682         }
4683       else if (!incomment && !inquote && !found_tag)
4684         {
4685           /* Check for proc/fn keywords. */
4686           switch (lowcase (c))
4687             {
4688             case 'p':
4689               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4690                 get_tagname = TRUE;
4691               continue;
4692             case 'f':
4693               if (nocase_tail ("unction"))
4694                 get_tagname = TRUE;
4695               continue;
4696             }
4697         }
4698     } /* while not eof */
4699
4700   free (tline.buffer);
4701 }
4702
4703 \f
4704 /*
4705  * Lisp tag functions
4706  *  look for (def or (DEF, quote or QUOTE
4707  */
4708
4709 static void L_getit (void);
4710
4711 static void
4712 L_getit (void)
4713 {
4714   if (*dbp == '\'')             /* Skip prefix quote */
4715     dbp++;
4716   else if (*dbp == '(')
4717   {
4718     dbp++;
4719     /* Try to skip "(quote " */
4720     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4721       /* Ok, then skip "(" before name in (defstruct (foo)) */
4722       dbp = skip_spaces (dbp);
4723   }
4724   get_tag (dbp, NULL);
4725 }
4726
4727 static void
4728 Lisp_functions (FILE *inf)
4729 {
4730   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4731     {
4732       if (dbp[0] != '(')
4733         continue;
4734
4735       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4736         {
4737           dbp = skip_non_spaces (dbp);
4738           dbp = skip_spaces (dbp);
4739           L_getit ();
4740         }
4741       else
4742         {
4743           /* Check for (foo::defmumble name-defined ... */
4744           do
4745             dbp++;
4746           while (!notinname (*dbp) && *dbp != ':');
4747           if (*dbp == ':')
4748             {
4749               do
4750                 dbp++;
4751               while (*dbp == ':');
4752
4753               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4754                 {
4755                   dbp = skip_non_spaces (dbp);
4756                   dbp = skip_spaces (dbp);
4757                   L_getit ();
4758                 }
4759             }
4760         }
4761     }
4762 }
4763
4764 \f
4765 /*
4766  * Lua script language parsing
4767  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4768  *
4769  *  "function" and "local function" are tags if they start at column 1.
4770  */
4771 static void
4772 Lua_functions (FILE *inf)
4773 {
4774   register char *bp;
4775
4776   LOOP_ON_INPUT_LINES (inf, lb, bp)
4777     {
4778       if (bp[0] != 'f' && bp[0] != 'l')
4779         continue;
4780
4781       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4782
4783       if (LOOKING_AT (bp, "function"))
4784         get_tag (bp, NULL);
4785     }
4786 }
4787
4788 \f
4789 /*
4790  * PostScript tags
4791  * Just look for lines where the first character is '/'
4792  * Also look at "defineps" for PSWrap
4793  * Ideas by:
4794  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4795  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4796  */
4797 static void
4798 PS_functions (FILE *inf)
4799 {
4800   register char *bp, *ep;
4801
4802   LOOP_ON_INPUT_LINES (inf, lb, bp)
4803     {
4804       if (bp[0] == '/')
4805         {
4806           for (ep = bp+1;
4807                *ep != '\0' && *ep != ' ' && *ep != '{';
4808                ep++)
4809             continue;
4810           make_tag (bp, ep - bp, TRUE,
4811                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4812         }
4813       else if (LOOKING_AT (bp, "defineps"))
4814         get_tag (bp, NULL);
4815     }
4816 }
4817
4818 \f
4819 /*
4820  * Forth tags
4821  * Ignore anything after \ followed by space or in ( )
4822  * Look for words defined by :
4823  * Look for constant, code, create, defer, value, and variable
4824  * OBP extensions:  Look for buffer:, field,
4825  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4826  */
4827 static void
4828 Forth_words (FILE *inf)
4829 {
4830   register char *bp;
4831
4832   LOOP_ON_INPUT_LINES (inf, lb, bp)
4833     while ((bp = skip_spaces (bp))[0] != '\0')
4834       if (bp[0] == '\\' && iswhite (bp[1]))
4835         break;                  /* read next line */
4836       else if (bp[0] == '(' && iswhite (bp[1]))
4837         do                      /* skip to ) or eol */
4838           bp++;
4839         while (*bp != ')' && *bp != '\0');
4840       else if ((bp[0] == ':' && iswhite (bp[1]) && bp++)
4841                || LOOKING_AT_NOCASE (bp, "constant")
4842                || LOOKING_AT_NOCASE (bp, "code")
4843                || LOOKING_AT_NOCASE (bp, "create")
4844                || LOOKING_AT_NOCASE (bp, "defer")
4845                || LOOKING_AT_NOCASE (bp, "value")
4846                || LOOKING_AT_NOCASE (bp, "variable")
4847                || LOOKING_AT_NOCASE (bp, "buffer:")
4848                || LOOKING_AT_NOCASE (bp, "field"))
4849         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4850       else
4851         bp = skip_non_spaces (bp);
4852 }
4853
4854 \f
4855 /*
4856  * Scheme tag functions
4857  * look for (def... xyzzy
4858  *          (def... (xyzzy
4859  *          (def ... ((...(xyzzy ....
4860  *          (set! xyzzy
4861  * Original code by Ken Haase (1985?)
4862  */
4863 static void
4864 Scheme_functions (FILE *inf)
4865 {
4866   register char *bp;
4867
4868   LOOP_ON_INPUT_LINES (inf, lb, bp)
4869     {
4870       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4871         {
4872           bp = skip_non_spaces (bp+4);
4873           /* Skip over open parens and white space.  Don't continue past
4874              '\0'. */
4875           while (*bp && notinname (*bp))
4876             bp++;
4877           get_tag (bp, NULL);
4878         }
4879       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4880         get_tag (bp, NULL);
4881     }
4882 }
4883
4884 \f
4885 /* Find tags in TeX and LaTeX input files.  */
4886
4887 /* TEX_toktab is a table of TeX control sequences that define tags.
4888  * Each entry records one such control sequence.
4889  *
4890  * Original code from who knows whom.
4891  * Ideas by:
4892  *   Stefan Monnier (2002)
4893  */
4894
4895 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4896
4897 /* Default set of control sequences to put into TEX_toktab.
4898    The value of environment var TEXTAGS is prepended to this.  */
4899 static const char *TEX_defenv = "\
4900 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4901 :part:appendix:entry:index:def\
4902 :newcommand:renewcommand:newenvironment:renewenvironment";
4903
4904 static void TEX_mode (FILE *);
4905 static void TEX_decode_env (const char *, const char *);
4906
4907 static char TEX_esc = '\\';
4908 static char TEX_opgrp = '{';
4909 static char TEX_clgrp = '}';
4910
4911 /*
4912  * TeX/LaTeX scanning loop.
4913  */
4914 static void
4915 TeX_commands (FILE *inf)
4916 {
4917   char *cp;
4918   linebuffer *key;
4919
4920   /* Select either \ or ! as escape character.  */
4921   TEX_mode (inf);
4922
4923   /* Initialize token table once from environment. */
4924   if (TEX_toktab == NULL)
4925     TEX_decode_env ("TEXTAGS", TEX_defenv);
4926
4927   LOOP_ON_INPUT_LINES (inf, lb, cp)
4928     {
4929       /* Look at each TEX keyword in line. */
4930       for (;;)
4931         {
4932           /* Look for a TEX escape. */
4933           while (*cp++ != TEX_esc)
4934             if (cp[-1] == '\0' || cp[-1] == '%')
4935               goto tex_next_line;
4936
4937           for (key = TEX_toktab; key->buffer != NULL; key++)
4938             if (strneq (cp, key->buffer, key->len))
4939               {
4940                 register char *p;
4941                 int namelen, linelen;
4942                 bool opgrp = FALSE;
4943
4944                 cp = skip_spaces (cp + key->len);
4945                 if (*cp == TEX_opgrp)
4946                   {
4947                     opgrp = TRUE;
4948                     cp++;
4949                   }
4950                 for (p = cp;
4951                      (!iswhite (*p) && *p != '#' &&
4952                       *p != TEX_opgrp && *p != TEX_clgrp);
4953                      p++)
4954                   continue;
4955                 namelen = p - cp;
4956                 linelen = lb.len;
4957                 if (!opgrp || *p == TEX_clgrp)
4958                   {
4959                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4960                       p++;
4961                     linelen = p - lb.buffer + 1;
4962                   }
4963                 make_tag (cp, namelen, TRUE,
4964                           lb.buffer, linelen, lineno, linecharno);
4965                 goto tex_next_line; /* We only tag a line once */
4966               }
4967         }
4968     tex_next_line:
4969       ;
4970     }
4971 }
4972
4973 #define TEX_LESC '\\'
4974 #define TEX_SESC '!'
4975
4976 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4977    chars accordingly. */
4978 static void
4979 TEX_mode (FILE *inf)
4980 {
4981   int c;
4982
4983   while ((c = getc (inf)) != EOF)
4984     {
4985       /* Skip to next line if we hit the TeX comment char. */
4986       if (c == '%')
4987         while (c != '\n' && c != EOF)
4988           c = getc (inf);
4989       else if (c == TEX_LESC || c == TEX_SESC )
4990         break;
4991     }
4992
4993   if (c == TEX_LESC)
4994     {
4995       TEX_esc = TEX_LESC;
4996       TEX_opgrp = '{';
4997       TEX_clgrp = '}';
4998     }
4999   else
5000     {
5001       TEX_esc = TEX_SESC;
5002       TEX_opgrp = '<';
5003       TEX_clgrp = '>';
5004     }
5005   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5006      No attempt is made to correct the situation. */
5007   rewind (inf);
5008 }
5009
5010 /* Read environment and prepend it to the default string.
5011    Build token table. */
5012 static void
5013 TEX_decode_env (const char *evarname, const char *defenv)
5014 {
5015   register const char *env, *p;
5016   int i, len;
5017
5018   /* Append default string to environment. */
5019   env = getenv (evarname);
5020   if (!env)
5021     env = defenv;
5022   else
5023     env = concat (env, defenv, "");
5024
5025   /* Allocate a token table */
5026   for (len = 1, p = env; p;)
5027     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5028       len++;
5029   TEX_toktab = xnew (len, linebuffer);
5030
5031   /* Unpack environment string into token table. Be careful about */
5032   /* zero-length strings (leading ':', "::" and trailing ':') */
5033   for (i = 0; *env != '\0';)
5034     {
5035       p = etags_strchr (env, ':');
5036       if (!p)                   /* End of environment string. */
5037         p = env + strlen (env);
5038       if (p - env > 0)
5039         {                       /* Only non-zero strings. */
5040           TEX_toktab[i].buffer = savenstr (env, p - env);
5041           TEX_toktab[i].len = p - env;
5042           i++;
5043         }
5044       if (*p)
5045         env = p + 1;
5046       else
5047         {
5048           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5049           TEX_toktab[i].len = 0;
5050           break;
5051         }
5052     }
5053 }
5054
5055 \f
5056 /* Texinfo support.  Dave Love, Mar. 2000.  */
5057 static void
5058 Texinfo_nodes (FILE *inf)
5059 {
5060   char *cp, *start;
5061   LOOP_ON_INPUT_LINES (inf, lb, cp)
5062     if (LOOKING_AT (cp, "@node"))
5063       {
5064         start = cp;
5065         while (*cp != '\0' && *cp != ',')
5066           cp++;
5067         make_tag (start, cp - start, TRUE,
5068                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5069       }
5070 }
5071
5072 \f
5073 /*
5074  * HTML support.
5075  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5076  * Contents of <a name=xxx> are tags with name xxx.
5077  *
5078  * Francesco Potortì, 2002.
5079  */
5080 static void
5081 HTML_labels (FILE *inf)
5082 {
5083   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5084   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5085   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5086   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5087   char *end;
5088
5089
5090   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5091
5092   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5093     for (;;)                    /* loop on the same line */
5094       {
5095         if (skiptag)            /* skip HTML tag */
5096           {
5097             while (*dbp != '\0' && *dbp != '>')
5098               dbp++;
5099             if (*dbp == '>')
5100               {
5101                 dbp += 1;
5102                 skiptag = FALSE;
5103                 continue;       /* look on the same line */
5104               }
5105             break;              /* go to next line */
5106           }
5107
5108         else if (intag) /* look for "name=" or "id=" */
5109           {
5110             while (*dbp != '\0' && *dbp != '>'
5111                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5112               dbp++;
5113             if (*dbp == '\0')
5114               break;            /* go to next line */
5115             if (*dbp == '>')
5116               {
5117                 dbp += 1;
5118                 intag = FALSE;
5119                 continue;       /* look on the same line */
5120               }
5121             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5122                 || LOOKING_AT_NOCASE (dbp, "id="))
5123               {
5124                 bool quoted = (dbp[0] == '"');
5125
5126                 if (quoted)
5127                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5128                     continue;
5129                 else
5130                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5131                     continue;
5132                 linebuffer_setlen (&token_name, end - dbp);
5133                 memcpy (token_name.buffer, dbp, end - dbp);
5134                 token_name.buffer[end - dbp] = '\0';
5135
5136                 dbp = end;
5137                 intag = FALSE;  /* we found what we looked for */
5138                 skiptag = TRUE; /* skip to the end of the tag */
5139                 getnext = TRUE; /* then grab the text */
5140                 continue;       /* look on the same line */
5141               }
5142             dbp += 1;
5143           }
5144
5145         else if (getnext)       /* grab next tokens and tag them */
5146           {
5147             dbp = skip_spaces (dbp);
5148             if (*dbp == '\0')
5149               break;            /* go to next line */
5150             if (*dbp == '<')
5151               {
5152                 intag = TRUE;
5153                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5154                 continue;       /* look on the same line */
5155               }
5156
5157             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5158               continue;
5159             make_tag (token_name.buffer, token_name.len, TRUE,
5160                       dbp, end - dbp, lineno, linecharno);
5161             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5162             getnext = FALSE;
5163             break;              /* go to next line */
5164           }
5165
5166         else                    /* look for an interesting HTML tag */
5167           {
5168             while (*dbp != '\0' && *dbp != '<')
5169               dbp++;
5170             if (*dbp == '\0')
5171               break;            /* go to next line */
5172             intag = TRUE;
5173             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5174               {
5175                 inanchor = TRUE;
5176                 continue;       /* look on the same line */
5177               }
5178             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5179                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5180                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5181                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5182               {
5183                 intag = FALSE;
5184                 getnext = TRUE;
5185                 continue;       /* look on the same line */
5186               }
5187             dbp += 1;
5188           }
5189       }
5190 }
5191
5192 \f
5193 /*
5194  * Prolog support
5195  *
5196  * Assumes that the predicate or rule starts at column 0.
5197  * Only the first clause of a predicate or rule is added.
5198  * Original code by Sunichirou Sugou (1989)
5199  * Rewritten by Anders Lindgren (1996)
5200  */
5201 static size_t prolog_pr (char *, char *);
5202 static void prolog_skip_comment (linebuffer *, FILE *);
5203 static size_t prolog_atom (char *, size_t);
5204
5205 static void
5206 Prolog_functions (FILE *inf)
5207 {
5208   char *cp, *last;
5209   size_t len;
5210   size_t allocated;
5211
5212   allocated = 0;
5213   len = 0;
5214   last = NULL;
5215
5216   LOOP_ON_INPUT_LINES (inf, lb, cp)
5217     {
5218       if (cp[0] == '\0')        /* Empty line */
5219         continue;
5220       else if (iswhite (cp[0])) /* Not a predicate */
5221         continue;
5222       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5223         prolog_skip_comment (&lb, inf);
5224       else if ((len = prolog_pr (cp, last)) > 0)
5225         {
5226           /* Predicate or rule.  Store the function name so that we
5227              only generate a tag for the first clause.  */
5228           if (last == NULL)
5229             last = xnew (len + 1, char);
5230           else if (len + 1 > allocated)
5231             xrnew (last, len + 1, char);
5232           allocated = len + 1;
5233           memcpy (last, cp, len);
5234           last[len] = '\0';
5235         }
5236     }
5237   free (last);
5238 }
5239
5240
5241 static void
5242 prolog_skip_comment (linebuffer *plb, FILE *inf)
5243 {
5244   char *cp;
5245
5246   do
5247     {
5248       for (cp = plb->buffer; *cp != '\0'; cp++)
5249         if (cp[0] == '*' && cp[1] == '/')
5250           return;
5251       readline (plb, inf);
5252     }
5253   while (!feof (inf));
5254 }
5255
5256 /*
5257  * A predicate or rule definition is added if it matches:
5258  *     <beginning of line><Prolog Atom><whitespace>(
5259  * or  <beginning of line><Prolog Atom><whitespace>:-
5260  *
5261  * It is added to the tags database if it doesn't match the
5262  * name of the previous clause header.
5263  *
5264  * Return the size of the name of the predicate or rule, or 0 if no
5265  * header was found.
5266  */
5267 static size_t
5268 prolog_pr (char *s, char *last)
5269
5270                                 /* Name of last clause. */
5271 {
5272   size_t pos;
5273   size_t len;
5274
5275   pos = prolog_atom (s, 0);
5276   if (! pos)
5277     return 0;
5278
5279   len = pos;
5280   pos = skip_spaces (s + pos) - s;
5281
5282   if ((s[pos] == '.'
5283        || (s[pos] == '(' && (pos += 1))
5284        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5285       && (last == NULL          /* save only the first clause */
5286           || len != strlen (last)
5287           || !strneq (s, last, len)))
5288         {
5289           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5290           return len;
5291         }
5292   else
5293     return 0;
5294 }
5295
5296 /*
5297  * Consume a Prolog atom.
5298  * Return the number of bytes consumed, or 0 if there was an error.
5299  *
5300  * A prolog atom, in this context, could be one of:
5301  * - An alphanumeric sequence, starting with a lower case letter.
5302  * - A quoted arbitrary string. Single quotes can escape themselves.
5303  *   Backslash quotes everything.
5304  */
5305 static size_t
5306 prolog_atom (char *s, size_t pos)
5307 {
5308   size_t origpos;
5309
5310   origpos = pos;
5311
5312   if (ISLOWER (s[pos]) || (s[pos] == '_'))
5313     {
5314       /* The atom is unquoted. */
5315       pos++;
5316       while (ISALNUM (s[pos]) || (s[pos] == '_'))
5317         {
5318           pos++;
5319         }
5320       return pos - origpos;
5321     }
5322   else if (s[pos] == '\'')
5323     {
5324       pos++;
5325
5326       for (;;)
5327         {
5328           if (s[pos] == '\'')
5329             {
5330               pos++;
5331               if (s[pos] != '\'')
5332                 break;
5333               pos++;            /* A double quote */
5334             }
5335           else if (s[pos] == '\0')
5336             /* Multiline quoted atoms are ignored. */
5337             return 0;
5338           else if (s[pos] == '\\')
5339             {
5340               if (s[pos+1] == '\0')
5341                 return 0;
5342               pos += 2;
5343             }
5344           else
5345             pos++;
5346         }
5347       return pos - origpos;
5348     }
5349   else
5350     return 0;
5351 }
5352
5353 \f
5354 /*
5355  * Support for Erlang
5356  *
5357  * Generates tags for functions, defines, and records.
5358  * Assumes that Erlang functions start at column 0.
5359  * Original code by Anders Lindgren (1996)
5360  */
5361 static int erlang_func (char *, char *);
5362 static void erlang_attribute (char *);
5363 static int erlang_atom (char *);
5364
5365 static void
5366 Erlang_functions (FILE *inf)
5367 {
5368   char *cp, *last;
5369   int len;
5370   int allocated;
5371
5372   allocated = 0;
5373   len = 0;
5374   last = NULL;
5375
5376   LOOP_ON_INPUT_LINES (inf, lb, cp)
5377     {
5378       if (cp[0] == '\0')        /* Empty line */
5379         continue;
5380       else if (iswhite (cp[0])) /* Not function nor attribute */
5381         continue;
5382       else if (cp[0] == '%')    /* comment */
5383         continue;
5384       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5385         continue;
5386       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5387         {
5388           erlang_attribute (cp);
5389           if (last != NULL)
5390             {
5391               free (last);
5392               last = NULL;
5393             }
5394         }
5395       else if ((len = erlang_func (cp, last)) > 0)
5396         {
5397           /*
5398            * Function.  Store the function name so that we only
5399            * generates a tag for the first clause.
5400            */
5401           if (last == NULL)
5402             last = xnew (len + 1, char);
5403           else if (len + 1 > allocated)
5404             xrnew (last, len + 1, char);
5405           allocated = len + 1;
5406           memcpy (last, cp, len);
5407           last[len] = '\0';
5408         }
5409     }
5410   free (last);
5411 }
5412
5413
5414 /*
5415  * A function definition is added if it matches:
5416  *     <beginning of line><Erlang Atom><whitespace>(
5417  *
5418  * It is added to the tags database if it doesn't match the
5419  * name of the previous clause header.
5420  *
5421  * Return the size of the name of the function, or 0 if no function
5422  * was found.
5423  */
5424 static int
5425 erlang_func (char *s, char *last)
5426
5427                                 /* Name of last clause. */
5428 {
5429   int pos;
5430   int len;
5431
5432   pos = erlang_atom (s);
5433   if (pos < 1)
5434     return 0;
5435
5436   len = pos;
5437   pos = skip_spaces (s + pos) - s;
5438
5439   /* Save only the first clause. */
5440   if (s[pos++] == '('
5441       && (last == NULL
5442           || len != (int)strlen (last)
5443           || !strneq (s, last, len)))
5444         {
5445           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5446           return len;
5447         }
5448
5449   return 0;
5450 }
5451
5452
5453 /*
5454  * Handle attributes.  Currently, tags are generated for defines
5455  * and records.
5456  *
5457  * They are on the form:
5458  * -define(foo, bar).
5459  * -define(Foo(M, N), M+N).
5460  * -record(graph, {vtab = notable, cyclic = true}).
5461  */
5462 static void
5463 erlang_attribute (char *s)
5464 {
5465   char *cp = s;
5466
5467   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5468       && *cp++ == '(')
5469     {
5470       int len = erlang_atom (skip_spaces (cp));
5471       if (len > 0)
5472         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5473     }
5474   return;
5475 }
5476
5477
5478 /*
5479  * Consume an Erlang atom (or variable).
5480  * Return the number of bytes consumed, or -1 if there was an error.
5481  */
5482 static int
5483 erlang_atom (char *s)
5484 {
5485   int pos = 0;
5486
5487   if (ISALPHA (s[pos]) || s[pos] == '_')
5488     {
5489       /* The atom is unquoted. */
5490       do
5491         pos++;
5492       while (ISALNUM (s[pos]) || s[pos] == '_');
5493     }
5494   else if (s[pos] == '\'')
5495     {
5496       for (pos++; s[pos] != '\''; pos++)
5497         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5498             || (s[pos] == '\\' && s[++pos] == '\0'))
5499           return 0;
5500       pos++;
5501     }
5502
5503   return pos;
5504 }
5505
5506 \f
5507 static char *scan_separators (char *);
5508 static void add_regex (char *, language *);
5509 static char *substitute (char *, char *, struct re_registers *);
5510
5511 /*
5512  * Take a string like "/blah/" and turn it into "blah", verifying
5513  * that the first and last characters are the same, and handling
5514  * quoted separator characters.  Actually, stops on the occurrence of
5515  * an unquoted separator.  Also process \t, \n, etc. and turn into
5516  * appropriate characters. Works in place.  Null terminates name string.
5517  * Returns pointer to terminating separator, or NULL for
5518  * unterminated regexps.
5519  */
5520 static char *
5521 scan_separators (char *name)
5522 {
5523   char sep = name[0];
5524   char *copyto = name;
5525   bool quoted = FALSE;
5526
5527   for (++name; *name != '\0'; ++name)
5528     {
5529       if (quoted)
5530         {
5531           switch (*name)
5532             {
5533             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5534             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5535             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5536             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5537             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5538             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5539             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5540             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5541             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5542             default:
5543               if (*name == sep)
5544                 *copyto++ = sep;
5545               else
5546                 {
5547                   /* Something else is quoted, so preserve the quote. */
5548                   *copyto++ = '\\';
5549                   *copyto++ = *name;
5550                 }
5551               break;
5552             }
5553           quoted = FALSE;
5554         }
5555       else if (*name == '\\')
5556         quoted = TRUE;
5557       else if (*name == sep)
5558         break;
5559       else
5560         *copyto++ = *name;
5561     }
5562   if (*name != sep)
5563     name = NULL;                /* signal unterminated regexp */
5564
5565   /* Terminate copied string. */
5566   *copyto = '\0';
5567   return name;
5568 }
5569
5570 /* Look at the argument of --regex or --no-regex and do the right
5571    thing.  Same for each line of a regexp file. */
5572 static void
5573 analyse_regex (char *regex_arg)
5574 {
5575   if (regex_arg == NULL)
5576     {
5577       free_regexps ();          /* --no-regex: remove existing regexps */
5578       return;
5579     }
5580
5581   /* A real --regexp option or a line in a regexp file. */
5582   switch (regex_arg[0])
5583     {
5584       /* Comments in regexp file or null arg to --regex. */
5585     case '\0':
5586     case ' ':
5587     case '\t':
5588       break;
5589
5590       /* Read a regex file.  This is recursive and may result in a
5591          loop, which will stop when the file descriptors are exhausted. */
5592     case '@':
5593       {
5594         FILE *regexfp;
5595         linebuffer regexbuf;
5596         char *regexfile = regex_arg + 1;
5597
5598         /* regexfile is a file containing regexps, one per line. */
5599         regexfp = fopen (regexfile, "r");
5600         if (regexfp == NULL)
5601           {
5602             pfatal (regexfile);
5603             return;
5604           }
5605         linebuffer_init (&regexbuf);
5606         while (readline_internal (&regexbuf, regexfp) > 0)
5607           analyse_regex (regexbuf.buffer);
5608         free (regexbuf.buffer);
5609         fclose (regexfp);
5610       }
5611       break;
5612
5613       /* Regexp to be used for a specific language only. */
5614     case '{':
5615       {
5616         language *lang;
5617         char *lang_name = regex_arg + 1;
5618         char *cp;
5619
5620         for (cp = lang_name; *cp != '}'; cp++)
5621           if (*cp == '\0')
5622             {
5623               error ("unterminated language name in regex: %s", regex_arg);
5624               return;
5625             }
5626         *cp++ = '\0';
5627         lang = get_language_from_langname (lang_name);
5628         if (lang == NULL)
5629           return;
5630         add_regex (cp, lang);
5631       }
5632       break;
5633
5634       /* Regexp to be used for any language. */
5635     default:
5636       add_regex (regex_arg, NULL);
5637       break;
5638     }
5639 }
5640
5641 /* Separate the regexp pattern, compile it,
5642    and care for optional name and modifiers. */
5643 static void
5644 add_regex (char *regexp_pattern, language *lang)
5645 {
5646   static struct re_pattern_buffer zeropattern;
5647   char sep, *pat, *name, *modifiers;
5648   char empty = '\0';
5649   const char *err;
5650   struct re_pattern_buffer *patbuf;
5651   regexp *rp;
5652   bool
5653     force_explicit_name = TRUE, /* do not use implicit tag names */
5654     ignore_case = FALSE,        /* case is significant */
5655     multi_line = FALSE,         /* matches are done one line at a time */
5656     single_line = FALSE;        /* dot does not match newline */
5657
5658
5659   if (strlen (regexp_pattern) < 3)
5660     {
5661       error ("null regexp");
5662       return;
5663     }
5664   sep = regexp_pattern[0];
5665   name = scan_separators (regexp_pattern);
5666   if (name == NULL)
5667     {
5668       error ("%s: unterminated regexp", regexp_pattern);
5669       return;
5670     }
5671   if (name[1] == sep)
5672     {
5673       error ("null name for regexp \"%s\"", regexp_pattern);
5674       return;
5675     }
5676   modifiers = scan_separators (name);
5677   if (modifiers == NULL)        /* no terminating separator --> no name */
5678     {
5679       modifiers = name;
5680       name = &empty;
5681     }
5682   else
5683     modifiers += 1;             /* skip separator */
5684
5685   /* Parse regex modifiers. */
5686   for (; modifiers[0] != '\0'; modifiers++)
5687     switch (modifiers[0])
5688       {
5689       case 'N':
5690         if (modifiers == name)
5691           error ("forcing explicit tag name but no name, ignoring");
5692         force_explicit_name = TRUE;
5693         break;
5694       case 'i':
5695         ignore_case = TRUE;
5696         break;
5697       case 's':
5698         single_line = TRUE;
5699         /* FALLTHRU */
5700       case 'm':
5701         multi_line = TRUE;
5702         need_filebuf = TRUE;
5703         break;
5704       default:
5705         error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5706         break;
5707       }
5708
5709   patbuf = xnew (1, struct re_pattern_buffer);
5710   *patbuf = zeropattern;
5711   if (ignore_case)
5712     {
5713       static char lc_trans[CHARS];
5714       int i;
5715       for (i = 0; i < CHARS; i++)
5716         lc_trans[i] = lowcase (i);
5717       patbuf->translate = lc_trans;     /* translation table to fold case  */
5718     }
5719
5720   if (multi_line)
5721     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5722   else
5723     pat = regexp_pattern;
5724
5725   if (single_line)
5726     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5727   else
5728     re_set_syntax (RE_SYNTAX_EMACS);
5729
5730   err = re_compile_pattern (pat, strlen (pat), patbuf);
5731   if (multi_line)
5732     free (pat);
5733   if (err != NULL)
5734     {
5735       error ("%s while compiling pattern", err);
5736       return;
5737     }
5738
5739   rp = p_head;
5740   p_head = xnew (1, regexp);
5741   p_head->pattern = savestr (regexp_pattern);
5742   p_head->p_next = rp;
5743   p_head->lang = lang;
5744   p_head->pat = patbuf;
5745   p_head->name = savestr (name);
5746   p_head->error_signaled = FALSE;
5747   p_head->force_explicit_name = force_explicit_name;
5748   p_head->ignore_case = ignore_case;
5749   p_head->multi_line = multi_line;
5750 }
5751
5752 /*
5753  * Do the substitutions indicated by the regular expression and
5754  * arguments.
5755  */
5756 static char *
5757 substitute (char *in, char *out, struct re_registers *regs)
5758 {
5759   char *result, *t;
5760   int size, dig, diglen;
5761
5762   result = NULL;
5763   size = strlen (out);
5764
5765   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5766   if (out[size - 1] == '\\')
5767     fatal ("pattern error in \"%s\"", out);
5768   for (t = etags_strchr (out, '\\');
5769        t != NULL;
5770        t = etags_strchr (t + 2, '\\'))
5771     if (ISDIGIT (t[1]))
5772       {
5773         dig = t[1] - '0';
5774         diglen = regs->end[dig] - regs->start[dig];
5775         size += diglen - 2;
5776       }
5777     else
5778       size -= 1;
5779
5780   /* Allocate space and do the substitutions. */
5781   assert (size >= 0);
5782   result = xnew (size + 1, char);
5783
5784   for (t = result; *out != '\0'; out++)
5785     if (*out == '\\' && ISDIGIT (*++out))
5786       {
5787         dig = *out - '0';
5788         diglen = regs->end[dig] - regs->start[dig];
5789         memcpy (t, in + regs->start[dig], diglen);
5790         t += diglen;
5791       }
5792     else
5793       *t++ = *out;
5794   *t = '\0';
5795
5796   assert (t <= result + size);
5797   assert (t - result == (int)strlen (result));
5798
5799   return result;
5800 }
5801
5802 /* Deallocate all regexps. */
5803 static void
5804 free_regexps (void)
5805 {
5806   regexp *rp;
5807   while (p_head != NULL)
5808     {
5809       rp = p_head->p_next;
5810       free (p_head->pattern);
5811       free (p_head->name);
5812       free (p_head);
5813       p_head = rp;
5814     }
5815   return;
5816 }
5817
5818 /*
5819  * Reads the whole file as a single string from `filebuf' and looks for
5820  * multi-line regular expressions, creating tags on matches.
5821  * readline already dealt with normal regexps.
5822  *
5823  * Idea by Ben Wing <ben@666.com> (2002).
5824  */
5825 static void
5826 regex_tag_multiline (void)
5827 {
5828   char *buffer = filebuf.buffer;
5829   regexp *rp;
5830   char *name;
5831
5832   for (rp = p_head; rp != NULL; rp = rp->p_next)
5833     {
5834       int match = 0;
5835
5836       if (!rp->multi_line)
5837         continue;               /* skip normal regexps */
5838
5839       /* Generic initializations before parsing file from memory. */
5840       lineno = 1;               /* reset global line number */
5841       charno = 0;               /* reset global char number */
5842       linecharno = 0;           /* reset global char number of line start */
5843
5844       /* Only use generic regexps or those for the current language. */
5845       if (rp->lang != NULL && rp->lang != curfdp->lang)
5846         continue;
5847
5848       while (match >= 0 && match < filebuf.len)
5849         {
5850           match = re_search (rp->pat, buffer, filebuf.len, charno,
5851                              filebuf.len - match, &rp->regs);
5852           switch (match)
5853             {
5854             case -2:
5855               /* Some error. */
5856               if (!rp->error_signaled)
5857                 {
5858                   error ("regexp stack overflow while matching \"%s\"",
5859                          rp->pattern);
5860                   rp->error_signaled = TRUE;
5861                 }
5862               break;
5863             case -1:
5864               /* No match. */
5865               break;
5866             default:
5867               if (match == rp->regs.end[0])
5868                 {
5869                   if (!rp->error_signaled)
5870                     {
5871                       error ("regexp matches the empty string: \"%s\"",
5872                              rp->pattern);
5873                       rp->error_signaled = TRUE;
5874                     }
5875                   match = -3;   /* exit from while loop */
5876                   break;
5877                 }
5878
5879               /* Match occurred.  Construct a tag. */
5880               while (charno < rp->regs.end[0])
5881                 if (buffer[charno++] == '\n')
5882                   lineno++, linecharno = charno;
5883               name = rp->name;
5884               if (name[0] == '\0')
5885                 name = NULL;
5886               else /* make a named tag */
5887                 name = substitute (buffer, rp->name, &rp->regs);
5888               if (rp->force_explicit_name)
5889                 /* Force explicit tag name, if a name is there. */
5890                 pfnote (name, TRUE, buffer + linecharno,
5891                         charno - linecharno + 1, lineno, linecharno);
5892               else
5893                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5894                           charno - linecharno + 1, lineno, linecharno);
5895               break;
5896             }
5897         }
5898     }
5899 }
5900
5901 \f
5902 static bool
5903 nocase_tail (const char *cp)
5904 {
5905   register int len = 0;
5906
5907   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5908     cp++, len++;
5909   if (*cp == '\0' && !intoken (dbp[len]))
5910     {
5911       dbp += len;
5912       return TRUE;
5913     }
5914   return FALSE;
5915 }
5916
5917 static void
5918 get_tag (register char *bp, char **namepp)
5919 {
5920   register char *cp = bp;
5921
5922   if (*bp != '\0')
5923     {
5924       /* Go till you get to white space or a syntactic break */
5925       for (cp = bp + 1; !notinname (*cp); cp++)
5926         continue;
5927       make_tag (bp, cp - bp, TRUE,
5928                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5929     }
5930
5931   if (namepp != NULL)
5932     *namepp = savenstr (bp, cp - bp);
5933 }
5934
5935 /*
5936  * Read a line of text from `stream' into `lbp', excluding the
5937  * newline or CR-NL, if any.  Return the number of characters read from
5938  * `stream', which is the length of the line including the newline.
5939  *
5940  * On DOS or Windows we do not count the CR character, if any before the
5941  * NL, in the returned length; this mirrors the behavior of Emacs on those
5942  * platforms (for text files, it translates CR-NL to NL as it reads in the
5943  * file).
5944  *
5945  * If multi-line regular expressions are requested, each line read is
5946  * appended to `filebuf'.
5947  */
5948 static long
5949 readline_internal (linebuffer *lbp, register FILE *stream)
5950 {
5951   char *buffer = lbp->buffer;
5952   register char *p = lbp->buffer;
5953   register char *pend;
5954   int chars_deleted;
5955
5956   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
5957
5958   for (;;)
5959     {
5960       register int c = getc (stream);
5961       if (p == pend)
5962         {
5963           /* We're at the end of linebuffer: expand it. */
5964           lbp->size *= 2;
5965           xrnew (buffer, lbp->size, char);
5966           p += buffer - lbp->buffer;
5967           pend = buffer + lbp->size;
5968           lbp->buffer = buffer;
5969         }
5970       if (c == EOF)
5971         {
5972           *p = '\0';
5973           chars_deleted = 0;
5974           break;
5975         }
5976       if (c == '\n')
5977         {
5978           if (p > buffer && p[-1] == '\r')
5979             {
5980               p -= 1;
5981 #ifdef DOS_NT
5982              /* Assume CRLF->LF translation will be performed by Emacs
5983                 when loading this file, so CRs won't appear in the buffer.
5984                 It would be cleaner to compensate within Emacs;
5985                 however, Emacs does not know how many CRs were deleted
5986                 before any given point in the file.  */
5987               chars_deleted = 1;
5988 #else
5989               chars_deleted = 2;
5990 #endif
5991             }
5992           else
5993             {
5994               chars_deleted = 1;
5995             }
5996           *p = '\0';
5997           break;
5998         }
5999       *p++ = c;
6000     }
6001   lbp->len = p - buffer;
6002
6003   if (need_filebuf              /* we need filebuf for multi-line regexps */
6004       && chars_deleted > 0)     /* not at EOF */
6005     {
6006       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6007         {
6008           /* Expand filebuf. */
6009           filebuf.size *= 2;
6010           xrnew (filebuf.buffer, filebuf.size, char);
6011         }
6012       memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6013       filebuf.len += lbp->len;
6014       filebuf.buffer[filebuf.len++] = '\n';
6015       filebuf.buffer[filebuf.len] = '\0';
6016     }
6017
6018   return lbp->len + chars_deleted;
6019 }
6020
6021 /*
6022  * Like readline_internal, above, but in addition try to match the
6023  * input line against relevant regular expressions and manage #line
6024  * directives.
6025  */
6026 static void
6027 readline (linebuffer *lbp, FILE *stream)
6028 {
6029   long result;
6030
6031   linecharno = charno;          /* update global char number of line start */
6032   result = readline_internal (lbp, stream); /* read line */
6033   lineno += 1;                  /* increment global line number */
6034   charno += result;             /* increment global char number */
6035
6036   /* Honor #line directives. */
6037   if (!no_line_directive)
6038     {
6039       static bool discard_until_line_directive;
6040
6041       /* Check whether this is a #line directive. */
6042       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6043         {
6044           unsigned int lno;
6045           int start = 0;
6046
6047           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6048               && start > 0)     /* double quote character found */
6049             {
6050               char *endp = lbp->buffer + start;
6051
6052               while ((endp = etags_strchr (endp, '"')) != NULL
6053                      && endp[-1] == '\\')
6054                 endp++;
6055               if (endp != NULL)
6056                 /* Ok, this is a real #line directive.  Let's deal with it. */
6057                 {
6058                   char *taggedabsname;  /* absolute name of original file */
6059                   char *taggedfname;    /* name of original file as given */
6060                   char *name;           /* temp var */
6061
6062                   discard_until_line_directive = FALSE; /* found it */
6063                   name = lbp->buffer + start;
6064                   *endp = '\0';
6065                   canonicalize_filename (name);
6066                   taggedabsname = absolute_filename (name, tagfiledir);
6067                   if (filename_is_absolute (name)
6068                       || filename_is_absolute (curfdp->infname))
6069                     taggedfname = savestr (taggedabsname);
6070                   else
6071                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6072
6073                   if (streq (curfdp->taggedfname, taggedfname))
6074                     /* The #line directive is only a line number change.  We
6075                        deal with this afterwards. */
6076                     free (taggedfname);
6077                   else
6078                     /* The tags following this #line directive should be
6079                        attributed to taggedfname.  In order to do this, set
6080                        curfdp accordingly. */
6081                     {
6082                       fdesc *fdp; /* file description pointer */
6083
6084                       /* Go look for a file description already set up for the
6085                          file indicated in the #line directive.  If there is
6086                          one, use it from now until the next #line
6087                          directive. */
6088                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6089                         if (streq (fdp->infname, curfdp->infname)
6090                             && streq (fdp->taggedfname, taggedfname))
6091                           /* If we remove the second test above (after the &&)
6092                              then all entries pertaining to the same file are
6093                              coalesced in the tags file.  If we use it, then
6094                              entries pertaining to the same file but generated
6095                              from different files (via #line directives) will
6096                              go into separate sections in the tags file.  These
6097                              alternatives look equivalent.  The first one
6098                              destroys some apparently useless information. */
6099                           {
6100                             curfdp = fdp;
6101                             free (taggedfname);
6102                             break;
6103                           }
6104                       /* Else, if we already tagged the real file, skip all
6105                          input lines until the next #line directive. */
6106                       if (fdp == NULL) /* not found */
6107                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6108                           if (streq (fdp->infabsname, taggedabsname))
6109                             {
6110                               discard_until_line_directive = TRUE;
6111                               free (taggedfname);
6112                               break;
6113                             }
6114                       /* Else create a new file description and use that from
6115                          now on, until the next #line directive. */
6116                       if (fdp == NULL) /* not found */
6117                         {
6118                           fdp = fdhead;
6119                           fdhead = xnew (1, fdesc);
6120                           *fdhead = *curfdp; /* copy curr. file description */
6121                           fdhead->next = fdp;
6122                           fdhead->infname = savestr (curfdp->infname);
6123                           fdhead->infabsname = savestr (curfdp->infabsname);
6124                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6125                           fdhead->taggedfname = taggedfname;
6126                           fdhead->usecharno = FALSE;
6127                           fdhead->prop = NULL;
6128                           fdhead->written = FALSE;
6129                           curfdp = fdhead;
6130                         }
6131                     }
6132                   free (taggedabsname);
6133                   lineno = lno - 1;
6134                   readline (lbp, stream);
6135                   return;
6136                 } /* if a real #line directive */
6137             } /* if #line is followed by a number */
6138         } /* if line begins with "#line " */
6139
6140       /* If we are here, no #line directive was found. */
6141       if (discard_until_line_directive)
6142         {
6143           if (result > 0)
6144             {
6145               /* Do a tail recursion on ourselves, thus discarding the contents
6146                  of the line buffer. */
6147               readline (lbp, stream);
6148               return;
6149             }
6150           /* End of file. */
6151           discard_until_line_directive = FALSE;
6152           return;
6153         }
6154     } /* if #line directives should be considered */
6155
6156   {
6157     int match;
6158     regexp *rp;
6159     char *name;
6160
6161     /* Match against relevant regexps. */
6162     if (lbp->len > 0)
6163       for (rp = p_head; rp != NULL; rp = rp->p_next)
6164         {
6165           /* Only use generic regexps or those for the current language.
6166              Also do not use multiline regexps, which is the job of
6167              regex_tag_multiline. */
6168           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6169               || rp->multi_line)
6170             continue;
6171
6172           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6173           switch (match)
6174             {
6175             case -2:
6176               /* Some error. */
6177               if (!rp->error_signaled)
6178                 {
6179                   error ("regexp stack overflow while matching \"%s\"",
6180                          rp->pattern);
6181                   rp->error_signaled = TRUE;
6182                 }
6183               break;
6184             case -1:
6185               /* No match. */
6186               break;
6187             case 0:
6188               /* Empty string matched. */
6189               if (!rp->error_signaled)
6190                 {
6191                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6192                   rp->error_signaled = TRUE;
6193                 }
6194               break;
6195             default:
6196               /* Match occurred.  Construct a tag. */
6197               name = rp->name;
6198               if (name[0] == '\0')
6199                 name = NULL;
6200               else /* make a named tag */
6201                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6202               if (rp->force_explicit_name)
6203                 /* Force explicit tag name, if a name is there. */
6204                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6205               else
6206                 make_tag (name, strlen (name), TRUE,
6207                           lbp->buffer, match, lineno, linecharno);
6208               break;
6209             }
6210         }
6211   }
6212 }
6213
6214 \f
6215 /*
6216  * Return a pointer to a space of size strlen(cp)+1 allocated
6217  * with xnew where the string CP has been copied.
6218  */
6219 static char *
6220 savestr (const char *cp)
6221 {
6222   return savenstr (cp, strlen (cp));
6223 }
6224
6225 /*
6226  * Return a pointer to a space of size LEN+1 allocated with xnew where
6227  * the string CP has been copied for at most the first LEN characters.
6228  */
6229 static char *
6230 savenstr (const char *cp, int len)
6231 {
6232   register char *dp;
6233
6234   dp = xnew (len + 1, char);
6235   memcpy (dp, cp, len);
6236   dp[len] = '\0';
6237   return dp;
6238 }
6239
6240 /*
6241  * Return the ptr in sp at which the character c last
6242  * appears; NULL if not found
6243  *
6244  * Identical to POSIX strrchr, included for portability.
6245  */
6246 static char *
6247 etags_strrchr (register const char *sp, register int c)
6248 {
6249   register const char *r;
6250
6251   r = NULL;
6252   do
6253     {
6254       if (*sp == c)
6255         r = sp;
6256   } while (*sp++);
6257   return (char *)r;
6258 }
6259
6260 /*
6261  * Return the ptr in sp at which the character c first
6262  * appears; NULL if not found
6263  *
6264  * Identical to POSIX strchr, included for portability.
6265  */
6266 static char *
6267 etags_strchr (register const char *sp, register int c)
6268 {
6269   do
6270     {
6271       if (*sp == c)
6272         return (char *)sp;
6273     } while (*sp++);
6274   return NULL;
6275 }
6276
6277 /* Skip spaces (end of string is not space), return new pointer. */
6278 static char *
6279 skip_spaces (char *cp)
6280 {
6281   while (iswhite (*cp))
6282     cp++;
6283   return cp;
6284 }
6285
6286 /* Skip non spaces, except end of string, return new pointer. */
6287 static char *
6288 skip_non_spaces (char *cp)
6289 {
6290   while (*cp != '\0' && !iswhite (*cp))
6291     cp++;
6292   return cp;
6293 }
6294
6295 /* Print error message and exit.  */
6296 void
6297 fatal (const char *s1, const char *s2)
6298 {
6299   error (s1, s2);
6300   exit (EXIT_FAILURE);
6301 }
6302
6303 static void
6304 pfatal (const char *s1)
6305 {
6306   perror (s1);
6307   exit (EXIT_FAILURE);
6308 }
6309
6310 static void
6311 suggest_asking_for_help (void)
6312 {
6313   fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6314            progname);
6315   exit (EXIT_FAILURE);
6316 }
6317
6318 /* Output a diagnostic with printf-style FORMAT and args.  */
6319 static void
6320 error (const char *format, ...)
6321 {
6322   va_list ap;
6323   va_start (ap, format);
6324   fprintf (stderr, "%s: ", progname);
6325   vfprintf (stderr, format, ap);
6326   fprintf (stderr, "\n");
6327   va_end (ap);
6328 }
6329
6330 /* Return a newly-allocated string whose contents
6331    concatenate those of s1, s2, s3.  */
6332 static char *
6333 concat (const char *s1, const char *s2, const char *s3)
6334 {
6335   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6336   char *result = xnew (len1 + len2 + len3 + 1, char);
6337
6338   strcpy (result, s1);
6339   strcpy (result + len1, s2);
6340   strcpy (result + len1 + len2, s3);
6341   result[len1 + len2 + len3] = '\0';
6342
6343   return result;
6344 }
6345
6346 \f
6347 /* Does the same work as the system V getcwd, but does not need to
6348    guess the buffer size in advance. */
6349 static char *
6350 etags_getcwd (void)
6351 {
6352   int bufsize = 200;
6353   char *path = xnew (bufsize, char);
6354
6355   while (getcwd (path, bufsize) == NULL)
6356     {
6357       if (errno != ERANGE)
6358         pfatal ("getcwd");
6359       bufsize *= 2;
6360       free (path);
6361       path = xnew (bufsize, char);
6362     }
6363
6364   canonicalize_filename (path);
6365   return path;
6366 }
6367
6368 /* Return a newly allocated string containing the file name of FILE
6369    relative to the absolute directory DIR (which should end with a slash). */
6370 static char *
6371 relative_filename (char *file, char *dir)
6372 {
6373   char *fp, *dp, *afn, *res;
6374   int i;
6375
6376   /* Find the common root of file and dir (with a trailing slash). */
6377   afn = absolute_filename (file, cwd);
6378   fp = afn;
6379   dp = dir;
6380   while (*fp++ == *dp++)
6381     continue;
6382   fp--, dp--;                   /* back to the first differing char */
6383 #ifdef DOS_NT
6384   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6385     return afn;
6386 #endif
6387   do                            /* look at the equal chars until '/' */
6388     fp--, dp--;
6389   while (*fp != '/');
6390
6391   /* Build a sequence of "../" strings for the resulting relative file name. */
6392   i = 0;
6393   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6394     i += 1;
6395   res = xnew (3*i + strlen (fp + 1) + 1, char);
6396   res[0] = '\0';
6397   while (i-- > 0)
6398     strcat (res, "../");
6399
6400   /* Add the file name relative to the common root of file and dir. */
6401   strcat (res, fp + 1);
6402   free (afn);
6403
6404   return res;
6405 }
6406
6407 /* Return a newly allocated string containing the absolute file name
6408    of FILE given DIR (which should end with a slash). */
6409 static char *
6410 absolute_filename (char *file, char *dir)
6411 {
6412   char *slashp, *cp, *res;
6413
6414   if (filename_is_absolute (file))
6415     res = savestr (file);
6416 #ifdef DOS_NT
6417   /* We don't support non-absolute file names with a drive
6418      letter, like `d:NAME' (it's too much hassle).  */
6419   else if (file[1] == ':')
6420     fatal ("%s: relative file names with drive letters not supported", file);
6421 #endif
6422   else
6423     res = concat (dir, file, "");
6424
6425   /* Delete the "/dirname/.." and "/." substrings. */
6426   slashp = etags_strchr (res, '/');
6427   while (slashp != NULL && slashp[0] != '\0')
6428     {
6429       if (slashp[1] == '.')
6430         {
6431           if (slashp[2] == '.'
6432               && (slashp[3] == '/' || slashp[3] == '\0'))
6433             {
6434               cp = slashp;
6435               do
6436                 cp--;
6437               while (cp >= res && !filename_is_absolute (cp));
6438               if (cp < res)
6439                 cp = slashp;    /* the absolute name begins with "/.." */
6440 #ifdef DOS_NT
6441               /* Under MSDOS and NT we get `d:/NAME' as absolute
6442                  file name, so the luser could say `d:/../NAME'.
6443                  We silently treat this as `d:/NAME'.  */
6444               else if (cp[0] != '/')
6445                 cp = slashp;
6446 #endif
6447               memmove (cp, slashp + 3, strlen (slashp + 2));
6448               slashp = cp;
6449               continue;
6450             }
6451           else if (slashp[2] == '/' || slashp[2] == '\0')
6452             {
6453               memmove (slashp, slashp + 2, strlen (slashp + 1));
6454               continue;
6455             }
6456         }
6457
6458       slashp = etags_strchr (slashp + 1, '/');
6459     }
6460
6461   if (res[0] == '\0')           /* just a safety net: should never happen */
6462     {
6463       free (res);
6464       return savestr ("/");
6465     }
6466   else
6467     return res;
6468 }
6469
6470 /* Return a newly allocated string containing the absolute
6471    file name of dir where FILE resides given DIR (which should
6472    end with a slash). */
6473 static char *
6474 absolute_dirname (char *file, char *dir)
6475 {
6476   char *slashp, *res;
6477   char save;
6478
6479   slashp = etags_strrchr (file, '/');
6480   if (slashp == NULL)
6481     return savestr (dir);
6482   save = slashp[1];
6483   slashp[1] = '\0';
6484   res = absolute_filename (file, dir);
6485   slashp[1] = save;
6486
6487   return res;
6488 }
6489
6490 /* Whether the argument string is an absolute file name.  The argument
6491    string must have been canonicalized with canonicalize_filename. */
6492 static bool
6493 filename_is_absolute (char *fn)
6494 {
6495   return (fn[0] == '/'
6496 #ifdef DOS_NT
6497           || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/')
6498 #endif
6499           );
6500 }
6501
6502 /* Downcase DOS drive letter and collapse separators into single slashes.
6503    Works in place. */
6504 static void
6505 canonicalize_filename (register char *fn)
6506 {
6507   register char* cp;
6508   char sep = '/';
6509
6510 #ifdef DOS_NT
6511   /* Canonicalize drive letter case.  */
6512 # define ISUPPER(c)     isupper (CHAR (c))
6513   if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6514     fn[0] = lowcase (fn[0]);
6515
6516   sep = '\\';
6517 #endif
6518
6519   /* Collapse multiple separators into a single slash. */
6520   for (cp = fn; *cp != '\0'; cp++, fn++)
6521     if (*cp == sep)
6522       {
6523         *fn = '/';
6524         while (cp[1] == sep)
6525           cp++;
6526       }
6527     else
6528       *fn = *cp;
6529   *fn = '\0';
6530 }
6531
6532 \f
6533 /* Initialize a linebuffer for use. */
6534 static void
6535 linebuffer_init (linebuffer *lbp)
6536 {
6537   lbp->size = (DEBUG) ? 3 : 200;
6538   lbp->buffer = xnew (lbp->size, char);
6539   lbp->buffer[0] = '\0';
6540   lbp->len = 0;
6541 }
6542
6543 /* Set the minimum size of a string contained in a linebuffer. */
6544 static void
6545 linebuffer_setlen (linebuffer *lbp, int toksize)
6546 {
6547   while (lbp->size <= toksize)
6548     {
6549       lbp->size *= 2;
6550       xrnew (lbp->buffer, lbp->size, char);
6551     }
6552   lbp->len = toksize;
6553 }
6554
6555 /* Like malloc but get fatal error if memory is exhausted. */
6556 static void *
6557 xmalloc (size_t size)
6558 {
6559   void *result = malloc (size);
6560   if (result == NULL)
6561     fatal ("virtual memory exhausted", (char *)NULL);
6562   return result;
6563 }
6564
6565 static void *
6566 xrealloc (char *ptr, size_t size)
6567 {
6568   void *result = realloc (ptr, size);
6569   if (result == NULL)
6570     fatal ("virtual memory exhausted", (char *)NULL);
6571   return result;
6572 }
6573
6574 /*
6575  * Local Variables:
6576  * indent-tabs-mode: t
6577  * tab-width: 8
6578  * fill-column: 79
6579  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6580  * c-file-style: "gnu"
6581  * End:
6582  */
6583
6584 /* etags.c ends here */