lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2012
  32   Free Software Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  *
  72  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #define TRUE    1
  84 #define FALSE   0
  85
  86 #ifdef DEBUG
  87 #  undef DEBUG
  88 #  define DEBUG TRUE
  89 #else
  90 #  define DEBUG  FALSE
  91 #  define NDEBUG                /* disable assert */
  92 #endif
  93
  94 #include <config.h>
  95
  96 #ifndef _GNU_SOURCE
  97 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  98 #endif
  99
 100 /* WIN32_NATIVE is for XEmacs.
 101    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 102 #ifdef WIN32_NATIVE
 103 # undef MSDOS
 104 # undef  WINDOWSNT
 105 # define WINDOWSNT
 106 #endif /* WIN32_NATIVE */
 107
 108 #ifdef MSDOS
 109 # undef MSDOS
 110 # define MSDOS TRUE
 111 # include <fcntl.h>
 112 # include <sys/param.h>
 113 # include <io.h>
 114 #else
 115 # define MSDOS FALSE
 116 #endif /* MSDOS */
 117
 118 #ifdef WINDOWSNT
 119 # include <fcntl.h>
 120 # include <direct.h>
 121 # include <io.h>
 122 # define MAXPATHLEN _MAX_PATH
 123 # undef HAVE_NTGUI
 124 # undef  DOS_NT
 125 # define DOS_NT
 126 # ifndef HAVE_GETCWD
 127 #   define HAVE_GETCWD
 128 # endif /* undef HAVE_GETCWD */
 129 #else /* not WINDOWSNT */
 130 #endif /* !WINDOWSNT */
 131
 132 #include <unistd.h>
 133 #ifndef HAVE_UNISTD_H
 134 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 135     extern char *getcwd (char *buf, size_t size);
 136 # endif
 137 #endif /* HAVE_UNISTD_H */
 138
 139 #include <stdarg.h>
 140 #include <stdlib.h>
 141 #include <string.h>
 142 #include <stdio.h>
 143 #include <ctype.h>
 144 #include <errno.h>
 145 #include <sys/types.h>
 146 #include <sys/stat.h>
 147 #include <c-strcase.h>
 148
 149 #include <assert.h>
 150 #ifdef NDEBUG
 151 # undef  assert                 /* some systems have a buggy assert.h */
 152 # define assert(x) ((void) 0)
 153 #endif
 154
 155 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 156 # define NO_LONG_OPTIONS TRUE
 157 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 158   extern char *optarg;
 159   extern int optind, opterr;
 160 #else
 161 # define NO_LONG_OPTIONS FALSE
 162 # include <getopt.h>
 163 #endif /* NO_LONG_OPTIONS */
 164
 165 #include <regex.h>
 166
 167 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 168  Leave it undefined to make the program "etags", which makes emacs-style
 169  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 170 #ifdef CTAGS
 171 # undef  CTAGS
 172 # define CTAGS TRUE
 173 #else
 174 # define CTAGS FALSE
 175 #endif
 176
 177 #define streq(s,t)      (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 178 #define strcaseeq(s,t)  (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
 179 #define strneq(s,t,n)   (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 180 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
 181
 182 #define CHARS 256               /* 2^sizeof(char) */
 183 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 184 #define iswhite(c)      (_wht[CHAR (c)]) /* c is white (see white) */
 185 #define notinname(c)    (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
 186 #define begtoken(c)     (_btk[CHAR (c)]) /* c can start token (see begtk) */
 187 #define intoken(c)      (_itk[CHAR (c)]) /* c can be in token (see midtk) */
 188 #define endtoken(c)     (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
 189
 190 #define ISALNUM(c)      isalnum (CHAR (c))
 191 #define ISALPHA(c)      isalpha (CHAR (c))
 192 #define ISDIGIT(c)      isdigit (CHAR (c))
 193 #define ISLOWER(c)      islower (CHAR (c))
 194
 195 #define lowcase(c)      tolower (CHAR (c))
 196
 197
 198 /*
 199  *      xnew, xrnew -- allocate, reallocate storage
 200  *
 201  * SYNOPSIS:    Type *xnew (int n, Type);
 202  *              void xrnew (OldPointer, int n, Type);
 203  */
 204 #if DEBUG
 205 # include "chkmalloc.h"
 206 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 207                                                   (n) * sizeof (Type)))
 208 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 209                                         (char *) (op), (n) * sizeof (Type)))
 210 #else
 211 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 212 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 213                                         (char *) (op), (n) * sizeof (Type)))
 214 #endif
 215
 216 #define bool int
 217
 218 typedef void Lang_function (FILE *);
 219
 220 typedef struct
 221 {
 222   const char *suffix;           /* file name suffix for this compressor */
 223   const char *command;          /* takes one arg and decompresses to stdout */
 224 } compressor;
 225
 226 typedef struct
 227 {
 228   const char *name;             /* language name */
 229   const char *help;             /* detailed help for the language */
 230   Lang_function *function;      /* parse function */
 231   const char **suffixes;        /* name suffixes of this language's files */
 232   const char **filenames;       /* names of this language's files */
 233   const char **interpreters;    /* interpreters for this language */
 234   bool metasource;              /* source used to generate other sources */
 235 } language;
 236
 237 typedef struct fdesc
 238 {
 239   struct fdesc *next;           /* for the linked list */
 240   char *infname;                /* uncompressed input file name */
 241   char *infabsname;             /* absolute uncompressed input file name */
 242   char *infabsdir;              /* absolute dir of input file */
 243   char *taggedfname;            /* file name to write in tagfile */
 244   language *lang;               /* language of file */
 245   char *prop;                   /* file properties to write in tagfile */
 246   bool usecharno;               /* etags tags shall contain char number */
 247   bool written;                 /* entry written in the tags file */
 248 } fdesc;
 249
 250 typedef struct node_st
 251 {                               /* sorting structure */
 252   struct node_st *left, *right; /* left and right sons */
 253   fdesc *fdp;                   /* description of file to whom tag belongs */
 254   char *name;                   /* tag name */
 255   char *regex;                  /* search regexp */
 256   bool valid;                   /* write this tag on the tag file */
 257   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 258   bool been_warned;             /* warning already given for duplicated tag */
 259   int lno;                      /* line number tag is on */
 260   long cno;                     /* character number line starts on */
 261 } node;
 262
 263 /*
 264  * A `linebuffer' is a structure which holds a line of text.
 265  * `readline_internal' reads a line from a stream into a linebuffer
 266  * and works regardless of the length of the line.
 267  * SIZE is the size of BUFFER, LEN is the length of the string in
 268  * BUFFER after readline reads it.
 269  */
 270 typedef struct
 271 {
 272   long size;
 273   int len;
 274   char *buffer;
 275 } linebuffer;
 276
 277 /* Used to support mixing of --lang and file names. */
 278 typedef struct
 279 {
 280   enum {
 281     at_language,                /* a language specification */
 282     at_regexp,                  /* a regular expression */
 283     at_filename,                /* a file name */
 284     at_stdin,                   /* read from stdin here */
 285     at_end                      /* stop parsing the list */
 286   } arg_type;                   /* argument type */
 287   language *lang;               /* language associated with the argument */
 288   char *what;                   /* the argument itself */
 289 } argument;
 290
 291 /* Structure defining a regular expression. */
 292 typedef struct regexp
 293 {
 294   struct regexp *p_next;        /* pointer to next in list */
 295   language *lang;               /* if set, use only for this language */
 296   char *pattern;                /* the regexp pattern */
 297   char *name;                   /* tag name */
 298   struct re_pattern_buffer *pat; /* the compiled pattern */
 299   struct re_registers regs;     /* re registers */
 300   bool error_signaled;          /* already signaled for this regexp */
 301   bool force_explicit_name;     /* do not allow implicit tag name */
 302   bool ignore_case;             /* ignore case when matching */
 303   bool multi_line;              /* do a multi-line match on the whole file */
 304 } regexp;
 305
 306
 307 /* Many compilers barf on this:
 308         Lang_function Ada_funcs;
 309    so let's write it this way */
 310 static void Ada_funcs (FILE *);
 311 static void Asm_labels (FILE *);
 312 static void C_entries (int c_ext, FILE *);
 313 static void default_C_entries (FILE *);
 314 static void plain_C_entries (FILE *);
 315 static void Cjava_entries (FILE *);
 316 static void Cobol_paragraphs (FILE *);
 317 static void Cplusplus_entries (FILE *);
 318 static void Cstar_entries (FILE *);
 319 static void Erlang_functions (FILE *);
 320 static void Forth_words (FILE *);
 321 static void Fortran_functions (FILE *);
 322 static void HTML_labels (FILE *);
 323 static void Lisp_functions (FILE *);
 324 static void Lua_functions (FILE *);
 325 static void Makefile_targets (FILE *);
 326 static void Pascal_functions (FILE *);
 327 static void Perl_functions (FILE *);
 328 static void PHP_functions (FILE *);
 329 static void PS_functions (FILE *);
 330 static void Prolog_functions (FILE *);
 331 static void Python_functions (FILE *);
 332 static void Scheme_functions (FILE *);
 333 static void TeX_commands (FILE *);
 334 static void Texinfo_nodes (FILE *);
 335 static void Yacc_entries (FILE *);
 336 static void just_read_file (FILE *);
 337
 338 static void print_language_names (void);
 339 static void print_version (void);
 340 static void print_help (argument *);
 341 int main (int, char **);
 342
 343 static compressor *get_compressor_from_suffix (char *, char **);
 344 static language *get_language_from_langname (const char *);
 345 static language *get_language_from_interpreter (char *);
 346 static language *get_language_from_filename (char *, bool);
 347 static void readline (linebuffer *, FILE *);
 348 static long readline_internal (linebuffer *, FILE *);
 349 static bool nocase_tail (const char *);
 350 static void get_tag (char *, char **);
 351
 352 static void analyse_regex (char *);
 353 static void free_regexps (void);
 354 static void regex_tag_multiline (void);
 355 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 356 static _Noreturn void suggest_asking_for_help (void);
 357 _Noreturn void fatal (const char *, const char *);
 358 static _Noreturn void pfatal (const char *);
 359 static void add_node (node *, node **);
 360
 361 static void init (void);
 362 static void process_file_name (char *, language *);
 363 static void process_file (FILE *, char *, language *);
 364 static void find_entries (FILE *);
 365 static void free_tree (node *);
 366 static void free_fdesc (fdesc *);
 367 static void pfnote (char *, bool, char *, int, int, long);
 368 static void make_tag (const char *, int, bool, char *, int, int, long);
 369 static void invalidate_nodes (fdesc *, node **);
 370 static void put_entries (node *);
 371
 372 static char *concat (const char *, const char *, const char *);
 373 static char *skip_spaces (char *);
 374 static char *skip_non_spaces (char *);
 375 static char *savenstr (const char *, int);
 376 static char *savestr (const char *);
 377 static char *etags_strchr (const char *, int);
 378 static char *etags_strrchr (const char *, int);
 379 static char *etags_getcwd (void);
 380 static char *relative_filename (char *, char *);
 381 static char *absolute_filename (char *, char *);
 382 static char *absolute_dirname (char *, char *);
 383 static bool filename_is_absolute (char *f);
 384 static void canonicalize_filename (char *);
 385 static void linebuffer_init (linebuffer *);
 386 static void linebuffer_setlen (linebuffer *, int);
 387 static void *xmalloc (size_t);
 388 static void *xrealloc (char *, size_t);
 389
 390 \f
 391 static char searchar = '/';     /* use /.../ searches */
 392
 393 static char *tagfile;           /* output file */
 394 static char *progname;          /* name this program was invoked with */
 395 static char *cwd;               /* current working directory */
 396 static char *tagfiledir;        /* directory of tagfile */
 397 static FILE *tagf;              /* ioptr for tags file */
 398 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 399
 400 static fdesc *fdhead;           /* head of file description list */
 401 static fdesc *curfdp;           /* current file description */
 402 static int lineno;              /* line number of current line */
 403 static long charno;             /* current character number */
 404 static long linecharno;         /* charno of start of current line */
 405 static char *dbp;               /* pointer to start of current tag */
 406
 407 static const int invalidcharno = -1;
 408
 409 static node *nodehead;          /* the head of the binary tree of tags */
 410 static node *last_node;         /* the last node created */
 411
 412 static linebuffer lb;           /* the current line */
 413 static linebuffer filebuf;      /* a buffer containing the whole file */
 414 static linebuffer token_name;   /* a buffer containing a tag name */
 415
 416 /* boolean "functions" (see init)       */
 417 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 418 static const char
 419   /* white chars */
 420   *white = " \f\t\n\r\v",
 421   /* not in a name */
 422   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 423   /* token ending chars */
 424   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 425   /* token starting chars */
 426   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 427   /* valid in-token chars */
 428   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 429
 430 static bool append_to_tagfile;  /* -a: append to tags */
 431 /* The next five default to TRUE in C and derived languages.  */
 432 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 433 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 434                                 /* 0 struct/enum/union decls, and C++ */
 435                                 /* member functions. */
 436 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 437                                 /* constants and variables. */
 438                                 /* -D: opposite of -d.  Default under ctags. */
 439 static bool globals;            /* create tags for global variables */
 440 static bool members;            /* create tags for C member variables */
 441 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 442 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 443 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 444 static bool update;             /* -u: update tags */
 445 static bool vgrind_style;       /* -v: create vgrind style index output */
 446 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 447 static bool cxref_style;        /* -x: create cxref style output */
 448 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 449 static bool ignoreindent;       /* -I: ignore indentation in C */
 450 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 451
 452 /* STDIN is defined in LynxOS system headers */
 453 #ifdef STDIN
 454 # undef STDIN
 455 #endif
 456
 457 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 458 static bool parsing_stdin;      /* --parse-stdin used */
 459
 460 static regexp *p_head;          /* list of all regexps */
 461 static bool need_filebuf;       /* some regexes are multi-line */
 462
 463 static struct option longopts[] =
 464 {
 465   { "append",             no_argument,       NULL,               'a'   },
 466   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 467   { "c++",                no_argument,       NULL,               'C'   },
 468   { "declarations",       no_argument,       &declarations,      TRUE  },
 469   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 470   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 471   { "help",               no_argument,       NULL,               'h'   },
 472   { "help",               no_argument,       NULL,               'H'   },
 473   { "ignore-indentation", no_argument,       NULL,               'I'   },
 474   { "language",           required_argument, NULL,               'l'   },
 475   { "members",            no_argument,       &members,           TRUE  },
 476   { "no-members",         no_argument,       &members,           FALSE },
 477   { "output",             required_argument, NULL,               'o'   },
 478   { "regex",              required_argument, NULL,               'r'   },
 479   { "no-regex",           no_argument,       NULL,               'R'   },
 480   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 481   { "parse-stdin",        required_argument, NULL,               STDIN },
 482   { "version",            no_argument,       NULL,               'V'   },
 483
 484 #if CTAGS /* Ctags options */
 485   { "backward-search",    no_argument,       NULL,               'B'   },
 486   { "cxref",              no_argument,       NULL,               'x'   },
 487   { "defines",            no_argument,       NULL,               'd'   },
 488   { "globals",            no_argument,       &globals,           TRUE  },
 489   { "typedefs",           no_argument,       NULL,               't'   },
 490   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 491   { "update",             no_argument,       NULL,               'u'   },
 492   { "vgrind",             no_argument,       NULL,               'v'   },
 493   { "no-warn",            no_argument,       NULL,               'w'   },
 494
 495 #else /* Etags options */
 496   { "no-defines",         no_argument,       NULL,               'D'   },
 497   { "no-globals",         no_argument,       &globals,           FALSE },
 498   { "include",            required_argument, NULL,               'i'   },
 499 #endif
 500   { NULL }
 501 };
 502
 503 static compressor compressors[] =
 504 {
 505   { "z", "gzip -d -c"},
 506   { "Z", "gzip -d -c"},
 507   { "gz", "gzip -d -c"},
 508   { "GZ", "gzip -d -c"},
 509   { "bz2", "bzip2 -d -c" },
 510   { "xz", "xz -d -c" },
 511   { NULL }
 512 };
 513
 514 /*
 515  * Language stuff.
 516  */
 517
 518 /* Ada code */
 519 static const char *Ada_suffixes [] =
 520   { "ads", "adb", "ada", NULL };
 521 static const char Ada_help [] =
 522 "In Ada code, functions, procedures, packages, tasks and types are\n\
 523 tags.  Use the `--packages-only' option to create tags for\n\
 524 packages only.\n\
 525 Ada tag names have suffixes indicating the type of entity:\n\
 526         Entity type:    Qualifier:\n\
 527         ------------    ----------\n\
 528         function        /f\n\
 529         procedure       /p\n\
 530         package spec    /s\n\
 531         package body    /b\n\
 532         type            /t\n\
 533         task            /k\n\
 534 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 535 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 536 will just search for any tag `bidule'.";
 537
 538 /* Assembly code */
 539 static const char *Asm_suffixes [] =
 540   { "a",        /* Unix assembler */
 541     "asm", /* Microcontroller assembly */
 542     "def", /* BSO/Tasking definition includes  */
 543     "inc", /* Microcontroller include files */
 544     "ins", /* Microcontroller include files */
 545     "s", "sa", /* Unix assembler */
 546     "S",   /* cpp-processed Unix assembler */
 547     "src", /* BSO/Tasking C compiler output */
 548     NULL
 549   };
 550 static const char Asm_help [] =
 551 "In assembler code, labels appearing at the beginning of a line,\n\
 552 followed by a colon, are tags.";
 553
 554
 555 /* Note that .c and .h can be considered C++, if the --c++ flag was
 556    given, or if the `class' or `template' keywords are met inside the file.
 557    That is why default_C_entries is called for these. */
 558 static const char *default_C_suffixes [] =
 559   { "c", "h", NULL };
 560 #if CTAGS                               /* C help for Ctags */
 561 static const char default_C_help [] =
 562 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 563 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 564 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 565 Use --globals to tag global variables.\n\
 566 You can tag function declarations and external variables by\n\
 567 using `--declarations', and struct members by using `--members'.";
 568 #else                                   /* C help for Etags */
 569 static const char default_C_help [] =
 570 "In C code, any C function or typedef is a tag, and so are\n\
 571 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 572 definitions and `enum' constants are tags unless you specify\n\
 573 `--no-defines'.  Global variables are tags unless you specify\n\
 574 `--no-globals' and so are struct members unless you specify\n\
 575 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 576 `--no-members' can make the tags table file much smaller.\n\
 577 You can tag function declarations and external variables by\n\
 578 using `--declarations'.";
 579 #endif  /* C help for Ctags and Etags */
 580
 581 static const char *Cplusplus_suffixes [] =
 582   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 583     "M",                        /* Objective C++ */
 584     "pdb",                      /* PostScript with C syntax */
 585     NULL };
 586 static const char Cplusplus_help [] =
 587 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 588 --help --lang=c --lang=c++ for full help.)\n\
 589 In addition to C tags, member functions are also recognized.  Member\n\
 590 variables are recognized unless you use the `--no-members' option.\n\
 591 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 592 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 593 `operator+'.";
 594
 595 static const char *Cjava_suffixes [] =
 596   { "java", NULL };
 597 static char Cjava_help [] =
 598 "In Java code, all the tags constructs of C and C++ code are\n\
 599 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 600
 601
 602 static const char *Cobol_suffixes [] =
 603   { "COB", "cob", NULL };
 604 static char Cobol_help [] =
 605 "In Cobol code, tags are paragraph names; that is, any word\n\
 606 starting in column 8 and followed by a period.";
 607
 608 static const char *Cstar_suffixes [] =
 609   { "cs", "hs", NULL };
 610
 611 static const char *Erlang_suffixes [] =
 612   { "erl", "hrl", NULL };
 613 static const char Erlang_help [] =
 614 "In Erlang code, the tags are the functions, records and macros\n\
 615 defined in the file.";
 616
 617 const char *Forth_suffixes [] =
 618   { "fth", "tok", NULL };
 619 static const char Forth_help [] =
 620 "In Forth code, tags are words defined by `:',\n\
 621 constant, code, create, defer, value, variable, buffer:, field.";
 622
 623 static const char *Fortran_suffixes [] =
 624   { "F", "f", "f90", "for", NULL };
 625 static const char Fortran_help [] =
 626 "In Fortran code, functions, subroutines and block data are tags.";
 627
 628 static const char *HTML_suffixes [] =
 629   { "htm", "html", "shtml", NULL };
 630 static const char HTML_help [] =
 631 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 632 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 633 occurrences of `id='.";
 634
 635 static const char *Lisp_suffixes [] =
 636   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 637 static const char Lisp_help [] =
 638 "In Lisp code, any function defined with `defun', any variable\n\
 639 defined with `defvar' or `defconst', and in general the first\n\
 640 argument of any expression that starts with `(def' in column zero\n\
 641 is a tag.";
 642
 643 static const char *Lua_suffixes [] =
 644   { "lua", "LUA", NULL };
 645 static const char Lua_help [] =
 646 "In Lua scripts, all functions are tags.";
 647
 648 static const char *Makefile_filenames [] =
 649   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 650 static const char Makefile_help [] =
 651 "In makefiles, targets are tags; additionally, variables are tags\n\
 652 unless you specify `--no-globals'.";
 653
 654 static const char *Objc_suffixes [] =
 655   { "lm",                       /* Objective lex file */
 656     "m",                        /* Objective C file */
 657      NULL };
 658 static const char Objc_help [] =
 659 "In Objective C code, tags include Objective C definitions for classes,\n\
 660 class categories, methods and protocols.  Tags for variables and\n\
 661 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 662 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 663
 664 static const char *Pascal_suffixes [] =
 665   { "p", "pas", NULL };
 666 static const char Pascal_help [] =
 667 "In Pascal code, the tags are the functions and procedures defined\n\
 668 in the file.";
 669 /* " // this is for working around an Emacs highlighting bug... */
 670
 671 static const char *Perl_suffixes [] =
 672   { "pl", "pm", NULL };
 673 static const char *Perl_interpreters [] =
 674   { "perl", "@PERL@", NULL };
 675 static const char Perl_help [] =
 676 "In Perl code, the tags are the packages, subroutines and variables\n\
 677 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 678 `--globals' if you want to tag global variables.  Tags for\n\
 679 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 680 defined in the default package is `main::SUB'.";
 681
 682 static const char *PHP_suffixes [] =
 683   { "php", "php3", "php4", NULL };
 684 static const char PHP_help [] =
 685 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 686 the `--no-members' option, vars are tags too.";
 687
 688 static const char *plain_C_suffixes [] =
 689   { "pc",                       /* Pro*C file */
 690      NULL };
 691
 692 static const char *PS_suffixes [] =
 693   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 694 static const char PS_help [] =
 695 "In PostScript code, the tags are the functions.";
 696
 697 static const char *Prolog_suffixes [] =
 698   { "prolog", NULL };
 699 static const char Prolog_help [] =
 700 "In Prolog code, tags are predicates and rules at the beginning of\n\
 701 line.";
 702
 703 static const char *Python_suffixes [] =
 704   { "py", NULL };
 705 static const char Python_help [] =
 706 "In Python code, `def' or `class' at the beginning of a line\n\
 707 generate a tag.";
 708
 709 /* Can't do the `SCM' or `scm' prefix with a version number. */
 710 static const char *Scheme_suffixes [] =
 711   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 712 static const char Scheme_help [] =
 713 "In Scheme code, tags include anything defined with `def' or with a\n\
 714 construct whose name starts with `def'.  They also include\n\
 715 variables set with `set!' at top level in the file.";
 716
 717 static const char *TeX_suffixes [] =
 718   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 719 static const char TeX_help [] =
 720 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 721 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 722 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 723 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 724 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 725 \n\
 726 Other commands can be specified by setting the environment variable\n\
 727 `TEXTAGS' to a colon-separated list like, for example,\n\
 728      TEXTAGS=\"mycommand:myothercommand\".";
 729
 730
 731 static const char *Texinfo_suffixes [] =
 732   { "texi", "texinfo", "txi", NULL };
 733 static const char Texinfo_help [] =
 734 "for texinfo files, lines starting with @node are tagged.";
 735
 736 static const char *Yacc_suffixes [] =
 737   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 738 static const char Yacc_help [] =
 739 "In Bison or Yacc input files, each rule defines as a tag the\n\
 740 nonterminal it constructs.  The portions of the file that contain\n\
 741 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 742 for full help).";
 743
 744 static const char auto_help [] =
 745 "`auto' is not a real language, it indicates to use\n\
 746 a default language for files base on file name suffix and file contents.";
 747
 748 static const char none_help [] =
 749 "`none' is not a real language, it indicates to only do\n\
 750 regexp processing on files.";
 751
 752 static const char no_lang_help [] =
 753 "No detailed help available for this language.";
 754
 755
 756 /*
 757  * Table of languages.
 758  *
 759  * It is ok for a given function to be listed under more than one
 760  * name.  I just didn't.
 761  */
 762
 763 static language lang_names [] =
 764 {
 765   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 766   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 767   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 768   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 769   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 770   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 771   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 772   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 773   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 774   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 775   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 776   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 777   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 778   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 779   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 780   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 781   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 782   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 783   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 784   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 785   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 786   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 787   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 788   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 789   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 790   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 791   { "auto",      auto_help },                      /* default guessing scheme */
 792   { "none",      none_help,      just_read_file }, /* regexp matching only */
 793   { NULL }                /* end of list */
 794 };
 795
 796 \f
 797 static void
 798 print_language_names (void)
 799 {
 800   language *lang;
 801   const char **name, **ext;
 802
 803   puts ("\nThese are the currently supported languages, along with the\n\
 804 default file names and dot suffixes:");
 805   for (lang = lang_names; lang->name != NULL; lang++)
 806     {
 807       printf ("  %-*s", 10, lang->name);
 808       if (lang->filenames != NULL)
 809         for (name = lang->filenames; *name != NULL; name++)
 810           printf (" %s", *name);
 811       if (lang->suffixes != NULL)
 812         for (ext = lang->suffixes; *ext != NULL; ext++)
 813           printf (" .%s", *ext);
 814       puts ("");
 815     }
 816   puts ("where `auto' means use default language for files based on file\n\
 817 name suffix, and `none' means only do regexp processing on files.\n\
 818 If no language is specified and no matching suffix is found,\n\
 819 the first line of the file is read for a sharp-bang (#!) sequence\n\
 820 followed by the name of an interpreter.  If no such sequence is found,\n\
 821 Fortran is tried first; if no tags are found, C is tried next.\n\
 822 When parsing any C file, a \"class\" or \"template\" keyword\n\
 823 switches to C++.");
 824   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 825 \n\
 826 For detailed help on a given language use, for example,\n\
 827 etags --help --lang=ada.");
 828 }
 829
 830 #ifndef EMACS_NAME
 831 # define EMACS_NAME "standalone"
 832 #endif
 833 #ifndef VERSION
 834 # define VERSION "17.38.1.4"
 835 #endif
 836 static void
 837 print_version (void)
 838 {
 839   /* Makes it easier to update automatically. */
 840   char emacs_copyright[] = "Copyright (C) 2012 Free Software Foundation, Inc.";
 841
 842   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 843   puts (emacs_copyright);
 844   puts ("This program is distributed under the terms in ETAGS.README");
 845
 846   exit (EXIT_SUCCESS);
 847 }
 848
 849 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 850 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 851 #endif
 852
 853 static void
 854 print_help (argument *argbuffer)
 855 {
 856   bool help_for_lang = FALSE;
 857
 858   for (; argbuffer->arg_type != at_end; argbuffer++)
 859     if (argbuffer->arg_type == at_language)
 860       {
 861         if (help_for_lang)
 862           puts ("");
 863         puts (argbuffer->lang->help);
 864         help_for_lang = TRUE;
 865       }
 866
 867   if (help_for_lang)
 868     exit (EXIT_SUCCESS);
 869
 870   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 871 \n\
 872 These are the options accepted by %s.\n", progname, progname);
 873   if (NO_LONG_OPTIONS)
 874     puts ("WARNING: long option names do not work with this executable,\n\
 875 as it is not linked with GNU getopt.");
 876   else
 877     puts ("You may use unambiguous abbreviations for the long option names.");
 878   puts ("  A - as file name means read names from stdin (one per line).\n\
 879 Absolute names are stored in the output file as they are.\n\
 880 Relative ones are stored relative to the output file's directory.\n");
 881
 882   puts ("-a, --append\n\
 883         Append tag entries to existing tags file.");
 884
 885   puts ("--packages-only\n\
 886         For Ada files, only generate tags for packages.");
 887
 888   if (CTAGS)
 889     puts ("-B, --backward-search\n\
 890         Write the search commands for the tag entries using '?', the\n\
 891         backward-search command instead of '/', the forward-search command.");
 892
 893   /* This option is mostly obsolete, because etags can now automatically
 894      detect C++.  Retained for backward compatibility and for debugging and
 895      experimentation.  In principle, we could want to tag as C++ even
 896      before any "class" or "template" keyword.
 897   puts ("-C, --c++\n\
 898         Treat files whose name suffix defaults to C language as C++ files.");
 899   */
 900
 901   puts ("--declarations\n\
 902         In C and derived languages, create tags for function declarations,");
 903   if (CTAGS)
 904     puts ("\tand create tags for extern variables if --globals is used.");
 905   else
 906     puts
 907       ("\tand create tags for extern variables unless --no-globals is used.");
 908
 909   if (CTAGS)
 910     puts ("-d, --defines\n\
 911         Create tag entries for C #define constants and enum constants, too.");
 912   else
 913     puts ("-D, --no-defines\n\
 914         Don't create tag entries for C #define constants and enum constants.\n\
 915         This makes the tags file smaller.");
 916
 917   if (!CTAGS)
 918     puts ("-i FILE, --include=FILE\n\
 919         Include a note in tag file indicating that, when searching for\n\
 920         a tag, one should also consult the tags file FILE after\n\
 921         checking the current file.");
 922
 923   puts ("-l LANG, --language=LANG\n\
 924         Force the following files to be considered as written in the\n\
 925         named language up to the next --language=LANG option.");
 926
 927   if (CTAGS)
 928     puts ("--globals\n\
 929         Create tag entries for global variables in some languages.");
 930   else
 931     puts ("--no-globals\n\
 932         Do not create tag entries for global variables in some\n\
 933         languages.  This makes the tags file smaller.");
 934
 935   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 936     puts ("--no-line-directive\n\
 937         Ignore #line preprocessor directives in C and derived languages.");
 938
 939   if (CTAGS)
 940     puts ("--members\n\
 941         Create tag entries for members of structures in some languages.");
 942   else
 943     puts ("--no-members\n\
 944         Do not create tag entries for members of structures\n\
 945         in some languages.");
 946
 947   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 948         Make a tag for each line matching a regular expression pattern\n\
 949         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 950         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 951         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 952         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 953   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 954         For example Tcl named tags can be created with:\n\
 955           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 956         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 957         `m' means to allow multi-line matches, `s' implies `m' and\n\
 958         causes dot to match any character, including newline.");
 959
 960   puts ("-R, --no-regex\n\
 961         Don't create tags from regexps for the following files.");
 962
 963   puts ("-I, --ignore-indentation\n\
 964         In C and C++ do not assume that a closing brace in the first\n\
 965         column is the final brace of a function or structure definition.");
 966
 967   puts ("-o FILE, --output=FILE\n\
 968         Write the tags to FILE.");
 969
 970   puts ("--parse-stdin=NAME\n\
 971         Read from standard input and record tags as belonging to file NAME.");
 972
 973   if (CTAGS)
 974     {
 975       puts ("-t, --typedefs\n\
 976         Generate tag entries for C and Ada typedefs.");
 977       puts ("-T, --typedefs-and-c++\n\
 978         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 979         and C++ member functions.");
 980     }
 981
 982   if (CTAGS)
 983     puts ("-u, --update\n\
 984         Update the tag entries for the given files, leaving tag\n\
 985         entries for other files in place.  Currently, this is\n\
 986         implemented by deleting the existing entries for the given\n\
 987         files and then rewriting the new entries at the end of the\n\
 988         tags file.  It is often faster to simply rebuild the entire\n\
 989         tag file than to use this.");
 990
 991   if (CTAGS)
 992     {
 993       puts ("-v, --vgrind\n\
 994         Print on the standard output an index of items intended for\n\
 995         human consumption, similar to the output of vgrind.  The index\n\
 996         is sorted, and gives the page number of each item.");
 997
 998       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 999         puts ("-w, --no-duplicates\n\
1000         Do not create duplicate tag entries, for compatibility with\n\
1001         traditional ctags.");
1002
1003       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1004         puts ("-w, --no-warn\n\
1005         Suppress warning messages about duplicate tag entries.");
1006
1007       puts ("-x, --cxref\n\
1008         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1009         The output uses line numbers instead of page numbers, but\n\
1010         beyond that the differences are cosmetic; try both to see\n\
1011         which you like.");
1012     }
1013
1014   puts ("-V, --version\n\
1015         Print the version of the program.\n\
1016 -h, --help\n\
1017         Print this help message.\n\
1018         Followed by one or more `--language' options prints detailed\n\
1019         help about tag generation for the specified languages.");
1020
1021   print_language_names ();
1022
1023   puts ("");
1024   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1025
1026   exit (EXIT_SUCCESS);
1027 }
1028
1029 \f
1030 int
1031 main (int argc, char **argv)
1032 {
1033   int i;
1034   unsigned int nincluded_files;
1035   char **included_files;
1036   argument *argbuffer;
1037   int current_arg, file_count;
1038   linebuffer filename_lb;
1039   bool help_asked = FALSE;
1040   ptrdiff_t len;
1041  char *optstring;
1042  int opt;
1043
1044
1045 #ifdef DOS_NT
1046   _fmode = O_BINARY;   /* all of files are treated as binary files */
1047 #endif /* DOS_NT */
1048
1049   progname = argv[0];
1050   nincluded_files = 0;
1051   included_files = xnew (argc, char *);
1052   current_arg = 0;
1053   file_count = 0;
1054
1055   /* Allocate enough no matter what happens.  Overkill, but each one
1056      is small. */
1057   argbuffer = xnew (argc, argument);
1058
1059   /*
1060    * Always find typedefs and structure tags.
1061    * Also default to find macro constants, enum constants, struct
1062    * members and global variables.  Do it for both etags and ctags.
1063    */
1064   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1065   globals = members = TRUE;
1066
1067   /* When the optstring begins with a '-' getopt_long does not rearrange the
1068      non-options arguments to be at the end, but leaves them alone. */
1069   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1070                       "ac:Cf:Il:o:r:RSVhH",
1071                       (CTAGS) ? "BxdtTuvw" : "Di:");
1072
1073   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1074     switch (opt)
1075       {
1076       case 0:
1077         /* If getopt returns 0, then it has already processed a
1078            long-named option.  We should do nothing.  */
1079         break;
1080
1081       case 1:
1082         /* This means that a file name has been seen.  Record it. */
1083         argbuffer[current_arg].arg_type = at_filename;
1084         argbuffer[current_arg].what     = optarg;
1085         len = strlen (optarg);
1086         if (whatlen_max < len)
1087           whatlen_max = len;
1088         ++current_arg;
1089         ++file_count;
1090         break;
1091
1092       case STDIN:
1093         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1094         argbuffer[current_arg].arg_type = at_stdin;
1095         argbuffer[current_arg].what     = optarg;
1096         len = strlen (optarg);
1097         if (whatlen_max < len)
1098           whatlen_max = len;
1099         ++current_arg;
1100         ++file_count;
1101         if (parsing_stdin)
1102           fatal ("cannot parse standard input more than once", (char *)NULL);
1103         parsing_stdin = TRUE;
1104         break;
1105
1106         /* Common options. */
1107       case 'a': append_to_tagfile = TRUE;       break;
1108       case 'C': cplusplus = TRUE;               break;
1109       case 'f':         /* for compatibility with old makefiles */
1110       case 'o':
1111         if (tagfile)
1112           {
1113             error ("-o option may only be given once.");
1114             suggest_asking_for_help ();
1115             /* NOTREACHED */
1116           }
1117         tagfile = optarg;
1118         break;
1119       case 'I':
1120       case 'S':         /* for backward compatibility */
1121         ignoreindent = TRUE;
1122         break;
1123       case 'l':
1124         {
1125           language *lang = get_language_from_langname (optarg);
1126           if (lang != NULL)
1127             {
1128               argbuffer[current_arg].lang = lang;
1129               argbuffer[current_arg].arg_type = at_language;
1130               ++current_arg;
1131             }
1132         }
1133         break;
1134       case 'c':
1135         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1136         optarg = concat (optarg, "i", ""); /* memory leak here */
1137         /* FALLTHRU */
1138       case 'r':
1139         argbuffer[current_arg].arg_type = at_regexp;
1140         argbuffer[current_arg].what = optarg;
1141         len = strlen (optarg);
1142         if (whatlen_max < len)
1143           whatlen_max = len;
1144         ++current_arg;
1145         break;
1146       case 'R':
1147         argbuffer[current_arg].arg_type = at_regexp;
1148         argbuffer[current_arg].what = NULL;
1149         ++current_arg;
1150         break;
1151       case 'V':
1152         print_version ();
1153         break;
1154       case 'h':
1155       case 'H':
1156         help_asked = TRUE;
1157         break;
1158
1159         /* Etags options */
1160       case 'D': constantypedefs = FALSE;                        break;
1161       case 'i': included_files[nincluded_files++] = optarg;     break;
1162
1163         /* Ctags options. */
1164       case 'B': searchar = '?';                                 break;
1165       case 'd': constantypedefs = TRUE;                         break;
1166       case 't': typedefs = TRUE;                                break;
1167       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1168       case 'u': update = TRUE;                                  break;
1169       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1170       case 'x': cxref_style = TRUE;                             break;
1171       case 'w': no_warnings = TRUE;                             break;
1172       default:
1173         suggest_asking_for_help ();
1174         /* NOTREACHED */
1175       }
1176
1177   /* No more options.  Store the rest of arguments. */
1178   for (; optind < argc; optind++)
1179     {
1180       argbuffer[current_arg].arg_type = at_filename;
1181       argbuffer[current_arg].what = argv[optind];
1182       len = strlen (argv[optind]);
1183       if (whatlen_max < len)
1184         whatlen_max = len;
1185       ++current_arg;
1186       ++file_count;
1187     }
1188
1189   argbuffer[current_arg].arg_type = at_end;
1190
1191   if (help_asked)
1192     print_help (argbuffer);
1193     /* NOTREACHED */
1194
1195   if (nincluded_files == 0 && file_count == 0)
1196     {
1197       error ("no input files specified.");
1198       suggest_asking_for_help ();
1199       /* NOTREACHED */
1200     }
1201
1202   if (tagfile == NULL)
1203     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1204   cwd = etags_getcwd ();        /* the current working directory */
1205   if (cwd[strlen (cwd) - 1] != '/')
1206     {
1207       char *oldcwd = cwd;
1208       cwd = concat (oldcwd, "/", "");
1209       free (oldcwd);
1210     }
1211
1212   /* Compute base directory for relative file names. */
1213   if (streq (tagfile, "-")
1214       || strneq (tagfile, "/dev/", 5))
1215     tagfiledir = cwd;            /* relative file names are relative to cwd */
1216   else
1217     {
1218       canonicalize_filename (tagfile);
1219       tagfiledir = absolute_dirname (tagfile, cwd);
1220     }
1221
1222   init ();                      /* set up boolean "functions" */
1223
1224   linebuffer_init (&lb);
1225   linebuffer_init (&filename_lb);
1226   linebuffer_init (&filebuf);
1227   linebuffer_init (&token_name);
1228
1229   if (!CTAGS)
1230     {
1231       if (streq (tagfile, "-"))
1232         {
1233           tagf = stdout;
1234 #ifdef DOS_NT
1235           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1236              doesn't take effect until after `stdout' is already open). */
1237           if (!isatty (fileno (stdout)))
1238             setmode (fileno (stdout), O_BINARY);
1239 #endif /* DOS_NT */
1240         }
1241       else
1242         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1243       if (tagf == NULL)
1244         pfatal (tagfile);
1245     }
1246
1247   /*
1248    * Loop through files finding functions.
1249    */
1250   for (i = 0; i < current_arg; i++)
1251     {
1252       static language *lang;    /* non-NULL if language is forced */
1253       char *this_file;
1254
1255       switch (argbuffer[i].arg_type)
1256         {
1257         case at_language:
1258           lang = argbuffer[i].lang;
1259           break;
1260         case at_regexp:
1261           analyse_regex (argbuffer[i].what);
1262           break;
1263         case at_filename:
1264               this_file = argbuffer[i].what;
1265               /* Input file named "-" means read file names from stdin
1266                  (one per line) and use them. */
1267               if (streq (this_file, "-"))
1268                 {
1269                   if (parsing_stdin)
1270                     fatal ("cannot parse standard input AND read file names from it",
1271                            (char *)NULL);
1272                   while (readline_internal (&filename_lb, stdin) > 0)
1273                     process_file_name (filename_lb.buffer, lang);
1274                 }
1275               else
1276                 process_file_name (this_file, lang);
1277           break;
1278         case at_stdin:
1279           this_file = argbuffer[i].what;
1280           process_file (stdin, this_file, lang);
1281           break;
1282         }
1283     }
1284
1285   free_regexps ();
1286   free (lb.buffer);
1287   free (filebuf.buffer);
1288   free (token_name.buffer);
1289
1290   if (!CTAGS || cxref_style)
1291     {
1292       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1293       put_entries (nodehead);
1294       free_tree (nodehead);
1295       nodehead = NULL;
1296       if (!CTAGS)
1297         {
1298           fdesc *fdp;
1299
1300           /* Output file entries that have no tags. */
1301           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1302             if (!fdp->written)
1303               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1304
1305           while (nincluded_files-- > 0)
1306             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1307
1308           if (fclose (tagf) == EOF)
1309             pfatal (tagfile);
1310         }
1311
1312       exit (EXIT_SUCCESS);
1313     }
1314
1315   /* From here on, we are in (CTAGS && !cxref_style) */
1316   if (update)
1317     {
1318       char *cmd =
1319         xmalloc (strlen (tagfile) + whatlen_max +
1320                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1321       for (i = 0; i < current_arg; ++i)
1322         {
1323           switch (argbuffer[i].arg_type)
1324             {
1325             case at_filename:
1326             case at_stdin:
1327               break;
1328             default:
1329               continue;         /* the for loop */
1330             }
1331           strcpy (cmd, "mv ");
1332           strcat (cmd, tagfile);
1333           strcat (cmd, " OTAGS;fgrep -v '\t");
1334           strcat (cmd, argbuffer[i].what);
1335           strcat (cmd, "\t' OTAGS >");
1336           strcat (cmd, tagfile);
1337           strcat (cmd, ";rm OTAGS");
1338           if (system (cmd) != EXIT_SUCCESS)
1339             fatal ("failed to execute shell command", (char *)NULL);
1340         }
1341       free (cmd);
1342       append_to_tagfile = TRUE;
1343     }
1344
1345   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1346   if (tagf == NULL)
1347     pfatal (tagfile);
1348   put_entries (nodehead);       /* write all the tags (CTAGS) */
1349   free_tree (nodehead);
1350   nodehead = NULL;
1351   if (fclose (tagf) == EOF)
1352     pfatal (tagfile);
1353
1354   if (CTAGS)
1355     if (append_to_tagfile || update)
1356       {
1357         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1358         /* Maybe these should be used:
1359            setenv ("LC_COLLATE", "C", 1);
1360            setenv ("LC_ALL", "C", 1); */
1361         strcpy (cmd, "sort -u -o ");
1362         strcat (cmd, tagfile);
1363         strcat (cmd, " ");
1364         strcat (cmd, tagfile);
1365         exit (system (cmd));
1366       }
1367   return EXIT_SUCCESS;
1368 }
1369
1370
1371 /*
1372  * Return a compressor given the file name.  If EXTPTR is non-zero,
1373  * return a pointer into FILE where the compressor-specific
1374  * extension begins.  If no compressor is found, NULL is returned
1375  * and EXTPTR is not significant.
1376  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1377  */
1378 static compressor *
1379 get_compressor_from_suffix (char *file, char **extptr)
1380 {
1381   compressor *compr;
1382   char *slash, *suffix;
1383
1384   /* File has been processed by canonicalize_filename,
1385      so we don't need to consider backslashes on DOS_NT.  */
1386   slash = etags_strrchr (file, '/');
1387   suffix = etags_strrchr (file, '.');
1388   if (suffix == NULL || suffix < slash)
1389     return NULL;
1390   if (extptr != NULL)
1391     *extptr = suffix;
1392   suffix += 1;
1393   /* Let those poor souls who live with DOS 8+3 file name limits get
1394      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1395      Only the first do loop is run if not MSDOS */
1396   do
1397     {
1398       for (compr = compressors; compr->suffix != NULL; compr++)
1399         if (streq (compr->suffix, suffix))
1400           return compr;
1401       if (!MSDOS)
1402         break;                  /* do it only once: not really a loop */
1403       if (extptr != NULL)
1404         *extptr = ++suffix;
1405     } while (*suffix != '\0');
1406   return NULL;
1407 }
1408
1409
1410
1411 /*
1412  * Return a language given the name.
1413  */
1414 static language *
1415 get_language_from_langname (const char *name)
1416 {
1417   language *lang;
1418
1419   if (name == NULL)
1420     error ("empty language name");
1421   else
1422     {
1423       for (lang = lang_names; lang->name != NULL; lang++)
1424         if (streq (name, lang->name))
1425           return lang;
1426       error ("unknown language \"%s\"", name);
1427     }
1428
1429   return NULL;
1430 }
1431
1432
1433 /*
1434  * Return a language given the interpreter name.
1435  */
1436 static language *
1437 get_language_from_interpreter (char *interpreter)
1438 {
1439   language *lang;
1440   const char **iname;
1441
1442   if (interpreter == NULL)
1443     return NULL;
1444   for (lang = lang_names; lang->name != NULL; lang++)
1445     if (lang->interpreters != NULL)
1446       for (iname = lang->interpreters; *iname != NULL; iname++)
1447         if (streq (*iname, interpreter))
1448             return lang;
1449
1450   return NULL;
1451 }
1452
1453
1454
1455 /*
1456  * Return a language given the file name.
1457  */
1458 static language *
1459 get_language_from_filename (char *file, int case_sensitive)
1460 {
1461   language *lang;
1462   const char **name, **ext, *suffix;
1463
1464   /* Try whole file name first. */
1465   for (lang = lang_names; lang->name != NULL; lang++)
1466     if (lang->filenames != NULL)
1467       for (name = lang->filenames; *name != NULL; name++)
1468         if ((case_sensitive)
1469             ? streq (*name, file)
1470             : strcaseeq (*name, file))
1471           return lang;
1472
1473   /* If not found, try suffix after last dot. */
1474   suffix = etags_strrchr (file, '.');
1475   if (suffix == NULL)
1476     return NULL;
1477   suffix += 1;
1478   for (lang = lang_names; lang->name != NULL; lang++)
1479     if (lang->suffixes != NULL)
1480       for (ext = lang->suffixes; *ext != NULL; ext++)
1481         if ((case_sensitive)
1482             ? streq (*ext, suffix)
1483             : strcaseeq (*ext, suffix))
1484           return lang;
1485   return NULL;
1486 }
1487
1488 \f
1489 /*
1490  * This routine is called on each file argument.
1491  */
1492 static void
1493 process_file_name (char *file, language *lang)
1494 {
1495   struct stat stat_buf;
1496   FILE *inf;
1497   fdesc *fdp;
1498   compressor *compr;
1499   char *compressed_name, *uncompressed_name;
1500   char *ext, *real_name;
1501   int retval;
1502
1503   canonicalize_filename (file);
1504   if (streq (file, tagfile) && !streq (tagfile, "-"))
1505     {
1506       error ("skipping inclusion of %s in self.", file);
1507       return;
1508     }
1509   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1510     {
1511       compressed_name = NULL;
1512       real_name = uncompressed_name = savestr (file);
1513     }
1514   else
1515     {
1516       real_name = compressed_name = savestr (file);
1517       uncompressed_name = savenstr (file, ext - file);
1518     }
1519
1520   /* If the canonicalized uncompressed name
1521      has already been dealt with, skip it silently. */
1522   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1523     {
1524       assert (fdp->infname != NULL);
1525       if (streq (uncompressed_name, fdp->infname))
1526         goto cleanup;
1527     }
1528
1529   if (stat (real_name, &stat_buf) != 0)
1530     {
1531       /* Reset real_name and try with a different name. */
1532       real_name = NULL;
1533       if (compressed_name != NULL) /* try with the given suffix */
1534         {
1535           if (stat (uncompressed_name, &stat_buf) == 0)
1536             real_name = uncompressed_name;
1537         }
1538       else                      /* try all possible suffixes */
1539         {
1540           for (compr = compressors; compr->suffix != NULL; compr++)
1541             {
1542               compressed_name = concat (file, ".", compr->suffix);
1543               if (stat (compressed_name, &stat_buf) != 0)
1544                 {
1545                   if (MSDOS)
1546                     {
1547                       char *suf = compressed_name + strlen (file);
1548                       size_t suflen = strlen (compr->suffix) + 1;
1549                       for ( ; suf[1]; suf++, suflen--)
1550                         {
1551                           memmove (suf, suf + 1, suflen);
1552                           if (stat (compressed_name, &stat_buf) == 0)
1553                             {
1554                               real_name = compressed_name;
1555                               break;
1556                             }
1557                         }
1558                       if (real_name != NULL)
1559                         break;
1560                     } /* MSDOS */
1561                   free (compressed_name);
1562                   compressed_name = NULL;
1563                 }
1564               else
1565                 {
1566                   real_name = compressed_name;
1567                   break;
1568                 }
1569             }
1570         }
1571       if (real_name == NULL)
1572         {
1573           perror (file);
1574           goto cleanup;
1575         }
1576     } /* try with a different name */
1577
1578   if (!S_ISREG (stat_buf.st_mode))
1579     {
1580       error ("skipping %s: it is not a regular file.", real_name);
1581       goto cleanup;
1582     }
1583   if (real_name == compressed_name)
1584     {
1585       char *cmd = concat (compr->command, " ", real_name);
1586       inf = (FILE *) popen (cmd, "r");
1587       free (cmd);
1588     }
1589   else
1590     inf = fopen (real_name, "r");
1591   if (inf == NULL)
1592     {
1593       perror (real_name);
1594       goto cleanup;
1595     }
1596
1597   process_file (inf, uncompressed_name, lang);
1598
1599   if (real_name == compressed_name)
1600     retval = pclose (inf);
1601   else
1602     retval = fclose (inf);
1603   if (retval < 0)
1604     pfatal (file);
1605
1606  cleanup:
1607   free (compressed_name);
1608   free (uncompressed_name);
1609   last_node = NULL;
1610   curfdp = NULL;
1611   return;
1612 }
1613
1614 static void
1615 process_file (FILE *fh, char *fn, language *lang)
1616 {
1617   static const fdesc emptyfdesc;
1618   fdesc *fdp;
1619
1620   /* Create a new input file description entry. */
1621   fdp = xnew (1, fdesc);
1622   *fdp = emptyfdesc;
1623   fdp->next = fdhead;
1624   fdp->infname = savestr (fn);
1625   fdp->lang = lang;
1626   fdp->infabsname = absolute_filename (fn, cwd);
1627   fdp->infabsdir = absolute_dirname (fn, cwd);
1628   if (filename_is_absolute (fn))
1629     {
1630       /* An absolute file name.  Canonicalize it. */
1631       fdp->taggedfname = absolute_filename (fn, NULL);
1632     }
1633   else
1634     {
1635       /* A file name relative to cwd.  Make it relative
1636          to the directory of the tags file. */
1637       fdp->taggedfname = relative_filename (fn, tagfiledir);
1638     }
1639   fdp->usecharno = TRUE;        /* use char position when making tags */
1640   fdp->prop = NULL;
1641   fdp->written = FALSE;         /* not written on tags file yet */
1642
1643   fdhead = fdp;
1644   curfdp = fdhead;              /* the current file description */
1645
1646   find_entries (fh);
1647
1648   /* If not Ctags, and if this is not metasource and if it contained no #line
1649      directives, we can write the tags and free all nodes pointing to
1650      curfdp. */
1651   if (!CTAGS
1652       && curfdp->usecharno      /* no #line directives in this file */
1653       && !curfdp->lang->metasource)
1654     {
1655       node *np, *prev;
1656
1657       /* Look for the head of the sublist relative to this file.  See add_node
1658          for the structure of the node tree. */
1659       prev = NULL;
1660       for (np = nodehead; np != NULL; prev = np, np = np->left)
1661         if (np->fdp == curfdp)
1662           break;
1663
1664       /* If we generated tags for this file, write and delete them. */
1665       if (np != NULL)
1666         {
1667           /* This is the head of the last sublist, if any.  The following
1668              instructions depend on this being true. */
1669           assert (np->left == NULL);
1670
1671           assert (fdhead == curfdp);
1672           assert (last_node->fdp == curfdp);
1673           put_entries (np);     /* write tags for file curfdp->taggedfname */
1674           free_tree (np);       /* remove the written nodes */
1675           if (prev == NULL)
1676             nodehead = NULL;    /* no nodes left */
1677           else
1678             prev->left = NULL;  /* delete the pointer to the sublist */
1679         }
1680     }
1681 }
1682
1683 /*
1684  * This routine sets up the boolean pseudo-functions which work
1685  * by setting boolean flags dependent upon the corresponding character.
1686  * Every char which is NOT in that string is not a white char.  Therefore,
1687  * all of the array "_wht" is set to FALSE, and then the elements
1688  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1689  * of a char is TRUE if it is the string "white", else FALSE.
1690  */
1691 static void
1692 init (void)
1693 {
1694   register const char *sp;
1695   register int i;
1696
1697   for (i = 0; i < CHARS; i++)
1698     iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i) = FALSE;
1699   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1700   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1701   notinname ('\0') = notinname ('\n');
1702   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1703   begtoken ('\0') = begtoken ('\n');
1704   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1705   intoken ('\0') = intoken ('\n');
1706   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1707   endtoken ('\0') = endtoken ('\n');
1708 }
1709
1710 /*
1711  * This routine opens the specified file and calls the function
1712  * which finds the function and type definitions.
1713  */
1714 static void
1715 find_entries (FILE *inf)
1716 {
1717   char *cp;
1718   language *lang = curfdp->lang;
1719   Lang_function *parser = NULL;
1720
1721   /* If user specified a language, use it. */
1722   if (lang != NULL && lang->function != NULL)
1723     {
1724       parser = lang->function;
1725     }
1726
1727   /* Else try to guess the language given the file name. */
1728   if (parser == NULL)
1729     {
1730       lang = get_language_from_filename (curfdp->infname, TRUE);
1731       if (lang != NULL && lang->function != NULL)
1732         {
1733           curfdp->lang = lang;
1734           parser = lang->function;
1735         }
1736     }
1737
1738   /* Else look for sharp-bang as the first two characters. */
1739   if (parser == NULL
1740       && readline_internal (&lb, inf) > 0
1741       && lb.len >= 2
1742       && lb.buffer[0] == '#'
1743       && lb.buffer[1] == '!')
1744     {
1745       char *lp;
1746
1747       /* Set lp to point at the first char after the last slash in the
1748          line or, if no slashes, at the first nonblank.  Then set cp to
1749          the first successive blank and terminate the string. */
1750       lp = etags_strrchr (lb.buffer+2, '/');
1751       if (lp != NULL)
1752         lp += 1;
1753       else
1754         lp = skip_spaces (lb.buffer + 2);
1755       cp = skip_non_spaces (lp);
1756       *cp = '\0';
1757
1758       if (strlen (lp) > 0)
1759         {
1760           lang = get_language_from_interpreter (lp);
1761           if (lang != NULL && lang->function != NULL)
1762             {
1763               curfdp->lang = lang;
1764               parser = lang->function;
1765             }
1766         }
1767     }
1768
1769   /* We rewind here, even if inf may be a pipe.  We fail if the
1770      length of the first line is longer than the pipe block size,
1771      which is unlikely. */
1772   rewind (inf);
1773
1774   /* Else try to guess the language given the case insensitive file name. */
1775   if (parser == NULL)
1776     {
1777       lang = get_language_from_filename (curfdp->infname, FALSE);
1778       if (lang != NULL && lang->function != NULL)
1779         {
1780           curfdp->lang = lang;
1781           parser = lang->function;
1782         }
1783     }
1784
1785   /* Else try Fortran or C. */
1786   if (parser == NULL)
1787     {
1788       node *old_last_node = last_node;
1789
1790       curfdp->lang = get_language_from_langname ("fortran");
1791       find_entries (inf);
1792
1793       if (old_last_node == last_node)
1794         /* No Fortran entries found.  Try C. */
1795         {
1796           /* We do not tag if rewind fails.
1797              Only the file name will be recorded in the tags file. */
1798           rewind (inf);
1799           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1800           find_entries (inf);
1801         }
1802       return;
1803     }
1804
1805   if (!no_line_directive
1806       && curfdp->lang != NULL && curfdp->lang->metasource)
1807     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1808        file, or anyway we parsed a file that is automatically generated from
1809        this one.  If this is the case, the bingo.c file contained #line
1810        directives that generated tags pointing to this file.  Let's delete
1811        them all before parsing this file, which is the real source. */
1812     {
1813       fdesc **fdpp = &fdhead;
1814       while (*fdpp != NULL)
1815         if (*fdpp != curfdp
1816             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1817           /* We found one of those!  We must delete both the file description
1818              and all tags referring to it. */
1819           {
1820             fdesc *badfdp = *fdpp;
1821
1822             /* Delete the tags referring to badfdp->taggedfname
1823                that were obtained from badfdp->infname. */
1824             invalidate_nodes (badfdp, &nodehead);
1825
1826             *fdpp = badfdp->next; /* remove the bad description from the list */
1827             free_fdesc (badfdp);
1828           }
1829         else
1830           fdpp = &(*fdpp)->next; /* advance the list pointer */
1831     }
1832
1833   assert (parser != NULL);
1834
1835   /* Generic initializations before reading from file. */
1836   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1837
1838   /* Generic initializations before parsing file with readline. */
1839   lineno = 0;                  /* reset global line number */
1840   charno = 0;                  /* reset global char number */
1841   linecharno = 0;              /* reset global char number of line start */
1842
1843   parser (inf);
1844
1845   regex_tag_multiline ();
1846 }
1847
1848 \f
1849 /*
1850  * Check whether an implicitly named tag should be created,
1851  * then call `pfnote'.
1852  * NAME is a string that is internally copied by this function.
1853  *
1854  * TAGS format specification
1855  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1856  * The following is explained in some more detail in etc/ETAGS.EBNF.
1857  *
1858  * make_tag creates tags with "implicit tag names" (unnamed tags)
1859  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1860  *  1. NAME does not contain any of the characters in NONAM;
1861  *  2. LINESTART contains name as either a rightmost, or rightmost but
1862  *     one character, substring;
1863  *  3. the character, if any, immediately before NAME in LINESTART must
1864  *     be a character in NONAM;
1865  *  4. the character, if any, immediately after NAME in LINESTART must
1866  *     also be a character in NONAM.
1867  *
1868  * The implementation uses the notinname() macro, which recognizes the
1869  * characters stored in the string `nonam'.
1870  * etags.el needs to use the same characters that are in NONAM.
1871  */
1872 static void
1873 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1874           int namelen,          /* tag length */
1875           int is_func,          /* tag is a function */
1876           char *linestart,      /* start of the line where tag is */
1877           int linelen,          /* length of the line where tag is */
1878           int lno,              /* line number */
1879           long int cno)         /* character number */
1880 {
1881   bool named = (name != NULL && namelen > 0);
1882   char *nname = NULL;
1883
1884   if (!CTAGS && named)          /* maybe set named to false */
1885     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1886        such that etags.el can guess a name from it. */
1887     {
1888       int i;
1889       register const char *cp = name;
1890
1891       for (i = 0; i < namelen; i++)
1892         if (notinname (*cp++))
1893           break;
1894       if (i == namelen)                         /* rule #1 */
1895         {
1896           cp = linestart + linelen - namelen;
1897           if (notinname (linestart[linelen-1]))
1898             cp -= 1;                            /* rule #4 */
1899           if (cp >= linestart                   /* rule #2 */
1900               && (cp == linestart
1901                   || notinname (cp[-1]))        /* rule #3 */
1902               && strneq (name, cp, namelen))    /* rule #2 */
1903             named = FALSE;      /* use implicit tag name */
1904         }
1905     }
1906
1907   if (named)
1908     nname = savenstr (name, namelen);
1909
1910   pfnote (nname, is_func, linestart, linelen, lno, cno);
1911 }
1912
1913 /* Record a tag. */
1914 static void
1915 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1916                                 /* tag name, or NULL if unnamed */
1917                                 /* tag is a function */
1918                                 /* start of the line where tag is */
1919                                 /* length of the line where tag is */
1920                                 /* line number */
1921                                 /* character number */
1922 {
1923   register node *np;
1924
1925   assert (name == NULL || name[0] != '\0');
1926   if (CTAGS && name == NULL)
1927     return;
1928
1929   np = xnew (1, node);
1930
1931   /* If ctags mode, change name "main" to M<thisfilename>. */
1932   if (CTAGS && !cxref_style && streq (name, "main"))
1933     {
1934       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1935       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1936       fp = etags_strrchr (np->name, '.');
1937       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1938         fp[0] = '\0';
1939     }
1940   else
1941     np->name = name;
1942   np->valid = TRUE;
1943   np->been_warned = FALSE;
1944   np->fdp = curfdp;
1945   np->is_func = is_func;
1946   np->lno = lno;
1947   if (np->fdp->usecharno)
1948     /* Our char numbers are 0-base, because of C language tradition?
1949        ctags compatibility?  old versions compatibility?   I don't know.
1950        Anyway, since emacs's are 1-base we expect etags.el to take care
1951        of the difference.  If we wanted to have 1-based numbers, we would
1952        uncomment the +1 below. */
1953     np->cno = cno /* + 1 */ ;
1954   else
1955     np->cno = invalidcharno;
1956   np->left = np->right = NULL;
1957   if (CTAGS && !cxref_style)
1958     {
1959       if (strlen (linestart) < 50)
1960         np->regex = concat (linestart, "$", "");
1961       else
1962         np->regex = savenstr (linestart, 50);
1963     }
1964   else
1965     np->regex = savenstr (linestart, linelen);
1966
1967   add_node (np, &nodehead);
1968 }
1969
1970 /*
1971  * free_tree ()
1972  *      recurse on left children, iterate on right children.
1973  */
1974 static void
1975 free_tree (register node *np)
1976 {
1977   while (np)
1978     {
1979       register node *node_right = np->right;
1980       free_tree (np->left);
1981       free (np->name);
1982       free (np->regex);
1983       free (np);
1984       np = node_right;
1985     }
1986 }
1987
1988 /*
1989  * free_fdesc ()
1990  *      delete a file description
1991  */
1992 static void
1993 free_fdesc (register fdesc *fdp)
1994 {
1995   free (fdp->infname);
1996   free (fdp->infabsname);
1997   free (fdp->infabsdir);
1998   free (fdp->taggedfname);
1999   free (fdp->prop);
2000   free (fdp);
2001 }
2002
2003 /*
2004  * add_node ()
2005  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2006  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2007  *      balancing.
2008  *
2009  *      add_node is the only function allowed to add nodes, so it can
2010  *      maintain state.
2011  */
2012 static void
2013 add_node (node *np, node **cur_node_p)
2014 {
2015   register int dif;
2016   register node *cur_node = *cur_node_p;
2017
2018   if (cur_node == NULL)
2019     {
2020       *cur_node_p = np;
2021       last_node = np;
2022       return;
2023     }
2024
2025   if (!CTAGS)
2026     /* Etags Mode */
2027     {
2028       /* For each file name, tags are in a linked sublist on the right
2029          pointer.  The first tags of different files are a linked list
2030          on the left pointer.  last_node points to the end of the last
2031          used sublist. */
2032       if (last_node != NULL && last_node->fdp == np->fdp)
2033         {
2034           /* Let's use the same sublist as the last added node. */
2035           assert (last_node->right == NULL);
2036           last_node->right = np;
2037           last_node = np;
2038         }
2039       else if (cur_node->fdp == np->fdp)
2040         {
2041           /* Scanning the list we found the head of a sublist which is
2042              good for us.  Let's scan this sublist. */
2043           add_node (np, &cur_node->right);
2044         }
2045       else
2046         /* The head of this sublist is not good for us.  Let's try the
2047            next one. */
2048         add_node (np, &cur_node->left);
2049     } /* if ETAGS mode */
2050
2051   else
2052     {
2053       /* Ctags Mode */
2054       dif = strcmp (np->name, cur_node->name);
2055
2056       /*
2057        * If this tag name matches an existing one, then
2058        * do not add the node, but maybe print a warning.
2059        */
2060       if (no_duplicates && !dif)
2061         {
2062           if (np->fdp == cur_node->fdp)
2063             {
2064               if (!no_warnings)
2065                 {
2066                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2067                            np->fdp->infname, lineno, np->name);
2068                   fprintf (stderr, "Second entry ignored\n");
2069                 }
2070             }
2071           else if (!cur_node->been_warned && !no_warnings)
2072             {
2073               fprintf
2074                 (stderr,
2075                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2076                  np->fdp->infname, cur_node->fdp->infname, np->name);
2077               cur_node->been_warned = TRUE;
2078             }
2079           return;
2080         }
2081
2082       /* Actually add the node */
2083       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2084     } /* if CTAGS mode */
2085 }
2086
2087 /*
2088  * invalidate_nodes ()
2089  *      Scan the node tree and invalidate all nodes pointing to the
2090  *      given file description (CTAGS case) or free them (ETAGS case).
2091  */
2092 static void
2093 invalidate_nodes (fdesc *badfdp, node **npp)
2094 {
2095   node *np = *npp;
2096
2097   if (np == NULL)
2098     return;
2099
2100   if (CTAGS)
2101     {
2102       if (np->left != NULL)
2103         invalidate_nodes (badfdp, &np->left);
2104       if (np->fdp == badfdp)
2105         np->valid = FALSE;
2106       if (np->right != NULL)
2107         invalidate_nodes (badfdp, &np->right);
2108     }
2109   else
2110     {
2111       assert (np->fdp != NULL);
2112       if (np->fdp == badfdp)
2113         {
2114           *npp = np->left;      /* detach the sublist from the list */
2115           np->left = NULL;      /* isolate it */
2116           free_tree (np);       /* free it */
2117           invalidate_nodes (badfdp, npp);
2118         }
2119       else
2120         invalidate_nodes (badfdp, &np->left);
2121     }
2122 }
2123
2124 \f
2125 static int total_size_of_entries (node *);
2126 static int number_len (long);
2127
2128 /* Length of a non-negative number's decimal representation. */
2129 static int
2130 number_len (long int num)
2131 {
2132   int len = 1;
2133   while ((num /= 10) > 0)
2134     len += 1;
2135   return len;
2136 }
2137
2138 /*
2139  * Return total number of characters that put_entries will output for
2140  * the nodes in the linked list at the right of the specified node.
2141  * This count is irrelevant with etags.el since emacs 19.34 at least,
2142  * but is still supplied for backward compatibility.
2143  */
2144 static int
2145 total_size_of_entries (register node *np)
2146 {
2147   register int total = 0;
2148
2149   for (; np != NULL; np = np->right)
2150     if (np->valid)
2151       {
2152         total += strlen (np->regex) + 1;                /* pat\177 */
2153         if (np->name != NULL)
2154           total += strlen (np->name) + 1;               /* name\001 */
2155         total += number_len ((long) np->lno) + 1;       /* lno, */
2156         if (np->cno != invalidcharno)                   /* cno */
2157           total += number_len (np->cno);
2158         total += 1;                                     /* newline */
2159       }
2160
2161   return total;
2162 }
2163
2164 static void
2165 put_entries (register node *np)
2166 {
2167   register char *sp;
2168   static fdesc *fdp = NULL;
2169
2170   if (np == NULL)
2171     return;
2172
2173   /* Output subentries that precede this one */
2174   if (CTAGS)
2175     put_entries (np->left);
2176
2177   /* Output this entry */
2178   if (np->valid)
2179     {
2180       if (!CTAGS)
2181         {
2182           /* Etags mode */
2183           if (fdp != np->fdp)
2184             {
2185               fdp = np->fdp;
2186               fprintf (tagf, "\f\n%s,%d\n",
2187                        fdp->taggedfname, total_size_of_entries (np));
2188               fdp->written = TRUE;
2189             }
2190           fputs (np->regex, tagf);
2191           fputc ('\177', tagf);
2192           if (np->name != NULL)
2193             {
2194               fputs (np->name, tagf);
2195               fputc ('\001', tagf);
2196             }
2197           fprintf (tagf, "%d,", np->lno);
2198           if (np->cno != invalidcharno)
2199             fprintf (tagf, "%ld", np->cno);
2200           fputs ("\n", tagf);
2201         }
2202       else
2203         {
2204           /* Ctags mode */
2205           if (np->name == NULL)
2206             error ("internal error: NULL name in ctags mode.");
2207
2208           if (cxref_style)
2209             {
2210               if (vgrind_style)
2211                 fprintf (stdout, "%s %s %d\n",
2212                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2213               else
2214                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2215                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2216             }
2217           else
2218             {
2219               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2220
2221               if (np->is_func)
2222                 {               /* function or #define macro with args */
2223                   putc (searchar, tagf);
2224                   putc ('^', tagf);
2225
2226                   for (sp = np->regex; *sp; sp++)
2227                     {
2228                       if (*sp == '\\' || *sp == searchar)
2229                         putc ('\\', tagf);
2230                       putc (*sp, tagf);
2231                     }
2232                   putc (searchar, tagf);
2233                 }
2234               else
2235                 {               /* anything else; text pattern inadequate */
2236                   fprintf (tagf, "%d", np->lno);
2237                 }
2238               putc ('\n', tagf);
2239             }
2240         }
2241     } /* if this node contains a valid tag */
2242
2243   /* Output subentries that follow this one */
2244   put_entries (np->right);
2245   if (!CTAGS)
2246     put_entries (np->left);
2247 }
2248
2249 \f
2250 /* C extensions. */
2251 #define C_EXT   0x00fff         /* C extensions */
2252 #define C_PLAIN 0x00000         /* C */
2253 #define C_PLPL  0x00001         /* C++ */
2254 #define C_STAR  0x00003         /* C* */
2255 #define C_JAVA  0x00005         /* JAVA */
2256 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2257 #define YACC    0x10000         /* yacc file */
2258
2259 /*
2260  * The C symbol tables.
2261  */
2262 enum sym_type
2263 {
2264   st_none,
2265   st_C_objprot, st_C_objimpl, st_C_objend,
2266   st_C_gnumacro,
2267   st_C_ignore, st_C_attribute,
2268   st_C_javastruct,
2269   st_C_operator,
2270   st_C_class, st_C_template,
2271   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2272 };
2273
2274 static unsigned int hash (const char *, unsigned int);
2275 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2276 static enum sym_type C_symtype (char *, int, int);
2277
2278 /* Feed stuff between (but not including) %[ and %] lines to:
2279      gperf -m 5
2280 %[
2281 %compare-strncmp
2282 %enum
2283 %struct-type
2284 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2285 %%
2286 if,             0,                      st_C_ignore
2287 for,            0,                      st_C_ignore
2288 while,          0,                      st_C_ignore
2289 switch,         0,                      st_C_ignore
2290 return,         0,                      st_C_ignore
2291 __attribute__,  0,                      st_C_attribute
2292 GTY,            0,                      st_C_attribute
2293 @interface,     0,                      st_C_objprot
2294 @protocol,      0,                      st_C_objprot
2295 @implementation,0,                      st_C_objimpl
2296 @end,           0,                      st_C_objend
2297 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2298 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2299 friend,         C_PLPL,                 st_C_ignore
2300 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2301 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2302 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2303 class,          0,                      st_C_class
2304 namespace,      C_PLPL,                 st_C_struct
2305 domain,         C_STAR,                 st_C_struct
2306 union,          0,                      st_C_struct
2307 struct,         0,                      st_C_struct
2308 extern,         0,                      st_C_extern
2309 enum,           0,                      st_C_enum
2310 typedef,        0,                      st_C_typedef
2311 define,         0,                      st_C_define
2312 undef,          0,                      st_C_define
2313 operator,       C_PLPL,                 st_C_operator
2314 template,       0,                      st_C_template
2315 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2316 DEFUN,          0,                      st_C_gnumacro
2317 SYSCALL,        0,                      st_C_gnumacro
2318 ENTRY,          0,                      st_C_gnumacro
2319 PSEUDO,         0,                      st_C_gnumacro
2320 # These are defined inside C functions, so currently they are not met.
2321 # EXFUN used in glibc, DEFVAR_* in emacs.
2322 #EXFUN,         0,                      st_C_gnumacro
2323 #DEFVAR_,       0,                      st_C_gnumacro
2324 %]
2325 and replace lines between %< and %> with its output, then:
2326  - remove the #if characterset check
2327  - make in_word_set static and not inline. */
2328 /*%<*/
2329 /* C code produced by gperf version 3.0.1 */
2330 /* Command-line: gperf -m 5  */
2331 /* Computed positions: -k'2-3' */
2332
2333 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2334 /* maximum key range = 33, duplicates = 0 */
2335
2336 static inline unsigned int
2337 hash (register const char *str, register unsigned int len)
2338 {
2339   static unsigned char asso_values[] =
2340     {
2341       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2342       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2343       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2344       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2345       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2346       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2347       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2348       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2349       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2350       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2351       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2352        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2353        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2354       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2355       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2356       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2357       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2358       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2359       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2360       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2361       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2362       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2363       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2364       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2365       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2366       35, 35, 35, 35, 35, 35
2367     };
2368   register int hval = len;
2369
2370   switch (hval)
2371     {
2372       default:
2373         hval += asso_values[(unsigned char)str[2]];
2374       /*FALLTHROUGH*/
2375       case 2:
2376         hval += asso_values[(unsigned char)str[1]];
2377         break;
2378     }
2379   return hval;
2380 }
2381
2382 static struct C_stab_entry *
2383 in_word_set (register const char *str, register unsigned int len)
2384 {
2385   enum
2386     {
2387       TOTAL_KEYWORDS = 33,
2388       MIN_WORD_LENGTH = 2,
2389       MAX_WORD_LENGTH = 15,
2390       MIN_HASH_VALUE = 2,
2391       MAX_HASH_VALUE = 34
2392     };
2393
2394   static struct C_stab_entry wordlist[] =
2395     {
2396       {""}, {""},
2397       {"if",            0,                      st_C_ignore},
2398       {"GTY",           0,                      st_C_attribute},
2399       {"@end",          0,                      st_C_objend},
2400       {"union",         0,                      st_C_struct},
2401       {"define",                0,                      st_C_define},
2402       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2403       {"template",      0,                      st_C_template},
2404       {"operator",      C_PLPL,                 st_C_operator},
2405       {"@interface",    0,                      st_C_objprot},
2406       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2407       {"friend",                C_PLPL,                 st_C_ignore},
2408       {"typedef",       0,                      st_C_typedef},
2409       {"return",                0,                      st_C_ignore},
2410       {"@implementation",0,                     st_C_objimpl},
2411       {"@protocol",     0,                      st_C_objprot},
2412       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2413       {"extern",                0,                      st_C_extern},
2414       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2415       {"struct",                0,                      st_C_struct},
2416       {"domain",                C_STAR,                 st_C_struct},
2417       {"switch",                0,                      st_C_ignore},
2418       {"enum",          0,                      st_C_enum},
2419       {"for",           0,                      st_C_ignore},
2420       {"namespace",     C_PLPL,                 st_C_struct},
2421       {"class",         0,                      st_C_class},
2422       {"while",         0,                      st_C_ignore},
2423       {"undef",         0,                      st_C_define},
2424       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2425       {"__attribute__", 0,                      st_C_attribute},
2426       {"SYSCALL",       0,                      st_C_gnumacro},
2427       {"ENTRY",         0,                      st_C_gnumacro},
2428       {"PSEUDO",                0,                      st_C_gnumacro},
2429       {"DEFUN",         0,                      st_C_gnumacro}
2430     };
2431
2432   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2433     {
2434       register int key = hash (str, len);
2435
2436       if (key <= MAX_HASH_VALUE && key >= 0)
2437         {
2438           register const char *s = wordlist[key].name;
2439
2440           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2441             return &wordlist[key];
2442         }
2443     }
2444   return 0;
2445 }
2446 /*%>*/
2447
2448 static enum sym_type
2449 C_symtype (char *str, int len, int c_ext)
2450 {
2451   register struct C_stab_entry *se = in_word_set (str, len);
2452
2453   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2454     return st_none;
2455   return se->type;
2456 }
2457
2458 \f
2459 /*
2460  * Ignoring __attribute__ ((list))
2461  */
2462 static bool inattribute;        /* looking at an __attribute__ construct */
2463
2464 /*
2465  * C functions and variables are recognized using a simple
2466  * finite automaton.  fvdef is its state variable.
2467  */
2468 static enum
2469 {
2470   fvnone,                       /* nothing seen */
2471   fdefunkey,                    /* Emacs DEFUN keyword seen */
2472   fdefunname,                   /* Emacs DEFUN name seen */
2473   foperator,                    /* func: operator keyword seen (cplpl) */
2474   fvnameseen,                   /* function or variable name seen */
2475   fstartlist,                   /* func: just after open parenthesis */
2476   finlist,                      /* func: in parameter list */
2477   flistseen,                    /* func: after parameter list */
2478   fignore,                      /* func: before open brace */
2479   vignore                       /* var-like: ignore until ';' */
2480 } fvdef;
2481
2482 static bool fvextern;           /* func or var: extern keyword seen; */
2483
2484 /*
2485  * typedefs are recognized using a simple finite automaton.
2486  * typdef is its state variable.
2487  */
2488 static enum
2489 {
2490   tnone,                        /* nothing seen */
2491   tkeyseen,                     /* typedef keyword seen */
2492   ttypeseen,                    /* defined type seen */
2493   tinbody,                      /* inside typedef body */
2494   tend,                         /* just before typedef tag */
2495   tignore                       /* junk after typedef tag */
2496 } typdef;
2497
2498 /*
2499  * struct-like structures (enum, struct and union) are recognized
2500  * using another simple finite automaton.  `structdef' is its state
2501  * variable.
2502  */
2503 static enum
2504 {
2505   snone,                        /* nothing seen yet,
2506                                    or in struct body if bracelev > 0 */
2507   skeyseen,                     /* struct-like keyword seen */
2508   stagseen,                     /* struct-like tag seen */
2509   scolonseen                    /* colon seen after struct-like tag */
2510 } structdef;
2511
2512 /*
2513  * When objdef is different from onone, objtag is the name of the class.
2514  */
2515 static const char *objtag = "<uninited>";
2516
2517 /*
2518  * Yet another little state machine to deal with preprocessor lines.
2519  */
2520 static enum
2521 {
2522   dnone,                        /* nothing seen */
2523   dsharpseen,                   /* '#' seen as first char on line */
2524   ddefineseen,                  /* '#' and 'define' seen */
2525   dignorerest                   /* ignore rest of line */
2526 } definedef;
2527
2528 /*
2529  * State machine for Objective C protocols and implementations.
2530  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2531  */
2532 static enum
2533 {
2534   onone,                        /* nothing seen */
2535   oprotocol,                    /* @interface or @protocol seen */
2536   oimplementation,              /* @implementations seen */
2537   otagseen,                     /* class name seen */
2538   oparenseen,                   /* parenthesis before category seen */
2539   ocatseen,                     /* category name seen */
2540   oinbody,                      /* in @implementation body */
2541   omethodsign,                  /* in @implementation body, after +/- */
2542   omethodtag,                   /* after method name */
2543   omethodcolon,                 /* after method colon */
2544   omethodparm,                  /* after method parameter */
2545   oignore                       /* wait for @end */
2546 } objdef;
2547
2548
2549 /*
2550  * Use this structure to keep info about the token read, and how it
2551  * should be tagged.  Used by the make_C_tag function to build a tag.
2552  */
2553 static struct tok
2554 {
2555   char *line;                   /* string containing the token */
2556   int offset;                   /* where the token starts in LINE */
2557   int length;                   /* token length */
2558   /*
2559     The previous members can be used to pass strings around for generic
2560     purposes.  The following ones specifically refer to creating tags.  In this
2561     case the token contained here is the pattern that will be used to create a
2562     tag.
2563   */
2564   bool valid;                   /* do not create a tag; the token should be
2565                                    invalidated whenever a state machine is
2566                                    reset prematurely */
2567   bool named;                   /* create a named tag */
2568   int lineno;                   /* source line number of tag */
2569   long linepos;                 /* source char number of tag */
2570 } token;                        /* latest token read */
2571
2572 /*
2573  * Variables and functions for dealing with nested structures.
2574  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2575  */
2576 static void pushclass_above (int, char *, int);
2577 static void popclass_above (int);
2578 static void write_classname (linebuffer *, const char *qualifier);
2579
2580 static struct {
2581   char **cname;                 /* nested class names */
2582   int *bracelev;                /* nested class brace level */
2583   int nl;                       /* class nesting level (elements used) */
2584   int size;                     /* length of the array */
2585 } cstack;                       /* stack for nested declaration tags */
2586 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2587 #define nestlev         (cstack.nl)
2588 /* After struct keyword or in struct body, not inside a nested function. */
2589 #define instruct        (structdef == snone && nestlev > 0                      \
2590                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2591
2592 static void
2593 pushclass_above (int bracelev, char *str, int len)
2594 {
2595   int nl;
2596
2597   popclass_above (bracelev);
2598   nl = cstack.nl;
2599   if (nl >= cstack.size)
2600     {
2601       int size = cstack.size *= 2;
2602       xrnew (cstack.cname, size, char *);
2603       xrnew (cstack.bracelev, size, int);
2604     }
2605   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2606   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2607   cstack.bracelev[nl] = bracelev;
2608   cstack.nl = nl + 1;
2609 }
2610
2611 static void
2612 popclass_above (int bracelev)
2613 {
2614   int nl;
2615
2616   for (nl = cstack.nl - 1;
2617        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2618        nl--)
2619     {
2620       free (cstack.cname[nl]);
2621       cstack.nl = nl;
2622     }
2623 }
2624
2625 static void
2626 write_classname (linebuffer *cn, const char *qualifier)
2627 {
2628   int i, len;
2629   int qlen = strlen (qualifier);
2630
2631   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2632     {
2633       len = 0;
2634       cn->len = 0;
2635       cn->buffer[0] = '\0';
2636     }
2637   else
2638     {
2639       len = strlen (cstack.cname[0]);
2640       linebuffer_setlen (cn, len);
2641       strcpy (cn->buffer, cstack.cname[0]);
2642     }
2643   for (i = 1; i < cstack.nl; i++)
2644     {
2645       char *s;
2646       int slen;
2647
2648       s = cstack.cname[i];
2649       if (s == NULL)
2650         continue;
2651       slen = strlen (s);
2652       len += slen + qlen;
2653       linebuffer_setlen (cn, len);
2654       strncat (cn->buffer, qualifier, qlen);
2655       strncat (cn->buffer, s, slen);
2656     }
2657 }
2658
2659 \f
2660 static bool consider_token (char *, int, int, int *, int, int, bool *);
2661 static void make_C_tag (bool);
2662
2663 /*
2664  * consider_token ()
2665  *      checks to see if the current token is at the start of a
2666  *      function or variable, or corresponds to a typedef, or
2667  *      is a struct/union/enum tag, or #define, or an enum constant.
2668  *
2669  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2670  *      with args.  C_EXTP points to which language we are looking at.
2671  *
2672  * Globals
2673  *      fvdef                   IN OUT
2674  *      structdef               IN OUT
2675  *      definedef               IN OUT
2676  *      typdef                  IN OUT
2677  *      objdef                  IN OUT
2678  */
2679
2680 static bool
2681 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2682                                 /* IN: token pointer */
2683                                 /* IN: token length */
2684                                 /* IN: first char after the token */
2685                                 /* IN, OUT: C extensions mask */
2686                                 /* IN: brace level */
2687                                 /* IN: parenthesis level */
2688                                 /* OUT: function or variable found */
2689 {
2690   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2691      structtype is the type of the preceding struct-like keyword, and
2692      structbracelev is the brace level where it has been seen. */
2693   static enum sym_type structtype;
2694   static int structbracelev;
2695   static enum sym_type toktype;
2696
2697
2698   toktype = C_symtype (str, len, *c_extp);
2699
2700   /*
2701    * Skip __attribute__
2702    */
2703   if (toktype == st_C_attribute)
2704     {
2705       inattribute = TRUE;
2706       return FALSE;
2707      }
2708
2709    /*
2710     * Advance the definedef state machine.
2711     */
2712    switch (definedef)
2713      {
2714      case dnone:
2715        /* We're not on a preprocessor line. */
2716        if (toktype == st_C_gnumacro)
2717          {
2718            fvdef = fdefunkey;
2719            return FALSE;
2720          }
2721        break;
2722      case dsharpseen:
2723        if (toktype == st_C_define)
2724          {
2725            definedef = ddefineseen;
2726          }
2727        else
2728          {
2729            definedef = dignorerest;
2730          }
2731        return FALSE;
2732      case ddefineseen:
2733        /*
2734         * Make a tag for any macro, unless it is a constant
2735         * and constantypedefs is FALSE.
2736         */
2737        definedef = dignorerest;
2738        *is_func_or_var = (c == '(');
2739        if (!*is_func_or_var && !constantypedefs)
2740          return FALSE;
2741        else
2742          return TRUE;
2743      case dignorerest:
2744        return FALSE;
2745      default:
2746        error ("internal error: definedef value.");
2747      }
2748
2749    /*
2750     * Now typedefs
2751     */
2752    switch (typdef)
2753      {
2754      case tnone:
2755        if (toktype == st_C_typedef)
2756          {
2757            if (typedefs)
2758              typdef = tkeyseen;
2759            fvextern = FALSE;
2760            fvdef = fvnone;
2761            return FALSE;
2762          }
2763        break;
2764      case tkeyseen:
2765        switch (toktype)
2766          {
2767          case st_none:
2768          case st_C_class:
2769          case st_C_struct:
2770          case st_C_enum:
2771            typdef = ttypeseen;
2772          }
2773        break;
2774      case ttypeseen:
2775        if (structdef == snone && fvdef == fvnone)
2776          {
2777            fvdef = fvnameseen;
2778            return TRUE;
2779          }
2780        break;
2781      case tend:
2782        switch (toktype)
2783          {
2784          case st_C_class:
2785          case st_C_struct:
2786          case st_C_enum:
2787            return FALSE;
2788          }
2789        return TRUE;
2790      }
2791
2792    switch (toktype)
2793      {
2794      case st_C_javastruct:
2795        if (structdef == stagseen)
2796          structdef = scolonseen;
2797        return FALSE;
2798      case st_C_template:
2799      case st_C_class:
2800        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2801            && bracelev == 0
2802            && definedef == dnone && structdef == snone
2803            && typdef == tnone && fvdef == fvnone)
2804          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2805        if (toktype == st_C_template)
2806          break;
2807        /* FALLTHRU */
2808      case st_C_struct:
2809      case st_C_enum:
2810        if (parlev == 0
2811            && fvdef != vignore
2812            && (typdef == tkeyseen
2813                || (typedefs_or_cplusplus && structdef == snone)))
2814          {
2815            structdef = skeyseen;
2816            structtype = toktype;
2817            structbracelev = bracelev;
2818            if (fvdef == fvnameseen)
2819              fvdef = fvnone;
2820          }
2821        return FALSE;
2822      }
2823
2824    if (structdef == skeyseen)
2825      {
2826        structdef = stagseen;
2827        return TRUE;
2828      }
2829
2830    if (typdef != tnone)
2831      definedef = dnone;
2832
2833    /* Detect Objective C constructs. */
2834    switch (objdef)
2835      {
2836      case onone:
2837        switch (toktype)
2838          {
2839          case st_C_objprot:
2840            objdef = oprotocol;
2841            return FALSE;
2842          case st_C_objimpl:
2843            objdef = oimplementation;
2844            return FALSE;
2845          }
2846        break;
2847      case oimplementation:
2848        /* Save the class tag for functions or variables defined inside. */
2849        objtag = savenstr (str, len);
2850        objdef = oinbody;
2851        return FALSE;
2852      case oprotocol:
2853        /* Save the class tag for categories. */
2854        objtag = savenstr (str, len);
2855        objdef = otagseen;
2856        *is_func_or_var = TRUE;
2857        return TRUE;
2858      case oparenseen:
2859        objdef = ocatseen;
2860        *is_func_or_var = TRUE;
2861        return TRUE;
2862      case oinbody:
2863        break;
2864      case omethodsign:
2865        if (parlev == 0)
2866          {
2867            fvdef = fvnone;
2868            objdef = omethodtag;
2869            linebuffer_setlen (&token_name, len);
2870            strncpy (token_name.buffer, str, len);
2871            token_name.buffer[len] = '\0';
2872            return TRUE;
2873          }
2874        return FALSE;
2875      case omethodcolon:
2876        if (parlev == 0)
2877          objdef = omethodparm;
2878        return FALSE;
2879      case omethodparm:
2880        if (parlev == 0)
2881          {
2882            fvdef = fvnone;
2883            objdef = omethodtag;
2884            linebuffer_setlen (&token_name, token_name.len + len);
2885            strncat (token_name.buffer, str, len);
2886            return TRUE;
2887          }
2888        return FALSE;
2889      case oignore:
2890        if (toktype == st_C_objend)
2891          {
2892            /* Memory leakage here: the string pointed by objtag is
2893               never released, because many tests would be needed to
2894               avoid breaking on incorrect input code.  The amount of
2895               memory leaked here is the sum of the lengths of the
2896               class tags.
2897            free (objtag); */
2898            objdef = onone;
2899          }
2900        return FALSE;
2901      }
2902
2903    /* A function, variable or enum constant? */
2904    switch (toktype)
2905      {
2906      case st_C_extern:
2907        fvextern = TRUE;
2908        switch  (fvdef)
2909          {
2910          case finlist:
2911          case flistseen:
2912          case fignore:
2913          case vignore:
2914            break;
2915          default:
2916            fvdef = fvnone;
2917          }
2918        return FALSE;
2919      case st_C_ignore:
2920        fvextern = FALSE;
2921        fvdef = vignore;
2922        return FALSE;
2923      case st_C_operator:
2924        fvdef = foperator;
2925        *is_func_or_var = TRUE;
2926        return TRUE;
2927      case st_none:
2928        if (constantypedefs
2929            && structdef == snone
2930            && structtype == st_C_enum && bracelev > structbracelev)
2931          return TRUE;           /* enum constant */
2932        switch (fvdef)
2933          {
2934          case fdefunkey:
2935            if (bracelev > 0)
2936              break;
2937            fvdef = fdefunname;  /* GNU macro */
2938            *is_func_or_var = TRUE;
2939            return TRUE;
2940          case fvnone:
2941            switch (typdef)
2942              {
2943              case ttypeseen:
2944                return FALSE;
2945              case tnone:
2946                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2947                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2948                  {
2949                    fvdef = vignore;
2950                    return FALSE;
2951                  }
2952                break;
2953              }
2954           /* FALLTHRU */
2955           case fvnameseen:
2956           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2957             {
2958               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2959                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2960               fvdef = foperator;
2961               *is_func_or_var = TRUE;
2962               return TRUE;
2963             }
2964           if (bracelev > 0 && !instruct)
2965             break;
2966           fvdef = fvnameseen;   /* function or variable */
2967           *is_func_or_var = TRUE;
2968           return TRUE;
2969         }
2970       break;
2971     }
2972
2973   return FALSE;
2974 }
2975
2976 \f
2977 /*
2978  * C_entries often keeps pointers to tokens or lines which are older than
2979  * the line currently read.  By keeping two line buffers, and switching
2980  * them at end of line, it is possible to use those pointers.
2981  */
2982 static struct
2983 {
2984   long linepos;
2985   linebuffer lb;
2986 } lbs[2];
2987
2988 #define current_lb_is_new (newndx == curndx)
2989 #define switch_line_buffers() (curndx = 1 - curndx)
2990
2991 #define curlb (lbs[curndx].lb)
2992 #define newlb (lbs[newndx].lb)
2993 #define curlinepos (lbs[curndx].linepos)
2994 #define newlinepos (lbs[newndx].linepos)
2995
2996 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2997 #define cplpl (c_ext & C_PLPL)
2998 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2999
3000 #define CNL_SAVE_DEFINEDEF()                                            \
3001 do {                                                                    \
3002   curlinepos = charno;                                                  \
3003   readline (&curlb, inf);                                               \
3004   lp = curlb.buffer;                                                    \
3005   quotednl = FALSE;                                                     \
3006   newndx = curndx;                                                      \
3007 } while (0)
3008
3009 #define CNL()                                                           \
3010 do {                                                                    \
3011   CNL_SAVE_DEFINEDEF();                                                 \
3012   if (savetoken.valid)                                                  \
3013     {                                                                   \
3014       token = savetoken;                                                \
3015       savetoken.valid = FALSE;                                          \
3016     }                                                                   \
3017   definedef = dnone;                                                    \
3018 } while (0)
3019
3020
3021 static void
3022 make_C_tag (int isfun)
3023 {
3024   /* This function is never called when token.valid is FALSE, but
3025      we must protect against invalid input or internal errors. */
3026   if (token.valid)
3027     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3028               token.offset+token.length+1, token.lineno, token.linepos);
3029   else if (DEBUG)
3030     {                             /* this branch is optimized away if !DEBUG */
3031       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3032                 token_name.len + 17, isfun, token.line,
3033                 token.offset+token.length+1, token.lineno, token.linepos);
3034       error ("INVALID TOKEN");
3035     }
3036
3037   token.valid = FALSE;
3038 }
3039
3040
3041 /*
3042  * C_entries ()
3043  *      This routine finds functions, variables, typedefs,
3044  *      #define's, enum constants and struct/union/enum definitions in
3045  *      C syntax and adds them to the list.
3046  */
3047 static void
3048 C_entries (int c_ext, FILE *inf)
3049                                 /* extension of C */
3050                                 /* input file */
3051 {
3052   register char c;              /* latest char read; '\0' for end of line */
3053   register char *lp;            /* pointer one beyond the character `c' */
3054   int curndx, newndx;           /* indices for current and new lb */
3055   register int tokoff;          /* offset in line of start of current token */
3056   register int toklen;          /* length of current token */
3057   const char *qualifier;        /* string used to qualify names */
3058   int qlen;                     /* length of qualifier */
3059   int bracelev;                 /* current brace level */
3060   int bracketlev;               /* current bracket level */
3061   int parlev;                   /* current parenthesis level */
3062   int attrparlev;               /* __attribute__ parenthesis level */
3063   int templatelev;              /* current template level */
3064   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3065   bool incomm, inquote, inchar, quotednl, midtoken;
3066   bool yacc_rules;              /* in the rules part of a yacc file */
3067   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3068
3069
3070   linebuffer_init (&lbs[0].lb);
3071   linebuffer_init (&lbs[1].lb);
3072   if (cstack.size == 0)
3073     {
3074       cstack.size = (DEBUG) ? 1 : 4;
3075       cstack.nl = 0;
3076       cstack.cname = xnew (cstack.size, char *);
3077       cstack.bracelev = xnew (cstack.size, int);
3078     }
3079
3080   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3081   curndx = newndx = 0;
3082   lp = curlb.buffer;
3083   *lp = 0;
3084
3085   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3086   structdef = snone; definedef = dnone; objdef = onone;
3087   yacc_rules = FALSE;
3088   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3089   token.valid = savetoken.valid = FALSE;
3090   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3091   if (cjava)
3092     { qualifier = "."; qlen = 1; }
3093   else
3094     { qualifier = "::"; qlen = 2; }
3095
3096
3097   while (!feof (inf))
3098     {
3099       c = *lp++;
3100       if (c == '\\')
3101         {
3102           /* If we are at the end of the line, the next character is a
3103              '\0'; do not skip it, because it is what tells us
3104              to read the next line.  */
3105           if (*lp == '\0')
3106             {
3107               quotednl = TRUE;
3108               continue;
3109             }
3110           lp++;
3111           c = ' ';
3112         }
3113       else if (incomm)
3114         {
3115           switch (c)
3116             {
3117             case '*':
3118               if (*lp == '/')
3119                 {
3120                   c = *lp++;
3121                   incomm = FALSE;
3122                 }
3123               break;
3124             case '\0':
3125               /* Newlines inside comments do not end macro definitions in
3126                  traditional cpp. */
3127               CNL_SAVE_DEFINEDEF ();
3128               break;
3129             }
3130           continue;
3131         }
3132       else if (inquote)
3133         {
3134           switch (c)
3135             {
3136             case '"':
3137               inquote = FALSE;
3138               break;
3139             case '\0':
3140               /* Newlines inside strings do not end macro definitions
3141                  in traditional cpp, even though compilers don't
3142                  usually accept them. */
3143               CNL_SAVE_DEFINEDEF ();
3144               break;
3145             }
3146           continue;
3147         }
3148       else if (inchar)
3149         {
3150           switch (c)
3151             {
3152             case '\0':
3153               /* Hmmm, something went wrong. */
3154               CNL ();
3155               /* FALLTHRU */
3156             case '\'':
3157               inchar = FALSE;
3158               break;
3159             }
3160           continue;
3161         }
3162       else switch (c)
3163         {
3164         case '"':
3165           inquote = TRUE;
3166           if (bracketlev > 0)
3167             continue;
3168           if (inattribute)
3169             break;
3170           switch (fvdef)
3171             {
3172             case fdefunkey:
3173             case fstartlist:
3174             case finlist:
3175             case fignore:
3176             case vignore:
3177               break;
3178             default:
3179               fvextern = FALSE;
3180               fvdef = fvnone;
3181             }
3182           continue;
3183         case '\'':
3184           inchar = TRUE;
3185           if (bracketlev > 0)
3186             continue;
3187           if (inattribute)
3188             break;
3189           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3190             {
3191               fvextern = FALSE;
3192               fvdef = fvnone;
3193             }
3194           continue;
3195         case '/':
3196           if (*lp == '*')
3197             {
3198               incomm = TRUE;
3199               lp++;
3200               c = ' ';
3201               if (bracketlev > 0)
3202                 continue;
3203             }
3204           else if (/* cplpl && */ *lp == '/')
3205             {
3206               c = '\0';
3207             }
3208           break;
3209         case '%':
3210           if ((c_ext & YACC) && *lp == '%')
3211             {
3212               /* Entering or exiting rules section in yacc file. */
3213               lp++;
3214               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3215               typdef = tnone; structdef = snone;
3216               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3217               bracelev = 0;
3218               yacc_rules = !yacc_rules;
3219               continue;
3220             }
3221           else
3222             break;
3223         case '#':
3224           if (definedef == dnone)
3225             {
3226               char *cp;
3227               bool cpptoken = TRUE;
3228
3229               /* Look back on this line.  If all blanks, or nonblanks
3230                  followed by an end of comment, this is a preprocessor
3231                  token. */
3232               for (cp = newlb.buffer; cp < lp-1; cp++)
3233                 if (!iswhite (*cp))
3234                   {
3235                     if (*cp == '*' && cp[1] == '/')
3236                       {
3237                         cp++;
3238                         cpptoken = TRUE;
3239                       }
3240                     else
3241                       cpptoken = FALSE;
3242                   }
3243               if (cpptoken)
3244                 definedef = dsharpseen;
3245             } /* if (definedef == dnone) */
3246           continue;
3247         case '[':
3248           bracketlev++;
3249           continue;
3250         default:
3251           if (bracketlev > 0)
3252             {
3253               if (c == ']')
3254                 --bracketlev;
3255               else if (c == '\0')
3256                 CNL_SAVE_DEFINEDEF ();
3257               continue;
3258             }
3259           break;
3260         } /* switch (c) */
3261
3262
3263       /* Consider token only if some involved conditions are satisfied. */
3264       if (typdef != tignore
3265           && definedef != dignorerest
3266           && fvdef != finlist
3267           && templatelev == 0
3268           && (definedef != dnone
3269               || structdef != scolonseen)
3270           && !inattribute)
3271         {
3272           if (midtoken)
3273             {
3274               if (endtoken (c))
3275                 {
3276                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3277                     /* This handles :: in the middle,
3278                        but not at the beginning of an identifier.
3279                        Also, space-separated :: is not recognized. */
3280                     {
3281                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3282                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3283                       lp += 2;
3284                       toklen += 2;
3285                       c = lp[-1];
3286                       goto still_in_token;
3287                     }
3288                   else
3289                     {
3290                       bool funorvar = FALSE;
3291
3292                       if (yacc_rules
3293                           || consider_token (newlb.buffer + tokoff, toklen, c,
3294                                              &c_ext, bracelev, parlev,
3295                                              &funorvar))
3296                         {
3297                           if (fvdef == foperator)
3298                             {
3299                               char *oldlp = lp;
3300                               lp = skip_spaces (lp-1);
3301                               if (*lp != '\0')
3302                                 lp += 1;
3303                               while (*lp != '\0'
3304                                      && !iswhite (*lp) && *lp != '(')
3305                                 lp += 1;
3306                               c = *lp++;
3307                               toklen += lp - oldlp;
3308                             }
3309                           token.named = FALSE;
3310                           if (!plainc
3311                               && nestlev > 0 && definedef == dnone)
3312                             /* in struct body */
3313                             {
3314                               write_classname (&token_name, qualifier);
3315                               linebuffer_setlen (&token_name,
3316                                                  token_name.len+qlen+toklen);
3317                               strcat (token_name.buffer, qualifier);
3318                               strncat (token_name.buffer,
3319                                        newlb.buffer + tokoff, toklen);
3320                               token.named = TRUE;
3321                             }
3322                           else if (objdef == ocatseen)
3323                             /* Objective C category */
3324                             {
3325                               int len = strlen (objtag) + 2 + toklen;
3326                               linebuffer_setlen (&token_name, len);
3327                               strcpy (token_name.buffer, objtag);
3328                               strcat (token_name.buffer, "(");
3329                               strncat (token_name.buffer,
3330                                        newlb.buffer + tokoff, toklen);
3331                               strcat (token_name.buffer, ")");
3332                               token.named = TRUE;
3333                             }
3334                           else if (objdef == omethodtag
3335                                    || objdef == omethodparm)
3336                             /* Objective C method */
3337                             {
3338                               token.named = TRUE;
3339                             }
3340                           else if (fvdef == fdefunname)
3341                             /* GNU DEFUN and similar macros */
3342                             {
3343                               bool defun = (newlb.buffer[tokoff] == 'F');
3344                               int off = tokoff;
3345                               int len = toklen;
3346
3347                               /* Rewrite the tag so that emacs lisp DEFUNs
3348                                  can be found by their elisp name */
3349                               if (defun)
3350                                 {
3351                                   off += 1;
3352                                   len -= 1;
3353                                 }
3354                               linebuffer_setlen (&token_name, len);
3355                               strncpy (token_name.buffer,
3356                                        newlb.buffer + off, len);
3357                               token_name.buffer[len] = '\0';
3358                               if (defun)
3359                                 while (--len >= 0)
3360                                   if (token_name.buffer[len] == '_')
3361                                     token_name.buffer[len] = '-';
3362                               token.named = defun;
3363                             }
3364                           else
3365                             {
3366                               linebuffer_setlen (&token_name, toklen);
3367                               strncpy (token_name.buffer,
3368                                        newlb.buffer + tokoff, toklen);
3369                               token_name.buffer[toklen] = '\0';
3370                               /* Name macros and members. */
3371                               token.named = (structdef == stagseen
3372                                              || typdef == ttypeseen
3373                                              || typdef == tend
3374                                              || (funorvar
3375                                                  && definedef == dignorerest)
3376                                              || (funorvar
3377                                                  && definedef == dnone
3378                                                  && structdef == snone
3379                                                  && bracelev > 0));
3380                             }
3381                           token.lineno = lineno;
3382                           token.offset = tokoff;
3383                           token.length = toklen;
3384                           token.line = newlb.buffer;
3385                           token.linepos = newlinepos;
3386                           token.valid = TRUE;
3387
3388                           if (definedef == dnone
3389                               && (fvdef == fvnameseen
3390                                   || fvdef == foperator
3391                                   || structdef == stagseen
3392                                   || typdef == tend
3393                                   || typdef == ttypeseen
3394                                   || objdef != onone))
3395                             {
3396                               if (current_lb_is_new)
3397                                 switch_line_buffers ();
3398                             }
3399                           else if (definedef != dnone
3400                                    || fvdef == fdefunname
3401                                    || instruct)
3402                             make_C_tag (funorvar);
3403                         }
3404                       else /* not yacc and consider_token failed */
3405                         {
3406                           if (inattribute && fvdef == fignore)
3407                             {
3408                               /* We have just met __attribute__ after a
3409                                  function parameter list: do not tag the
3410                                  function again. */
3411                               fvdef = fvnone;
3412                             }
3413                         }
3414                       midtoken = FALSE;
3415                     }
3416                 } /* if (endtoken (c)) */
3417               else if (intoken (c))
3418                 still_in_token:
3419                 {
3420                   toklen++;
3421                   continue;
3422                 }
3423             } /* if (midtoken) */
3424           else if (begtoken (c))
3425             {
3426               switch (definedef)
3427                 {
3428                 case dnone:
3429                   switch (fvdef)
3430                     {
3431                     case fstartlist:
3432                       /* This prevents tagging fb in
3433                          void (__attribute__((noreturn)) *fb) (void);
3434                          Fixing this is not easy and not very important. */
3435                       fvdef = finlist;
3436                       continue;
3437                     case flistseen:
3438                       if (plainc || declarations)
3439                         {
3440                           make_C_tag (TRUE); /* a function */
3441                           fvdef = fignore;
3442                         }
3443                       break;
3444                     }
3445                   if (structdef == stagseen && !cjava)
3446                     {
3447                       popclass_above (bracelev);
3448                       structdef = snone;
3449                     }
3450                   break;
3451                 case dsharpseen:
3452                   savetoken = token;
3453                   break;
3454                 }
3455               if (!yacc_rules || lp == newlb.buffer + 1)
3456                 {
3457                   tokoff = lp - 1 - newlb.buffer;
3458                   toklen = 1;
3459                   midtoken = TRUE;
3460                 }
3461               continue;
3462             } /* if (begtoken) */
3463         } /* if must look at token */
3464
3465
3466       /* Detect end of line, colon, comma, semicolon and various braces
3467          after having handled a token.*/
3468       switch (c)
3469         {
3470         case ':':
3471           if (inattribute)
3472             break;
3473           if (yacc_rules && token.offset == 0 && token.valid)
3474             {
3475               make_C_tag (FALSE); /* a yacc function */
3476               break;
3477             }
3478           if (definedef != dnone)
3479             break;
3480           switch (objdef)
3481             {
3482             case  otagseen:
3483               objdef = oignore;
3484               make_C_tag (TRUE); /* an Objective C class */
3485               break;
3486             case omethodtag:
3487             case omethodparm:
3488               objdef = omethodcolon;
3489               linebuffer_setlen (&token_name, token_name.len + 1);
3490               strcat (token_name.buffer, ":");
3491               break;
3492             }
3493           if (structdef == stagseen)
3494             {
3495               structdef = scolonseen;
3496               break;
3497             }
3498           /* Should be useless, but may be work as a safety net. */
3499           if (cplpl && fvdef == flistseen)
3500             {
3501               make_C_tag (TRUE); /* a function */
3502               fvdef = fignore;
3503               break;
3504             }
3505           break;
3506         case ';':
3507           if (definedef != dnone || inattribute)
3508             break;
3509           switch (typdef)
3510             {
3511             case tend:
3512             case ttypeseen:
3513               make_C_tag (FALSE); /* a typedef */
3514               typdef = tnone;
3515               fvdef = fvnone;
3516               break;
3517             case tnone:
3518             case tinbody:
3519             case tignore:
3520               switch (fvdef)
3521                 {
3522                 case fignore:
3523                   if (typdef == tignore || cplpl)
3524                     fvdef = fvnone;
3525                   break;
3526                 case fvnameseen:
3527                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3528                       || (members && instruct))
3529                     make_C_tag (FALSE); /* a variable */
3530                   fvextern = FALSE;
3531                   fvdef = fvnone;
3532                   token.valid = FALSE;
3533                   break;
3534                 case flistseen:
3535                   if ((declarations
3536                        && (cplpl || !instruct)
3537                        && (typdef == tnone || (typdef != tignore && instruct)))
3538                       || (members
3539                           && plainc && instruct))
3540                     make_C_tag (TRUE);  /* a function */
3541                   /* FALLTHRU */
3542                 default:
3543                   fvextern = FALSE;
3544                   fvdef = fvnone;
3545                   if (declarations
3546                        && cplpl && structdef == stagseen)
3547                     make_C_tag (FALSE); /* forward declaration */
3548                   else
3549                     token.valid = FALSE;
3550                 } /* switch (fvdef) */
3551               /* FALLTHRU */
3552             default:
3553               if (!instruct)
3554                 typdef = tnone;
3555             }
3556           if (structdef == stagseen)
3557             structdef = snone;
3558           break;
3559         case ',':
3560           if (definedef != dnone || inattribute)
3561             break;
3562           switch (objdef)
3563             {
3564             case omethodtag:
3565             case omethodparm:
3566               make_C_tag (TRUE); /* an Objective C method */
3567               objdef = oinbody;
3568               break;
3569             }
3570           switch (fvdef)
3571             {
3572             case fdefunkey:
3573             case foperator:
3574             case fstartlist:
3575             case finlist:
3576             case fignore:
3577             case vignore:
3578               break;
3579             case fdefunname:
3580               fvdef = fignore;
3581               break;
3582             case fvnameseen:
3583               if (parlev == 0
3584                   && ((globals
3585                        && bracelev == 0
3586                        && templatelev == 0
3587                        && (!fvextern || declarations))
3588                       || (members && instruct)))
3589                   make_C_tag (FALSE); /* a variable */
3590               break;
3591             case flistseen:
3592               if ((declarations && typdef == tnone && !instruct)
3593                   || (members && typdef != tignore && instruct))
3594                 {
3595                   make_C_tag (TRUE); /* a function */
3596                   fvdef = fvnameseen;
3597                 }
3598               else if (!declarations)
3599                 fvdef = fvnone;
3600               token.valid = FALSE;
3601               break;
3602             default:
3603               fvdef = fvnone;
3604             }
3605           if (structdef == stagseen)
3606             structdef = snone;
3607           break;
3608         case ']':
3609           if (definedef != dnone || inattribute)
3610             break;
3611           if (structdef == stagseen)
3612             structdef = snone;
3613           switch (typdef)
3614             {
3615             case ttypeseen:
3616             case tend:
3617               typdef = tignore;
3618               make_C_tag (FALSE);       /* a typedef */
3619               break;
3620             case tnone:
3621             case tinbody:
3622               switch (fvdef)
3623                 {
3624                 case foperator:
3625                 case finlist:
3626                 case fignore:
3627                 case vignore:
3628                   break;
3629                 case fvnameseen:
3630                   if ((members && bracelev == 1)
3631                       || (globals && bracelev == 0
3632                           && (!fvextern || declarations)))
3633                     make_C_tag (FALSE); /* a variable */
3634                   /* FALLTHRU */
3635                 default:
3636                   fvdef = fvnone;
3637                 }
3638               break;
3639             }
3640           break;
3641         case '(':
3642           if (inattribute)
3643             {
3644               attrparlev++;
3645               break;
3646             }
3647           if (definedef != dnone)
3648             break;
3649           if (objdef == otagseen && parlev == 0)
3650             objdef = oparenseen;
3651           switch (fvdef)
3652             {
3653             case fvnameseen:
3654               if (typdef == ttypeseen
3655                   && *lp != '*'
3656                   && !instruct)
3657                 {
3658                   /* This handles constructs like:
3659                      typedef void OperatorFun (int fun); */
3660                   make_C_tag (FALSE);
3661                   typdef = tignore;
3662                   fvdef = fignore;
3663                   break;
3664                 }
3665               /* FALLTHRU */
3666             case foperator:
3667               fvdef = fstartlist;
3668               break;
3669             case flistseen:
3670               fvdef = finlist;
3671               break;
3672             }
3673           parlev++;
3674           break;
3675         case ')':
3676           if (inattribute)
3677             {
3678               if (--attrparlev == 0)
3679                 inattribute = FALSE;
3680               break;
3681             }
3682           if (definedef != dnone)
3683             break;
3684           if (objdef == ocatseen && parlev == 1)
3685             {
3686               make_C_tag (TRUE); /* an Objective C category */
3687               objdef = oignore;
3688             }
3689           if (--parlev == 0)
3690             {
3691               switch (fvdef)
3692                 {
3693                 case fstartlist:
3694                 case finlist:
3695                   fvdef = flistseen;
3696                   break;
3697                 }
3698               if (!instruct
3699                   && (typdef == tend
3700                       || typdef == ttypeseen))
3701                 {
3702                   typdef = tignore;
3703                   make_C_tag (FALSE); /* a typedef */
3704                 }
3705             }
3706           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3707             parlev = 0;
3708           break;
3709         case '{':
3710           if (definedef != dnone)
3711             break;
3712           if (typdef == ttypeseen)
3713             {
3714               /* Whenever typdef is set to tinbody (currently only
3715                  here), typdefbracelev should be set to bracelev. */
3716               typdef = tinbody;
3717               typdefbracelev = bracelev;
3718             }
3719           switch (fvdef)
3720             {
3721             case flistseen:
3722               make_C_tag (TRUE);    /* a function */
3723               /* FALLTHRU */
3724             case fignore:
3725               fvdef = fvnone;
3726               break;
3727             case fvnone:
3728               switch (objdef)
3729                 {
3730                 case otagseen:
3731                   make_C_tag (TRUE); /* an Objective C class */
3732                   objdef = oignore;
3733                   break;
3734                 case omethodtag:
3735                 case omethodparm:
3736                   make_C_tag (TRUE); /* an Objective C method */
3737                   objdef = oinbody;
3738                   break;
3739                 default:
3740                   /* Neutralize `extern "C" {' grot. */
3741                   if (bracelev == 0 && structdef == snone && nestlev == 0
3742                       && typdef == tnone)
3743                     bracelev = -1;
3744                 }
3745               break;
3746             }
3747           switch (structdef)
3748             {
3749             case skeyseen:         /* unnamed struct */
3750               pushclass_above (bracelev, NULL, 0);
3751               structdef = snone;
3752               break;
3753             case stagseen:         /* named struct or enum */
3754             case scolonseen:       /* a class */
3755               pushclass_above (bracelev,token.line+token.offset, token.length);
3756               structdef = snone;
3757               make_C_tag (FALSE);  /* a struct or enum */
3758               break;
3759             }
3760           bracelev += 1;
3761           break;
3762         case '*':
3763           if (definedef != dnone)
3764             break;
3765           if (fvdef == fstartlist)
3766             {
3767               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3768               token.valid = FALSE;
3769             }
3770           break;
3771         case '}':
3772           if (definedef != dnone)
3773             break;
3774           bracelev -= 1;
3775           if (!ignoreindent && lp == newlb.buffer + 1)
3776             {
3777               if (bracelev != 0)
3778                 token.valid = FALSE; /* unexpected value, token unreliable */
3779               bracelev = 0;     /* reset brace level if first column */
3780               parlev = 0;       /* also reset paren level, just in case... */
3781             }
3782           else if (bracelev < 0)
3783             {
3784               token.valid = FALSE; /* something gone amiss, token unreliable */
3785               bracelev = 0;
3786             }
3787           if (bracelev == 0 && fvdef == vignore)
3788             fvdef = fvnone;             /* end of function */
3789           popclass_above (bracelev);
3790           structdef = snone;
3791           /* Only if typdef == tinbody is typdefbracelev significant. */
3792           if (typdef == tinbody && bracelev <= typdefbracelev)
3793             {
3794               assert (bracelev == typdefbracelev);
3795               typdef = tend;
3796             }
3797           break;
3798         case '=':
3799           if (definedef != dnone)
3800             break;
3801           switch (fvdef)
3802             {
3803             case foperator:
3804             case finlist:
3805             case fignore:
3806             case vignore:
3807               break;
3808             case fvnameseen:
3809               if ((members && bracelev == 1)
3810                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3811                 make_C_tag (FALSE); /* a variable */
3812               /* FALLTHRU */
3813             default:
3814               fvdef = vignore;
3815             }
3816           break;
3817         case '<':
3818           if (cplpl
3819               && (structdef == stagseen || fvdef == fvnameseen))
3820             {
3821               templatelev++;
3822               break;
3823             }
3824           goto resetfvdef;
3825         case '>':
3826           if (templatelev > 0)
3827             {
3828               templatelev--;
3829               break;
3830             }
3831           goto resetfvdef;
3832         case '+':
3833         case '-':
3834           if (objdef == oinbody && bracelev == 0)
3835             {
3836               objdef = omethodsign;
3837               break;
3838             }
3839           /* FALLTHRU */
3840         resetfvdef:
3841         case '#': case '~': case '&': case '%': case '/':
3842         case '|': case '^': case '!': case '.': case '?':
3843           if (definedef != dnone)
3844             break;
3845           /* These surely cannot follow a function tag in C. */
3846           switch (fvdef)
3847             {
3848             case foperator:
3849             case finlist:
3850             case fignore:
3851             case vignore:
3852               break;
3853             default:
3854               fvdef = fvnone;
3855             }
3856           break;
3857         case '\0':
3858           if (objdef == otagseen)
3859             {
3860               make_C_tag (TRUE); /* an Objective C class */
3861               objdef = oignore;
3862             }
3863           /* If a macro spans multiple lines don't reset its state. */
3864           if (quotednl)
3865             CNL_SAVE_DEFINEDEF ();
3866           else
3867             CNL ();
3868           break;
3869         } /* switch (c) */
3870
3871     } /* while not eof */
3872
3873   free (lbs[0].lb.buffer);
3874   free (lbs[1].lb.buffer);
3875 }
3876
3877 /*
3878  * Process either a C++ file or a C file depending on the setting
3879  * of a global flag.
3880  */
3881 static void
3882 default_C_entries (FILE *inf)
3883 {
3884   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3885 }
3886
3887 /* Always do plain C. */
3888 static void
3889 plain_C_entries (FILE *inf)
3890 {
3891   C_entries (0, inf);
3892 }
3893
3894 /* Always do C++. */
3895 static void
3896 Cplusplus_entries (FILE *inf)
3897 {
3898   C_entries (C_PLPL, inf);
3899 }
3900
3901 /* Always do Java. */
3902 static void
3903 Cjava_entries (FILE *inf)
3904 {
3905   C_entries (C_JAVA, inf);
3906 }
3907
3908 /* Always do C*. */
3909 static void
3910 Cstar_entries (FILE *inf)
3911 {
3912   C_entries (C_STAR, inf);
3913 }
3914
3915 /* Always do Yacc. */
3916 static void
3917 Yacc_entries (FILE *inf)
3918 {
3919   C_entries (YACC, inf);
3920 }
3921
3922 \f
3923 /* Useful macros. */
3924 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3925   for (;                        /* loop initialization */               \
3926        !feof (file_pointer)     /* loop test */                         \
3927        &&                       /* instructions at start of loop */     \
3928           (readline (&line_buffer, file_pointer),                       \
3929            char_pointer = line_buffer.buffer,                           \
3930            TRUE);                                                       \
3931       )
3932
3933 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3934   ((assert ("" kw), TRUE)   /* syntax error if not a literal string */  \
3935    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
3936    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
3937    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3938
3939 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3940 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3941   ((assert ("" kw), TRUE) /* syntax error if not a literal string */    \
3942    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
3943    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
3944
3945 /*
3946  * Read a file, but do no processing.  This is used to do regexp
3947  * matching on files that have no language defined.
3948  */
3949 static void
3950 just_read_file (FILE *inf)
3951 {
3952   while (!feof (inf))
3953     readline (&lb, inf);
3954 }
3955
3956 \f
3957 /* Fortran parsing */
3958
3959 static void F_takeprec (void);
3960 static void F_getit (FILE *);
3961
3962 static void
3963 F_takeprec (void)
3964 {
3965   dbp = skip_spaces (dbp);
3966   if (*dbp != '*')
3967     return;
3968   dbp++;
3969   dbp = skip_spaces (dbp);
3970   if (strneq (dbp, "(*)", 3))
3971     {
3972       dbp += 3;
3973       return;
3974     }
3975   if (!ISDIGIT (*dbp))
3976     {
3977       --dbp;                    /* force failure */
3978       return;
3979     }
3980   do
3981     dbp++;
3982   while (ISDIGIT (*dbp));
3983 }
3984
3985 static void
3986 F_getit (FILE *inf)
3987 {
3988   register char *cp;
3989
3990   dbp = skip_spaces (dbp);
3991   if (*dbp == '\0')
3992     {
3993       readline (&lb, inf);
3994       dbp = lb.buffer;
3995       if (dbp[5] != '&')
3996         return;
3997       dbp += 6;
3998       dbp = skip_spaces (dbp);
3999     }
4000   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4001     return;
4002   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4003     continue;
4004   make_tag (dbp, cp-dbp, TRUE,
4005             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4006 }
4007
4008
4009 static void
4010 Fortran_functions (FILE *inf)
4011 {
4012   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4013     {
4014       if (*dbp == '%')
4015         dbp++;                  /* Ratfor escape to fortran */
4016       dbp = skip_spaces (dbp);
4017       if (*dbp == '\0')
4018         continue;
4019
4020       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4021         dbp = skip_spaces (dbp);
4022
4023       if (LOOKING_AT_NOCASE (dbp, "pure"))
4024         dbp = skip_spaces (dbp);
4025
4026       if (LOOKING_AT_NOCASE (dbp, "elemental"))
4027         dbp = skip_spaces (dbp);
4028
4029       switch (lowcase (*dbp))
4030         {
4031         case 'i':
4032           if (nocase_tail ("integer"))
4033             F_takeprec ();
4034           break;
4035         case 'r':
4036           if (nocase_tail ("real"))
4037             F_takeprec ();
4038           break;
4039         case 'l':
4040           if (nocase_tail ("logical"))
4041             F_takeprec ();
4042           break;
4043         case 'c':
4044           if (nocase_tail ("complex") || nocase_tail ("character"))
4045             F_takeprec ();
4046           break;
4047         case 'd':
4048           if (nocase_tail ("double"))
4049             {
4050               dbp = skip_spaces (dbp);
4051               if (*dbp == '\0')
4052                 continue;
4053               if (nocase_tail ("precision"))
4054                 break;
4055               continue;
4056             }
4057           break;
4058         }
4059       dbp = skip_spaces (dbp);
4060       if (*dbp == '\0')
4061         continue;
4062       switch (lowcase (*dbp))
4063         {
4064         case 'f':
4065           if (nocase_tail ("function"))
4066             F_getit (inf);
4067           continue;
4068         case 's':
4069           if (nocase_tail ("subroutine"))
4070             F_getit (inf);
4071           continue;
4072         case 'e':
4073           if (nocase_tail ("entry"))
4074             F_getit (inf);
4075           continue;
4076         case 'b':
4077           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4078             {
4079               dbp = skip_spaces (dbp);
4080               if (*dbp == '\0') /* assume un-named */
4081                 make_tag ("blockdata", 9, TRUE,
4082                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4083               else
4084                 F_getit (inf);  /* look for name */
4085             }
4086           continue;
4087         }
4088     }
4089 }
4090
4091 \f
4092 /*
4093  * Ada parsing
4094  * Original code by
4095  * Philippe Waroquiers (1998)
4096  */
4097
4098 /* Once we are positioned after an "interesting" keyword, let's get
4099    the real tag value necessary. */
4100 static void
4101 Ada_getit (FILE *inf, const char *name_qualifier)
4102 {
4103   register char *cp;
4104   char *name;
4105   char c;
4106
4107   while (!feof (inf))
4108     {
4109       dbp = skip_spaces (dbp);
4110       if (*dbp == '\0'
4111           || (dbp[0] == '-' && dbp[1] == '-'))
4112         {
4113           readline (&lb, inf);
4114           dbp = lb.buffer;
4115         }
4116       switch (lowcase (*dbp))
4117         {
4118         case 'b':
4119           if (nocase_tail ("body"))
4120             {
4121               /* Skipping body of   procedure body   or   package body or ....
4122                  resetting qualifier to body instead of spec. */
4123               name_qualifier = "/b";
4124               continue;
4125             }
4126           break;
4127         case 't':
4128           /* Skipping type of   task type   or   protected type ... */
4129           if (nocase_tail ("type"))
4130             continue;
4131           break;
4132         }
4133       if (*dbp == '"')
4134         {
4135           dbp += 1;
4136           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4137             continue;
4138         }
4139       else
4140         {
4141           dbp = skip_spaces (dbp);
4142           for (cp = dbp;
4143                (*cp != '\0'
4144                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4145                cp++)
4146             continue;
4147           if (cp == dbp)
4148             return;
4149         }
4150       c = *cp;
4151       *cp = '\0';
4152       name = concat (dbp, name_qualifier, "");
4153       *cp = c;
4154       make_tag (name, strlen (name), TRUE,
4155                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4156       free (name);
4157       if (c == '"')
4158         dbp = cp + 1;
4159       return;
4160     }
4161 }
4162
4163 static void
4164 Ada_funcs (FILE *inf)
4165 {
4166   bool inquote = FALSE;
4167   bool skip_till_semicolumn = FALSE;
4168
4169   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4170     {
4171       while (*dbp != '\0')
4172         {
4173           /* Skip a string i.e. "abcd". */
4174           if (inquote || (*dbp == '"'))
4175             {
4176               dbp = etags_strchr (dbp + !inquote, '"');
4177               if (dbp != NULL)
4178                 {
4179                   inquote = FALSE;
4180                   dbp += 1;
4181                   continue;     /* advance char */
4182                 }
4183               else
4184                 {
4185                   inquote = TRUE;
4186                   break;        /* advance line */
4187                 }
4188             }
4189
4190           /* Skip comments. */
4191           if (dbp[0] == '-' && dbp[1] == '-')
4192             break;              /* advance line */
4193
4194           /* Skip character enclosed in single quote i.e. 'a'
4195              and skip single quote starting an attribute i.e. 'Image. */
4196           if (*dbp == '\'')
4197             {
4198               dbp++ ;
4199               if (*dbp != '\0')
4200                 dbp++;
4201               continue;
4202             }
4203
4204           if (skip_till_semicolumn)
4205             {
4206               if (*dbp == ';')
4207                 skip_till_semicolumn = FALSE;
4208               dbp++;
4209               continue;         /* advance char */
4210             }
4211
4212           /* Search for beginning of a token.  */
4213           if (!begtoken (*dbp))
4214             {
4215               dbp++;
4216               continue;         /* advance char */
4217             }
4218
4219           /* We are at the beginning of a token. */
4220           switch (lowcase (*dbp))
4221             {
4222             case 'f':
4223               if (!packages_only && nocase_tail ("function"))
4224                 Ada_getit (inf, "/f");
4225               else
4226                 break;          /* from switch */
4227               continue;         /* advance char */
4228             case 'p':
4229               if (!packages_only && nocase_tail ("procedure"))
4230                 Ada_getit (inf, "/p");
4231               else if (nocase_tail ("package"))
4232                 Ada_getit (inf, "/s");
4233               else if (nocase_tail ("protected")) /* protected type */
4234                 Ada_getit (inf, "/t");
4235               else
4236                 break;          /* from switch */
4237               continue;         /* advance char */
4238
4239             case 'u':
4240               if (typedefs && !packages_only && nocase_tail ("use"))
4241                 {
4242                   /* when tagging types, avoid tagging  use type Pack.Typename;
4243                      for this, we will skip everything till a ; */
4244                   skip_till_semicolumn = TRUE;
4245                   continue;     /* advance char */
4246                 }
4247
4248             case 't':
4249               if (!packages_only && nocase_tail ("task"))
4250                 Ada_getit (inf, "/k");
4251               else if (typedefs && !packages_only && nocase_tail ("type"))
4252                 {
4253                   Ada_getit (inf, "/t");
4254                   while (*dbp != '\0')
4255                     dbp += 1;
4256                 }
4257               else
4258                 break;          /* from switch */
4259               continue;         /* advance char */
4260             }
4261
4262           /* Look for the end of the token. */
4263           while (!endtoken (*dbp))
4264             dbp++;
4265
4266         } /* advance char */
4267     } /* advance line */
4268 }
4269
4270 \f
4271 /*
4272  * Unix and microcontroller assembly tag handling
4273  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4274  * Idea by Bob Weiner, Motorola Inc. (1994)
4275  */
4276 static void
4277 Asm_labels (FILE *inf)
4278 {
4279   register char *cp;
4280
4281   LOOP_ON_INPUT_LINES (inf, lb, cp)
4282     {
4283       /* If first char is alphabetic or one of [_.$], test for colon
4284          following identifier. */
4285       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4286         {
4287           /* Read past label. */
4288           cp++;
4289           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4290             cp++;
4291           if (*cp == ':' || iswhite (*cp))
4292             /* Found end of label, so copy it and add it to the table. */
4293             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4294                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4295         }
4296     }
4297 }
4298
4299 \f
4300 /*
4301  * Perl support
4302  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4303  * Perl variable names: /^(my|local).../
4304  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4305  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4306  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4307  */
4308 static void
4309 Perl_functions (FILE *inf)
4310 {
4311   char *package = savestr ("main"); /* current package name */
4312   register char *cp;
4313
4314   LOOP_ON_INPUT_LINES (inf, lb, cp)
4315     {
4316       cp = skip_spaces (cp);
4317
4318       if (LOOKING_AT (cp, "package"))
4319         {
4320           free (package);
4321           get_tag (cp, &package);
4322         }
4323       else if (LOOKING_AT (cp, "sub"))
4324         {
4325           char *pos;
4326           char *sp = cp;
4327
4328           while (!notinname (*cp))
4329             cp++;
4330           if (cp == sp)
4331             continue;           /* nothing found */
4332           if ((pos = etags_strchr (sp, ':')) != NULL
4333               && pos < cp && pos[1] == ':')
4334             /* The name is already qualified. */
4335             make_tag (sp, cp - sp, TRUE,
4336                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4337           else
4338             /* Qualify it. */
4339             {
4340               char savechar, *name;
4341
4342               savechar = *cp;
4343               *cp = '\0';
4344               name = concat (package, "::", sp);
4345               *cp = savechar;
4346               make_tag (name, strlen (name), TRUE,
4347                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4348               free (name);
4349             }
4350         }
4351        else if (globals)        /* only if we are tagging global vars */
4352         {
4353           /* Skip a qualifier, if any. */
4354           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4355           /* After "my" or "local", but before any following paren or space. */
4356           char *varstart = cp;
4357
4358           if (qual              /* should this be removed?  If yes, how? */
4359               && (*cp == '$' || *cp == '@' || *cp == '%'))
4360             {
4361               varstart += 1;
4362               do
4363                 cp++;
4364               while (ISALNUM (*cp) || *cp == '_');
4365             }
4366           else if (qual)
4367             {
4368               /* Should be examining a variable list at this point;
4369                  could insist on seeing an open parenthesis. */
4370               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4371                 cp++;
4372             }
4373           else
4374             continue;
4375
4376           make_tag (varstart, cp - varstart, FALSE,
4377                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4378         }
4379     }
4380   free (package);
4381 }
4382
4383
4384 /*
4385  * Python support
4386  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4387  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4388  * More ideas by seb bacon <seb@jamkit.com> (2002)
4389  */
4390 static void
4391 Python_functions (FILE *inf)
4392 {
4393   register char *cp;
4394
4395   LOOP_ON_INPUT_LINES (inf, lb, cp)
4396     {
4397       cp = skip_spaces (cp);
4398       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4399         {
4400           char *name = cp;
4401           while (!notinname (*cp) && *cp != ':')
4402             cp++;
4403           make_tag (name, cp - name, TRUE,
4404                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4405         }
4406     }
4407 }
4408
4409 \f
4410 /*
4411  * PHP support
4412  * Look for:
4413  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4414  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4415  *  - /^[ \t]*define\(\"[^\"]+/
4416  * Only with --members:
4417  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4418  * Idea by Diez B. Roggisch (2001)
4419  */
4420 static void
4421 PHP_functions (FILE *inf)
4422 {
4423   register char *cp, *name;
4424   bool search_identifier = FALSE;
4425
4426   LOOP_ON_INPUT_LINES (inf, lb, cp)
4427     {
4428       cp = skip_spaces (cp);
4429       name = cp;
4430       if (search_identifier
4431           && *cp != '\0')
4432         {
4433           while (!notinname (*cp))
4434             cp++;
4435           make_tag (name, cp - name, TRUE,
4436                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4437           search_identifier = FALSE;
4438         }
4439       else if (LOOKING_AT (cp, "function"))
4440         {
4441           if (*cp == '&')
4442             cp = skip_spaces (cp+1);
4443           if (*cp != '\0')
4444             {
4445               name = cp;
4446               while (!notinname (*cp))
4447                 cp++;
4448               make_tag (name, cp - name, TRUE,
4449                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4450             }
4451           else
4452             search_identifier = TRUE;
4453         }
4454       else if (LOOKING_AT (cp, "class"))
4455         {
4456           if (*cp != '\0')
4457             {
4458               name = cp;
4459               while (*cp != '\0' && !iswhite (*cp))
4460                 cp++;
4461               make_tag (name, cp - name, FALSE,
4462                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4463             }
4464           else
4465             search_identifier = TRUE;
4466         }
4467       else if (strneq (cp, "define", 6)
4468                && (cp = skip_spaces (cp+6))
4469                && *cp++ == '('
4470                && (*cp == '"' || *cp == '\''))
4471         {
4472           char quote = *cp++;
4473           name = cp;
4474           while (*cp != quote && *cp != '\0')
4475             cp++;
4476           make_tag (name, cp - name, FALSE,
4477                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4478         }
4479       else if (members
4480                && LOOKING_AT (cp, "var")
4481                && *cp == '$')
4482         {
4483           name = cp;
4484           while (!notinname (*cp))
4485             cp++;
4486           make_tag (name, cp - name, FALSE,
4487                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4488         }
4489     }
4490 }
4491
4492 \f
4493 /*
4494  * Cobol tag functions
4495  * We could look for anything that could be a paragraph name.
4496  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4497  * Idea by Corny de Souza (1993)
4498  */
4499 static void
4500 Cobol_paragraphs (FILE *inf)
4501 {
4502   register char *bp, *ep;
4503
4504   LOOP_ON_INPUT_LINES (inf, lb, bp)
4505     {
4506       if (lb.len < 9)
4507         continue;
4508       bp += 8;
4509
4510       /* If eoln, compiler option or comment ignore whole line. */
4511       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4512         continue;
4513
4514       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4515         continue;
4516       if (*ep++ == '.')
4517         make_tag (bp, ep - bp, TRUE,
4518                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4519     }
4520 }
4521
4522 \f
4523 /*
4524  * Makefile support
4525  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4526  */
4527 static void
4528 Makefile_targets (FILE *inf)
4529 {
4530   register char *bp;
4531
4532   LOOP_ON_INPUT_LINES (inf, lb, bp)
4533     {
4534       if (*bp == '\t' || *bp == '#')
4535         continue;
4536       while (*bp != '\0' && *bp != '=' && *bp != ':')
4537         bp++;
4538       if (*bp == ':' || (globals && *bp == '='))
4539         {
4540           /* We should detect if there is more than one tag, but we do not.
4541              We just skip initial and final spaces. */
4542           char * namestart = skip_spaces (lb.buffer);
4543           while (--bp > namestart)
4544             if (!notinname (*bp))
4545               break;
4546           make_tag (namestart, bp - namestart + 1, TRUE,
4547                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4548         }
4549     }
4550 }
4551
4552 \f
4553 /*
4554  * Pascal parsing
4555  * Original code by Mosur K. Mohan (1989)
4556  *
4557  *  Locates tags for procedures & functions.  Doesn't do any type- or
4558  *  var-definitions.  It does look for the keyword "extern" or
4559  *  "forward" immediately following the procedure statement; if found,
4560  *  the tag is skipped.
4561  */
4562 static void
4563 Pascal_functions (FILE *inf)
4564 {
4565   linebuffer tline;             /* mostly copied from C_entries */
4566   long save_lcno;
4567   int save_lineno, namelen, taglen;
4568   char c, *name;
4569
4570   bool                          /* each of these flags is TRUE if: */
4571     incomment,                  /* point is inside a comment */
4572     inquote,                    /* point is inside '..' string */
4573     get_tagname,                /* point is after PROCEDURE/FUNCTION
4574                                    keyword, so next item = potential tag */
4575     found_tag,                  /* point is after a potential tag */
4576     inparms,                    /* point is within parameter-list */
4577     verify_tag;                 /* point has passed the parm-list, so the
4578                                    next token will determine whether this
4579                                    is a FORWARD/EXTERN to be ignored, or
4580                                    whether it is a real tag */
4581
4582   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4583   name = NULL;                  /* keep compiler quiet */
4584   dbp = lb.buffer;
4585   *dbp = '\0';
4586   linebuffer_init (&tline);
4587
4588   incomment = inquote = FALSE;
4589   found_tag = FALSE;            /* have a proc name; check if extern */
4590   get_tagname = FALSE;          /* found "procedure" keyword         */
4591   inparms = FALSE;              /* found '(' after "proc"            */
4592   verify_tag = FALSE;           /* check if "extern" is ahead        */
4593
4594
4595   while (!feof (inf))           /* long main loop to get next char */
4596     {
4597       c = *dbp++;
4598       if (c == '\0')            /* if end of line */
4599         {
4600           readline (&lb, inf);
4601           dbp = lb.buffer;
4602           if (*dbp == '\0')
4603             continue;
4604           if (!((found_tag && verify_tag)
4605                 || get_tagname))
4606             c = *dbp++;         /* only if don't need *dbp pointing
4607                                    to the beginning of the name of
4608                                    the procedure or function */
4609         }
4610       if (incomment)
4611         {
4612           if (c == '}')         /* within { } comments */
4613             incomment = FALSE;
4614           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4615             {
4616               dbp++;
4617               incomment = FALSE;
4618             }
4619           continue;
4620         }
4621       else if (inquote)
4622         {
4623           if (c == '\'')
4624             inquote = FALSE;
4625           continue;
4626         }
4627       else
4628         switch (c)
4629           {
4630           case '\'':
4631             inquote = TRUE;     /* found first quote */
4632             continue;
4633           case '{':             /* found open { comment */
4634             incomment = TRUE;
4635             continue;
4636           case '(':
4637             if (*dbp == '*')    /* found open (* comment */
4638               {
4639                 incomment = TRUE;
4640                 dbp++;
4641               }
4642             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4643               inparms = TRUE;
4644             continue;
4645           case ')':             /* end of parms list */
4646             if (inparms)
4647               inparms = FALSE;
4648             continue;
4649           case ';':
4650             if (found_tag && !inparms) /* end of proc or fn stmt */
4651               {
4652                 verify_tag = TRUE;
4653                 break;
4654               }
4655             continue;
4656           }
4657       if (found_tag && verify_tag && (*dbp != ' '))
4658         {
4659           /* Check if this is an "extern" declaration. */
4660           if (*dbp == '\0')
4661             continue;
4662           if (lowcase (*dbp == 'e'))
4663             {
4664               if (nocase_tail ("extern")) /* superfluous, really! */
4665                 {
4666                   found_tag = FALSE;
4667                   verify_tag = FALSE;
4668                 }
4669             }
4670           else if (lowcase (*dbp) == 'f')
4671             {
4672               if (nocase_tail ("forward")) /* check for forward reference */
4673                 {
4674                   found_tag = FALSE;
4675                   verify_tag = FALSE;
4676                 }
4677             }
4678           if (found_tag && verify_tag) /* not external proc, so make tag */
4679             {
4680               found_tag = FALSE;
4681               verify_tag = FALSE;
4682               make_tag (name, namelen, TRUE,
4683                         tline.buffer, taglen, save_lineno, save_lcno);
4684               continue;
4685             }
4686         }
4687       if (get_tagname)          /* grab name of proc or fn */
4688         {
4689           char *cp;
4690
4691           if (*dbp == '\0')
4692             continue;
4693
4694           /* Find block name. */
4695           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4696             continue;
4697
4698           /* Save all values for later tagging. */
4699           linebuffer_setlen (&tline, lb.len);
4700           strcpy (tline.buffer, lb.buffer);
4701           save_lineno = lineno;
4702           save_lcno = linecharno;
4703           name = tline.buffer + (dbp - lb.buffer);
4704           namelen = cp - dbp;
4705           taglen = cp - lb.buffer + 1;
4706
4707           dbp = cp;             /* set dbp to e-o-token */
4708           get_tagname = FALSE;
4709           found_tag = TRUE;
4710           continue;
4711
4712           /* And proceed to check for "extern". */
4713         }
4714       else if (!incomment && !inquote && !found_tag)
4715         {
4716           /* Check for proc/fn keywords. */
4717           switch (lowcase (c))
4718             {
4719             case 'p':
4720               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4721                 get_tagname = TRUE;
4722               continue;
4723             case 'f':
4724               if (nocase_tail ("unction"))
4725                 get_tagname = TRUE;
4726               continue;
4727             }
4728         }
4729     } /* while not eof */
4730
4731   free (tline.buffer);
4732 }
4733
4734 \f
4735 /*
4736  * Lisp tag functions
4737  *  look for (def or (DEF, quote or QUOTE
4738  */
4739
4740 static void L_getit (void);
4741
4742 static void
4743 L_getit (void)
4744 {
4745   if (*dbp == '\'')             /* Skip prefix quote */
4746     dbp++;
4747   else if (*dbp == '(')
4748   {
4749     dbp++;
4750     /* Try to skip "(quote " */
4751     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4752       /* Ok, then skip "(" before name in (defstruct (foo)) */
4753       dbp = skip_spaces (dbp);
4754   }
4755   get_tag (dbp, NULL);
4756 }
4757
4758 static void
4759 Lisp_functions (FILE *inf)
4760 {
4761   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4762     {
4763       if (dbp[0] != '(')
4764         continue;
4765
4766       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4767         {
4768           dbp = skip_non_spaces (dbp);
4769           dbp = skip_spaces (dbp);
4770           L_getit ();
4771         }
4772       else
4773         {
4774           /* Check for (foo::defmumble name-defined ... */
4775           do
4776             dbp++;
4777           while (!notinname (*dbp) && *dbp != ':');
4778           if (*dbp == ':')
4779             {
4780               do
4781                 dbp++;
4782               while (*dbp == ':');
4783
4784               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4785                 {
4786                   dbp = skip_non_spaces (dbp);
4787                   dbp = skip_spaces (dbp);
4788                   L_getit ();
4789                 }
4790             }
4791         }
4792     }
4793 }
4794
4795 \f
4796 /*
4797  * Lua script language parsing
4798  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4799  *
4800  *  "function" and "local function" are tags if they start at column 1.
4801  */
4802 static void
4803 Lua_functions (FILE *inf)
4804 {
4805   register char *bp;
4806
4807   LOOP_ON_INPUT_LINES (inf, lb, bp)
4808     {
4809       if (bp[0] != 'f' && bp[0] != 'l')
4810         continue;
4811
4812       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4813
4814       if (LOOKING_AT (bp, "function"))
4815         get_tag (bp, NULL);
4816     }
4817 }
4818
4819 \f
4820 /*
4821  * PostScript tags
4822  * Just look for lines where the first character is '/'
4823  * Also look at "defineps" for PSWrap
4824  * Ideas by:
4825  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4826  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4827  */
4828 static void
4829 PS_functions (FILE *inf)
4830 {
4831   register char *bp, *ep;
4832
4833   LOOP_ON_INPUT_LINES (inf, lb, bp)
4834     {
4835       if (bp[0] == '/')
4836         {
4837           for (ep = bp+1;
4838                *ep != '\0' && *ep != ' ' && *ep != '{';
4839                ep++)
4840             continue;
4841           make_tag (bp, ep - bp, TRUE,
4842                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4843         }
4844       else if (LOOKING_AT (bp, "defineps"))
4845         get_tag (bp, NULL);
4846     }
4847 }
4848
4849 \f
4850 /*
4851  * Forth tags
4852  * Ignore anything after \ followed by space or in ( )
4853  * Look for words defined by :
4854  * Look for constant, code, create, defer, value, and variable
4855  * OBP extensions:  Look for buffer:, field,
4856  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4857  */
4858 static void
4859 Forth_words (FILE *inf)
4860 {
4861   register char *bp;
4862
4863   LOOP_ON_INPUT_LINES (inf, lb, bp)
4864     while ((bp = skip_spaces (bp))[0] != '\0')
4865       if (bp[0] == '\\' && iswhite (bp[1]))
4866         break;                  /* read next line */
4867       else if (bp[0] == '(' && iswhite (bp[1]))
4868         do                      /* skip to ) or eol */
4869           bp++;
4870         while (*bp != ')' && *bp != '\0');
4871       else if ((bp[0] == ':' && iswhite (bp[1]) && bp++)
4872                || LOOKING_AT_NOCASE (bp, "constant")
4873                || LOOKING_AT_NOCASE (bp, "code")
4874                || LOOKING_AT_NOCASE (bp, "create")
4875                || LOOKING_AT_NOCASE (bp, "defer")
4876                || LOOKING_AT_NOCASE (bp, "value")
4877                || LOOKING_AT_NOCASE (bp, "variable")
4878                || LOOKING_AT_NOCASE (bp, "buffer:")
4879                || LOOKING_AT_NOCASE (bp, "field"))
4880         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4881       else
4882         bp = skip_non_spaces (bp);
4883 }
4884
4885 \f
4886 /*
4887  * Scheme tag functions
4888  * look for (def... xyzzy
4889  *          (def... (xyzzy
4890  *          (def ... ((...(xyzzy ....
4891  *          (set! xyzzy
4892  * Original code by Ken Haase (1985?)
4893  */
4894 static void
4895 Scheme_functions (FILE *inf)
4896 {
4897   register char *bp;
4898
4899   LOOP_ON_INPUT_LINES (inf, lb, bp)
4900     {
4901       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4902         {
4903           bp = skip_non_spaces (bp+4);
4904           /* Skip over open parens and white space.  Don't continue past
4905              '\0'. */
4906           while (*bp && notinname (*bp))
4907             bp++;
4908           get_tag (bp, NULL);
4909         }
4910       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4911         get_tag (bp, NULL);
4912     }
4913 }
4914
4915 \f
4916 /* Find tags in TeX and LaTeX input files.  */
4917
4918 /* TEX_toktab is a table of TeX control sequences that define tags.
4919  * Each entry records one such control sequence.
4920  *
4921  * Original code from who knows whom.
4922  * Ideas by:
4923  *   Stefan Monnier (2002)
4924  */
4925
4926 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4927
4928 /* Default set of control sequences to put into TEX_toktab.
4929    The value of environment var TEXTAGS is prepended to this.  */
4930 static const char *TEX_defenv = "\
4931 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4932 :part:appendix:entry:index:def\
4933 :newcommand:renewcommand:newenvironment:renewenvironment";
4934
4935 static void TEX_mode (FILE *);
4936 static void TEX_decode_env (const char *, const char *);
4937
4938 static char TEX_esc = '\\';
4939 static char TEX_opgrp = '{';
4940 static char TEX_clgrp = '}';
4941
4942 /*
4943  * TeX/LaTeX scanning loop.
4944  */
4945 static void
4946 TeX_commands (FILE *inf)
4947 {
4948   char *cp;
4949   linebuffer *key;
4950
4951   /* Select either \ or ! as escape character.  */
4952   TEX_mode (inf);
4953
4954   /* Initialize token table once from environment. */
4955   if (TEX_toktab == NULL)
4956     TEX_decode_env ("TEXTAGS", TEX_defenv);
4957
4958   LOOP_ON_INPUT_LINES (inf, lb, cp)
4959     {
4960       /* Look at each TEX keyword in line. */
4961       for (;;)
4962         {
4963           /* Look for a TEX escape. */
4964           while (*cp++ != TEX_esc)
4965             if (cp[-1] == '\0' || cp[-1] == '%')
4966               goto tex_next_line;
4967
4968           for (key = TEX_toktab; key->buffer != NULL; key++)
4969             if (strneq (cp, key->buffer, key->len))
4970               {
4971                 register char *p;
4972                 int namelen, linelen;
4973                 bool opgrp = FALSE;
4974
4975                 cp = skip_spaces (cp + key->len);
4976                 if (*cp == TEX_opgrp)
4977                   {
4978                     opgrp = TRUE;
4979                     cp++;
4980                   }
4981                 for (p = cp;
4982                      (!iswhite (*p) && *p != '#' &&
4983                       *p != TEX_opgrp && *p != TEX_clgrp);
4984                      p++)
4985                   continue;
4986                 namelen = p - cp;
4987                 linelen = lb.len;
4988                 if (!opgrp || *p == TEX_clgrp)
4989                   {
4990                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4991                       p++;
4992                     linelen = p - lb.buffer + 1;
4993                   }
4994                 make_tag (cp, namelen, TRUE,
4995                           lb.buffer, linelen, lineno, linecharno);
4996                 goto tex_next_line; /* We only tag a line once */
4997               }
4998         }
4999     tex_next_line:
5000       ;
5001     }
5002 }
5003
5004 #define TEX_LESC '\\'
5005 #define TEX_SESC '!'
5006
5007 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5008    chars accordingly. */
5009 static void
5010 TEX_mode (FILE *inf)
5011 {
5012   int c;
5013
5014   while ((c = getc (inf)) != EOF)
5015     {
5016       /* Skip to next line if we hit the TeX comment char. */
5017       if (c == '%')
5018         while (c != '\n' && c != EOF)
5019           c = getc (inf);
5020       else if (c == TEX_LESC || c == TEX_SESC )
5021         break;
5022     }
5023
5024   if (c == TEX_LESC)
5025     {
5026       TEX_esc = TEX_LESC;
5027       TEX_opgrp = '{';
5028       TEX_clgrp = '}';
5029     }
5030   else
5031     {
5032       TEX_esc = TEX_SESC;
5033       TEX_opgrp = '<';
5034       TEX_clgrp = '>';
5035     }
5036   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5037      No attempt is made to correct the situation. */
5038   rewind (inf);
5039 }
5040
5041 /* Read environment and prepend it to the default string.
5042    Build token table. */
5043 static void
5044 TEX_decode_env (const char *evarname, const char *defenv)
5045 {
5046   register const char *env, *p;
5047   int i, len;
5048
5049   /* Append default string to environment. */
5050   env = getenv (evarname);
5051   if (!env)
5052     env = defenv;
5053   else
5054     env = concat (env, defenv, "");
5055
5056   /* Allocate a token table */
5057   for (len = 1, p = env; p;)
5058     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5059       len++;
5060   TEX_toktab = xnew (len, linebuffer);
5061
5062   /* Unpack environment string into token table. Be careful about */
5063   /* zero-length strings (leading ':', "::" and trailing ':') */
5064   for (i = 0; *env != '\0';)
5065     {
5066       p = etags_strchr (env, ':');
5067       if (!p)                   /* End of environment string. */
5068         p = env + strlen (env);
5069       if (p - env > 0)
5070         {                       /* Only non-zero strings. */
5071           TEX_toktab[i].buffer = savenstr (env, p - env);
5072           TEX_toktab[i].len = p - env;
5073           i++;
5074         }
5075       if (*p)
5076         env = p + 1;
5077       else
5078         {
5079           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5080           TEX_toktab[i].len = 0;
5081           break;
5082         }
5083     }
5084 }
5085
5086 \f
5087 /* Texinfo support.  Dave Love, Mar. 2000.  */
5088 static void
5089 Texinfo_nodes (FILE *inf)
5090 {
5091   char *cp, *start;
5092   LOOP_ON_INPUT_LINES (inf, lb, cp)
5093     if (LOOKING_AT (cp, "@node"))
5094       {
5095         start = cp;
5096         while (*cp != '\0' && *cp != ',')
5097           cp++;
5098         make_tag (start, cp - start, TRUE,
5099                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5100       }
5101 }
5102
5103 \f
5104 /*
5105  * HTML support.
5106  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5107  * Contents of <a name=xxx> are tags with name xxx.
5108  *
5109  * Francesco Potortì, 2002.
5110  */
5111 static void
5112 HTML_labels (FILE *inf)
5113 {
5114   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5115   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5116   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5117   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5118   char *end;
5119
5120
5121   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5122
5123   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5124     for (;;)                    /* loop on the same line */
5125       {
5126         if (skiptag)            /* skip HTML tag */
5127           {
5128             while (*dbp != '\0' && *dbp != '>')
5129               dbp++;
5130             if (*dbp == '>')
5131               {
5132                 dbp += 1;
5133                 skiptag = FALSE;
5134                 continue;       /* look on the same line */
5135               }
5136             break;              /* go to next line */
5137           }
5138
5139         else if (intag) /* look for "name=" or "id=" */
5140           {
5141             while (*dbp != '\0' && *dbp != '>'
5142                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5143               dbp++;
5144             if (*dbp == '\0')
5145               break;            /* go to next line */
5146             if (*dbp == '>')
5147               {
5148                 dbp += 1;
5149                 intag = FALSE;
5150                 continue;       /* look on the same line */
5151               }
5152             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5153                 || LOOKING_AT_NOCASE (dbp, "id="))
5154               {
5155                 bool quoted = (dbp[0] == '"');
5156
5157                 if (quoted)
5158                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5159                     continue;
5160                 else
5161                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5162                     continue;
5163                 linebuffer_setlen (&token_name, end - dbp);
5164                 strncpy (token_name.buffer, dbp, end - dbp);
5165                 token_name.buffer[end - dbp] = '\0';
5166
5167                 dbp = end;
5168                 intag = FALSE;  /* we found what we looked for */
5169                 skiptag = TRUE; /* skip to the end of the tag */
5170                 getnext = TRUE; /* then grab the text */
5171                 continue;       /* look on the same line */
5172               }
5173             dbp += 1;
5174           }
5175
5176         else if (getnext)       /* grab next tokens and tag them */
5177           {
5178             dbp = skip_spaces (dbp);
5179             if (*dbp == '\0')
5180               break;            /* go to next line */
5181             if (*dbp == '<')
5182               {
5183                 intag = TRUE;
5184                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5185                 continue;       /* look on the same line */
5186               }
5187
5188             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5189               continue;
5190             make_tag (token_name.buffer, token_name.len, TRUE,
5191                       dbp, end - dbp, lineno, linecharno);
5192             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5193             getnext = FALSE;
5194             break;              /* go to next line */
5195           }
5196
5197         else                    /* look for an interesting HTML tag */
5198           {
5199             while (*dbp != '\0' && *dbp != '<')
5200               dbp++;
5201             if (*dbp == '\0')
5202               break;            /* go to next line */
5203             intag = TRUE;
5204             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5205               {
5206                 inanchor = TRUE;
5207                 continue;       /* look on the same line */
5208               }
5209             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5210                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5211                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5212                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5213               {
5214                 intag = FALSE;
5215                 getnext = TRUE;
5216                 continue;       /* look on the same line */
5217               }
5218             dbp += 1;
5219           }
5220       }
5221 }
5222
5223 \f
5224 /*
5225  * Prolog support
5226  *
5227  * Assumes that the predicate or rule starts at column 0.
5228  * Only the first clause of a predicate or rule is added.
5229  * Original code by Sunichirou Sugou (1989)
5230  * Rewritten by Anders Lindgren (1996)
5231  */
5232 static size_t prolog_pr (char *, char *);
5233 static void prolog_skip_comment (linebuffer *, FILE *);
5234 static size_t prolog_atom (char *, size_t);
5235
5236 static void
5237 Prolog_functions (FILE *inf)
5238 {
5239   char *cp, *last;
5240   size_t len;
5241   size_t allocated;
5242
5243   allocated = 0;
5244   len = 0;
5245   last = NULL;
5246
5247   LOOP_ON_INPUT_LINES (inf, lb, cp)
5248     {
5249       if (cp[0] == '\0')        /* Empty line */
5250         continue;
5251       else if (iswhite (cp[0])) /* Not a predicate */
5252         continue;
5253       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5254         prolog_skip_comment (&lb, inf);
5255       else if ((len = prolog_pr (cp, last)) > 0)
5256         {
5257           /* Predicate or rule.  Store the function name so that we
5258              only generate a tag for the first clause.  */
5259           if (last == NULL)
5260             last = xnew (len + 1, char);
5261           else if (len + 1 > allocated)
5262             xrnew (last, len + 1, char);
5263           allocated = len + 1;
5264           strncpy (last, cp, len);
5265           last[len] = '\0';
5266         }
5267     }
5268   free (last);
5269 }
5270
5271
5272 static void
5273 prolog_skip_comment (linebuffer *plb, FILE *inf)
5274 {
5275   char *cp;
5276
5277   do
5278     {
5279       for (cp = plb->buffer; *cp != '\0'; cp++)
5280         if (cp[0] == '*' && cp[1] == '/')
5281           return;
5282       readline (plb, inf);
5283     }
5284   while (!feof (inf));
5285 }
5286
5287 /*
5288  * A predicate or rule definition is added if it matches:
5289  *     <beginning of line><Prolog Atom><whitespace>(
5290  * or  <beginning of line><Prolog Atom><whitespace>:-
5291  *
5292  * It is added to the tags database if it doesn't match the
5293  * name of the previous clause header.
5294  *
5295  * Return the size of the name of the predicate or rule, or 0 if no
5296  * header was found.
5297  */
5298 static size_t
5299 prolog_pr (char *s, char *last)
5300
5301                                 /* Name of last clause. */
5302 {
5303   size_t pos;
5304   size_t len;
5305
5306   pos = prolog_atom (s, 0);
5307   if (! pos)
5308     return 0;
5309
5310   len = pos;
5311   pos = skip_spaces (s + pos) - s;
5312
5313   if ((s[pos] == '.'
5314        || (s[pos] == '(' && (pos += 1))
5315        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5316       && (last == NULL          /* save only the first clause */
5317           || len != strlen (last)
5318           || !strneq (s, last, len)))
5319         {
5320           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5321           return len;
5322         }
5323   else
5324     return 0;
5325 }
5326
5327 /*
5328  * Consume a Prolog atom.
5329  * Return the number of bytes consumed, or 0 if there was an error.
5330  *
5331  * A prolog atom, in this context, could be one of:
5332  * - An alphanumeric sequence, starting with a lower case letter.
5333  * - A quoted arbitrary string. Single quotes can escape themselves.
5334  *   Backslash quotes everything.
5335  */
5336 static size_t
5337 prolog_atom (char *s, size_t pos)
5338 {
5339   size_t origpos;
5340
5341   origpos = pos;
5342
5343   if (ISLOWER (s[pos]) || (s[pos] == '_'))
5344     {
5345       /* The atom is unquoted. */
5346       pos++;
5347       while (ISALNUM (s[pos]) || (s[pos] == '_'))
5348         {
5349           pos++;
5350         }
5351       return pos - origpos;
5352     }
5353   else if (s[pos] == '\'')
5354     {
5355       pos++;
5356
5357       for (;;)
5358         {
5359           if (s[pos] == '\'')
5360             {
5361               pos++;
5362               if (s[pos] != '\'')
5363                 break;
5364               pos++;            /* A double quote */
5365             }
5366           else if (s[pos] == '\0')
5367             /* Multiline quoted atoms are ignored. */
5368             return 0;
5369           else if (s[pos] == '\\')
5370             {
5371               if (s[pos+1] == '\0')
5372                 return 0;
5373               pos += 2;
5374             }
5375           else
5376             pos++;
5377         }
5378       return pos - origpos;
5379     }
5380   else
5381     return 0;
5382 }
5383
5384 \f
5385 /*
5386  * Support for Erlang
5387  *
5388  * Generates tags for functions, defines, and records.
5389  * Assumes that Erlang functions start at column 0.
5390  * Original code by Anders Lindgren (1996)
5391  */
5392 static int erlang_func (char *, char *);
5393 static void erlang_attribute (char *);
5394 static int erlang_atom (char *);
5395
5396 static void
5397 Erlang_functions (FILE *inf)
5398 {
5399   char *cp, *last;
5400   int len;
5401   int allocated;
5402
5403   allocated = 0;
5404   len = 0;
5405   last = NULL;
5406
5407   LOOP_ON_INPUT_LINES (inf, lb, cp)
5408     {
5409       if (cp[0] == '\0')        /* Empty line */
5410         continue;
5411       else if (iswhite (cp[0])) /* Not function nor attribute */
5412         continue;
5413       else if (cp[0] == '%')    /* comment */
5414         continue;
5415       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5416         continue;
5417       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5418         {
5419           erlang_attribute (cp);
5420           if (last != NULL)
5421             {
5422               free (last);
5423               last = NULL;
5424             }
5425         }
5426       else if ((len = erlang_func (cp, last)) > 0)
5427         {
5428           /*
5429            * Function.  Store the function name so that we only
5430            * generates a tag for the first clause.
5431            */
5432           if (last == NULL)
5433             last = xnew (len + 1, char);
5434           else if (len + 1 > allocated)
5435             xrnew (last, len + 1, char);
5436           allocated = len + 1;
5437           strncpy (last, cp, len);
5438           last[len] = '\0';
5439         }
5440     }
5441   free (last);
5442 }
5443
5444
5445 /*
5446  * A function definition is added if it matches:
5447  *     <beginning of line><Erlang Atom><whitespace>(
5448  *
5449  * It is added to the tags database if it doesn't match the
5450  * name of the previous clause header.
5451  *
5452  * Return the size of the name of the function, or 0 if no function
5453  * was found.
5454  */
5455 static int
5456 erlang_func (char *s, char *last)
5457
5458                                 /* Name of last clause. */
5459 {
5460   int pos;
5461   int len;
5462
5463   pos = erlang_atom (s);
5464   if (pos < 1)
5465     return 0;
5466
5467   len = pos;
5468   pos = skip_spaces (s + pos) - s;
5469
5470   /* Save only the first clause. */
5471   if (s[pos++] == '('
5472       && (last == NULL
5473           || len != (int)strlen (last)
5474           || !strneq (s, last, len)))
5475         {
5476           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5477           return len;
5478         }
5479
5480   return 0;
5481 }
5482
5483
5484 /*
5485  * Handle attributes.  Currently, tags are generated for defines
5486  * and records.
5487  *
5488  * They are on the form:
5489  * -define(foo, bar).
5490  * -define(Foo(M, N), M+N).
5491  * -record(graph, {vtab = notable, cyclic = true}).
5492  */
5493 static void
5494 erlang_attribute (char *s)
5495 {
5496   char *cp = s;
5497
5498   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5499       && *cp++ == '(')
5500     {
5501       int len = erlang_atom (skip_spaces (cp));
5502       if (len > 0)
5503         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5504     }
5505   return;
5506 }
5507
5508
5509 /*
5510  * Consume an Erlang atom (or variable).
5511  * Return the number of bytes consumed, or -1 if there was an error.
5512  */
5513 static int
5514 erlang_atom (char *s)
5515 {
5516   int pos = 0;
5517
5518   if (ISALPHA (s[pos]) || s[pos] == '_')
5519     {
5520       /* The atom is unquoted. */
5521       do
5522         pos++;
5523       while (ISALNUM (s[pos]) || s[pos] == '_');
5524     }
5525   else if (s[pos] == '\'')
5526     {
5527       for (pos++; s[pos] != '\''; pos++)
5528         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5529             || (s[pos] == '\\' && s[++pos] == '\0'))
5530           return 0;
5531       pos++;
5532     }
5533
5534   return pos;
5535 }
5536
5537 \f
5538 static char *scan_separators (char *);
5539 static void add_regex (char *, language *);
5540 static char *substitute (char *, char *, struct re_registers *);
5541
5542 /*
5543  * Take a string like "/blah/" and turn it into "blah", verifying
5544  * that the first and last characters are the same, and handling
5545  * quoted separator characters.  Actually, stops on the occurrence of
5546  * an unquoted separator.  Also process \t, \n, etc. and turn into
5547  * appropriate characters. Works in place.  Null terminates name string.
5548  * Returns pointer to terminating separator, or NULL for
5549  * unterminated regexps.
5550  */
5551 static char *
5552 scan_separators (char *name)
5553 {
5554   char sep = name[0];
5555   char *copyto = name;
5556   bool quoted = FALSE;
5557
5558   for (++name; *name != '\0'; ++name)
5559     {
5560       if (quoted)
5561         {
5562           switch (*name)
5563             {
5564             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5565             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5566             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5567             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5568             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5569             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5570             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5571             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5572             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5573             default:
5574               if (*name == sep)
5575                 *copyto++ = sep;
5576               else
5577                 {
5578                   /* Something else is quoted, so preserve the quote. */
5579                   *copyto++ = '\\';
5580                   *copyto++ = *name;
5581                 }
5582               break;
5583             }
5584           quoted = FALSE;
5585         }
5586       else if (*name == '\\')
5587         quoted = TRUE;
5588       else if (*name == sep)
5589         break;
5590       else
5591         *copyto++ = *name;
5592     }
5593   if (*name != sep)
5594     name = NULL;                /* signal unterminated regexp */
5595
5596   /* Terminate copied string. */
5597   *copyto = '\0';
5598   return name;
5599 }
5600
5601 /* Look at the argument of --regex or --no-regex and do the right
5602    thing.  Same for each line of a regexp file. */
5603 static void
5604 analyse_regex (char *regex_arg)
5605 {
5606   if (regex_arg == NULL)
5607     {
5608       free_regexps ();          /* --no-regex: remove existing regexps */
5609       return;
5610     }
5611
5612   /* A real --regexp option or a line in a regexp file. */
5613   switch (regex_arg[0])
5614     {
5615       /* Comments in regexp file or null arg to --regex. */
5616     case '\0':
5617     case ' ':
5618     case '\t':
5619       break;
5620
5621       /* Read a regex file.  This is recursive and may result in a
5622          loop, which will stop when the file descriptors are exhausted. */
5623     case '@':
5624       {
5625         FILE *regexfp;
5626         linebuffer regexbuf;
5627         char *regexfile = regex_arg + 1;
5628
5629         /* regexfile is a file containing regexps, one per line. */
5630         regexfp = fopen (regexfile, "r");
5631         if (regexfp == NULL)
5632           {
5633             pfatal (regexfile);
5634             return;
5635           }
5636         linebuffer_init (&regexbuf);
5637         while (readline_internal (&regexbuf, regexfp) > 0)
5638           analyse_regex (regexbuf.buffer);
5639         free (regexbuf.buffer);
5640         fclose (regexfp);
5641       }
5642       break;
5643
5644       /* Regexp to be used for a specific language only. */
5645     case '{':
5646       {
5647         language *lang;
5648         char *lang_name = regex_arg + 1;
5649         char *cp;
5650
5651         for (cp = lang_name; *cp != '}'; cp++)
5652           if (*cp == '\0')
5653             {
5654               error ("unterminated language name in regex: %s", regex_arg);
5655               return;
5656             }
5657         *cp++ = '\0';
5658         lang = get_language_from_langname (lang_name);
5659         if (lang == NULL)
5660           return;
5661         add_regex (cp, lang);
5662       }
5663       break;
5664
5665       /* Regexp to be used for any language. */
5666     default:
5667       add_regex (regex_arg, NULL);
5668       break;
5669     }
5670 }
5671
5672 /* Separate the regexp pattern, compile it,
5673    and care for optional name and modifiers. */
5674 static void
5675 add_regex (char *regexp_pattern, language *lang)
5676 {
5677   static struct re_pattern_buffer zeropattern;
5678   char sep, *pat, *name, *modifiers;
5679   char empty = '\0';
5680   const char *err;
5681   struct re_pattern_buffer *patbuf;
5682   regexp *rp;
5683   bool
5684     force_explicit_name = TRUE, /* do not use implicit tag names */
5685     ignore_case = FALSE,        /* case is significant */
5686     multi_line = FALSE,         /* matches are done one line at a time */
5687     single_line = FALSE;        /* dot does not match newline */
5688
5689
5690   if (strlen (regexp_pattern) < 3)
5691     {
5692       error ("null regexp");
5693       return;
5694     }
5695   sep = regexp_pattern[0];
5696   name = scan_separators (regexp_pattern);
5697   if (name == NULL)
5698     {
5699       error ("%s: unterminated regexp", regexp_pattern);
5700       return;
5701     }
5702   if (name[1] == sep)
5703     {
5704       error ("null name for regexp \"%s\"", regexp_pattern);
5705       return;
5706     }
5707   modifiers = scan_separators (name);
5708   if (modifiers == NULL)        /* no terminating separator --> no name */
5709     {
5710       modifiers = name;
5711       name = &empty;
5712     }
5713   else
5714     modifiers += 1;             /* skip separator */
5715
5716   /* Parse regex modifiers. */
5717   for (; modifiers[0] != '\0'; modifiers++)
5718     switch (modifiers[0])
5719       {
5720       case 'N':
5721         if (modifiers == name)
5722           error ("forcing explicit tag name but no name, ignoring");
5723         force_explicit_name = TRUE;
5724         break;
5725       case 'i':
5726         ignore_case = TRUE;
5727         break;
5728       case 's':
5729         single_line = TRUE;
5730         /* FALLTHRU */
5731       case 'm':
5732         multi_line = TRUE;
5733         need_filebuf = TRUE;
5734         break;
5735       default:
5736         error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5737         break;
5738       }
5739
5740   patbuf = xnew (1, struct re_pattern_buffer);
5741   *patbuf = zeropattern;
5742   if (ignore_case)
5743     {
5744       static char lc_trans[CHARS];
5745       int i;
5746       for (i = 0; i < CHARS; i++)
5747         lc_trans[i] = lowcase (i);
5748       patbuf->translate = lc_trans;     /* translation table to fold case  */
5749     }
5750
5751   if (multi_line)
5752     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5753   else
5754     pat = regexp_pattern;
5755
5756   if (single_line)
5757     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5758   else
5759     re_set_syntax (RE_SYNTAX_EMACS);
5760
5761   err = re_compile_pattern (pat, strlen (pat), patbuf);
5762   if (multi_line)
5763     free (pat);
5764   if (err != NULL)
5765     {
5766       error ("%s while compiling pattern", err);
5767       return;
5768     }
5769
5770   rp = p_head;
5771   p_head = xnew (1, regexp);
5772   p_head->pattern = savestr (regexp_pattern);
5773   p_head->p_next = rp;
5774   p_head->lang = lang;
5775   p_head->pat = patbuf;
5776   p_head->name = savestr (name);
5777   p_head->error_signaled = FALSE;
5778   p_head->force_explicit_name = force_explicit_name;
5779   p_head->ignore_case = ignore_case;
5780   p_head->multi_line = multi_line;
5781 }
5782
5783 /*
5784  * Do the substitutions indicated by the regular expression and
5785  * arguments.
5786  */
5787 static char *
5788 substitute (char *in, char *out, struct re_registers *regs)
5789 {
5790   char *result, *t;
5791   int size, dig, diglen;
5792
5793   result = NULL;
5794   size = strlen (out);
5795
5796   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5797   if (out[size - 1] == '\\')
5798     fatal ("pattern error in \"%s\"", out);
5799   for (t = etags_strchr (out, '\\');
5800        t != NULL;
5801        t = etags_strchr (t + 2, '\\'))
5802     if (ISDIGIT (t[1]))
5803       {
5804         dig = t[1] - '0';
5805         diglen = regs->end[dig] - regs->start[dig];
5806         size += diglen - 2;
5807       }
5808     else
5809       size -= 1;
5810
5811   /* Allocate space and do the substitutions. */
5812   assert (size >= 0);
5813   result = xnew (size + 1, char);
5814
5815   for (t = result; *out != '\0'; out++)
5816     if (*out == '\\' && ISDIGIT (*++out))
5817       {
5818         dig = *out - '0';
5819         diglen = regs->end[dig] - regs->start[dig];
5820         strncpy (t, in + regs->start[dig], diglen);
5821         t += diglen;
5822       }
5823     else
5824       *t++ = *out;
5825   *t = '\0';
5826
5827   assert (t <= result + size);
5828   assert (t - result == (int)strlen (result));
5829
5830   return result;
5831 }
5832
5833 /* Deallocate all regexps. */
5834 static void
5835 free_regexps (void)
5836 {
5837   regexp *rp;
5838   while (p_head != NULL)
5839     {
5840       rp = p_head->p_next;
5841       free (p_head->pattern);
5842       free (p_head->name);
5843       free (p_head);
5844       p_head = rp;
5845     }
5846   return;
5847 }
5848
5849 /*
5850  * Reads the whole file as a single string from `filebuf' and looks for
5851  * multi-line regular expressions, creating tags on matches.
5852  * readline already dealt with normal regexps.
5853  *
5854  * Idea by Ben Wing <ben@666.com> (2002).
5855  */
5856 static void
5857 regex_tag_multiline (void)
5858 {
5859   char *buffer = filebuf.buffer;
5860   regexp *rp;
5861   char *name;
5862
5863   for (rp = p_head; rp != NULL; rp = rp->p_next)
5864     {
5865       int match = 0;
5866
5867       if (!rp->multi_line)
5868         continue;               /* skip normal regexps */
5869
5870       /* Generic initializations before parsing file from memory. */
5871       lineno = 1;               /* reset global line number */
5872       charno = 0;               /* reset global char number */
5873       linecharno = 0;           /* reset global char number of line start */
5874
5875       /* Only use generic regexps or those for the current language. */
5876       if (rp->lang != NULL && rp->lang != curfdp->lang)
5877         continue;
5878
5879       while (match >= 0 && match < filebuf.len)
5880         {
5881           match = re_search (rp->pat, buffer, filebuf.len, charno,
5882                              filebuf.len - match, &rp->regs);
5883           switch (match)
5884             {
5885             case -2:
5886               /* Some error. */
5887               if (!rp->error_signaled)
5888                 {
5889                   error ("regexp stack overflow while matching \"%s\"",
5890                          rp->pattern);
5891                   rp->error_signaled = TRUE;
5892                 }
5893               break;
5894             case -1:
5895               /* No match. */
5896               break;
5897             default:
5898               if (match == rp->regs.end[0])
5899                 {
5900                   if (!rp->error_signaled)
5901                     {
5902                       error ("regexp matches the empty string: \"%s\"",
5903                              rp->pattern);
5904                       rp->error_signaled = TRUE;
5905                     }
5906                   match = -3;   /* exit from while loop */
5907                   break;
5908                 }
5909
5910               /* Match occurred.  Construct a tag. */
5911               while (charno < rp->regs.end[0])
5912                 if (buffer[charno++] == '\n')
5913                   lineno++, linecharno = charno;
5914               name = rp->name;
5915               if (name[0] == '\0')
5916                 name = NULL;
5917               else /* make a named tag */
5918                 name = substitute (buffer, rp->name, &rp->regs);
5919               if (rp->force_explicit_name)
5920                 /* Force explicit tag name, if a name is there. */
5921                 pfnote (name, TRUE, buffer + linecharno,
5922                         charno - linecharno + 1, lineno, linecharno);
5923               else
5924                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5925                           charno - linecharno + 1, lineno, linecharno);
5926               break;
5927             }
5928         }
5929     }
5930 }
5931
5932 \f
5933 static bool
5934 nocase_tail (const char *cp)
5935 {
5936   register int len = 0;
5937
5938   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5939     cp++, len++;
5940   if (*cp == '\0' && !intoken (dbp[len]))
5941     {
5942       dbp += len;
5943       return TRUE;
5944     }
5945   return FALSE;
5946 }
5947
5948 static void
5949 get_tag (register char *bp, char **namepp)
5950 {
5951   register char *cp = bp;
5952
5953   if (*bp != '\0')
5954     {
5955       /* Go till you get to white space or a syntactic break */
5956       for (cp = bp + 1; !notinname (*cp); cp++)
5957         continue;
5958       make_tag (bp, cp - bp, TRUE,
5959                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5960     }
5961
5962   if (namepp != NULL)
5963     *namepp = savenstr (bp, cp - bp);
5964 }
5965
5966 /*
5967  * Read a line of text from `stream' into `lbp', excluding the
5968  * newline or CR-NL, if any.  Return the number of characters read from
5969  * `stream', which is the length of the line including the newline.
5970  *
5971  * On DOS or Windows we do not count the CR character, if any before the
5972  * NL, in the returned length; this mirrors the behavior of Emacs on those
5973  * platforms (for text files, it translates CR-NL to NL as it reads in the
5974  * file).
5975  *
5976  * If multi-line regular expressions are requested, each line read is
5977  * appended to `filebuf'.
5978  */
5979 static long
5980 readline_internal (linebuffer *lbp, register FILE *stream)
5981 {
5982   char *buffer = lbp->buffer;
5983   register char *p = lbp->buffer;
5984   register char *pend;
5985   int chars_deleted;
5986
5987   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
5988
5989   for (;;)
5990     {
5991       register int c = getc (stream);
5992       if (p == pend)
5993         {
5994           /* We're at the end of linebuffer: expand it. */
5995           lbp->size *= 2;
5996           xrnew (buffer, lbp->size, char);
5997           p += buffer - lbp->buffer;
5998           pend = buffer + lbp->size;
5999           lbp->buffer = buffer;
6000         }
6001       if (c == EOF)
6002         {
6003           *p = '\0';
6004           chars_deleted = 0;
6005           break;
6006         }
6007       if (c == '\n')
6008         {
6009           if (p > buffer && p[-1] == '\r')
6010             {
6011               p -= 1;
6012 #ifdef DOS_NT
6013              /* Assume CRLF->LF translation will be performed by Emacs
6014                 when loading this file, so CRs won't appear in the buffer.
6015                 It would be cleaner to compensate within Emacs;
6016                 however, Emacs does not know how many CRs were deleted
6017                 before any given point in the file.  */
6018               chars_deleted = 1;
6019 #else
6020               chars_deleted = 2;
6021 #endif
6022             }
6023           else
6024             {
6025               chars_deleted = 1;
6026             }
6027           *p = '\0';
6028           break;
6029         }
6030       *p++ = c;
6031     }
6032   lbp->len = p - buffer;
6033
6034   if (need_filebuf              /* we need filebuf for multi-line regexps */
6035       && chars_deleted > 0)     /* not at EOF */
6036     {
6037       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6038         {
6039           /* Expand filebuf. */
6040           filebuf.size *= 2;
6041           xrnew (filebuf.buffer, filebuf.size, char);
6042         }
6043       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6044       filebuf.len += lbp->len;
6045       filebuf.buffer[filebuf.len++] = '\n';
6046       filebuf.buffer[filebuf.len] = '\0';
6047     }
6048
6049   return lbp->len + chars_deleted;
6050 }
6051
6052 /*
6053  * Like readline_internal, above, but in addition try to match the
6054  * input line against relevant regular expressions and manage #line
6055  * directives.
6056  */
6057 static void
6058 readline (linebuffer *lbp, FILE *stream)
6059 {
6060   long result;
6061
6062   linecharno = charno;          /* update global char number of line start */
6063   result = readline_internal (lbp, stream); /* read line */
6064   lineno += 1;                  /* increment global line number */
6065   charno += result;             /* increment global char number */
6066
6067   /* Honor #line directives. */
6068   if (!no_line_directive)
6069     {
6070       static bool discard_until_line_directive;
6071
6072       /* Check whether this is a #line directive. */
6073       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6074         {
6075           unsigned int lno;
6076           int start = 0;
6077
6078           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6079               && start > 0)     /* double quote character found */
6080             {
6081               char *endp = lbp->buffer + start;
6082
6083               while ((endp = etags_strchr (endp, '"')) != NULL
6084                      && endp[-1] == '\\')
6085                 endp++;
6086               if (endp != NULL)
6087                 /* Ok, this is a real #line directive.  Let's deal with it. */
6088                 {
6089                   char *taggedabsname;  /* absolute name of original file */
6090                   char *taggedfname;    /* name of original file as given */
6091                   char *name;           /* temp var */
6092
6093                   discard_until_line_directive = FALSE; /* found it */
6094                   name = lbp->buffer + start;
6095                   *endp = '\0';
6096                   canonicalize_filename (name);
6097                   taggedabsname = absolute_filename (name, tagfiledir);
6098                   if (filename_is_absolute (name)
6099                       || filename_is_absolute (curfdp->infname))
6100                     taggedfname = savestr (taggedabsname);
6101                   else
6102                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6103
6104                   if (streq (curfdp->taggedfname, taggedfname))
6105                     /* The #line directive is only a line number change.  We
6106                        deal with this afterwards. */
6107                     free (taggedfname);
6108                   else
6109                     /* The tags following this #line directive should be
6110                        attributed to taggedfname.  In order to do this, set
6111                        curfdp accordingly. */
6112                     {
6113                       fdesc *fdp; /* file description pointer */
6114
6115                       /* Go look for a file description already set up for the
6116                          file indicated in the #line directive.  If there is
6117                          one, use it from now until the next #line
6118                          directive. */
6119                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6120                         if (streq (fdp->infname, curfdp->infname)
6121                             && streq (fdp->taggedfname, taggedfname))
6122                           /* If we remove the second test above (after the &&)
6123                              then all entries pertaining to the same file are
6124                              coalesced in the tags file.  If we use it, then
6125                              entries pertaining to the same file but generated
6126                              from different files (via #line directives) will
6127                              go into separate sections in the tags file.  These
6128                              alternatives look equivalent.  The first one
6129                              destroys some apparently useless information. */
6130                           {
6131                             curfdp = fdp;
6132                             free (taggedfname);
6133                             break;
6134                           }
6135                       /* Else, if we already tagged the real file, skip all
6136                          input lines until the next #line directive. */
6137                       if (fdp == NULL) /* not found */
6138                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6139                           if (streq (fdp->infabsname, taggedabsname))
6140                             {
6141                               discard_until_line_directive = TRUE;
6142                               free (taggedfname);
6143                               break;
6144                             }
6145                       /* Else create a new file description and use that from
6146                          now on, until the next #line directive. */
6147                       if (fdp == NULL) /* not found */
6148                         {
6149                           fdp = fdhead;
6150                           fdhead = xnew (1, fdesc);
6151                           *fdhead = *curfdp; /* copy curr. file description */
6152                           fdhead->next = fdp;
6153                           fdhead->infname = savestr (curfdp->infname);
6154                           fdhead->infabsname = savestr (curfdp->infabsname);
6155                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6156                           fdhead->taggedfname = taggedfname;
6157                           fdhead->usecharno = FALSE;
6158                           fdhead->prop = NULL;
6159                           fdhead->written = FALSE;
6160                           curfdp = fdhead;
6161                         }
6162                     }
6163                   free (taggedabsname);
6164                   lineno = lno - 1;
6165                   readline (lbp, stream);
6166                   return;
6167                 } /* if a real #line directive */
6168             } /* if #line is followed by a number */
6169         } /* if line begins with "#line " */
6170
6171       /* If we are here, no #line directive was found. */
6172       if (discard_until_line_directive)
6173         {
6174           if (result > 0)
6175             {
6176               /* Do a tail recursion on ourselves, thus discarding the contents
6177                  of the line buffer. */
6178               readline (lbp, stream);
6179               return;
6180             }
6181           /* End of file. */
6182           discard_until_line_directive = FALSE;
6183           return;
6184         }
6185     } /* if #line directives should be considered */
6186
6187   {
6188     int match;
6189     regexp *rp;
6190     char *name;
6191
6192     /* Match against relevant regexps. */
6193     if (lbp->len > 0)
6194       for (rp = p_head; rp != NULL; rp = rp->p_next)
6195         {
6196           /* Only use generic regexps or those for the current language.
6197              Also do not use multiline regexps, which is the job of
6198              regex_tag_multiline. */
6199           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6200               || rp->multi_line)
6201             continue;
6202
6203           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6204           switch (match)
6205             {
6206             case -2:
6207               /* Some error. */
6208               if (!rp->error_signaled)
6209                 {
6210                   error ("regexp stack overflow while matching \"%s\"",
6211                          rp->pattern);
6212                   rp->error_signaled = TRUE;
6213                 }
6214               break;
6215             case -1:
6216               /* No match. */
6217               break;
6218             case 0:
6219               /* Empty string matched. */
6220               if (!rp->error_signaled)
6221                 {
6222                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6223                   rp->error_signaled = TRUE;
6224                 }
6225               break;
6226             default:
6227               /* Match occurred.  Construct a tag. */
6228               name = rp->name;
6229               if (name[0] == '\0')
6230                 name = NULL;
6231               else /* make a named tag */
6232                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6233               if (rp->force_explicit_name)
6234                 /* Force explicit tag name, if a name is there. */
6235                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6236               else
6237                 make_tag (name, strlen (name), TRUE,
6238                           lbp->buffer, match, lineno, linecharno);
6239               break;
6240             }
6241         }
6242   }
6243 }
6244
6245 \f
6246 /*
6247  * Return a pointer to a space of size strlen(cp)+1 allocated
6248  * with xnew where the string CP has been copied.
6249  */
6250 static char *
6251 savestr (const char *cp)
6252 {
6253   return savenstr (cp, strlen (cp));
6254 }
6255
6256 /*
6257  * Return a pointer to a space of size LEN+1 allocated with xnew where
6258  * the string CP has been copied for at most the first LEN characters.
6259  */
6260 static char *
6261 savenstr (const char *cp, int len)
6262 {
6263   register char *dp;
6264
6265   dp = xnew (len + 1, char);
6266   strncpy (dp, cp, len);
6267   dp[len] = '\0';
6268   return dp;
6269 }
6270
6271 /*
6272  * Return the ptr in sp at which the character c last
6273  * appears; NULL if not found
6274  *
6275  * Identical to POSIX strrchr, included for portability.
6276  */
6277 static char *
6278 etags_strrchr (register const char *sp, register int c)
6279 {
6280   register const char *r;
6281
6282   r = NULL;
6283   do
6284     {
6285       if (*sp == c)
6286         r = sp;
6287   } while (*sp++);
6288   return (char *)r;
6289 }
6290
6291 /*
6292  * Return the ptr in sp at which the character c first
6293  * appears; NULL if not found
6294  *
6295  * Identical to POSIX strchr, included for portability.
6296  */
6297 static char *
6298 etags_strchr (register const char *sp, register int c)
6299 {
6300   do
6301     {
6302       if (*sp == c)
6303         return (char *)sp;
6304     } while (*sp++);
6305   return NULL;
6306 }
6307
6308 /* Skip spaces (end of string is not space), return new pointer. */
6309 static char *
6310 skip_spaces (char *cp)
6311 {
6312   while (iswhite (*cp))
6313     cp++;
6314   return cp;
6315 }
6316
6317 /* Skip non spaces, except end of string, return new pointer. */
6318 static char *
6319 skip_non_spaces (char *cp)
6320 {
6321   while (*cp != '\0' && !iswhite (*cp))
6322     cp++;
6323   return cp;
6324 }
6325
6326 /* Print error message and exit.  */
6327 void
6328 fatal (const char *s1, const char *s2)
6329 {
6330   error (s1, s2);
6331   exit (EXIT_FAILURE);
6332 }
6333
6334 static void
6335 pfatal (const char *s1)
6336 {
6337   perror (s1);
6338   exit (EXIT_FAILURE);
6339 }
6340
6341 static void
6342 suggest_asking_for_help (void)
6343 {
6344   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6345            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6346   exit (EXIT_FAILURE);
6347 }
6348
6349 /* Output a diagnostic with printf-style FORMAT and args.  */
6350 static void
6351 error (const char *format, ...)
6352 {
6353   va_list ap;
6354   va_start (ap, format);
6355   fprintf (stderr, "%s: ", progname);
6356   vfprintf (stderr, format, ap);
6357   fprintf (stderr, "\n");
6358   va_end (ap);
6359 }
6360
6361 /* Return a newly-allocated string whose contents
6362    concatenate those of s1, s2, s3.  */
6363 static char *
6364 concat (const char *s1, const char *s2, const char *s3)
6365 {
6366   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6367   char *result = xnew (len1 + len2 + len3 + 1, char);
6368
6369   strcpy (result, s1);
6370   strcpy (result + len1, s2);
6371   strcpy (result + len1 + len2, s3);
6372   result[len1 + len2 + len3] = '\0';
6373
6374   return result;
6375 }
6376
6377 \f
6378 /* Does the same work as the system V getcwd, but does not need to
6379    guess the buffer size in advance. */
6380 static char *
6381 etags_getcwd (void)
6382 {
6383 #ifdef HAVE_GETCWD
6384   int bufsize = 200;
6385   char *path = xnew (bufsize, char);
6386
6387   while (getcwd (path, bufsize) == NULL)
6388     {
6389       if (errno != ERANGE)
6390         pfatal ("getcwd");
6391       bufsize *= 2;
6392       free (path);
6393       path = xnew (bufsize, char);
6394     }
6395
6396   canonicalize_filename (path);
6397   return path;
6398
6399 #else /* not HAVE_GETCWD */
6400 #if MSDOS
6401
6402   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6403
6404   getwd (path);
6405
6406   for (p = path; *p != '\0'; p++)
6407     if (*p == '\\')
6408       *p = '/';
6409     else
6410       *p = lowcase (*p);
6411
6412   return strdup (path);
6413 #else /* not MSDOS */
6414   linebuffer path;
6415   FILE *pipe;
6416
6417   linebuffer_init (&path);
6418   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6419   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6420     pfatal ("pwd");
6421   pclose (pipe);
6422
6423   return path.buffer;
6424 #endif /* not MSDOS */
6425 #endif /* not HAVE_GETCWD */
6426 }
6427
6428 /* Return a newly allocated string containing the file name of FILE
6429    relative to the absolute directory DIR (which should end with a slash). */
6430 static char *
6431 relative_filename (char *file, char *dir)
6432 {
6433   char *fp, *dp, *afn, *res;
6434   int i;
6435
6436   /* Find the common root of file and dir (with a trailing slash). */
6437   afn = absolute_filename (file, cwd);
6438   fp = afn;
6439   dp = dir;
6440   while (*fp++ == *dp++)
6441     continue;
6442   fp--, dp--;                   /* back to the first differing char */
6443 #ifdef DOS_NT
6444   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6445     return afn;
6446 #endif
6447   do                            /* look at the equal chars until '/' */
6448     fp--, dp--;
6449   while (*fp != '/');
6450
6451   /* Build a sequence of "../" strings for the resulting relative file name. */
6452   i = 0;
6453   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6454     i += 1;
6455   res = xnew (3*i + strlen (fp + 1) + 1, char);
6456   res[0] = '\0';
6457   while (i-- > 0)
6458     strcat (res, "../");
6459
6460   /* Add the file name relative to the common root of file and dir. */
6461   strcat (res, fp + 1);
6462   free (afn);
6463
6464   return res;
6465 }
6466
6467 /* Return a newly allocated string containing the absolute file name
6468    of FILE given DIR (which should end with a slash). */
6469 static char *
6470 absolute_filename (char *file, char *dir)
6471 {
6472   char *slashp, *cp, *res;
6473
6474   if (filename_is_absolute (file))
6475     res = savestr (file);
6476 #ifdef DOS_NT
6477   /* We don't support non-absolute file names with a drive
6478      letter, like `d:NAME' (it's too much hassle).  */
6479   else if (file[1] == ':')
6480     fatal ("%s: relative file names with drive letters not supported", file);
6481 #endif
6482   else
6483     res = concat (dir, file, "");
6484
6485   /* Delete the "/dirname/.." and "/." substrings. */
6486   slashp = etags_strchr (res, '/');
6487   while (slashp != NULL && slashp[0] != '\0')
6488     {
6489       if (slashp[1] == '.')
6490         {
6491           if (slashp[2] == '.'
6492               && (slashp[3] == '/' || slashp[3] == '\0'))
6493             {
6494               cp = slashp;
6495               do
6496                 cp--;
6497               while (cp >= res && !filename_is_absolute (cp));
6498               if (cp < res)
6499                 cp = slashp;    /* the absolute name begins with "/.." */
6500 #ifdef DOS_NT
6501               /* Under MSDOS and NT we get `d:/NAME' as absolute
6502                  file name, so the luser could say `d:/../NAME'.
6503                  We silently treat this as `d:/NAME'.  */
6504               else if (cp[0] != '/')
6505                 cp = slashp;
6506 #endif
6507               memmove (cp, slashp + 3, strlen (slashp + 2));
6508               slashp = cp;
6509               continue;
6510             }
6511           else if (slashp[2] == '/' || slashp[2] == '\0')
6512             {
6513               memmove (slashp, slashp + 2, strlen (slashp + 1));
6514               continue;
6515             }
6516         }
6517
6518       slashp = etags_strchr (slashp + 1, '/');
6519     }
6520
6521   if (res[0] == '\0')           /* just a safety net: should never happen */
6522     {
6523       free (res);
6524       return savestr ("/");
6525     }
6526   else
6527     return res;
6528 }
6529
6530 /* Return a newly allocated string containing the absolute
6531    file name of dir where FILE resides given DIR (which should
6532    end with a slash). */
6533 static char *
6534 absolute_dirname (char *file, char *dir)
6535 {
6536   char *slashp, *res;
6537   char save;
6538
6539   slashp = etags_strrchr (file, '/');
6540   if (slashp == NULL)
6541     return savestr (dir);
6542   save = slashp[1];
6543   slashp[1] = '\0';
6544   res = absolute_filename (file, dir);
6545   slashp[1] = save;
6546
6547   return res;
6548 }
6549
6550 /* Whether the argument string is an absolute file name.  The argument
6551    string must have been canonicalized with canonicalize_filename. */
6552 static bool
6553 filename_is_absolute (char *fn)
6554 {
6555   return (fn[0] == '/'
6556 #ifdef DOS_NT
6557           || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/')
6558 #endif
6559           );
6560 }
6561
6562 /* Downcase DOS drive letter and collapse separators into single slashes.
6563    Works in place. */
6564 static void
6565 canonicalize_filename (register char *fn)
6566 {
6567   register char* cp;
6568   char sep = '/';
6569
6570 #ifdef DOS_NT
6571   /* Canonicalize drive letter case.  */
6572 # define ISUPPER(c)     isupper (CHAR (c))
6573   if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6574     fn[0] = lowcase (fn[0]);
6575
6576   sep = '\\';
6577 #endif
6578
6579   /* Collapse multiple separators into a single slash. */
6580   for (cp = fn; *cp != '\0'; cp++, fn++)
6581     if (*cp == sep)
6582       {
6583         *fn = '/';
6584         while (cp[1] == sep)
6585           cp++;
6586       }
6587     else
6588       *fn = *cp;
6589   *fn = '\0';
6590 }
6591
6592 \f
6593 /* Initialize a linebuffer for use. */
6594 static void
6595 linebuffer_init (linebuffer *lbp)
6596 {
6597   lbp->size = (DEBUG) ? 3 : 200;
6598   lbp->buffer = xnew (lbp->size, char);
6599   lbp->buffer[0] = '\0';
6600   lbp->len = 0;
6601 }
6602
6603 /* Set the minimum size of a string contained in a linebuffer. */
6604 static void
6605 linebuffer_setlen (linebuffer *lbp, int toksize)
6606 {
6607   while (lbp->size <= toksize)
6608     {
6609       lbp->size *= 2;
6610       xrnew (lbp->buffer, lbp->size, char);
6611     }
6612   lbp->len = toksize;
6613 }
6614
6615 /* Like malloc but get fatal error if memory is exhausted. */
6616 static void *
6617 xmalloc (size_t size)
6618 {
6619   void *result = malloc (size);
6620   if (result == NULL)
6621     fatal ("virtual memory exhausted", (char *)NULL);
6622   return result;
6623 }
6624
6625 static void *
6626 xrealloc (char *ptr, size_t size)
6627 {
6628   void *result = realloc (ptr, size);
6629   if (result == NULL)
6630     fatal ("virtual memory exhausted", (char *)NULL);
6631   return result;
6632 }
6633
6634 /*
6635  * Local Variables:
6636  * indent-tabs-mode: t
6637  * tab-width: 8
6638  * fill-column: 79
6639  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6640  * c-file-style: "gnu"
6641  * End:
6642  */
6643
6644 /* etags.c ends here */