lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2013 Free Software
  32 Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  *
  72  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #define TRUE    1
  84 #define FALSE   0
  85
  86 #ifdef DEBUG
  87 #  undef DEBUG
  88 #  define DEBUG TRUE
  89 #else
  90 #  define DEBUG  FALSE
  91 #  define NDEBUG                /* disable assert */
  92 #endif
  93
  94 #include <config.h>
  95
  96 #ifndef _GNU_SOURCE
  97 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  98 #endif
  99
 100 /* WIN32_NATIVE is for XEmacs.
 101    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 102 #ifdef WIN32_NATIVE
 103 # undef MSDOS
 104 # undef  WINDOWSNT
 105 # define WINDOWSNT
 106 #endif /* WIN32_NATIVE */
 107
 108 #ifdef MSDOS
 109 # undef MSDOS
 110 # define MSDOS TRUE
 111 # include <fcntl.h>
 112 # include <sys/param.h>
 113 # include <io.h>
 114 #else
 115 # define MSDOS FALSE
 116 #endif /* MSDOS */
 117
 118 #ifdef WINDOWSNT
 119 # include <fcntl.h>
 120 # include <direct.h>
 121 # include <io.h>
 122 # define MAXPATHLEN _MAX_PATH
 123 # undef HAVE_NTGUI
 124 # undef  DOS_NT
 125 # define DOS_NT
 126 #endif /* WINDOWSNT */
 127
 128 #include <unistd.h>
 129 #include <stdarg.h>
 130 #include <stdlib.h>
 131 #include <string.h>
 132 #include <stdio.h>
 133 #include <ctype.h>
 134 #include <errno.h>
 135 #include <sys/types.h>
 136 #include <sys/stat.h>
 137 #include <c-strcase.h>
 138
 139 #include <assert.h>
 140 #ifdef NDEBUG
 141 # undef  assert                 /* some systems have a buggy assert.h */
 142 # define assert(x) ((void) 0)
 143 #endif
 144
 145 #include <getopt.h>
 146 #include <regex.h>
 147
 148 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 149  Leave it undefined to make the program "etags", which makes emacs-style
 150  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 151 #ifdef CTAGS
 152 # undef  CTAGS
 153 # define CTAGS TRUE
 154 #else
 155 # define CTAGS FALSE
 156 #endif
 157
 158 #define streq(s,t)      (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 159 #define strcaseeq(s,t)  (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
 160 #define strneq(s,t,n)   (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 161 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
 162
 163 #define CHARS 256               /* 2^sizeof(char) */
 164 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 165 #define iswhite(c)      (_wht[CHAR (c)]) /* c is white (see white) */
 166 #define notinname(c)    (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
 167 #define begtoken(c)     (_btk[CHAR (c)]) /* c can start token (see begtk) */
 168 #define intoken(c)      (_itk[CHAR (c)]) /* c can be in token (see midtk) */
 169 #define endtoken(c)     (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
 170
 171 #define ISALNUM(c)      isalnum (CHAR (c))
 172 #define ISALPHA(c)      isalpha (CHAR (c))
 173 #define ISDIGIT(c)      isdigit (CHAR (c))
 174 #define ISLOWER(c)      islower (CHAR (c))
 175
 176 #define lowcase(c)      tolower (CHAR (c))
 177
 178
 179 /*
 180  *      xnew, xrnew -- allocate, reallocate storage
 181  *
 182  * SYNOPSIS:    Type *xnew (int n, Type);
 183  *              void xrnew (OldPointer, int n, Type);
 184  */
 185 #if DEBUG
 186 # include "chkmalloc.h"
 187 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 188                                                   (n) * sizeof (Type)))
 189 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 190                                         (char *) (op), (n) * sizeof (Type)))
 191 #else
 192 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 193 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 194                                         (char *) (op), (n) * sizeof (Type)))
 195 #endif
 196
 197 #define bool int
 198
 199 typedef void Lang_function (FILE *);
 200
 201 typedef struct
 202 {
 203   const char *suffix;           /* file name suffix for this compressor */
 204   const char *command;          /* takes one arg and decompresses to stdout */
 205 } compressor;
 206
 207 typedef struct
 208 {
 209   const char *name;             /* language name */
 210   const char *help;             /* detailed help for the language */
 211   Lang_function *function;      /* parse function */
 212   const char **suffixes;        /* name suffixes of this language's files */
 213   const char **filenames;       /* names of this language's files */
 214   const char **interpreters;    /* interpreters for this language */
 215   bool metasource;              /* source used to generate other sources */
 216 } language;
 217
 218 typedef struct fdesc
 219 {
 220   struct fdesc *next;           /* for the linked list */
 221   char *infname;                /* uncompressed input file name */
 222   char *infabsname;             /* absolute uncompressed input file name */
 223   char *infabsdir;              /* absolute dir of input file */
 224   char *taggedfname;            /* file name to write in tagfile */
 225   language *lang;               /* language of file */
 226   char *prop;                   /* file properties to write in tagfile */
 227   bool usecharno;               /* etags tags shall contain char number */
 228   bool written;                 /* entry written in the tags file */
 229 } fdesc;
 230
 231 typedef struct node_st
 232 {                               /* sorting structure */
 233   struct node_st *left, *right; /* left and right sons */
 234   fdesc *fdp;                   /* description of file to whom tag belongs */
 235   char *name;                   /* tag name */
 236   char *regex;                  /* search regexp */
 237   bool valid;                   /* write this tag on the tag file */
 238   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 239   bool been_warned;             /* warning already given for duplicated tag */
 240   int lno;                      /* line number tag is on */
 241   long cno;                     /* character number line starts on */
 242 } node;
 243
 244 /*
 245  * A `linebuffer' is a structure which holds a line of text.
 246  * `readline_internal' reads a line from a stream into a linebuffer
 247  * and works regardless of the length of the line.
 248  * SIZE is the size of BUFFER, LEN is the length of the string in
 249  * BUFFER after readline reads it.
 250  */
 251 typedef struct
 252 {
 253   long size;
 254   int len;
 255   char *buffer;
 256 } linebuffer;
 257
 258 /* Used to support mixing of --lang and file names. */
 259 typedef struct
 260 {
 261   enum {
 262     at_language,                /* a language specification */
 263     at_regexp,                  /* a regular expression */
 264     at_filename,                /* a file name */
 265     at_stdin,                   /* read from stdin here */
 266     at_end                      /* stop parsing the list */
 267   } arg_type;                   /* argument type */
 268   language *lang;               /* language associated with the argument */
 269   char *what;                   /* the argument itself */
 270 } argument;
 271
 272 /* Structure defining a regular expression. */
 273 typedef struct regexp
 274 {
 275   struct regexp *p_next;        /* pointer to next in list */
 276   language *lang;               /* if set, use only for this language */
 277   char *pattern;                /* the regexp pattern */
 278   char *name;                   /* tag name */
 279   struct re_pattern_buffer *pat; /* the compiled pattern */
 280   struct re_registers regs;     /* re registers */
 281   bool error_signaled;          /* already signaled for this regexp */
 282   bool force_explicit_name;     /* do not allow implicit tag name */
 283   bool ignore_case;             /* ignore case when matching */
 284   bool multi_line;              /* do a multi-line match on the whole file */
 285 } regexp;
 286
 287
 288 /* Many compilers barf on this:
 289         Lang_function Ada_funcs;
 290    so let's write it this way */
 291 static void Ada_funcs (FILE *);
 292 static void Asm_labels (FILE *);
 293 static void C_entries (int c_ext, FILE *);
 294 static void default_C_entries (FILE *);
 295 static void plain_C_entries (FILE *);
 296 static void Cjava_entries (FILE *);
 297 static void Cobol_paragraphs (FILE *);
 298 static void Cplusplus_entries (FILE *);
 299 static void Cstar_entries (FILE *);
 300 static void Erlang_functions (FILE *);
 301 static void Forth_words (FILE *);
 302 static void Fortran_functions (FILE *);
 303 static void HTML_labels (FILE *);
 304 static void Lisp_functions (FILE *);
 305 static void Lua_functions (FILE *);
 306 static void Makefile_targets (FILE *);
 307 static void Pascal_functions (FILE *);
 308 static void Perl_functions (FILE *);
 309 static void PHP_functions (FILE *);
 310 static void PS_functions (FILE *);
 311 static void Prolog_functions (FILE *);
 312 static void Python_functions (FILE *);
 313 static void Scheme_functions (FILE *);
 314 static void TeX_commands (FILE *);
 315 static void Texinfo_nodes (FILE *);
 316 static void Yacc_entries (FILE *);
 317 static void just_read_file (FILE *);
 318
 319 static void print_language_names (void);
 320 static void print_version (void);
 321 static void print_help (argument *);
 322 int main (int, char **);
 323
 324 static compressor *get_compressor_from_suffix (char *, char **);
 325 static language *get_language_from_langname (const char *);
 326 static language *get_language_from_interpreter (char *);
 327 static language *get_language_from_filename (char *, bool);
 328 static void readline (linebuffer *, FILE *);
 329 static long readline_internal (linebuffer *, FILE *);
 330 static bool nocase_tail (const char *);
 331 static void get_tag (char *, char **);
 332
 333 static void analyse_regex (char *);
 334 static void free_regexps (void);
 335 static void regex_tag_multiline (void);
 336 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 337 static _Noreturn void suggest_asking_for_help (void);
 338 _Noreturn void fatal (const char *, const char *);
 339 static _Noreturn void pfatal (const char *);
 340 static void add_node (node *, node **);
 341
 342 static void init (void);
 343 static void process_file_name (char *, language *);
 344 static void process_file (FILE *, char *, language *);
 345 static void find_entries (FILE *);
 346 static void free_tree (node *);
 347 static void free_fdesc (fdesc *);
 348 static void pfnote (char *, bool, char *, int, int, long);
 349 static void make_tag (const char *, int, bool, char *, int, int, long);
 350 static void invalidate_nodes (fdesc *, node **);
 351 static void put_entries (node *);
 352
 353 static char *concat (const char *, const char *, const char *);
 354 static char *skip_spaces (char *);
 355 static char *skip_non_spaces (char *);
 356 static char *skip_name (char *);
 357 static char *savenstr (const char *, int);
 358 static char *savestr (const char *);
 359 static char *etags_strchr (const char *, int);
 360 static char *etags_strrchr (const char *, int);
 361 static char *etags_getcwd (void);
 362 static char *relative_filename (char *, char *);
 363 static char *absolute_filename (char *, char *);
 364 static char *absolute_dirname (char *, char *);
 365 static bool filename_is_absolute (char *f);
 366 static void canonicalize_filename (char *);
 367 static void linebuffer_init (linebuffer *);
 368 static void linebuffer_setlen (linebuffer *, int);
 369 static void *xmalloc (size_t);
 370 static void *xrealloc (char *, size_t);
 371
 372 \f
 373 static char searchar = '/';     /* use /.../ searches */
 374
 375 static char *tagfile;           /* output file */
 376 static char *progname;          /* name this program was invoked with */
 377 static char *cwd;               /* current working directory */
 378 static char *tagfiledir;        /* directory of tagfile */
 379 static FILE *tagf;              /* ioptr for tags file */
 380 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 381
 382 static fdesc *fdhead;           /* head of file description list */
 383 static fdesc *curfdp;           /* current file description */
 384 static int lineno;              /* line number of current line */
 385 static long charno;             /* current character number */
 386 static long linecharno;         /* charno of start of current line */
 387 static char *dbp;               /* pointer to start of current tag */
 388
 389 static const int invalidcharno = -1;
 390
 391 static node *nodehead;          /* the head of the binary tree of tags */
 392 static node *last_node;         /* the last node created */
 393
 394 static linebuffer lb;           /* the current line */
 395 static linebuffer filebuf;      /* a buffer containing the whole file */
 396 static linebuffer token_name;   /* a buffer containing a tag name */
 397
 398 /* boolean "functions" (see init)       */
 399 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 400 static const char
 401   /* white chars */
 402   *white = " \f\t\n\r\v",
 403   /* not in a name */
 404   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 405   /* token ending chars */
 406   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 407   /* token starting chars */
 408   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 409   /* valid in-token chars */
 410   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 411
 412 static bool append_to_tagfile;  /* -a: append to tags */
 413 /* The next five default to TRUE in C and derived languages.  */
 414 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 415 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 416                                 /* 0 struct/enum/union decls, and C++ */
 417                                 /* member functions. */
 418 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 419                                 /* constants and variables. */
 420                                 /* -D: opposite of -d.  Default under ctags. */
 421 static bool globals;            /* create tags for global variables */
 422 static bool members;            /* create tags for C member variables */
 423 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 424 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 425 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 426 static bool update;             /* -u: update tags */
 427 static bool vgrind_style;       /* -v: create vgrind style index output */
 428 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 429 static bool cxref_style;        /* -x: create cxref style output */
 430 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 431 static bool ignoreindent;       /* -I: ignore indentation in C */
 432 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 433
 434 /* STDIN is defined in LynxOS system headers */
 435 #ifdef STDIN
 436 # undef STDIN
 437 #endif
 438
 439 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 440 static bool parsing_stdin;      /* --parse-stdin used */
 441
 442 static regexp *p_head;          /* list of all regexps */
 443 static bool need_filebuf;       /* some regexes are multi-line */
 444
 445 static struct option longopts[] =
 446 {
 447   { "append",             no_argument,       NULL,               'a'   },
 448   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 449   { "c++",                no_argument,       NULL,               'C'   },
 450   { "declarations",       no_argument,       &declarations,      TRUE  },
 451   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 452   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 453   { "help",               no_argument,       NULL,               'h'   },
 454   { "help",               no_argument,       NULL,               'H'   },
 455   { "ignore-indentation", no_argument,       NULL,               'I'   },
 456   { "language",           required_argument, NULL,               'l'   },
 457   { "members",            no_argument,       &members,           TRUE  },
 458   { "no-members",         no_argument,       &members,           FALSE },
 459   { "output",             required_argument, NULL,               'o'   },
 460   { "regex",              required_argument, NULL,               'r'   },
 461   { "no-regex",           no_argument,       NULL,               'R'   },
 462   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 463   { "parse-stdin",        required_argument, NULL,               STDIN },
 464   { "version",            no_argument,       NULL,               'V'   },
 465
 466 #if CTAGS /* Ctags options */
 467   { "backward-search",    no_argument,       NULL,               'B'   },
 468   { "cxref",              no_argument,       NULL,               'x'   },
 469   { "defines",            no_argument,       NULL,               'd'   },
 470   { "globals",            no_argument,       &globals,           TRUE  },
 471   { "typedefs",           no_argument,       NULL,               't'   },
 472   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 473   { "update",             no_argument,       NULL,               'u'   },
 474   { "vgrind",             no_argument,       NULL,               'v'   },
 475   { "no-warn",            no_argument,       NULL,               'w'   },
 476
 477 #else /* Etags options */
 478   { "no-defines",         no_argument,       NULL,               'D'   },
 479   { "no-globals",         no_argument,       &globals,           FALSE },
 480   { "include",            required_argument, NULL,               'i'   },
 481 #endif
 482   { NULL }
 483 };
 484
 485 static compressor compressors[] =
 486 {
 487   { "z", "gzip -d -c"},
 488   { "Z", "gzip -d -c"},
 489   { "gz", "gzip -d -c"},
 490   { "GZ", "gzip -d -c"},
 491   { "bz2", "bzip2 -d -c" },
 492   { "xz", "xz -d -c" },
 493   { NULL }
 494 };
 495
 496 /*
 497  * Language stuff.
 498  */
 499
 500 /* Ada code */
 501 static const char *Ada_suffixes [] =
 502   { "ads", "adb", "ada", NULL };
 503 static const char Ada_help [] =
 504 "In Ada code, functions, procedures, packages, tasks and types are\n\
 505 tags.  Use the `--packages-only' option to create tags for\n\
 506 packages only.\n\
 507 Ada tag names have suffixes indicating the type of entity:\n\
 508         Entity type:    Qualifier:\n\
 509         ------------    ----------\n\
 510         function        /f\n\
 511         procedure       /p\n\
 512         package spec    /s\n\
 513         package body    /b\n\
 514         type            /t\n\
 515         task            /k\n\
 516 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 517 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 518 will just search for any tag `bidule'.";
 519
 520 /* Assembly code */
 521 static const char *Asm_suffixes [] =
 522   { "a",        /* Unix assembler */
 523     "asm", /* Microcontroller assembly */
 524     "def", /* BSO/Tasking definition includes  */
 525     "inc", /* Microcontroller include files */
 526     "ins", /* Microcontroller include files */
 527     "s", "sa", /* Unix assembler */
 528     "S",   /* cpp-processed Unix assembler */
 529     "src", /* BSO/Tasking C compiler output */
 530     NULL
 531   };
 532 static const char Asm_help [] =
 533 "In assembler code, labels appearing at the beginning of a line,\n\
 534 followed by a colon, are tags.";
 535
 536
 537 /* Note that .c and .h can be considered C++, if the --c++ flag was
 538    given, or if the `class' or `template' keywords are met inside the file.
 539    That is why default_C_entries is called for these. */
 540 static const char *default_C_suffixes [] =
 541   { "c", "h", NULL };
 542 #if CTAGS                               /* C help for Ctags */
 543 static const char default_C_help [] =
 544 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 545 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 546 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 547 Use --globals to tag global variables.\n\
 548 You can tag function declarations and external variables by\n\
 549 using `--declarations', and struct members by using `--members'.";
 550 #else                                   /* C help for Etags */
 551 static const char default_C_help [] =
 552 "In C code, any C function or typedef is a tag, and so are\n\
 553 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 554 definitions and `enum' constants are tags unless you specify\n\
 555 `--no-defines'.  Global variables are tags unless you specify\n\
 556 `--no-globals' and so are struct members unless you specify\n\
 557 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 558 `--no-members' can make the tags table file much smaller.\n\
 559 You can tag function declarations and external variables by\n\
 560 using `--declarations'.";
 561 #endif  /* C help for Ctags and Etags */
 562
 563 static const char *Cplusplus_suffixes [] =
 564   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 565     "M",                        /* Objective C++ */
 566     "pdb",                      /* PostScript with C syntax */
 567     NULL };
 568 static const char Cplusplus_help [] =
 569 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 570 --help --lang=c --lang=c++ for full help.)\n\
 571 In addition to C tags, member functions are also recognized.  Member\n\
 572 variables are recognized unless you use the `--no-members' option.\n\
 573 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 574 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 575 `operator+'.";
 576
 577 static const char *Cjava_suffixes [] =
 578   { "java", NULL };
 579 static char Cjava_help [] =
 580 "In Java code, all the tags constructs of C and C++ code are\n\
 581 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 582
 583
 584 static const char *Cobol_suffixes [] =
 585   { "COB", "cob", NULL };
 586 static char Cobol_help [] =
 587 "In Cobol code, tags are paragraph names; that is, any word\n\
 588 starting in column 8 and followed by a period.";
 589
 590 static const char *Cstar_suffixes [] =
 591   { "cs", "hs", NULL };
 592
 593 static const char *Erlang_suffixes [] =
 594   { "erl", "hrl", NULL };
 595 static const char Erlang_help [] =
 596 "In Erlang code, the tags are the functions, records and macros\n\
 597 defined in the file.";
 598
 599 const char *Forth_suffixes [] =
 600   { "fth", "tok", NULL };
 601 static const char Forth_help [] =
 602 "In Forth code, tags are words defined by `:',\n\
 603 constant, code, create, defer, value, variable, buffer:, field.";
 604
 605 static const char *Fortran_suffixes [] =
 606   { "F", "f", "f90", "for", NULL };
 607 static const char Fortran_help [] =
 608 "In Fortran code, functions, subroutines and block data are tags.";
 609
 610 static const char *HTML_suffixes [] =
 611   { "htm", "html", "shtml", NULL };
 612 static const char HTML_help [] =
 613 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 614 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 615 occurrences of `id='.";
 616
 617 static const char *Lisp_suffixes [] =
 618   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 619 static const char Lisp_help [] =
 620 "In Lisp code, any function defined with `defun', any variable\n\
 621 defined with `defvar' or `defconst', and in general the first\n\
 622 argument of any expression that starts with `(def' in column zero\n\
 623 is a tag.\n\
 624 The `--declarations' option tags \"(defvar foo)\" constructs too.";
 625
 626 static const char *Lua_suffixes [] =
 627   { "lua", "LUA", NULL };
 628 static const char Lua_help [] =
 629 "In Lua scripts, all functions are tags.";
 630
 631 static const char *Makefile_filenames [] =
 632   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 633 static const char Makefile_help [] =
 634 "In makefiles, targets are tags; additionally, variables are tags\n\
 635 unless you specify `--no-globals'.";
 636
 637 static const char *Objc_suffixes [] =
 638   { "lm",                       /* Objective lex file */
 639     "m",                        /* Objective C file */
 640      NULL };
 641 static const char Objc_help [] =
 642 "In Objective C code, tags include Objective C definitions for classes,\n\
 643 class categories, methods and protocols.  Tags for variables and\n\
 644 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 645 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 646
 647 static const char *Pascal_suffixes [] =
 648   { "p", "pas", NULL };
 649 static const char Pascal_help [] =
 650 "In Pascal code, the tags are the functions and procedures defined\n\
 651 in the file.";
 652 /* " // this is for working around an Emacs highlighting bug... */
 653
 654 static const char *Perl_suffixes [] =
 655   { "pl", "pm", NULL };
 656 static const char *Perl_interpreters [] =
 657   { "perl", "@PERL@", NULL };
 658 static const char Perl_help [] =
 659 "In Perl code, the tags are the packages, subroutines and variables\n\
 660 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 661 `--globals' if you want to tag global variables.  Tags for\n\
 662 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 663 defined in the default package is `main::SUB'.";
 664
 665 static const char *PHP_suffixes [] =
 666   { "php", "php3", "php4", NULL };
 667 static const char PHP_help [] =
 668 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 669 the `--no-members' option, vars are tags too.";
 670
 671 static const char *plain_C_suffixes [] =
 672   { "pc",                       /* Pro*C file */
 673      NULL };
 674
 675 static const char *PS_suffixes [] =
 676   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 677 static const char PS_help [] =
 678 "In PostScript code, the tags are the functions.";
 679
 680 static const char *Prolog_suffixes [] =
 681   { "prolog", NULL };
 682 static const char Prolog_help [] =
 683 "In Prolog code, tags are predicates and rules at the beginning of\n\
 684 line.";
 685
 686 static const char *Python_suffixes [] =
 687   { "py", NULL };
 688 static const char Python_help [] =
 689 "In Python code, `def' or `class' at the beginning of a line\n\
 690 generate a tag.";
 691
 692 /* Can't do the `SCM' or `scm' prefix with a version number. */
 693 static const char *Scheme_suffixes [] =
 694   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 695 static const char Scheme_help [] =
 696 "In Scheme code, tags include anything defined with `def' or with a\n\
 697 construct whose name starts with `def'.  They also include\n\
 698 variables set with `set!' at top level in the file.";
 699
 700 static const char *TeX_suffixes [] =
 701   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 702 static const char TeX_help [] =
 703 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 704 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 705 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 706 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 707 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 708 \n\
 709 Other commands can be specified by setting the environment variable\n\
 710 `TEXTAGS' to a colon-separated list like, for example,\n\
 711      TEXTAGS=\"mycommand:myothercommand\".";
 712
 713
 714 static const char *Texinfo_suffixes [] =
 715   { "texi", "texinfo", "txi", NULL };
 716 static const char Texinfo_help [] =
 717 "for texinfo files, lines starting with @node are tagged.";
 718
 719 static const char *Yacc_suffixes [] =
 720   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 721 static const char Yacc_help [] =
 722 "In Bison or Yacc input files, each rule defines as a tag the\n\
 723 nonterminal it constructs.  The portions of the file that contain\n\
 724 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 725 for full help).";
 726
 727 static const char auto_help [] =
 728 "`auto' is not a real language, it indicates to use\n\
 729 a default language for files base on file name suffix and file contents.";
 730
 731 static const char none_help [] =
 732 "`none' is not a real language, it indicates to only do\n\
 733 regexp processing on files.";
 734
 735 static const char no_lang_help [] =
 736 "No detailed help available for this language.";
 737
 738
 739 /*
 740  * Table of languages.
 741  *
 742  * It is ok for a given function to be listed under more than one
 743  * name.  I just didn't.
 744  */
 745
 746 static language lang_names [] =
 747 {
 748   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 749   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 750   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 751   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 752   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 753   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 754   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 755   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 756   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 757   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 758   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 759   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 760   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 761   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 762   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 763   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 764   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 765   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 766   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 767   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 768   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 769   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 770   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 771   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 772   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 773   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 774   { "auto",      auto_help },                      /* default guessing scheme */
 775   { "none",      none_help,      just_read_file }, /* regexp matching only */
 776   { NULL }                /* end of list */
 777 };
 778
 779 \f
 780 static void
 781 print_language_names (void)
 782 {
 783   language *lang;
 784   const char **name, **ext;
 785
 786   puts ("\nThese are the currently supported languages, along with the\n\
 787 default file names and dot suffixes:");
 788   for (lang = lang_names; lang->name != NULL; lang++)
 789     {
 790       printf ("  %-*s", 10, lang->name);
 791       if (lang->filenames != NULL)
 792         for (name = lang->filenames; *name != NULL; name++)
 793           printf (" %s", *name);
 794       if (lang->suffixes != NULL)
 795         for (ext = lang->suffixes; *ext != NULL; ext++)
 796           printf (" .%s", *ext);
 797       puts ("");
 798     }
 799   puts ("where `auto' means use default language for files based on file\n\
 800 name suffix, and `none' means only do regexp processing on files.\n\
 801 If no language is specified and no matching suffix is found,\n\
 802 the first line of the file is read for a sharp-bang (#!) sequence\n\
 803 followed by the name of an interpreter.  If no such sequence is found,\n\
 804 Fortran is tried first; if no tags are found, C is tried next.\n\
 805 When parsing any C file, a \"class\" or \"template\" keyword\n\
 806 switches to C++.");
 807   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 808 \n\
 809 For detailed help on a given language use, for example,\n\
 810 etags --help --lang=ada.");
 811 }
 812
 813 #ifndef EMACS_NAME
 814 # define EMACS_NAME "standalone"
 815 #endif
 816 #ifndef VERSION
 817 # define VERSION "17.38.1.4"
 818 #endif
 819 static void
 820 print_version (void)
 821 {
 822   char emacs_copyright[] = COPYRIGHT;
 823
 824   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 825   puts (emacs_copyright);
 826   puts ("This program is distributed under the terms in ETAGS.README");
 827
 828   exit (EXIT_SUCCESS);
 829 }
 830
 831 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 832 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 833 #endif
 834
 835 static void
 836 print_help (argument *argbuffer)
 837 {
 838   bool help_for_lang = FALSE;
 839
 840   for (; argbuffer->arg_type != at_end; argbuffer++)
 841     if (argbuffer->arg_type == at_language)
 842       {
 843         if (help_for_lang)
 844           puts ("");
 845         puts (argbuffer->lang->help);
 846         help_for_lang = TRUE;
 847       }
 848
 849   if (help_for_lang)
 850     exit (EXIT_SUCCESS);
 851
 852   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 853 \n\
 854 These are the options accepted by %s.\n", progname, progname);
 855   puts ("You may use unambiguous abbreviations for the long option names.");
 856   puts ("  A - as file name means read names from stdin (one per line).\n\
 857 Absolute names are stored in the output file as they are.\n\
 858 Relative ones are stored relative to the output file's directory.\n");
 859
 860   puts ("-a, --append\n\
 861         Append tag entries to existing tags file.");
 862
 863   puts ("--packages-only\n\
 864         For Ada files, only generate tags for packages.");
 865
 866   if (CTAGS)
 867     puts ("-B, --backward-search\n\
 868         Write the search commands for the tag entries using '?', the\n\
 869         backward-search command instead of '/', the forward-search command.");
 870
 871   /* This option is mostly obsolete, because etags can now automatically
 872      detect C++.  Retained for backward compatibility and for debugging and
 873      experimentation.  In principle, we could want to tag as C++ even
 874      before any "class" or "template" keyword.
 875   puts ("-C, --c++\n\
 876         Treat files whose name suffix defaults to C language as C++ files.");
 877   */
 878
 879   puts ("--declarations\n\
 880         In C and derived languages, create tags for function declarations,");
 881   if (CTAGS)
 882     puts ("\tand create tags for extern variables if --globals is used.");
 883   else
 884     puts
 885       ("\tand create tags for extern variables unless --no-globals is used.");
 886
 887   if (CTAGS)
 888     puts ("-d, --defines\n\
 889         Create tag entries for C #define constants and enum constants, too.");
 890   else
 891     puts ("-D, --no-defines\n\
 892         Don't create tag entries for C #define constants and enum constants.\n\
 893         This makes the tags file smaller.");
 894
 895   if (!CTAGS)
 896     puts ("-i FILE, --include=FILE\n\
 897         Include a note in tag file indicating that, when searching for\n\
 898         a tag, one should also consult the tags file FILE after\n\
 899         checking the current file.");
 900
 901   puts ("-l LANG, --language=LANG\n\
 902         Force the following files to be considered as written in the\n\
 903         named language up to the next --language=LANG option.");
 904
 905   if (CTAGS)
 906     puts ("--globals\n\
 907         Create tag entries for global variables in some languages.");
 908   else
 909     puts ("--no-globals\n\
 910         Do not create tag entries for global variables in some\n\
 911         languages.  This makes the tags file smaller.");
 912
 913   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 914     puts ("--no-line-directive\n\
 915         Ignore #line preprocessor directives in C and derived languages.");
 916
 917   if (CTAGS)
 918     puts ("--members\n\
 919         Create tag entries for members of structures in some languages.");
 920   else
 921     puts ("--no-members\n\
 922         Do not create tag entries for members of structures\n\
 923         in some languages.");
 924
 925   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 926         Make a tag for each line matching a regular expression pattern\n\
 927         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 928         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 929         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 930         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 931   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 932         For example Tcl named tags can be created with:\n\
 933           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 934         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 935         `m' means to allow multi-line matches, `s' implies `m' and\n\
 936         causes dot to match any character, including newline.");
 937
 938   puts ("-R, --no-regex\n\
 939         Don't create tags from regexps for the following files.");
 940
 941   puts ("-I, --ignore-indentation\n\
 942         In C and C++ do not assume that a closing brace in the first\n\
 943         column is the final brace of a function or structure definition.");
 944
 945   puts ("-o FILE, --output=FILE\n\
 946         Write the tags to FILE.");
 947
 948   puts ("--parse-stdin=NAME\n\
 949         Read from standard input and record tags as belonging to file NAME.");
 950
 951   if (CTAGS)
 952     {
 953       puts ("-t, --typedefs\n\
 954         Generate tag entries for C and Ada typedefs.");
 955       puts ("-T, --typedefs-and-c++\n\
 956         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 957         and C++ member functions.");
 958     }
 959
 960   if (CTAGS)
 961     puts ("-u, --update\n\
 962         Update the tag entries for the given files, leaving tag\n\
 963         entries for other files in place.  Currently, this is\n\
 964         implemented by deleting the existing entries for the given\n\
 965         files and then rewriting the new entries at the end of the\n\
 966         tags file.  It is often faster to simply rebuild the entire\n\
 967         tag file than to use this.");
 968
 969   if (CTAGS)
 970     {
 971       puts ("-v, --vgrind\n\
 972         Print on the standard output an index of items intended for\n\
 973         human consumption, similar to the output of vgrind.  The index\n\
 974         is sorted, and gives the page number of each item.");
 975
 976       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 977         puts ("-w, --no-duplicates\n\
 978         Do not create duplicate tag entries, for compatibility with\n\
 979         traditional ctags.");
 980
 981       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 982         puts ("-w, --no-warn\n\
 983         Suppress warning messages about duplicate tag entries.");
 984
 985       puts ("-x, --cxref\n\
 986         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 987         The output uses line numbers instead of page numbers, but\n\
 988         beyond that the differences are cosmetic; try both to see\n\
 989         which you like.");
 990     }
 991
 992   puts ("-V, --version\n\
 993         Print the version of the program.\n\
 994 -h, --help\n\
 995         Print this help message.\n\
 996         Followed by one or more `--language' options prints detailed\n\
 997         help about tag generation for the specified languages.");
 998
 999   print_language_names ();
1000
1001   puts ("");
1002   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1003
1004   exit (EXIT_SUCCESS);
1005 }
1006
1007 \f
1008 int
1009 main (int argc, char **argv)
1010 {
1011   int i;
1012   unsigned int nincluded_files;
1013   char **included_files;
1014   argument *argbuffer;
1015   int current_arg, file_count;
1016   linebuffer filename_lb;
1017   bool help_asked = FALSE;
1018   ptrdiff_t len;
1019  char *optstring;
1020  int opt;
1021
1022
1023 #ifdef DOS_NT
1024   _fmode = O_BINARY;   /* all of files are treated as binary files */
1025 #endif /* DOS_NT */
1026
1027   progname = argv[0];
1028   nincluded_files = 0;
1029   included_files = xnew (argc, char *);
1030   current_arg = 0;
1031   file_count = 0;
1032
1033   /* Allocate enough no matter what happens.  Overkill, but each one
1034      is small. */
1035   argbuffer = xnew (argc, argument);
1036
1037   /*
1038    * Always find typedefs and structure tags.
1039    * Also default to find macro constants, enum constants, struct
1040    * members and global variables.  Do it for both etags and ctags.
1041    */
1042   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1043   globals = members = TRUE;
1044
1045   /* When the optstring begins with a '-' getopt_long does not rearrange the
1046      non-options arguments to be at the end, but leaves them alone. */
1047   optstring = concat ("-ac:Cf:Il:o:r:RSVhH",
1048                       (CTAGS) ? "BxdtTuvw" : "Di:",
1049                       "");
1050
1051   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1052     switch (opt)
1053       {
1054       case 0:
1055         /* If getopt returns 0, then it has already processed a
1056            long-named option.  We should do nothing.  */
1057         break;
1058
1059       case 1:
1060         /* This means that a file name has been seen.  Record it. */
1061         argbuffer[current_arg].arg_type = at_filename;
1062         argbuffer[current_arg].what     = optarg;
1063         len = strlen (optarg);
1064         if (whatlen_max < len)
1065           whatlen_max = len;
1066         ++current_arg;
1067         ++file_count;
1068         break;
1069
1070       case STDIN:
1071         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1072         argbuffer[current_arg].arg_type = at_stdin;
1073         argbuffer[current_arg].what     = optarg;
1074         len = strlen (optarg);
1075         if (whatlen_max < len)
1076           whatlen_max = len;
1077         ++current_arg;
1078         ++file_count;
1079         if (parsing_stdin)
1080           fatal ("cannot parse standard input more than once", (char *)NULL);
1081         parsing_stdin = TRUE;
1082         break;
1083
1084         /* Common options. */
1085       case 'a': append_to_tagfile = TRUE;       break;
1086       case 'C': cplusplus = TRUE;               break;
1087       case 'f':         /* for compatibility with old makefiles */
1088       case 'o':
1089         if (tagfile)
1090           {
1091             error ("-o option may only be given once.");
1092             suggest_asking_for_help ();
1093             /* NOTREACHED */
1094           }
1095         tagfile = optarg;
1096         break;
1097       case 'I':
1098       case 'S':         /* for backward compatibility */
1099         ignoreindent = TRUE;
1100         break;
1101       case 'l':
1102         {
1103           language *lang = get_language_from_langname (optarg);
1104           if (lang != NULL)
1105             {
1106               argbuffer[current_arg].lang = lang;
1107               argbuffer[current_arg].arg_type = at_language;
1108               ++current_arg;
1109             }
1110         }
1111         break;
1112       case 'c':
1113         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1114         optarg = concat (optarg, "i", ""); /* memory leak here */
1115         /* FALLTHRU */
1116       case 'r':
1117         argbuffer[current_arg].arg_type = at_regexp;
1118         argbuffer[current_arg].what = optarg;
1119         len = strlen (optarg);
1120         if (whatlen_max < len)
1121           whatlen_max = len;
1122         ++current_arg;
1123         break;
1124       case 'R':
1125         argbuffer[current_arg].arg_type = at_regexp;
1126         argbuffer[current_arg].what = NULL;
1127         ++current_arg;
1128         break;
1129       case 'V':
1130         print_version ();
1131         break;
1132       case 'h':
1133       case 'H':
1134         help_asked = TRUE;
1135         break;
1136
1137         /* Etags options */
1138       case 'D': constantypedefs = FALSE;                        break;
1139       case 'i': included_files[nincluded_files++] = optarg;     break;
1140
1141         /* Ctags options. */
1142       case 'B': searchar = '?';                                 break;
1143       case 'd': constantypedefs = TRUE;                         break;
1144       case 't': typedefs = TRUE;                                break;
1145       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1146       case 'u': update = TRUE;                                  break;
1147       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1148       case 'x': cxref_style = TRUE;                             break;
1149       case 'w': no_warnings = TRUE;                             break;
1150       default:
1151         suggest_asking_for_help ();
1152         /* NOTREACHED */
1153       }
1154
1155   /* No more options.  Store the rest of arguments. */
1156   for (; optind < argc; optind++)
1157     {
1158       argbuffer[current_arg].arg_type = at_filename;
1159       argbuffer[current_arg].what = argv[optind];
1160       len = strlen (argv[optind]);
1161       if (whatlen_max < len)
1162         whatlen_max = len;
1163       ++current_arg;
1164       ++file_count;
1165     }
1166
1167   argbuffer[current_arg].arg_type = at_end;
1168
1169   if (help_asked)
1170     print_help (argbuffer);
1171     /* NOTREACHED */
1172
1173   if (nincluded_files == 0 && file_count == 0)
1174     {
1175       error ("no input files specified.");
1176       suggest_asking_for_help ();
1177       /* NOTREACHED */
1178     }
1179
1180   if (tagfile == NULL)
1181     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1182   cwd = etags_getcwd ();        /* the current working directory */
1183   if (cwd[strlen (cwd) - 1] != '/')
1184     {
1185       char *oldcwd = cwd;
1186       cwd = concat (oldcwd, "/", "");
1187       free (oldcwd);
1188     }
1189
1190   /* Compute base directory for relative file names. */
1191   if (streq (tagfile, "-")
1192       || strneq (tagfile, "/dev/", 5))
1193     tagfiledir = cwd;            /* relative file names are relative to cwd */
1194   else
1195     {
1196       canonicalize_filename (tagfile);
1197       tagfiledir = absolute_dirname (tagfile, cwd);
1198     }
1199
1200   init ();                      /* set up boolean "functions" */
1201
1202   linebuffer_init (&lb);
1203   linebuffer_init (&filename_lb);
1204   linebuffer_init (&filebuf);
1205   linebuffer_init (&token_name);
1206
1207   if (!CTAGS)
1208     {
1209       if (streq (tagfile, "-"))
1210         {
1211           tagf = stdout;
1212 #ifdef DOS_NT
1213           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1214              doesn't take effect until after `stdout' is already open). */
1215           if (!isatty (fileno (stdout)))
1216             setmode (fileno (stdout), O_BINARY);
1217 #endif /* DOS_NT */
1218         }
1219       else
1220         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1221       if (tagf == NULL)
1222         pfatal (tagfile);
1223     }
1224
1225   /*
1226    * Loop through files finding functions.
1227    */
1228   for (i = 0; i < current_arg; i++)
1229     {
1230       static language *lang;    /* non-NULL if language is forced */
1231       char *this_file;
1232
1233       switch (argbuffer[i].arg_type)
1234         {
1235         case at_language:
1236           lang = argbuffer[i].lang;
1237           break;
1238         case at_regexp:
1239           analyse_regex (argbuffer[i].what);
1240           break;
1241         case at_filename:
1242               this_file = argbuffer[i].what;
1243               /* Input file named "-" means read file names from stdin
1244                  (one per line) and use them. */
1245               if (streq (this_file, "-"))
1246                 {
1247                   if (parsing_stdin)
1248                     fatal ("cannot parse standard input AND read file names from it",
1249                            (char *)NULL);
1250                   while (readline_internal (&filename_lb, stdin) > 0)
1251                     process_file_name (filename_lb.buffer, lang);
1252                 }
1253               else
1254                 process_file_name (this_file, lang);
1255           break;
1256         case at_stdin:
1257           this_file = argbuffer[i].what;
1258           process_file (stdin, this_file, lang);
1259           break;
1260         }
1261     }
1262
1263   free_regexps ();
1264   free (lb.buffer);
1265   free (filebuf.buffer);
1266   free (token_name.buffer);
1267
1268   if (!CTAGS || cxref_style)
1269     {
1270       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1271       put_entries (nodehead);
1272       free_tree (nodehead);
1273       nodehead = NULL;
1274       if (!CTAGS)
1275         {
1276           fdesc *fdp;
1277
1278           /* Output file entries that have no tags. */
1279           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1280             if (!fdp->written)
1281               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1282
1283           while (nincluded_files-- > 0)
1284             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1285
1286           if (fclose (tagf) == EOF)
1287             pfatal (tagfile);
1288         }
1289
1290       exit (EXIT_SUCCESS);
1291     }
1292
1293   /* From here on, we are in (CTAGS && !cxref_style) */
1294   if (update)
1295     {
1296       char *cmd =
1297         xmalloc (strlen (tagfile) + whatlen_max +
1298                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1299       for (i = 0; i < current_arg; ++i)
1300         {
1301           switch (argbuffer[i].arg_type)
1302             {
1303             case at_filename:
1304             case at_stdin:
1305               break;
1306             default:
1307               continue;         /* the for loop */
1308             }
1309           strcpy (cmd, "mv ");
1310           strcat (cmd, tagfile);
1311           strcat (cmd, " OTAGS;fgrep -v '\t");
1312           strcat (cmd, argbuffer[i].what);
1313           strcat (cmd, "\t' OTAGS >");
1314           strcat (cmd, tagfile);
1315           strcat (cmd, ";rm OTAGS");
1316           if (system (cmd) != EXIT_SUCCESS)
1317             fatal ("failed to execute shell command", (char *)NULL);
1318         }
1319       free (cmd);
1320       append_to_tagfile = TRUE;
1321     }
1322
1323   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1324   if (tagf == NULL)
1325     pfatal (tagfile);
1326   put_entries (nodehead);       /* write all the tags (CTAGS) */
1327   free_tree (nodehead);
1328   nodehead = NULL;
1329   if (fclose (tagf) == EOF)
1330     pfatal (tagfile);
1331
1332   if (CTAGS)
1333     if (append_to_tagfile || update)
1334       {
1335         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1336         /* Maybe these should be used:
1337            setenv ("LC_COLLATE", "C", 1);
1338            setenv ("LC_ALL", "C", 1); */
1339         strcpy (cmd, "sort -u -o ");
1340         strcat (cmd, tagfile);
1341         strcat (cmd, " ");
1342         strcat (cmd, tagfile);
1343         exit (system (cmd));
1344       }
1345   return EXIT_SUCCESS;
1346 }
1347
1348
1349 /*
1350  * Return a compressor given the file name.  If EXTPTR is non-zero,
1351  * return a pointer into FILE where the compressor-specific
1352  * extension begins.  If no compressor is found, NULL is returned
1353  * and EXTPTR is not significant.
1354  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1355  */
1356 static compressor *
1357 get_compressor_from_suffix (char *file, char **extptr)
1358 {
1359   compressor *compr;
1360   char *slash, *suffix;
1361
1362   /* File has been processed by canonicalize_filename,
1363      so we don't need to consider backslashes on DOS_NT.  */
1364   slash = etags_strrchr (file, '/');
1365   suffix = etags_strrchr (file, '.');
1366   if (suffix == NULL || suffix < slash)
1367     return NULL;
1368   if (extptr != NULL)
1369     *extptr = suffix;
1370   suffix += 1;
1371   /* Let those poor souls who live with DOS 8+3 file name limits get
1372      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1373      Only the first do loop is run if not MSDOS */
1374   do
1375     {
1376       for (compr = compressors; compr->suffix != NULL; compr++)
1377         if (streq (compr->suffix, suffix))
1378           return compr;
1379       if (!MSDOS)
1380         break;                  /* do it only once: not really a loop */
1381       if (extptr != NULL)
1382         *extptr = ++suffix;
1383     } while (*suffix != '\0');
1384   return NULL;
1385 }
1386
1387
1388
1389 /*
1390  * Return a language given the name.
1391  */
1392 static language *
1393 get_language_from_langname (const char *name)
1394 {
1395   language *lang;
1396
1397   if (name == NULL)
1398     error ("empty language name");
1399   else
1400     {
1401       for (lang = lang_names; lang->name != NULL; lang++)
1402         if (streq (name, lang->name))
1403           return lang;
1404       error ("unknown language \"%s\"", name);
1405     }
1406
1407   return NULL;
1408 }
1409
1410
1411 /*
1412  * Return a language given the interpreter name.
1413  */
1414 static language *
1415 get_language_from_interpreter (char *interpreter)
1416 {
1417   language *lang;
1418   const char **iname;
1419
1420   if (interpreter == NULL)
1421     return NULL;
1422   for (lang = lang_names; lang->name != NULL; lang++)
1423     if (lang->interpreters != NULL)
1424       for (iname = lang->interpreters; *iname != NULL; iname++)
1425         if (streq (*iname, interpreter))
1426             return lang;
1427
1428   return NULL;
1429 }
1430
1431
1432
1433 /*
1434  * Return a language given the file name.
1435  */
1436 static language *
1437 get_language_from_filename (char *file, int case_sensitive)
1438 {
1439   language *lang;
1440   const char **name, **ext, *suffix;
1441
1442   /* Try whole file name first. */
1443   for (lang = lang_names; lang->name != NULL; lang++)
1444     if (lang->filenames != NULL)
1445       for (name = lang->filenames; *name != NULL; name++)
1446         if ((case_sensitive)
1447             ? streq (*name, file)
1448             : strcaseeq (*name, file))
1449           return lang;
1450
1451   /* If not found, try suffix after last dot. */
1452   suffix = etags_strrchr (file, '.');
1453   if (suffix == NULL)
1454     return NULL;
1455   suffix += 1;
1456   for (lang = lang_names; lang->name != NULL; lang++)
1457     if (lang->suffixes != NULL)
1458       for (ext = lang->suffixes; *ext != NULL; ext++)
1459         if ((case_sensitive)
1460             ? streq (*ext, suffix)
1461             : strcaseeq (*ext, suffix))
1462           return lang;
1463   return NULL;
1464 }
1465
1466 \f
1467 /*
1468  * This routine is called on each file argument.
1469  */
1470 static void
1471 process_file_name (char *file, language *lang)
1472 {
1473   struct stat stat_buf;
1474   FILE *inf;
1475   fdesc *fdp;
1476   compressor *compr;
1477   char *compressed_name, *uncompressed_name;
1478   char *ext, *real_name;
1479   int retval;
1480
1481   canonicalize_filename (file);
1482   if (streq (file, tagfile) && !streq (tagfile, "-"))
1483     {
1484       error ("skipping inclusion of %s in self.", file);
1485       return;
1486     }
1487   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1488     {
1489       compressed_name = NULL;
1490       real_name = uncompressed_name = savestr (file);
1491     }
1492   else
1493     {
1494       real_name = compressed_name = savestr (file);
1495       uncompressed_name = savenstr (file, ext - file);
1496     }
1497
1498   /* If the canonicalized uncompressed name
1499      has already been dealt with, skip it silently. */
1500   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1501     {
1502       assert (fdp->infname != NULL);
1503       if (streq (uncompressed_name, fdp->infname))
1504         goto cleanup;
1505     }
1506
1507   if (stat (real_name, &stat_buf) != 0)
1508     {
1509       /* Reset real_name and try with a different name. */
1510       real_name = NULL;
1511       if (compressed_name != NULL) /* try with the given suffix */
1512         {
1513           if (stat (uncompressed_name, &stat_buf) == 0)
1514             real_name = uncompressed_name;
1515         }
1516       else                      /* try all possible suffixes */
1517         {
1518           for (compr = compressors; compr->suffix != NULL; compr++)
1519             {
1520               compressed_name = concat (file, ".", compr->suffix);
1521               if (stat (compressed_name, &stat_buf) != 0)
1522                 {
1523                   if (MSDOS)
1524                     {
1525                       char *suf = compressed_name + strlen (file);
1526                       size_t suflen = strlen (compr->suffix) + 1;
1527                       for ( ; suf[1]; suf++, suflen--)
1528                         {
1529                           memmove (suf, suf + 1, suflen);
1530                           if (stat (compressed_name, &stat_buf) == 0)
1531                             {
1532                               real_name = compressed_name;
1533                               break;
1534                             }
1535                         }
1536                       if (real_name != NULL)
1537                         break;
1538                     } /* MSDOS */
1539                   free (compressed_name);
1540                   compressed_name = NULL;
1541                 }
1542               else
1543                 {
1544                   real_name = compressed_name;
1545                   break;
1546                 }
1547             }
1548         }
1549       if (real_name == NULL)
1550         {
1551           perror (file);
1552           goto cleanup;
1553         }
1554     } /* try with a different name */
1555
1556   if (!S_ISREG (stat_buf.st_mode))
1557     {
1558       error ("skipping %s: it is not a regular file.", real_name);
1559       goto cleanup;
1560     }
1561   if (real_name == compressed_name)
1562     {
1563       char *cmd = concat (compr->command, " ", real_name);
1564       inf = (FILE *) popen (cmd, "r");
1565       free (cmd);
1566     }
1567   else
1568     inf = fopen (real_name, "r");
1569   if (inf == NULL)
1570     {
1571       perror (real_name);
1572       goto cleanup;
1573     }
1574
1575   process_file (inf, uncompressed_name, lang);
1576
1577   if (real_name == compressed_name)
1578     retval = pclose (inf);
1579   else
1580     retval = fclose (inf);
1581   if (retval < 0)
1582     pfatal (file);
1583
1584  cleanup:
1585   free (compressed_name);
1586   free (uncompressed_name);
1587   last_node = NULL;
1588   curfdp = NULL;
1589   return;
1590 }
1591
1592 static void
1593 process_file (FILE *fh, char *fn, language *lang)
1594 {
1595   static const fdesc emptyfdesc;
1596   fdesc *fdp;
1597
1598   /* Create a new input file description entry. */
1599   fdp = xnew (1, fdesc);
1600   *fdp = emptyfdesc;
1601   fdp->next = fdhead;
1602   fdp->infname = savestr (fn);
1603   fdp->lang = lang;
1604   fdp->infabsname = absolute_filename (fn, cwd);
1605   fdp->infabsdir = absolute_dirname (fn, cwd);
1606   if (filename_is_absolute (fn))
1607     {
1608       /* An absolute file name.  Canonicalize it. */
1609       fdp->taggedfname = absolute_filename (fn, NULL);
1610     }
1611   else
1612     {
1613       /* A file name relative to cwd.  Make it relative
1614          to the directory of the tags file. */
1615       fdp->taggedfname = relative_filename (fn, tagfiledir);
1616     }
1617   fdp->usecharno = TRUE;        /* use char position when making tags */
1618   fdp->prop = NULL;
1619   fdp->written = FALSE;         /* not written on tags file yet */
1620
1621   fdhead = fdp;
1622   curfdp = fdhead;              /* the current file description */
1623
1624   find_entries (fh);
1625
1626   /* If not Ctags, and if this is not metasource and if it contained no #line
1627      directives, we can write the tags and free all nodes pointing to
1628      curfdp. */
1629   if (!CTAGS
1630       && curfdp->usecharno      /* no #line directives in this file */
1631       && !curfdp->lang->metasource)
1632     {
1633       node *np, *prev;
1634
1635       /* Look for the head of the sublist relative to this file.  See add_node
1636          for the structure of the node tree. */
1637       prev = NULL;
1638       for (np = nodehead; np != NULL; prev = np, np = np->left)
1639         if (np->fdp == curfdp)
1640           break;
1641
1642       /* If we generated tags for this file, write and delete them. */
1643       if (np != NULL)
1644         {
1645           /* This is the head of the last sublist, if any.  The following
1646              instructions depend on this being true. */
1647           assert (np->left == NULL);
1648
1649           assert (fdhead == curfdp);
1650           assert (last_node->fdp == curfdp);
1651           put_entries (np);     /* write tags for file curfdp->taggedfname */
1652           free_tree (np);       /* remove the written nodes */
1653           if (prev == NULL)
1654             nodehead = NULL;    /* no nodes left */
1655           else
1656             prev->left = NULL;  /* delete the pointer to the sublist */
1657         }
1658     }
1659 }
1660
1661 /*
1662  * This routine sets up the boolean pseudo-functions which work
1663  * by setting boolean flags dependent upon the corresponding character.
1664  * Every char which is NOT in that string is not a white char.  Therefore,
1665  * all of the array "_wht" is set to FALSE, and then the elements
1666  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1667  * of a char is TRUE if it is the string "white", else FALSE.
1668  */
1669 static void
1670 init (void)
1671 {
1672   register const char *sp;
1673   register int i;
1674
1675   for (i = 0; i < CHARS; i++)
1676     iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i) = FALSE;
1677   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1678   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1679   notinname ('\0') = notinname ('\n');
1680   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1681   begtoken ('\0') = begtoken ('\n');
1682   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1683   intoken ('\0') = intoken ('\n');
1684   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1685   endtoken ('\0') = endtoken ('\n');
1686 }
1687
1688 /*
1689  * This routine opens the specified file and calls the function
1690  * which finds the function and type definitions.
1691  */
1692 static void
1693 find_entries (FILE *inf)
1694 {
1695   char *cp;
1696   language *lang = curfdp->lang;
1697   Lang_function *parser = NULL;
1698
1699   /* If user specified a language, use it. */
1700   if (lang != NULL && lang->function != NULL)
1701     {
1702       parser = lang->function;
1703     }
1704
1705   /* Else try to guess the language given the file name. */
1706   if (parser == NULL)
1707     {
1708       lang = get_language_from_filename (curfdp->infname, TRUE);
1709       if (lang != NULL && lang->function != NULL)
1710         {
1711           curfdp->lang = lang;
1712           parser = lang->function;
1713         }
1714     }
1715
1716   /* Else look for sharp-bang as the first two characters. */
1717   if (parser == NULL
1718       && readline_internal (&lb, inf) > 0
1719       && lb.len >= 2
1720       && lb.buffer[0] == '#'
1721       && lb.buffer[1] == '!')
1722     {
1723       char *lp;
1724
1725       /* Set lp to point at the first char after the last slash in the
1726          line or, if no slashes, at the first nonblank.  Then set cp to
1727          the first successive blank and terminate the string. */
1728       lp = etags_strrchr (lb.buffer+2, '/');
1729       if (lp != NULL)
1730         lp += 1;
1731       else
1732         lp = skip_spaces (lb.buffer + 2);
1733       cp = skip_non_spaces (lp);
1734       *cp = '\0';
1735
1736       if (strlen (lp) > 0)
1737         {
1738           lang = get_language_from_interpreter (lp);
1739           if (lang != NULL && lang->function != NULL)
1740             {
1741               curfdp->lang = lang;
1742               parser = lang->function;
1743             }
1744         }
1745     }
1746
1747   /* We rewind here, even if inf may be a pipe.  We fail if the
1748      length of the first line is longer than the pipe block size,
1749      which is unlikely. */
1750   rewind (inf);
1751
1752   /* Else try to guess the language given the case insensitive file name. */
1753   if (parser == NULL)
1754     {
1755       lang = get_language_from_filename (curfdp->infname, FALSE);
1756       if (lang != NULL && lang->function != NULL)
1757         {
1758           curfdp->lang = lang;
1759           parser = lang->function;
1760         }
1761     }
1762
1763   /* Else try Fortran or C. */
1764   if (parser == NULL)
1765     {
1766       node *old_last_node = last_node;
1767
1768       curfdp->lang = get_language_from_langname ("fortran");
1769       find_entries (inf);
1770
1771       if (old_last_node == last_node)
1772         /* No Fortran entries found.  Try C. */
1773         {
1774           /* We do not tag if rewind fails.
1775              Only the file name will be recorded in the tags file. */
1776           rewind (inf);
1777           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1778           find_entries (inf);
1779         }
1780       return;
1781     }
1782
1783   if (!no_line_directive
1784       && curfdp->lang != NULL && curfdp->lang->metasource)
1785     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1786        file, or anyway we parsed a file that is automatically generated from
1787        this one.  If this is the case, the bingo.c file contained #line
1788        directives that generated tags pointing to this file.  Let's delete
1789        them all before parsing this file, which is the real source. */
1790     {
1791       fdesc **fdpp = &fdhead;
1792       while (*fdpp != NULL)
1793         if (*fdpp != curfdp
1794             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1795           /* We found one of those!  We must delete both the file description
1796              and all tags referring to it. */
1797           {
1798             fdesc *badfdp = *fdpp;
1799
1800             /* Delete the tags referring to badfdp->taggedfname
1801                that were obtained from badfdp->infname. */
1802             invalidate_nodes (badfdp, &nodehead);
1803
1804             *fdpp = badfdp->next; /* remove the bad description from the list */
1805             free_fdesc (badfdp);
1806           }
1807         else
1808           fdpp = &(*fdpp)->next; /* advance the list pointer */
1809     }
1810
1811   assert (parser != NULL);
1812
1813   /* Generic initializations before reading from file. */
1814   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1815
1816   /* Generic initializations before parsing file with readline. */
1817   lineno = 0;                  /* reset global line number */
1818   charno = 0;                  /* reset global char number */
1819   linecharno = 0;              /* reset global char number of line start */
1820
1821   parser (inf);
1822
1823   regex_tag_multiline ();
1824 }
1825
1826 \f
1827 /*
1828  * Check whether an implicitly named tag should be created,
1829  * then call `pfnote'.
1830  * NAME is a string that is internally copied by this function.
1831  *
1832  * TAGS format specification
1833  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1834  * The following is explained in some more detail in etc/ETAGS.EBNF.
1835  *
1836  * make_tag creates tags with "implicit tag names" (unnamed tags)
1837  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1838  *  1. NAME does not contain any of the characters in NONAM;
1839  *  2. LINESTART contains name as either a rightmost, or rightmost but
1840  *     one character, substring;
1841  *  3. the character, if any, immediately before NAME in LINESTART must
1842  *     be a character in NONAM;
1843  *  4. the character, if any, immediately after NAME in LINESTART must
1844  *     also be a character in NONAM.
1845  *
1846  * The implementation uses the notinname() macro, which recognizes the
1847  * characters stored in the string `nonam'.
1848  * etags.el needs to use the same characters that are in NONAM.
1849  */
1850 static void
1851 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1852           int namelen,          /* tag length */
1853           int is_func,          /* tag is a function */
1854           char *linestart,      /* start of the line where tag is */
1855           int linelen,          /* length of the line where tag is */
1856           int lno,              /* line number */
1857           long int cno)         /* character number */
1858 {
1859   bool named = (name != NULL && namelen > 0);
1860   char *nname = NULL;
1861
1862   if (!CTAGS && named)          /* maybe set named to false */
1863     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1864        such that etags.el can guess a name from it. */
1865     {
1866       int i;
1867       register const char *cp = name;
1868
1869       for (i = 0; i < namelen; i++)
1870         if (notinname (*cp++))
1871           break;
1872       if (i == namelen)                         /* rule #1 */
1873         {
1874           cp = linestart + linelen - namelen;
1875           if (notinname (linestart[linelen-1]))
1876             cp -= 1;                            /* rule #4 */
1877           if (cp >= linestart                   /* rule #2 */
1878               && (cp == linestart
1879                   || notinname (cp[-1]))        /* rule #3 */
1880               && strneq (name, cp, namelen))    /* rule #2 */
1881             named = FALSE;      /* use implicit tag name */
1882         }
1883     }
1884
1885   if (named)
1886     nname = savenstr (name, namelen);
1887
1888   pfnote (nname, is_func, linestart, linelen, lno, cno);
1889 }
1890
1891 /* Record a tag. */
1892 static void
1893 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1894                                 /* tag name, or NULL if unnamed */
1895                                 /* tag is a function */
1896                                 /* start of the line where tag is */
1897                                 /* length of the line where tag is */
1898                                 /* line number */
1899                                 /* character number */
1900 {
1901   register node *np;
1902
1903   assert (name == NULL || name[0] != '\0');
1904   if (CTAGS && name == NULL)
1905     return;
1906
1907   np = xnew (1, node);
1908
1909   /* If ctags mode, change name "main" to M<thisfilename>. */
1910   if (CTAGS && !cxref_style && streq (name, "main"))
1911     {
1912       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1913       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1914       fp = etags_strrchr (np->name, '.');
1915       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1916         fp[0] = '\0';
1917     }
1918   else
1919     np->name = name;
1920   np->valid = TRUE;
1921   np->been_warned = FALSE;
1922   np->fdp = curfdp;
1923   np->is_func = is_func;
1924   np->lno = lno;
1925   if (np->fdp->usecharno)
1926     /* Our char numbers are 0-base, because of C language tradition?
1927        ctags compatibility?  old versions compatibility?   I don't know.
1928        Anyway, since emacs's are 1-base we expect etags.el to take care
1929        of the difference.  If we wanted to have 1-based numbers, we would
1930        uncomment the +1 below. */
1931     np->cno = cno /* + 1 */ ;
1932   else
1933     np->cno = invalidcharno;
1934   np->left = np->right = NULL;
1935   if (CTAGS && !cxref_style)
1936     {
1937       if (strlen (linestart) < 50)
1938         np->regex = concat (linestart, "$", "");
1939       else
1940         np->regex = savenstr (linestart, 50);
1941     }
1942   else
1943     np->regex = savenstr (linestart, linelen);
1944
1945   add_node (np, &nodehead);
1946 }
1947
1948 /*
1949  * free_tree ()
1950  *      recurse on left children, iterate on right children.
1951  */
1952 static void
1953 free_tree (register node *np)
1954 {
1955   while (np)
1956     {
1957       register node *node_right = np->right;
1958       free_tree (np->left);
1959       free (np->name);
1960       free (np->regex);
1961       free (np);
1962       np = node_right;
1963     }
1964 }
1965
1966 /*
1967  * free_fdesc ()
1968  *      delete a file description
1969  */
1970 static void
1971 free_fdesc (register fdesc *fdp)
1972 {
1973   free (fdp->infname);
1974   free (fdp->infabsname);
1975   free (fdp->infabsdir);
1976   free (fdp->taggedfname);
1977   free (fdp->prop);
1978   free (fdp);
1979 }
1980
1981 /*
1982  * add_node ()
1983  *      Adds a node to the tree of nodes.  In etags mode, sort by file
1984  *      name.  In ctags mode, sort by tag name.  Make no attempt at
1985  *      balancing.
1986  *
1987  *      add_node is the only function allowed to add nodes, so it can
1988  *      maintain state.
1989  */
1990 static void
1991 add_node (node *np, node **cur_node_p)
1992 {
1993   register int dif;
1994   register node *cur_node = *cur_node_p;
1995
1996   if (cur_node == NULL)
1997     {
1998       *cur_node_p = np;
1999       last_node = np;
2000       return;
2001     }
2002
2003   if (!CTAGS)
2004     /* Etags Mode */
2005     {
2006       /* For each file name, tags are in a linked sublist on the right
2007          pointer.  The first tags of different files are a linked list
2008          on the left pointer.  last_node points to the end of the last
2009          used sublist. */
2010       if (last_node != NULL && last_node->fdp == np->fdp)
2011         {
2012           /* Let's use the same sublist as the last added node. */
2013           assert (last_node->right == NULL);
2014           last_node->right = np;
2015           last_node = np;
2016         }
2017       else if (cur_node->fdp == np->fdp)
2018         {
2019           /* Scanning the list we found the head of a sublist which is
2020              good for us.  Let's scan this sublist. */
2021           add_node (np, &cur_node->right);
2022         }
2023       else
2024         /* The head of this sublist is not good for us.  Let's try the
2025            next one. */
2026         add_node (np, &cur_node->left);
2027     } /* if ETAGS mode */
2028
2029   else
2030     {
2031       /* Ctags Mode */
2032       dif = strcmp (np->name, cur_node->name);
2033
2034       /*
2035        * If this tag name matches an existing one, then
2036        * do not add the node, but maybe print a warning.
2037        */
2038       if (no_duplicates && !dif)
2039         {
2040           if (np->fdp == cur_node->fdp)
2041             {
2042               if (!no_warnings)
2043                 {
2044                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2045                            np->fdp->infname, lineno, np->name);
2046                   fprintf (stderr, "Second entry ignored\n");
2047                 }
2048             }
2049           else if (!cur_node->been_warned && !no_warnings)
2050             {
2051               fprintf
2052                 (stderr,
2053                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2054                  np->fdp->infname, cur_node->fdp->infname, np->name);
2055               cur_node->been_warned = TRUE;
2056             }
2057           return;
2058         }
2059
2060       /* Actually add the node */
2061       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2062     } /* if CTAGS mode */
2063 }
2064
2065 /*
2066  * invalidate_nodes ()
2067  *      Scan the node tree and invalidate all nodes pointing to the
2068  *      given file description (CTAGS case) or free them (ETAGS case).
2069  */
2070 static void
2071 invalidate_nodes (fdesc *badfdp, node **npp)
2072 {
2073   node *np = *npp;
2074
2075   if (np == NULL)
2076     return;
2077
2078   if (CTAGS)
2079     {
2080       if (np->left != NULL)
2081         invalidate_nodes (badfdp, &np->left);
2082       if (np->fdp == badfdp)
2083         np->valid = FALSE;
2084       if (np->right != NULL)
2085         invalidate_nodes (badfdp, &np->right);
2086     }
2087   else
2088     {
2089       assert (np->fdp != NULL);
2090       if (np->fdp == badfdp)
2091         {
2092           *npp = np->left;      /* detach the sublist from the list */
2093           np->left = NULL;      /* isolate it */
2094           free_tree (np);       /* free it */
2095           invalidate_nodes (badfdp, npp);
2096         }
2097       else
2098         invalidate_nodes (badfdp, &np->left);
2099     }
2100 }
2101
2102 \f
2103 static int total_size_of_entries (node *);
2104 static int number_len (long) ATTRIBUTE_CONST;
2105
2106 /* Length of a non-negative number's decimal representation. */
2107 static int
2108 number_len (long int num)
2109 {
2110   int len = 1;
2111   while ((num /= 10) > 0)
2112     len += 1;
2113   return len;
2114 }
2115
2116 /*
2117  * Return total number of characters that put_entries will output for
2118  * the nodes in the linked list at the right of the specified node.
2119  * This count is irrelevant with etags.el since emacs 19.34 at least,
2120  * but is still supplied for backward compatibility.
2121  */
2122 static int
2123 total_size_of_entries (register node *np)
2124 {
2125   register int total = 0;
2126
2127   for (; np != NULL; np = np->right)
2128     if (np->valid)
2129       {
2130         total += strlen (np->regex) + 1;                /* pat\177 */
2131         if (np->name != NULL)
2132           total += strlen (np->name) + 1;               /* name\001 */
2133         total += number_len ((long) np->lno) + 1;       /* lno, */
2134         if (np->cno != invalidcharno)                   /* cno */
2135           total += number_len (np->cno);
2136         total += 1;                                     /* newline */
2137       }
2138
2139   return total;
2140 }
2141
2142 static void
2143 put_entries (register node *np)
2144 {
2145   register char *sp;
2146   static fdesc *fdp = NULL;
2147
2148   if (np == NULL)
2149     return;
2150
2151   /* Output subentries that precede this one */
2152   if (CTAGS)
2153     put_entries (np->left);
2154
2155   /* Output this entry */
2156   if (np->valid)
2157     {
2158       if (!CTAGS)
2159         {
2160           /* Etags mode */
2161           if (fdp != np->fdp)
2162             {
2163               fdp = np->fdp;
2164               fprintf (tagf, "\f\n%s,%d\n",
2165                        fdp->taggedfname, total_size_of_entries (np));
2166               fdp->written = TRUE;
2167             }
2168           fputs (np->regex, tagf);
2169           fputc ('\177', tagf);
2170           if (np->name != NULL)
2171             {
2172               fputs (np->name, tagf);
2173               fputc ('\001', tagf);
2174             }
2175           fprintf (tagf, "%d,", np->lno);
2176           if (np->cno != invalidcharno)
2177             fprintf (tagf, "%ld", np->cno);
2178           fputs ("\n", tagf);
2179         }
2180       else
2181         {
2182           /* Ctags mode */
2183           if (np->name == NULL)
2184             error ("internal error: NULL name in ctags mode.");
2185
2186           if (cxref_style)
2187             {
2188               if (vgrind_style)
2189                 fprintf (stdout, "%s %s %d\n",
2190                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2191               else
2192                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2193                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2194             }
2195           else
2196             {
2197               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2198
2199               if (np->is_func)
2200                 {               /* function or #define macro with args */
2201                   putc (searchar, tagf);
2202                   putc ('^', tagf);
2203
2204                   for (sp = np->regex; *sp; sp++)
2205                     {
2206                       if (*sp == '\\' || *sp == searchar)
2207                         putc ('\\', tagf);
2208                       putc (*sp, tagf);
2209                     }
2210                   putc (searchar, tagf);
2211                 }
2212               else
2213                 {               /* anything else; text pattern inadequate */
2214                   fprintf (tagf, "%d", np->lno);
2215                 }
2216               putc ('\n', tagf);
2217             }
2218         }
2219     } /* if this node contains a valid tag */
2220
2221   /* Output subentries that follow this one */
2222   put_entries (np->right);
2223   if (!CTAGS)
2224     put_entries (np->left);
2225 }
2226
2227 \f
2228 /* C extensions. */
2229 #define C_EXT   0x00fff         /* C extensions */
2230 #define C_PLAIN 0x00000         /* C */
2231 #define C_PLPL  0x00001         /* C++ */
2232 #define C_STAR  0x00003         /* C* */
2233 #define C_JAVA  0x00005         /* JAVA */
2234 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2235 #define YACC    0x10000         /* yacc file */
2236
2237 /*
2238  * The C symbol tables.
2239  */
2240 enum sym_type
2241 {
2242   st_none,
2243   st_C_objprot, st_C_objimpl, st_C_objend,
2244   st_C_gnumacro,
2245   st_C_ignore, st_C_attribute,
2246   st_C_javastruct,
2247   st_C_operator,
2248   st_C_class, st_C_template,
2249   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2250 };
2251
2252 static unsigned int hash (const char *, unsigned int);
2253 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2254 static enum sym_type C_symtype (char *, int, int);
2255
2256 /* Feed stuff between (but not including) %[ and %] lines to:
2257      gperf -m 5
2258 %[
2259 %compare-strncmp
2260 %enum
2261 %struct-type
2262 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2263 %%
2264 if,             0,                      st_C_ignore
2265 for,            0,                      st_C_ignore
2266 while,          0,                      st_C_ignore
2267 switch,         0,                      st_C_ignore
2268 return,         0,                      st_C_ignore
2269 __attribute__,  0,                      st_C_attribute
2270 GTY,            0,                      st_C_attribute
2271 @interface,     0,                      st_C_objprot
2272 @protocol,      0,                      st_C_objprot
2273 @implementation,0,                      st_C_objimpl
2274 @end,           0,                      st_C_objend
2275 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2276 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2277 friend,         C_PLPL,                 st_C_ignore
2278 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2279 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2280 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2281 class,          0,                      st_C_class
2282 namespace,      C_PLPL,                 st_C_struct
2283 domain,         C_STAR,                 st_C_struct
2284 union,          0,                      st_C_struct
2285 struct,         0,                      st_C_struct
2286 extern,         0,                      st_C_extern
2287 enum,           0,                      st_C_enum
2288 typedef,        0,                      st_C_typedef
2289 define,         0,                      st_C_define
2290 undef,          0,                      st_C_define
2291 operator,       C_PLPL,                 st_C_operator
2292 template,       0,                      st_C_template
2293 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2294 DEFUN,          0,                      st_C_gnumacro
2295 SYSCALL,        0,                      st_C_gnumacro
2296 ENTRY,          0,                      st_C_gnumacro
2297 PSEUDO,         0,                      st_C_gnumacro
2298 # These are defined inside C functions, so currently they are not met.
2299 # EXFUN used in glibc, DEFVAR_* in emacs.
2300 #EXFUN,         0,                      st_C_gnumacro
2301 #DEFVAR_,       0,                      st_C_gnumacro
2302 %]
2303 and replace lines between %< and %> with its output, then:
2304  - remove the #if characterset check
2305  - make in_word_set static and not inline. */
2306 /*%<*/
2307 /* C code produced by gperf version 3.0.1 */
2308 /* Command-line: gperf -m 5  */
2309 /* Computed positions: -k'2-3' */
2310
2311 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2312 /* maximum key range = 33, duplicates = 0 */
2313
2314 static inline unsigned int
2315 hash (register const char *str, register unsigned int len)
2316 {
2317   static unsigned char asso_values[] =
2318     {
2319       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2320       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2321       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2322       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2323       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2324       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2325       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2326       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2327       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2328       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2329       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2330        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2331        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2332       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2333       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2334       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2335       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2336       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2337       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2338       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2339       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2340       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2341       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2342       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2343       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2344       35, 35, 35, 35, 35, 35
2345     };
2346   register int hval = len;
2347
2348   switch (hval)
2349     {
2350       default:
2351         hval += asso_values[(unsigned char)str[2]];
2352       /*FALLTHROUGH*/
2353       case 2:
2354         hval += asso_values[(unsigned char)str[1]];
2355         break;
2356     }
2357   return hval;
2358 }
2359
2360 static struct C_stab_entry *
2361 in_word_set (register const char *str, register unsigned int len)
2362 {
2363   enum
2364     {
2365       TOTAL_KEYWORDS = 33,
2366       MIN_WORD_LENGTH = 2,
2367       MAX_WORD_LENGTH = 15,
2368       MIN_HASH_VALUE = 2,
2369       MAX_HASH_VALUE = 34
2370     };
2371
2372   static struct C_stab_entry wordlist[] =
2373     {
2374       {""}, {""},
2375       {"if",            0,                      st_C_ignore},
2376       {"GTY",           0,                      st_C_attribute},
2377       {"@end",          0,                      st_C_objend},
2378       {"union",         0,                      st_C_struct},
2379       {"define",                0,                      st_C_define},
2380       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2381       {"template",      0,                      st_C_template},
2382       {"operator",      C_PLPL,                 st_C_operator},
2383       {"@interface",    0,                      st_C_objprot},
2384       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2385       {"friend",                C_PLPL,                 st_C_ignore},
2386       {"typedef",       0,                      st_C_typedef},
2387       {"return",                0,                      st_C_ignore},
2388       {"@implementation",0,                     st_C_objimpl},
2389       {"@protocol",     0,                      st_C_objprot},
2390       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2391       {"extern",                0,                      st_C_extern},
2392       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2393       {"struct",                0,                      st_C_struct},
2394       {"domain",                C_STAR,                 st_C_struct},
2395       {"switch",                0,                      st_C_ignore},
2396       {"enum",          0,                      st_C_enum},
2397       {"for",           0,                      st_C_ignore},
2398       {"namespace",     C_PLPL,                 st_C_struct},
2399       {"class",         0,                      st_C_class},
2400       {"while",         0,                      st_C_ignore},
2401       {"undef",         0,                      st_C_define},
2402       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2403       {"__attribute__", 0,                      st_C_attribute},
2404       {"SYSCALL",       0,                      st_C_gnumacro},
2405       {"ENTRY",         0,                      st_C_gnumacro},
2406       {"PSEUDO",                0,                      st_C_gnumacro},
2407       {"DEFUN",         0,                      st_C_gnumacro}
2408     };
2409
2410   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2411     {
2412       register int key = hash (str, len);
2413
2414       if (key <= MAX_HASH_VALUE && key >= 0)
2415         {
2416           register const char *s = wordlist[key].name;
2417
2418           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2419             return &wordlist[key];
2420         }
2421     }
2422   return 0;
2423 }
2424 /*%>*/
2425
2426 static enum sym_type
2427 C_symtype (char *str, int len, int c_ext)
2428 {
2429   register struct C_stab_entry *se = in_word_set (str, len);
2430
2431   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2432     return st_none;
2433   return se->type;
2434 }
2435
2436 \f
2437 /*
2438  * Ignoring __attribute__ ((list))
2439  */
2440 static bool inattribute;        /* looking at an __attribute__ construct */
2441
2442 /*
2443  * C functions and variables are recognized using a simple
2444  * finite automaton.  fvdef is its state variable.
2445  */
2446 static enum
2447 {
2448   fvnone,                       /* nothing seen */
2449   fdefunkey,                    /* Emacs DEFUN keyword seen */
2450   fdefunname,                   /* Emacs DEFUN name seen */
2451   foperator,                    /* func: operator keyword seen (cplpl) */
2452   fvnameseen,                   /* function or variable name seen */
2453   fstartlist,                   /* func: just after open parenthesis */
2454   finlist,                      /* func: in parameter list */
2455   flistseen,                    /* func: after parameter list */
2456   fignore,                      /* func: before open brace */
2457   vignore                       /* var-like: ignore until ';' */
2458 } fvdef;
2459
2460 static bool fvextern;           /* func or var: extern keyword seen; */
2461
2462 /*
2463  * typedefs are recognized using a simple finite automaton.
2464  * typdef is its state variable.
2465  */
2466 static enum
2467 {
2468   tnone,                        /* nothing seen */
2469   tkeyseen,                     /* typedef keyword seen */
2470   ttypeseen,                    /* defined type seen */
2471   tinbody,                      /* inside typedef body */
2472   tend,                         /* just before typedef tag */
2473   tignore                       /* junk after typedef tag */
2474 } typdef;
2475
2476 /*
2477  * struct-like structures (enum, struct and union) are recognized
2478  * using another simple finite automaton.  `structdef' is its state
2479  * variable.
2480  */
2481 static enum
2482 {
2483   snone,                        /* nothing seen yet,
2484                                    or in struct body if bracelev > 0 */
2485   skeyseen,                     /* struct-like keyword seen */
2486   stagseen,                     /* struct-like tag seen */
2487   scolonseen                    /* colon seen after struct-like tag */
2488 } structdef;
2489
2490 /*
2491  * When objdef is different from onone, objtag is the name of the class.
2492  */
2493 static const char *objtag = "<uninited>";
2494
2495 /*
2496  * Yet another little state machine to deal with preprocessor lines.
2497  */
2498 static enum
2499 {
2500   dnone,                        /* nothing seen */
2501   dsharpseen,                   /* '#' seen as first char on line */
2502   ddefineseen,                  /* '#' and 'define' seen */
2503   dignorerest                   /* ignore rest of line */
2504 } definedef;
2505
2506 /*
2507  * State machine for Objective C protocols and implementations.
2508  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2509  */
2510 static enum
2511 {
2512   onone,                        /* nothing seen */
2513   oprotocol,                    /* @interface or @protocol seen */
2514   oimplementation,              /* @implementations seen */
2515   otagseen,                     /* class name seen */
2516   oparenseen,                   /* parenthesis before category seen */
2517   ocatseen,                     /* category name seen */
2518   oinbody,                      /* in @implementation body */
2519   omethodsign,                  /* in @implementation body, after +/- */
2520   omethodtag,                   /* after method name */
2521   omethodcolon,                 /* after method colon */
2522   omethodparm,                  /* after method parameter */
2523   oignore                       /* wait for @end */
2524 } objdef;
2525
2526
2527 /*
2528  * Use this structure to keep info about the token read, and how it
2529  * should be tagged.  Used by the make_C_tag function to build a tag.
2530  */
2531 static struct tok
2532 {
2533   char *line;                   /* string containing the token */
2534   int offset;                   /* where the token starts in LINE */
2535   int length;                   /* token length */
2536   /*
2537     The previous members can be used to pass strings around for generic
2538     purposes.  The following ones specifically refer to creating tags.  In this
2539     case the token contained here is the pattern that will be used to create a
2540     tag.
2541   */
2542   bool valid;                   /* do not create a tag; the token should be
2543                                    invalidated whenever a state machine is
2544                                    reset prematurely */
2545   bool named;                   /* create a named tag */
2546   int lineno;                   /* source line number of tag */
2547   long linepos;                 /* source char number of tag */
2548 } token;                        /* latest token read */
2549
2550 /*
2551  * Variables and functions for dealing with nested structures.
2552  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2553  */
2554 static void pushclass_above (int, char *, int);
2555 static void popclass_above (int);
2556 static void write_classname (linebuffer *, const char *qualifier);
2557
2558 static struct {
2559   char **cname;                 /* nested class names */
2560   int *bracelev;                /* nested class brace level */
2561   int nl;                       /* class nesting level (elements used) */
2562   int size;                     /* length of the array */
2563 } cstack;                       /* stack for nested declaration tags */
2564 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2565 #define nestlev         (cstack.nl)
2566 /* After struct keyword or in struct body, not inside a nested function. */
2567 #define instruct        (structdef == snone && nestlev > 0                      \
2568                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2569
2570 static void
2571 pushclass_above (int bracelev, char *str, int len)
2572 {
2573   int nl;
2574
2575   popclass_above (bracelev);
2576   nl = cstack.nl;
2577   if (nl >= cstack.size)
2578     {
2579       int size = cstack.size *= 2;
2580       xrnew (cstack.cname, size, char *);
2581       xrnew (cstack.bracelev, size, int);
2582     }
2583   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2584   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2585   cstack.bracelev[nl] = bracelev;
2586   cstack.nl = nl + 1;
2587 }
2588
2589 static void
2590 popclass_above (int bracelev)
2591 {
2592   int nl;
2593
2594   for (nl = cstack.nl - 1;
2595        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2596        nl--)
2597     {
2598       free (cstack.cname[nl]);
2599       cstack.nl = nl;
2600     }
2601 }
2602
2603 static void
2604 write_classname (linebuffer *cn, const char *qualifier)
2605 {
2606   int i, len;
2607   int qlen = strlen (qualifier);
2608
2609   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2610     {
2611       len = 0;
2612       cn->len = 0;
2613       cn->buffer[0] = '\0';
2614     }
2615   else
2616     {
2617       len = strlen (cstack.cname[0]);
2618       linebuffer_setlen (cn, len);
2619       strcpy (cn->buffer, cstack.cname[0]);
2620     }
2621   for (i = 1; i < cstack.nl; i++)
2622     {
2623       char *s = cstack.cname[i];
2624       if (s == NULL)
2625         continue;
2626       linebuffer_setlen (cn, len + qlen + strlen (s));
2627       len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2628     }
2629 }
2630
2631 \f
2632 static bool consider_token (char *, int, int, int *, int, int, bool *);
2633 static void make_C_tag (bool);
2634
2635 /*
2636  * consider_token ()
2637  *      checks to see if the current token is at the start of a
2638  *      function or variable, or corresponds to a typedef, or
2639  *      is a struct/union/enum tag, or #define, or an enum constant.
2640  *
2641  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2642  *      with args.  C_EXTP points to which language we are looking at.
2643  *
2644  * Globals
2645  *      fvdef                   IN OUT
2646  *      structdef               IN OUT
2647  *      definedef               IN OUT
2648  *      typdef                  IN OUT
2649  *      objdef                  IN OUT
2650  */
2651
2652 static bool
2653 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2654                                 /* IN: token pointer */
2655                                 /* IN: token length */
2656                                 /* IN: first char after the token */
2657                                 /* IN, OUT: C extensions mask */
2658                                 /* IN: brace level */
2659                                 /* IN: parenthesis level */
2660                                 /* OUT: function or variable found */
2661 {
2662   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2663      structtype is the type of the preceding struct-like keyword, and
2664      structbracelev is the brace level where it has been seen. */
2665   static enum sym_type structtype;
2666   static int structbracelev;
2667   static enum sym_type toktype;
2668
2669
2670   toktype = C_symtype (str, len, *c_extp);
2671
2672   /*
2673    * Skip __attribute__
2674    */
2675   if (toktype == st_C_attribute)
2676     {
2677       inattribute = TRUE;
2678       return FALSE;
2679      }
2680
2681    /*
2682     * Advance the definedef state machine.
2683     */
2684    switch (definedef)
2685      {
2686      case dnone:
2687        /* We're not on a preprocessor line. */
2688        if (toktype == st_C_gnumacro)
2689          {
2690            fvdef = fdefunkey;
2691            return FALSE;
2692          }
2693        break;
2694      case dsharpseen:
2695        if (toktype == st_C_define)
2696          {
2697            definedef = ddefineseen;
2698          }
2699        else
2700          {
2701            definedef = dignorerest;
2702          }
2703        return FALSE;
2704      case ddefineseen:
2705        /*
2706         * Make a tag for any macro, unless it is a constant
2707         * and constantypedefs is FALSE.
2708         */
2709        definedef = dignorerest;
2710        *is_func_or_var = (c == '(');
2711        if (!*is_func_or_var && !constantypedefs)
2712          return FALSE;
2713        else
2714          return TRUE;
2715      case dignorerest:
2716        return FALSE;
2717      default:
2718        error ("internal error: definedef value.");
2719      }
2720
2721    /*
2722     * Now typedefs
2723     */
2724    switch (typdef)
2725      {
2726      case tnone:
2727        if (toktype == st_C_typedef)
2728          {
2729            if (typedefs)
2730              typdef = tkeyseen;
2731            fvextern = FALSE;
2732            fvdef = fvnone;
2733            return FALSE;
2734          }
2735        break;
2736      case tkeyseen:
2737        switch (toktype)
2738          {
2739          case st_none:
2740          case st_C_class:
2741          case st_C_struct:
2742          case st_C_enum:
2743            typdef = ttypeseen;
2744          }
2745        break;
2746      case ttypeseen:
2747        if (structdef == snone && fvdef == fvnone)
2748          {
2749            fvdef = fvnameseen;
2750            return TRUE;
2751          }
2752        break;
2753      case tend:
2754        switch (toktype)
2755          {
2756          case st_C_class:
2757          case st_C_struct:
2758          case st_C_enum:
2759            return FALSE;
2760          }
2761        return TRUE;
2762      }
2763
2764    switch (toktype)
2765      {
2766      case st_C_javastruct:
2767        if (structdef == stagseen)
2768          structdef = scolonseen;
2769        return FALSE;
2770      case st_C_template:
2771      case st_C_class:
2772        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2773            && bracelev == 0
2774            && definedef == dnone && structdef == snone
2775            && typdef == tnone && fvdef == fvnone)
2776          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2777        if (toktype == st_C_template)
2778          break;
2779        /* FALLTHRU */
2780      case st_C_struct:
2781      case st_C_enum:
2782        if (parlev == 0
2783            && fvdef != vignore
2784            && (typdef == tkeyseen
2785                || (typedefs_or_cplusplus && structdef == snone)))
2786          {
2787            structdef = skeyseen;
2788            structtype = toktype;
2789            structbracelev = bracelev;
2790            if (fvdef == fvnameseen)
2791              fvdef = fvnone;
2792          }
2793        return FALSE;
2794      }
2795
2796    if (structdef == skeyseen)
2797      {
2798        structdef = stagseen;
2799        return TRUE;
2800      }
2801
2802    if (typdef != tnone)
2803      definedef = dnone;
2804
2805    /* Detect Objective C constructs. */
2806    switch (objdef)
2807      {
2808      case onone:
2809        switch (toktype)
2810          {
2811          case st_C_objprot:
2812            objdef = oprotocol;
2813            return FALSE;
2814          case st_C_objimpl:
2815            objdef = oimplementation;
2816            return FALSE;
2817          }
2818        break;
2819      case oimplementation:
2820        /* Save the class tag for functions or variables defined inside. */
2821        objtag = savenstr (str, len);
2822        objdef = oinbody;
2823        return FALSE;
2824      case oprotocol:
2825        /* Save the class tag for categories. */
2826        objtag = savenstr (str, len);
2827        objdef = otagseen;
2828        *is_func_or_var = TRUE;
2829        return TRUE;
2830      case oparenseen:
2831        objdef = ocatseen;
2832        *is_func_or_var = TRUE;
2833        return TRUE;
2834      case oinbody:
2835        break;
2836      case omethodsign:
2837        if (parlev == 0)
2838          {
2839            fvdef = fvnone;
2840            objdef = omethodtag;
2841            linebuffer_setlen (&token_name, len);
2842            memcpy (token_name.buffer, str, len);
2843            token_name.buffer[len] = '\0';
2844            return TRUE;
2845          }
2846        return FALSE;
2847      case omethodcolon:
2848        if (parlev == 0)
2849          objdef = omethodparm;
2850        return FALSE;
2851      case omethodparm:
2852        if (parlev == 0)
2853          {
2854            int oldlen = token_name.len;
2855            fvdef = fvnone;
2856            objdef = omethodtag;
2857            linebuffer_setlen (&token_name, oldlen + len);
2858            memcpy (token_name.buffer + oldlen, str, len);
2859            token_name.buffer[oldlen + len] = '\0';
2860            return TRUE;
2861          }
2862        return FALSE;
2863      case oignore:
2864        if (toktype == st_C_objend)
2865          {
2866            /* Memory leakage here: the string pointed by objtag is
2867               never released, because many tests would be needed to
2868               avoid breaking on incorrect input code.  The amount of
2869               memory leaked here is the sum of the lengths of the
2870               class tags.
2871            free (objtag); */
2872            objdef = onone;
2873          }
2874        return FALSE;
2875      }
2876
2877    /* A function, variable or enum constant? */
2878    switch (toktype)
2879      {
2880      case st_C_extern:
2881        fvextern = TRUE;
2882        switch  (fvdef)
2883          {
2884          case finlist:
2885          case flistseen:
2886          case fignore:
2887          case vignore:
2888            break;
2889          default:
2890            fvdef = fvnone;
2891          }
2892        return FALSE;
2893      case st_C_ignore:
2894        fvextern = FALSE;
2895        fvdef = vignore;
2896        return FALSE;
2897      case st_C_operator:
2898        fvdef = foperator;
2899        *is_func_or_var = TRUE;
2900        return TRUE;
2901      case st_none:
2902        if (constantypedefs
2903            && structdef == snone
2904            && structtype == st_C_enum && bracelev > structbracelev)
2905          return TRUE;           /* enum constant */
2906        switch (fvdef)
2907          {
2908          case fdefunkey:
2909            if (bracelev > 0)
2910              break;
2911            fvdef = fdefunname;  /* GNU macro */
2912            *is_func_or_var = TRUE;
2913            return TRUE;
2914          case fvnone:
2915            switch (typdef)
2916              {
2917              case ttypeseen:
2918                return FALSE;
2919              case tnone:
2920                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2921                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2922                  {
2923                    fvdef = vignore;
2924                    return FALSE;
2925                  }
2926                break;
2927              }
2928           /* FALLTHRU */
2929           case fvnameseen:
2930           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2931             {
2932               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2933                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2934               fvdef = foperator;
2935               *is_func_or_var = TRUE;
2936               return TRUE;
2937             }
2938           if (bracelev > 0 && !instruct)
2939             break;
2940           fvdef = fvnameseen;   /* function or variable */
2941           *is_func_or_var = TRUE;
2942           return TRUE;
2943         }
2944       break;
2945     }
2946
2947   return FALSE;
2948 }
2949
2950 \f
2951 /*
2952  * C_entries often keeps pointers to tokens or lines which are older than
2953  * the line currently read.  By keeping two line buffers, and switching
2954  * them at end of line, it is possible to use those pointers.
2955  */
2956 static struct
2957 {
2958   long linepos;
2959   linebuffer lb;
2960 } lbs[2];
2961
2962 #define current_lb_is_new (newndx == curndx)
2963 #define switch_line_buffers() (curndx = 1 - curndx)
2964
2965 #define curlb (lbs[curndx].lb)
2966 #define newlb (lbs[newndx].lb)
2967 #define curlinepos (lbs[curndx].linepos)
2968 #define newlinepos (lbs[newndx].linepos)
2969
2970 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2971 #define cplpl (c_ext & C_PLPL)
2972 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2973
2974 #define CNL_SAVE_DEFINEDEF()                                            \
2975 do {                                                                    \
2976   curlinepos = charno;                                                  \
2977   readline (&curlb, inf);                                               \
2978   lp = curlb.buffer;                                                    \
2979   quotednl = FALSE;                                                     \
2980   newndx = curndx;                                                      \
2981 } while (0)
2982
2983 #define CNL()                                                           \
2984 do {                                                                    \
2985   CNL_SAVE_DEFINEDEF();                                                 \
2986   if (savetoken.valid)                                                  \
2987     {                                                                   \
2988       token = savetoken;                                                \
2989       savetoken.valid = FALSE;                                          \
2990     }                                                                   \
2991   definedef = dnone;                                                    \
2992 } while (0)
2993
2994
2995 static void
2996 make_C_tag (int isfun)
2997 {
2998   /* This function is never called when token.valid is FALSE, but
2999      we must protect against invalid input or internal errors. */
3000   if (token.valid)
3001     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3002               token.offset+token.length+1, token.lineno, token.linepos);
3003   else if (DEBUG)
3004     {                             /* this branch is optimized away if !DEBUG */
3005       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3006                 token_name.len + 17, isfun, token.line,
3007                 token.offset+token.length+1, token.lineno, token.linepos);
3008       error ("INVALID TOKEN");
3009     }
3010
3011   token.valid = FALSE;
3012 }
3013
3014
3015 /*
3016  * C_entries ()
3017  *      This routine finds functions, variables, typedefs,
3018  *      #define's, enum constants and struct/union/enum definitions in
3019  *      C syntax and adds them to the list.
3020  */
3021 static void
3022 C_entries (int c_ext, FILE *inf)
3023                                 /* extension of C */
3024                                 /* input file */
3025 {
3026   register char c;              /* latest char read; '\0' for end of line */
3027   register char *lp;            /* pointer one beyond the character `c' */
3028   int curndx, newndx;           /* indices for current and new lb */
3029   register int tokoff;          /* offset in line of start of current token */
3030   register int toklen;          /* length of current token */
3031   const char *qualifier;        /* string used to qualify names */
3032   int qlen;                     /* length of qualifier */
3033   int bracelev;                 /* current brace level */
3034   int bracketlev;               /* current bracket level */
3035   int parlev;                   /* current parenthesis level */
3036   int attrparlev;               /* __attribute__ parenthesis level */
3037   int templatelev;              /* current template level */
3038   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3039   bool incomm, inquote, inchar, quotednl, midtoken;
3040   bool yacc_rules;              /* in the rules part of a yacc file */
3041   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3042
3043
3044   linebuffer_init (&lbs[0].lb);
3045   linebuffer_init (&lbs[1].lb);
3046   if (cstack.size == 0)
3047     {
3048       cstack.size = (DEBUG) ? 1 : 4;
3049       cstack.nl = 0;
3050       cstack.cname = xnew (cstack.size, char *);
3051       cstack.bracelev = xnew (cstack.size, int);
3052     }
3053
3054   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3055   curndx = newndx = 0;
3056   lp = curlb.buffer;
3057   *lp = 0;
3058
3059   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3060   structdef = snone; definedef = dnone; objdef = onone;
3061   yacc_rules = FALSE;
3062   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3063   token.valid = savetoken.valid = FALSE;
3064   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3065   if (cjava)
3066     { qualifier = "."; qlen = 1; }
3067   else
3068     { qualifier = "::"; qlen = 2; }
3069
3070
3071   while (!feof (inf))
3072     {
3073       c = *lp++;
3074       if (c == '\\')
3075         {
3076           /* If we are at the end of the line, the next character is a
3077              '\0'; do not skip it, because it is what tells us
3078              to read the next line.  */
3079           if (*lp == '\0')
3080             {
3081               quotednl = TRUE;
3082               continue;
3083             }
3084           lp++;
3085           c = ' ';
3086         }
3087       else if (incomm)
3088         {
3089           switch (c)
3090             {
3091             case '*':
3092               if (*lp == '/')
3093                 {
3094                   c = *lp++;
3095                   incomm = FALSE;
3096                 }
3097               break;
3098             case '\0':
3099               /* Newlines inside comments do not end macro definitions in
3100                  traditional cpp. */
3101               CNL_SAVE_DEFINEDEF ();
3102               break;
3103             }
3104           continue;
3105         }
3106       else if (inquote)
3107         {
3108           switch (c)
3109             {
3110             case '"':
3111               inquote = FALSE;
3112               break;
3113             case '\0':
3114               /* Newlines inside strings do not end macro definitions
3115                  in traditional cpp, even though compilers don't
3116                  usually accept them. */
3117               CNL_SAVE_DEFINEDEF ();
3118               break;
3119             }
3120           continue;
3121         }
3122       else if (inchar)
3123         {
3124           switch (c)
3125             {
3126             case '\0':
3127               /* Hmmm, something went wrong. */
3128               CNL ();
3129               /* FALLTHRU */
3130             case '\'':
3131               inchar = FALSE;
3132               break;
3133             }
3134           continue;
3135         }
3136       else switch (c)
3137         {
3138         case '"':
3139           inquote = TRUE;
3140           if (bracketlev > 0)
3141             continue;
3142           if (inattribute)
3143             break;
3144           switch (fvdef)
3145             {
3146             case fdefunkey:
3147             case fstartlist:
3148             case finlist:
3149             case fignore:
3150             case vignore:
3151               break;
3152             default:
3153               fvextern = FALSE;
3154               fvdef = fvnone;
3155             }
3156           continue;
3157         case '\'':
3158           inchar = TRUE;
3159           if (bracketlev > 0)
3160             continue;
3161           if (inattribute)
3162             break;
3163           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3164             {
3165               fvextern = FALSE;
3166               fvdef = fvnone;
3167             }
3168           continue;
3169         case '/':
3170           if (*lp == '*')
3171             {
3172               incomm = TRUE;
3173               lp++;
3174               c = ' ';
3175               if (bracketlev > 0)
3176                 continue;
3177             }
3178           else if (/* cplpl && */ *lp == '/')
3179             {
3180               c = '\0';
3181             }
3182           break;
3183         case '%':
3184           if ((c_ext & YACC) && *lp == '%')
3185             {
3186               /* Entering or exiting rules section in yacc file. */
3187               lp++;
3188               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3189               typdef = tnone; structdef = snone;
3190               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3191               bracelev = 0;
3192               yacc_rules = !yacc_rules;
3193               continue;
3194             }
3195           else
3196             break;
3197         case '#':
3198           if (definedef == dnone)
3199             {
3200               char *cp;
3201               bool cpptoken = TRUE;
3202
3203               /* Look back on this line.  If all blanks, or nonblanks
3204                  followed by an end of comment, this is a preprocessor
3205                  token. */
3206               for (cp = newlb.buffer; cp < lp-1; cp++)
3207                 if (!iswhite (*cp))
3208                   {
3209                     if (*cp == '*' && cp[1] == '/')
3210                       {
3211                         cp++;
3212                         cpptoken = TRUE;
3213                       }
3214                     else
3215                       cpptoken = FALSE;
3216                   }
3217               if (cpptoken)
3218                 definedef = dsharpseen;
3219             } /* if (definedef == dnone) */
3220           continue;
3221         case '[':
3222           bracketlev++;
3223           continue;
3224         default:
3225           if (bracketlev > 0)
3226             {
3227               if (c == ']')
3228                 --bracketlev;
3229               else if (c == '\0')
3230                 CNL_SAVE_DEFINEDEF ();
3231               continue;
3232             }
3233           break;
3234         } /* switch (c) */
3235
3236
3237       /* Consider token only if some involved conditions are satisfied. */
3238       if (typdef != tignore
3239           && definedef != dignorerest
3240           && fvdef != finlist
3241           && templatelev == 0
3242           && (definedef != dnone
3243               || structdef != scolonseen)
3244           && !inattribute)
3245         {
3246           if (midtoken)
3247             {
3248               if (endtoken (c))
3249                 {
3250                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3251                     /* This handles :: in the middle,
3252                        but not at the beginning of an identifier.
3253                        Also, space-separated :: is not recognized. */
3254                     {
3255                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3256                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3257                       lp += 2;
3258                       toklen += 2;
3259                       c = lp[-1];
3260                       goto still_in_token;
3261                     }
3262                   else
3263                     {
3264                       bool funorvar = FALSE;
3265
3266                       if (yacc_rules
3267                           || consider_token (newlb.buffer + tokoff, toklen, c,
3268                                              &c_ext, bracelev, parlev,
3269                                              &funorvar))
3270                         {
3271                           if (fvdef == foperator)
3272                             {
3273                               char *oldlp = lp;
3274                               lp = skip_spaces (lp-1);
3275                               if (*lp != '\0')
3276                                 lp += 1;
3277                               while (*lp != '\0'
3278                                      && !iswhite (*lp) && *lp != '(')
3279                                 lp += 1;
3280                               c = *lp++;
3281                               toklen += lp - oldlp;
3282                             }
3283                           token.named = FALSE;
3284                           if (!plainc
3285                               && nestlev > 0 && definedef == dnone)
3286                             /* in struct body */
3287                             {
3288                               int len;
3289                               write_classname (&token_name, qualifier);
3290                               len = token_name.len;
3291                               linebuffer_setlen (&token_name, len+qlen+toklen);
3292                               sprintf (token_name.buffer + len, "%s%.*s",
3293                                        qualifier, toklen, newlb.buffer + tokoff);
3294                               token.named = TRUE;
3295                             }
3296                           else if (objdef == ocatseen)
3297                             /* Objective C category */
3298                             {
3299                               int len = strlen (objtag) + 2 + toklen;
3300                               linebuffer_setlen (&token_name, len);
3301                               sprintf (token_name.buffer, "%s(%.*s)",
3302                                        objtag, toklen, newlb.buffer + tokoff);
3303                               token.named = TRUE;
3304                             }
3305                           else if (objdef == omethodtag
3306                                    || objdef == omethodparm)
3307                             /* Objective C method */
3308                             {
3309                               token.named = TRUE;
3310                             }
3311                           else if (fvdef == fdefunname)
3312                             /* GNU DEFUN and similar macros */
3313                             {
3314                               bool defun = (newlb.buffer[tokoff] == 'F');
3315                               int off = tokoff;
3316                               int len = toklen;
3317
3318                               /* Rewrite the tag so that emacs lisp DEFUNs
3319                                  can be found by their elisp name */
3320                               if (defun)
3321                                 {
3322                                   off += 1;
3323                                   len -= 1;
3324                                 }
3325                               linebuffer_setlen (&token_name, len);
3326                               memcpy (token_name.buffer,
3327                                       newlb.buffer + off, len);
3328                               token_name.buffer[len] = '\0';
3329                               if (defun)
3330                                 while (--len >= 0)
3331                                   if (token_name.buffer[len] == '_')
3332                                     token_name.buffer[len] = '-';
3333                               token.named = defun;
3334                             }
3335                           else
3336                             {
3337                               linebuffer_setlen (&token_name, toklen);
3338                               memcpy (token_name.buffer,
3339                                       newlb.buffer + tokoff, toklen);
3340                               token_name.buffer[toklen] = '\0';
3341                               /* Name macros and members. */
3342                               token.named = (structdef == stagseen
3343                                              || typdef == ttypeseen
3344                                              || typdef == tend
3345                                              || (funorvar
3346                                                  && definedef == dignorerest)
3347                                              || (funorvar
3348                                                  && definedef == dnone
3349                                                  && structdef == snone
3350                                                  && bracelev > 0));
3351                             }
3352                           token.lineno = lineno;
3353                           token.offset = tokoff;
3354                           token.length = toklen;
3355                           token.line = newlb.buffer;
3356                           token.linepos = newlinepos;
3357                           token.valid = TRUE;
3358
3359                           if (definedef == dnone
3360                               && (fvdef == fvnameseen
3361                                   || fvdef == foperator
3362                                   || structdef == stagseen
3363                                   || typdef == tend
3364                                   || typdef == ttypeseen
3365                                   || objdef != onone))
3366                             {
3367                               if (current_lb_is_new)
3368                                 switch_line_buffers ();
3369                             }
3370                           else if (definedef != dnone
3371                                    || fvdef == fdefunname
3372                                    || instruct)
3373                             make_C_tag (funorvar);
3374                         }
3375                       else /* not yacc and consider_token failed */
3376                         {
3377                           if (inattribute && fvdef == fignore)
3378                             {
3379                               /* We have just met __attribute__ after a
3380                                  function parameter list: do not tag the
3381                                  function again. */
3382                               fvdef = fvnone;
3383                             }
3384                         }
3385                       midtoken = FALSE;
3386                     }
3387                 } /* if (endtoken (c)) */
3388               else if (intoken (c))
3389                 still_in_token:
3390                 {
3391                   toklen++;
3392                   continue;
3393                 }
3394             } /* if (midtoken) */
3395           else if (begtoken (c))
3396             {
3397               switch (definedef)
3398                 {
3399                 case dnone:
3400                   switch (fvdef)
3401                     {
3402                     case fstartlist:
3403                       /* This prevents tagging fb in
3404                          void (__attribute__((noreturn)) *fb) (void);
3405                          Fixing this is not easy and not very important. */
3406                       fvdef = finlist;
3407                       continue;
3408                     case flistseen:
3409                       if (plainc || declarations)
3410                         {
3411                           make_C_tag (TRUE); /* a function */
3412                           fvdef = fignore;
3413                         }
3414                       break;
3415                     }
3416                   if (structdef == stagseen && !cjava)
3417                     {
3418                       popclass_above (bracelev);
3419                       structdef = snone;
3420                     }
3421                   break;
3422                 case dsharpseen:
3423                   savetoken = token;
3424                   break;
3425                 }
3426               if (!yacc_rules || lp == newlb.buffer + 1)
3427                 {
3428                   tokoff = lp - 1 - newlb.buffer;
3429                   toklen = 1;
3430                   midtoken = TRUE;
3431                 }
3432               continue;
3433             } /* if (begtoken) */
3434         } /* if must look at token */
3435
3436
3437       /* Detect end of line, colon, comma, semicolon and various braces
3438          after having handled a token.*/
3439       switch (c)
3440         {
3441         case ':':
3442           if (inattribute)
3443             break;
3444           if (yacc_rules && token.offset == 0 && token.valid)
3445             {
3446               make_C_tag (FALSE); /* a yacc function */
3447               break;
3448             }
3449           if (definedef != dnone)
3450             break;
3451           switch (objdef)
3452             {
3453             case  otagseen:
3454               objdef = oignore;
3455               make_C_tag (TRUE); /* an Objective C class */
3456               break;
3457             case omethodtag:
3458             case omethodparm:
3459               objdef = omethodcolon;
3460               linebuffer_setlen (&token_name, token_name.len + 1);
3461               strcat (token_name.buffer, ":");
3462               break;
3463             }
3464           if (structdef == stagseen)
3465             {
3466               structdef = scolonseen;
3467               break;
3468             }
3469           /* Should be useless, but may be work as a safety net. */
3470           if (cplpl && fvdef == flistseen)
3471             {
3472               make_C_tag (TRUE); /* a function */
3473               fvdef = fignore;
3474               break;
3475             }
3476           break;
3477         case ';':
3478           if (definedef != dnone || inattribute)
3479             break;
3480           switch (typdef)
3481             {
3482             case tend:
3483             case ttypeseen:
3484               make_C_tag (FALSE); /* a typedef */
3485               typdef = tnone;
3486               fvdef = fvnone;
3487               break;
3488             case tnone:
3489             case tinbody:
3490             case tignore:
3491               switch (fvdef)
3492                 {
3493                 case fignore:
3494                   if (typdef == tignore || cplpl)
3495                     fvdef = fvnone;
3496                   break;
3497                 case fvnameseen:
3498                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3499                       || (members && instruct))
3500                     make_C_tag (FALSE); /* a variable */
3501                   fvextern = FALSE;
3502                   fvdef = fvnone;
3503                   token.valid = FALSE;
3504                   break;
3505                 case flistseen:
3506                   if ((declarations
3507                        && (cplpl || !instruct)
3508                        && (typdef == tnone || (typdef != tignore && instruct)))
3509                       || (members
3510                           && plainc && instruct))
3511                     make_C_tag (TRUE);  /* a function */
3512                   /* FALLTHRU */
3513                 default:
3514                   fvextern = FALSE;
3515                   fvdef = fvnone;
3516                   if (declarations
3517                        && cplpl && structdef == stagseen)
3518                     make_C_tag (FALSE); /* forward declaration */
3519                   else
3520                     token.valid = FALSE;
3521                 } /* switch (fvdef) */
3522               /* FALLTHRU */
3523             default:
3524               if (!instruct)
3525                 typdef = tnone;
3526             }
3527           if (structdef == stagseen)
3528             structdef = snone;
3529           break;
3530         case ',':
3531           if (definedef != dnone || inattribute)
3532             break;
3533           switch (objdef)
3534             {
3535             case omethodtag:
3536             case omethodparm:
3537               make_C_tag (TRUE); /* an Objective C method */
3538               objdef = oinbody;
3539               break;
3540             }
3541           switch (fvdef)
3542             {
3543             case fdefunkey:
3544             case foperator:
3545             case fstartlist:
3546             case finlist:
3547             case fignore:
3548             case vignore:
3549               break;
3550             case fdefunname:
3551               fvdef = fignore;
3552               break;
3553             case fvnameseen:
3554               if (parlev == 0
3555                   && ((globals
3556                        && bracelev == 0
3557                        && templatelev == 0
3558                        && (!fvextern || declarations))
3559                       || (members && instruct)))
3560                   make_C_tag (FALSE); /* a variable */
3561               break;
3562             case flistseen:
3563               if ((declarations && typdef == tnone && !instruct)
3564                   || (members && typdef != tignore && instruct))
3565                 {
3566                   make_C_tag (TRUE); /* a function */
3567                   fvdef = fvnameseen;
3568                 }
3569               else if (!declarations)
3570                 fvdef = fvnone;
3571               token.valid = FALSE;
3572               break;
3573             default:
3574               fvdef = fvnone;
3575             }
3576           if (structdef == stagseen)
3577             structdef = snone;
3578           break;
3579         case ']':
3580           if (definedef != dnone || inattribute)
3581             break;
3582           if (structdef == stagseen)
3583             structdef = snone;
3584           switch (typdef)
3585             {
3586             case ttypeseen:
3587             case tend:
3588               typdef = tignore;
3589               make_C_tag (FALSE);       /* a typedef */
3590               break;
3591             case tnone:
3592             case tinbody:
3593               switch (fvdef)
3594                 {
3595                 case foperator:
3596                 case finlist:
3597                 case fignore:
3598                 case vignore:
3599                   break;
3600                 case fvnameseen:
3601                   if ((members && bracelev == 1)
3602                       || (globals && bracelev == 0
3603                           && (!fvextern || declarations)))
3604                     make_C_tag (FALSE); /* a variable */
3605                   /* FALLTHRU */
3606                 default:
3607                   fvdef = fvnone;
3608                 }
3609               break;
3610             }
3611           break;
3612         case '(':
3613           if (inattribute)
3614             {
3615               attrparlev++;
3616               break;
3617             }
3618           if (definedef != dnone)
3619             break;
3620           if (objdef == otagseen && parlev == 0)
3621             objdef = oparenseen;
3622           switch (fvdef)
3623             {
3624             case fvnameseen:
3625               if (typdef == ttypeseen
3626                   && *lp != '*'
3627                   && !instruct)
3628                 {
3629                   /* This handles constructs like:
3630                      typedef void OperatorFun (int fun); */
3631                   make_C_tag (FALSE);
3632                   typdef = tignore;
3633                   fvdef = fignore;
3634                   break;
3635                 }
3636               /* FALLTHRU */
3637             case foperator:
3638               fvdef = fstartlist;
3639               break;
3640             case flistseen:
3641               fvdef = finlist;
3642               break;
3643             }
3644           parlev++;
3645           break;
3646         case ')':
3647           if (inattribute)
3648             {
3649               if (--attrparlev == 0)
3650                 inattribute = FALSE;
3651               break;
3652             }
3653           if (definedef != dnone)
3654             break;
3655           if (objdef == ocatseen && parlev == 1)
3656             {
3657               make_C_tag (TRUE); /* an Objective C category */
3658               objdef = oignore;
3659             }
3660           if (--parlev == 0)
3661             {
3662               switch (fvdef)
3663                 {
3664                 case fstartlist:
3665                 case finlist:
3666                   fvdef = flistseen;
3667                   break;
3668                 }
3669               if (!instruct
3670                   && (typdef == tend
3671                       || typdef == ttypeseen))
3672                 {
3673                   typdef = tignore;
3674                   make_C_tag (FALSE); /* a typedef */
3675                 }
3676             }
3677           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3678             parlev = 0;
3679           break;
3680         case '{':
3681           if (definedef != dnone)
3682             break;
3683           if (typdef == ttypeseen)
3684             {
3685               /* Whenever typdef is set to tinbody (currently only
3686                  here), typdefbracelev should be set to bracelev. */
3687               typdef = tinbody;
3688               typdefbracelev = bracelev;
3689             }
3690           switch (fvdef)
3691             {
3692             case flistseen:
3693               make_C_tag (TRUE);    /* a function */
3694               /* FALLTHRU */
3695             case fignore:
3696               fvdef = fvnone;
3697               break;
3698             case fvnone:
3699               switch (objdef)
3700                 {
3701                 case otagseen:
3702                   make_C_tag (TRUE); /* an Objective C class */
3703                   objdef = oignore;
3704                   break;
3705                 case omethodtag:
3706                 case omethodparm:
3707                   make_C_tag (TRUE); /* an Objective C method */
3708                   objdef = oinbody;
3709                   break;
3710                 default:
3711                   /* Neutralize `extern "C" {' grot. */
3712                   if (bracelev == 0 && structdef == snone && nestlev == 0
3713                       && typdef == tnone)
3714                     bracelev = -1;
3715                 }
3716               break;
3717             }
3718           switch (structdef)
3719             {
3720             case skeyseen:         /* unnamed struct */
3721               pushclass_above (bracelev, NULL, 0);
3722               structdef = snone;
3723               break;
3724             case stagseen:         /* named struct or enum */
3725             case scolonseen:       /* a class */
3726               pushclass_above (bracelev,token.line+token.offset, token.length);
3727               structdef = snone;
3728               make_C_tag (FALSE);  /* a struct or enum */
3729               break;
3730             }
3731           bracelev += 1;
3732           break;
3733         case '*':
3734           if (definedef != dnone)
3735             break;
3736           if (fvdef == fstartlist)
3737             {
3738               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3739               token.valid = FALSE;
3740             }
3741           break;
3742         case '}':
3743           if (definedef != dnone)
3744             break;
3745           bracelev -= 1;
3746           if (!ignoreindent && lp == newlb.buffer + 1)
3747             {
3748               if (bracelev != 0)
3749                 token.valid = FALSE; /* unexpected value, token unreliable */
3750               bracelev = 0;     /* reset brace level if first column */
3751               parlev = 0;       /* also reset paren level, just in case... */
3752             }
3753           else if (bracelev < 0)
3754             {
3755               token.valid = FALSE; /* something gone amiss, token unreliable */
3756               bracelev = 0;
3757             }
3758           if (bracelev == 0 && fvdef == vignore)
3759             fvdef = fvnone;             /* end of function */
3760           popclass_above (bracelev);
3761           structdef = snone;
3762           /* Only if typdef == tinbody is typdefbracelev significant. */
3763           if (typdef == tinbody && bracelev <= typdefbracelev)
3764             {
3765               assert (bracelev == typdefbracelev);
3766               typdef = tend;
3767             }
3768           break;
3769         case '=':
3770           if (definedef != dnone)
3771             break;
3772           switch (fvdef)
3773             {
3774             case foperator:
3775             case finlist:
3776             case fignore:
3777             case vignore:
3778               break;
3779             case fvnameseen:
3780               if ((members && bracelev == 1)
3781                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3782                 make_C_tag (FALSE); /* a variable */
3783               /* FALLTHRU */
3784             default:
3785               fvdef = vignore;
3786             }
3787           break;
3788         case '<':
3789           if (cplpl
3790               && (structdef == stagseen || fvdef == fvnameseen))
3791             {
3792               templatelev++;
3793               break;
3794             }
3795           goto resetfvdef;
3796         case '>':
3797           if (templatelev > 0)
3798             {
3799               templatelev--;
3800               break;
3801             }
3802           goto resetfvdef;
3803         case '+':
3804         case '-':
3805           if (objdef == oinbody && bracelev == 0)
3806             {
3807               objdef = omethodsign;
3808               break;
3809             }
3810           /* FALLTHRU */
3811         resetfvdef:
3812         case '#': case '~': case '&': case '%': case '/':
3813         case '|': case '^': case '!': case '.': case '?':
3814           if (definedef != dnone)
3815             break;
3816           /* These surely cannot follow a function tag in C. */
3817           switch (fvdef)
3818             {
3819             case foperator:
3820             case finlist:
3821             case fignore:
3822             case vignore:
3823               break;
3824             default:
3825               fvdef = fvnone;
3826             }
3827           break;
3828         case '\0':
3829           if (objdef == otagseen)
3830             {
3831               make_C_tag (TRUE); /* an Objective C class */
3832               objdef = oignore;
3833             }
3834           /* If a macro spans multiple lines don't reset its state. */
3835           if (quotednl)
3836             CNL_SAVE_DEFINEDEF ();
3837           else
3838             CNL ();
3839           break;
3840         } /* switch (c) */
3841
3842     } /* while not eof */
3843
3844   free (lbs[0].lb.buffer);
3845   free (lbs[1].lb.buffer);
3846 }
3847
3848 /*
3849  * Process either a C++ file or a C file depending on the setting
3850  * of a global flag.
3851  */
3852 static void
3853 default_C_entries (FILE *inf)
3854 {
3855   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3856 }
3857
3858 /* Always do plain C. */
3859 static void
3860 plain_C_entries (FILE *inf)
3861 {
3862   C_entries (0, inf);
3863 }
3864
3865 /* Always do C++. */
3866 static void
3867 Cplusplus_entries (FILE *inf)
3868 {
3869   C_entries (C_PLPL, inf);
3870 }
3871
3872 /* Always do Java. */
3873 static void
3874 Cjava_entries (FILE *inf)
3875 {
3876   C_entries (C_JAVA, inf);
3877 }
3878
3879 /* Always do C*. */
3880 static void
3881 Cstar_entries (FILE *inf)
3882 {
3883   C_entries (C_STAR, inf);
3884 }
3885
3886 /* Always do Yacc. */
3887 static void
3888 Yacc_entries (FILE *inf)
3889 {
3890   C_entries (YACC, inf);
3891 }
3892
3893 \f
3894 /* Useful macros. */
3895 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3896   for (;                        /* loop initialization */               \
3897        !feof (file_pointer)     /* loop test */                         \
3898        &&                       /* instructions at start of loop */     \
3899           (readline (&line_buffer, file_pointer),                       \
3900            char_pointer = line_buffer.buffer,                           \
3901            TRUE);                                                       \
3902       )
3903
3904 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3905   ((assert ("" kw), TRUE)   /* syntax error if not a literal string */  \
3906    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
3907    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
3908    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3909
3910 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3911 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3912   ((assert ("" kw), TRUE) /* syntax error if not a literal string */    \
3913    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
3914    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
3915
3916 /*
3917  * Read a file, but do no processing.  This is used to do regexp
3918  * matching on files that have no language defined.
3919  */
3920 static void
3921 just_read_file (FILE *inf)
3922 {
3923   while (!feof (inf))
3924     readline (&lb, inf);
3925 }
3926
3927 \f
3928 /* Fortran parsing */
3929
3930 static void F_takeprec (void);
3931 static void F_getit (FILE *);
3932
3933 static void
3934 F_takeprec (void)
3935 {
3936   dbp = skip_spaces (dbp);
3937   if (*dbp != '*')
3938     return;
3939   dbp++;
3940   dbp = skip_spaces (dbp);
3941   if (strneq (dbp, "(*)", 3))
3942     {
3943       dbp += 3;
3944       return;
3945     }
3946   if (!ISDIGIT (*dbp))
3947     {
3948       --dbp;                    /* force failure */
3949       return;
3950     }
3951   do
3952     dbp++;
3953   while (ISDIGIT (*dbp));
3954 }
3955
3956 static void
3957 F_getit (FILE *inf)
3958 {
3959   register char *cp;
3960
3961   dbp = skip_spaces (dbp);
3962   if (*dbp == '\0')
3963     {
3964       readline (&lb, inf);
3965       dbp = lb.buffer;
3966       if (dbp[5] != '&')
3967         return;
3968       dbp += 6;
3969       dbp = skip_spaces (dbp);
3970     }
3971   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3972     return;
3973   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3974     continue;
3975   make_tag (dbp, cp-dbp, TRUE,
3976             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3977 }
3978
3979
3980 static void
3981 Fortran_functions (FILE *inf)
3982 {
3983   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3984     {
3985       if (*dbp == '%')
3986         dbp++;                  /* Ratfor escape to fortran */
3987       dbp = skip_spaces (dbp);
3988       if (*dbp == '\0')
3989         continue;
3990
3991       if (LOOKING_AT_NOCASE (dbp, "recursive"))
3992         dbp = skip_spaces (dbp);
3993
3994       if (LOOKING_AT_NOCASE (dbp, "pure"))
3995         dbp = skip_spaces (dbp);
3996
3997       if (LOOKING_AT_NOCASE (dbp, "elemental"))
3998         dbp = skip_spaces (dbp);
3999
4000       switch (lowcase (*dbp))
4001         {
4002         case 'i':
4003           if (nocase_tail ("integer"))
4004             F_takeprec ();
4005           break;
4006         case 'r':
4007           if (nocase_tail ("real"))
4008             F_takeprec ();
4009           break;
4010         case 'l':
4011           if (nocase_tail ("logical"))
4012             F_takeprec ();
4013           break;
4014         case 'c':
4015           if (nocase_tail ("complex") || nocase_tail ("character"))
4016             F_takeprec ();
4017           break;
4018         case 'd':
4019           if (nocase_tail ("double"))
4020             {
4021               dbp = skip_spaces (dbp);
4022               if (*dbp == '\0')
4023                 continue;
4024               if (nocase_tail ("precision"))
4025                 break;
4026               continue;
4027             }
4028           break;
4029         }
4030       dbp = skip_spaces (dbp);
4031       if (*dbp == '\0')
4032         continue;
4033       switch (lowcase (*dbp))
4034         {
4035         case 'f':
4036           if (nocase_tail ("function"))
4037             F_getit (inf);
4038           continue;
4039         case 's':
4040           if (nocase_tail ("subroutine"))
4041             F_getit (inf);
4042           continue;
4043         case 'e':
4044           if (nocase_tail ("entry"))
4045             F_getit (inf);
4046           continue;
4047         case 'b':
4048           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4049             {
4050               dbp = skip_spaces (dbp);
4051               if (*dbp == '\0') /* assume un-named */
4052                 make_tag ("blockdata", 9, TRUE,
4053                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4054               else
4055                 F_getit (inf);  /* look for name */
4056             }
4057           continue;
4058         }
4059     }
4060 }
4061
4062 \f
4063 /*
4064  * Ada parsing
4065  * Original code by
4066  * Philippe Waroquiers (1998)
4067  */
4068
4069 /* Once we are positioned after an "interesting" keyword, let's get
4070    the real tag value necessary. */
4071 static void
4072 Ada_getit (FILE *inf, const char *name_qualifier)
4073 {
4074   register char *cp;
4075   char *name;
4076   char c;
4077
4078   while (!feof (inf))
4079     {
4080       dbp = skip_spaces (dbp);
4081       if (*dbp == '\0'
4082           || (dbp[0] == '-' && dbp[1] == '-'))
4083         {
4084           readline (&lb, inf);
4085           dbp = lb.buffer;
4086         }
4087       switch (lowcase (*dbp))
4088         {
4089         case 'b':
4090           if (nocase_tail ("body"))
4091             {
4092               /* Skipping body of   procedure body   or   package body or ....
4093                  resetting qualifier to body instead of spec. */
4094               name_qualifier = "/b";
4095               continue;
4096             }
4097           break;
4098         case 't':
4099           /* Skipping type of   task type   or   protected type ... */
4100           if (nocase_tail ("type"))
4101             continue;
4102           break;
4103         }
4104       if (*dbp == '"')
4105         {
4106           dbp += 1;
4107           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4108             continue;
4109         }
4110       else
4111         {
4112           dbp = skip_spaces (dbp);
4113           for (cp = dbp;
4114                (*cp != '\0'
4115                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4116                cp++)
4117             continue;
4118           if (cp == dbp)
4119             return;
4120         }
4121       c = *cp;
4122       *cp = '\0';
4123       name = concat (dbp, name_qualifier, "");
4124       *cp = c;
4125       make_tag (name, strlen (name), TRUE,
4126                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4127       free (name);
4128       if (c == '"')
4129         dbp = cp + 1;
4130       return;
4131     }
4132 }
4133
4134 static void
4135 Ada_funcs (FILE *inf)
4136 {
4137   bool inquote = FALSE;
4138   bool skip_till_semicolumn = FALSE;
4139
4140   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4141     {
4142       while (*dbp != '\0')
4143         {
4144           /* Skip a string i.e. "abcd". */
4145           if (inquote || (*dbp == '"'))
4146             {
4147               dbp = etags_strchr (dbp + !inquote, '"');
4148               if (dbp != NULL)
4149                 {
4150                   inquote = FALSE;
4151                   dbp += 1;
4152                   continue;     /* advance char */
4153                 }
4154               else
4155                 {
4156                   inquote = TRUE;
4157                   break;        /* advance line */
4158                 }
4159             }
4160
4161           /* Skip comments. */
4162           if (dbp[0] == '-' && dbp[1] == '-')
4163             break;              /* advance line */
4164
4165           /* Skip character enclosed in single quote i.e. 'a'
4166              and skip single quote starting an attribute i.e. 'Image. */
4167           if (*dbp == '\'')
4168             {
4169               dbp++ ;
4170               if (*dbp != '\0')
4171                 dbp++;
4172               continue;
4173             }
4174
4175           if (skip_till_semicolumn)
4176             {
4177               if (*dbp == ';')
4178                 skip_till_semicolumn = FALSE;
4179               dbp++;
4180               continue;         /* advance char */
4181             }
4182
4183           /* Search for beginning of a token.  */
4184           if (!begtoken (*dbp))
4185             {
4186               dbp++;
4187               continue;         /* advance char */
4188             }
4189
4190           /* We are at the beginning of a token. */
4191           switch (lowcase (*dbp))
4192             {
4193             case 'f':
4194               if (!packages_only && nocase_tail ("function"))
4195                 Ada_getit (inf, "/f");
4196               else
4197                 break;          /* from switch */
4198               continue;         /* advance char */
4199             case 'p':
4200               if (!packages_only && nocase_tail ("procedure"))
4201                 Ada_getit (inf, "/p");
4202               else if (nocase_tail ("package"))
4203                 Ada_getit (inf, "/s");
4204               else if (nocase_tail ("protected")) /* protected type */
4205                 Ada_getit (inf, "/t");
4206               else
4207                 break;          /* from switch */
4208               continue;         /* advance char */
4209
4210             case 'u':
4211               if (typedefs && !packages_only && nocase_tail ("use"))
4212                 {
4213                   /* when tagging types, avoid tagging  use type Pack.Typename;
4214                      for this, we will skip everything till a ; */
4215                   skip_till_semicolumn = TRUE;
4216                   continue;     /* advance char */
4217                 }
4218
4219             case 't':
4220               if (!packages_only && nocase_tail ("task"))
4221                 Ada_getit (inf, "/k");
4222               else if (typedefs && !packages_only && nocase_tail ("type"))
4223                 {
4224                   Ada_getit (inf, "/t");
4225                   while (*dbp != '\0')
4226                     dbp += 1;
4227                 }
4228               else
4229                 break;          /* from switch */
4230               continue;         /* advance char */
4231             }
4232
4233           /* Look for the end of the token. */
4234           while (!endtoken (*dbp))
4235             dbp++;
4236
4237         } /* advance char */
4238     } /* advance line */
4239 }
4240
4241 \f
4242 /*
4243  * Unix and microcontroller assembly tag handling
4244  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4245  * Idea by Bob Weiner, Motorola Inc. (1994)
4246  */
4247 static void
4248 Asm_labels (FILE *inf)
4249 {
4250   register char *cp;
4251
4252   LOOP_ON_INPUT_LINES (inf, lb, cp)
4253     {
4254       /* If first char is alphabetic or one of [_.$], test for colon
4255          following identifier. */
4256       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4257         {
4258           /* Read past label. */
4259           cp++;
4260           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4261             cp++;
4262           if (*cp == ':' || iswhite (*cp))
4263             /* Found end of label, so copy it and add it to the table. */
4264             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4265                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4266         }
4267     }
4268 }
4269
4270 \f
4271 /*
4272  * Perl support
4273  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4274  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
4275  * Perl variable names: /^(my|local).../
4276  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4277  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4278  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4279  */
4280 static void
4281 Perl_functions (FILE *inf)
4282 {
4283   char *package = savestr ("main"); /* current package name */
4284   register char *cp;
4285
4286   LOOP_ON_INPUT_LINES (inf, lb, cp)
4287     {
4288       cp = skip_spaces (cp);
4289
4290       if (LOOKING_AT (cp, "package"))
4291         {
4292           free (package);
4293           get_tag (cp, &package);
4294         }
4295       else if (LOOKING_AT (cp, "sub"))
4296         {
4297           char *pos, *sp;
4298
4299         subr:
4300           sp = cp;
4301           while (!notinname (*cp))
4302             cp++;
4303           if (cp == sp)
4304             continue;           /* nothing found */
4305           if ((pos = etags_strchr (sp, ':')) != NULL
4306               && pos < cp && pos[1] == ':')
4307             /* The name is already qualified. */
4308             make_tag (sp, cp - sp, TRUE,
4309                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4310           else
4311             /* Qualify it. */
4312             {
4313               char savechar, *name;
4314
4315               savechar = *cp;
4316               *cp = '\0';
4317               name = concat (package, "::", sp);
4318               *cp = savechar;
4319               make_tag (name, strlen (name), TRUE,
4320                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4321               free (name);
4322             }
4323         }
4324       else if (LOOKING_AT (cp, "use constant")
4325                || LOOKING_AT (cp, "use constant::defer"))
4326         {
4327           /* For hash style multi-constant like
4328                 use constant { FOO => 123,
4329                                BAR => 456 };
4330              only the first FOO is picked up.  Parsing across the value
4331              expressions would be difficult in general, due to possible nested
4332              hashes, here-documents, etc.  */
4333           if (*cp == '{')
4334             cp = skip_spaces (cp+1);
4335           goto subr;
4336         }
4337       else if (globals) /* only if we are tagging global vars */
4338         {
4339           /* Skip a qualifier, if any. */
4340           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4341           /* After "my" or "local", but before any following paren or space. */
4342           char *varstart = cp;
4343
4344           if (qual              /* should this be removed?  If yes, how? */
4345               && (*cp == '$' || *cp == '@' || *cp == '%'))
4346             {
4347               varstart += 1;
4348               do
4349                 cp++;
4350               while (ISALNUM (*cp) || *cp == '_');
4351             }
4352           else if (qual)
4353             {
4354               /* Should be examining a variable list at this point;
4355                  could insist on seeing an open parenthesis. */
4356               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4357                 cp++;
4358             }
4359           else
4360             continue;
4361
4362           make_tag (varstart, cp - varstart, FALSE,
4363                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4364         }
4365     }
4366   free (package);
4367 }
4368
4369
4370 /*
4371  * Python support
4372  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4373  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4374  * More ideas by seb bacon <seb@jamkit.com> (2002)
4375  */
4376 static void
4377 Python_functions (FILE *inf)
4378 {
4379   register char *cp;
4380
4381   LOOP_ON_INPUT_LINES (inf, lb, cp)
4382     {
4383       cp = skip_spaces (cp);
4384       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4385         {
4386           char *name = cp;
4387           while (!notinname (*cp) && *cp != ':')
4388             cp++;
4389           make_tag (name, cp - name, TRUE,
4390                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4391         }
4392     }
4393 }
4394
4395 \f
4396 /*
4397  * PHP support
4398  * Look for:
4399  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4400  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4401  *  - /^[ \t]*define\(\"[^\"]+/
4402  * Only with --members:
4403  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4404  * Idea by Diez B. Roggisch (2001)
4405  */
4406 static void
4407 PHP_functions (FILE *inf)
4408 {
4409   register char *cp, *name;
4410   bool search_identifier = FALSE;
4411
4412   LOOP_ON_INPUT_LINES (inf, lb, cp)
4413     {
4414       cp = skip_spaces (cp);
4415       name = cp;
4416       if (search_identifier
4417           && *cp != '\0')
4418         {
4419           while (!notinname (*cp))
4420             cp++;
4421           make_tag (name, cp - name, TRUE,
4422                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4423           search_identifier = FALSE;
4424         }
4425       else if (LOOKING_AT (cp, "function"))
4426         {
4427           if (*cp == '&')
4428             cp = skip_spaces (cp+1);
4429           if (*cp != '\0')
4430             {
4431               name = cp;
4432               while (!notinname (*cp))
4433                 cp++;
4434               make_tag (name, cp - name, TRUE,
4435                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4436             }
4437           else
4438             search_identifier = TRUE;
4439         }
4440       else if (LOOKING_AT (cp, "class"))
4441         {
4442           if (*cp != '\0')
4443             {
4444               name = cp;
4445               while (*cp != '\0' && !iswhite (*cp))
4446                 cp++;
4447               make_tag (name, cp - name, FALSE,
4448                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4449             }
4450           else
4451             search_identifier = TRUE;
4452         }
4453       else if (strneq (cp, "define", 6)
4454                && (cp = skip_spaces (cp+6))
4455                && *cp++ == '('
4456                && (*cp == '"' || *cp == '\''))
4457         {
4458           char quote = *cp++;
4459           name = cp;
4460           while (*cp != quote && *cp != '\0')
4461             cp++;
4462           make_tag (name, cp - name, FALSE,
4463                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4464         }
4465       else if (members
4466                && LOOKING_AT (cp, "var")
4467                && *cp == '$')
4468         {
4469           name = cp;
4470           while (!notinname (*cp))
4471             cp++;
4472           make_tag (name, cp - name, FALSE,
4473                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4474         }
4475     }
4476 }
4477
4478 \f
4479 /*
4480  * Cobol tag functions
4481  * We could look for anything that could be a paragraph name.
4482  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4483  * Idea by Corny de Souza (1993)
4484  */
4485 static void
4486 Cobol_paragraphs (FILE *inf)
4487 {
4488   register char *bp, *ep;
4489
4490   LOOP_ON_INPUT_LINES (inf, lb, bp)
4491     {
4492       if (lb.len < 9)
4493         continue;
4494       bp += 8;
4495
4496       /* If eoln, compiler option or comment ignore whole line. */
4497       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4498         continue;
4499
4500       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4501         continue;
4502       if (*ep++ == '.')
4503         make_tag (bp, ep - bp, TRUE,
4504                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4505     }
4506 }
4507
4508 \f
4509 /*
4510  * Makefile support
4511  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4512  */
4513 static void
4514 Makefile_targets (FILE *inf)
4515 {
4516   register char *bp;
4517
4518   LOOP_ON_INPUT_LINES (inf, lb, bp)
4519     {
4520       if (*bp == '\t' || *bp == '#')
4521         continue;
4522       while (*bp != '\0' && *bp != '=' && *bp != ':')
4523         bp++;
4524       if (*bp == ':' || (globals && *bp == '='))
4525         {
4526           /* We should detect if there is more than one tag, but we do not.
4527              We just skip initial and final spaces. */
4528           char * namestart = skip_spaces (lb.buffer);
4529           while (--bp > namestart)
4530             if (!notinname (*bp))
4531               break;
4532           make_tag (namestart, bp - namestart + 1, TRUE,
4533                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4534         }
4535     }
4536 }
4537
4538 \f
4539 /*
4540  * Pascal parsing
4541  * Original code by Mosur K. Mohan (1989)
4542  *
4543  *  Locates tags for procedures & functions.  Doesn't do any type- or
4544  *  var-definitions.  It does look for the keyword "extern" or
4545  *  "forward" immediately following the procedure statement; if found,
4546  *  the tag is skipped.
4547  */
4548 static void
4549 Pascal_functions (FILE *inf)
4550 {
4551   linebuffer tline;             /* mostly copied from C_entries */
4552   long save_lcno;
4553   int save_lineno, namelen, taglen;
4554   char c, *name;
4555
4556   bool                          /* each of these flags is TRUE if: */
4557     incomment,                  /* point is inside a comment */
4558     inquote,                    /* point is inside '..' string */
4559     get_tagname,                /* point is after PROCEDURE/FUNCTION
4560                                    keyword, so next item = potential tag */
4561     found_tag,                  /* point is after a potential tag */
4562     inparms,                    /* point is within parameter-list */
4563     verify_tag;                 /* point has passed the parm-list, so the
4564                                    next token will determine whether this
4565                                    is a FORWARD/EXTERN to be ignored, or
4566                                    whether it is a real tag */
4567
4568   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4569   name = NULL;                  /* keep compiler quiet */
4570   dbp = lb.buffer;
4571   *dbp = '\0';
4572   linebuffer_init (&tline);
4573
4574   incomment = inquote = FALSE;
4575   found_tag = FALSE;            /* have a proc name; check if extern */
4576   get_tagname = FALSE;          /* found "procedure" keyword         */
4577   inparms = FALSE;              /* found '(' after "proc"            */
4578   verify_tag = FALSE;           /* check if "extern" is ahead        */
4579
4580
4581   while (!feof (inf))           /* long main loop to get next char */
4582     {
4583       c = *dbp++;
4584       if (c == '\0')            /* if end of line */
4585         {
4586           readline (&lb, inf);
4587           dbp = lb.buffer;
4588           if (*dbp == '\0')
4589             continue;
4590           if (!((found_tag && verify_tag)
4591                 || get_tagname))
4592             c = *dbp++;         /* only if don't need *dbp pointing
4593                                    to the beginning of the name of
4594                                    the procedure or function */
4595         }
4596       if (incomment)
4597         {
4598           if (c == '}')         /* within { } comments */
4599             incomment = FALSE;
4600           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4601             {
4602               dbp++;
4603               incomment = FALSE;
4604             }
4605           continue;
4606         }
4607       else if (inquote)
4608         {
4609           if (c == '\'')
4610             inquote = FALSE;
4611           continue;
4612         }
4613       else
4614         switch (c)
4615           {
4616           case '\'':
4617             inquote = TRUE;     /* found first quote */
4618             continue;
4619           case '{':             /* found open { comment */
4620             incomment = TRUE;
4621             continue;
4622           case '(':
4623             if (*dbp == '*')    /* found open (* comment */
4624               {
4625                 incomment = TRUE;
4626                 dbp++;
4627               }
4628             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4629               inparms = TRUE;
4630             continue;
4631           case ')':             /* end of parms list */
4632             if (inparms)
4633               inparms = FALSE;
4634             continue;
4635           case ';':
4636             if (found_tag && !inparms) /* end of proc or fn stmt */
4637               {
4638                 verify_tag = TRUE;
4639                 break;
4640               }
4641             continue;
4642           }
4643       if (found_tag && verify_tag && (*dbp != ' '))
4644         {
4645           /* Check if this is an "extern" declaration. */
4646           if (*dbp == '\0')
4647             continue;
4648           if (lowcase (*dbp) == 'e')
4649             {
4650               if (nocase_tail ("extern")) /* superfluous, really! */
4651                 {
4652                   found_tag = FALSE;
4653                   verify_tag = FALSE;
4654                 }
4655             }
4656           else if (lowcase (*dbp) == 'f')
4657             {
4658               if (nocase_tail ("forward")) /* check for forward reference */
4659                 {
4660                   found_tag = FALSE;
4661                   verify_tag = FALSE;
4662                 }
4663             }
4664           if (found_tag && verify_tag) /* not external proc, so make tag */
4665             {
4666               found_tag = FALSE;
4667               verify_tag = FALSE;
4668               make_tag (name, namelen, TRUE,
4669                         tline.buffer, taglen, save_lineno, save_lcno);
4670               continue;
4671             }
4672         }
4673       if (get_tagname)          /* grab name of proc or fn */
4674         {
4675           char *cp;
4676
4677           if (*dbp == '\0')
4678             continue;
4679
4680           /* Find block name. */
4681           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4682             continue;
4683
4684           /* Save all values for later tagging. */
4685           linebuffer_setlen (&tline, lb.len);
4686           strcpy (tline.buffer, lb.buffer);
4687           save_lineno = lineno;
4688           save_lcno = linecharno;
4689           name = tline.buffer + (dbp - lb.buffer);
4690           namelen = cp - dbp;
4691           taglen = cp - lb.buffer + 1;
4692
4693           dbp = cp;             /* set dbp to e-o-token */
4694           get_tagname = FALSE;
4695           found_tag = TRUE;
4696           continue;
4697
4698           /* And proceed to check for "extern". */
4699         }
4700       else if (!incomment && !inquote && !found_tag)
4701         {
4702           /* Check for proc/fn keywords. */
4703           switch (lowcase (c))
4704             {
4705             case 'p':
4706               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4707                 get_tagname = TRUE;
4708               continue;
4709             case 'f':
4710               if (nocase_tail ("unction"))
4711                 get_tagname = TRUE;
4712               continue;
4713             }
4714         }
4715     } /* while not eof */
4716
4717   free (tline.buffer);
4718 }
4719
4720 \f
4721 /*
4722  * Lisp tag functions
4723  *  look for (def or (DEF, quote or QUOTE
4724  */
4725
4726 static void L_getit (void);
4727
4728 static void
4729 L_getit (void)
4730 {
4731   if (*dbp == '\'')             /* Skip prefix quote */
4732     dbp++;
4733   else if (*dbp == '(')
4734   {
4735     dbp++;
4736     /* Try to skip "(quote " */
4737     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4738       /* Ok, then skip "(" before name in (defstruct (foo)) */
4739       dbp = skip_spaces (dbp);
4740   }
4741   get_tag (dbp, NULL);
4742 }
4743
4744 static void
4745 Lisp_functions (FILE *inf)
4746 {
4747   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4748     {
4749       if (dbp[0] != '(')
4750         continue;
4751
4752       /* "(defvar foo)" is a declaration rather than a definition.  */
4753       if (! declarations)
4754         {
4755           char *p = dbp + 1;
4756           if (LOOKING_AT (p, "defvar"))
4757             {
4758               p = skip_name (p); /* past var name */
4759               p = skip_spaces (p);
4760               if (*p == ')')
4761                 continue;
4762             }
4763         }
4764
4765       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4766         {
4767           dbp = skip_non_spaces (dbp);
4768           dbp = skip_spaces (dbp);
4769           L_getit ();
4770         }
4771       else
4772         {
4773           /* Check for (foo::defmumble name-defined ... */
4774           do
4775             dbp++;
4776           while (!notinname (*dbp) && *dbp != ':');
4777           if (*dbp == ':')
4778             {
4779               do
4780                 dbp++;
4781               while (*dbp == ':');
4782
4783               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4784                 {
4785                   dbp = skip_non_spaces (dbp);
4786                   dbp = skip_spaces (dbp);
4787                   L_getit ();
4788                 }
4789             }
4790         }
4791     }
4792 }
4793
4794 \f
4795 /*
4796  * Lua script language parsing
4797  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4798  *
4799  *  "function" and "local function" are tags if they start at column 1.
4800  */
4801 static void
4802 Lua_functions (FILE *inf)
4803 {
4804   register char *bp;
4805
4806   LOOP_ON_INPUT_LINES (inf, lb, bp)
4807     {
4808       if (bp[0] != 'f' && bp[0] != 'l')
4809         continue;
4810
4811       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4812
4813       if (LOOKING_AT (bp, "function"))
4814         get_tag (bp, NULL);
4815     }
4816 }
4817
4818 \f
4819 /*
4820  * PostScript tags
4821  * Just look for lines where the first character is '/'
4822  * Also look at "defineps" for PSWrap
4823  * Ideas by:
4824  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4825  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4826  */
4827 static void
4828 PS_functions (FILE *inf)
4829 {
4830   register char *bp, *ep;
4831
4832   LOOP_ON_INPUT_LINES (inf, lb, bp)
4833     {
4834       if (bp[0] == '/')
4835         {
4836           for (ep = bp+1;
4837                *ep != '\0' && *ep != ' ' && *ep != '{';
4838                ep++)
4839             continue;
4840           make_tag (bp, ep - bp, TRUE,
4841                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4842         }
4843       else if (LOOKING_AT (bp, "defineps"))
4844         get_tag (bp, NULL);
4845     }
4846 }
4847
4848 \f
4849 /*
4850  * Forth tags
4851  * Ignore anything after \ followed by space or in ( )
4852  * Look for words defined by :
4853  * Look for constant, code, create, defer, value, and variable
4854  * OBP extensions:  Look for buffer:, field,
4855  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4856  */
4857 static void
4858 Forth_words (FILE *inf)
4859 {
4860   register char *bp;
4861
4862   LOOP_ON_INPUT_LINES (inf, lb, bp)
4863     while ((bp = skip_spaces (bp))[0] != '\0')
4864       if (bp[0] == '\\' && iswhite (bp[1]))
4865         break;                  /* read next line */
4866       else if (bp[0] == '(' && iswhite (bp[1]))
4867         do                      /* skip to ) or eol */
4868           bp++;
4869         while (*bp != ')' && *bp != '\0');
4870       else if ((bp[0] == ':' && iswhite (bp[1]) && bp++)
4871                || LOOKING_AT_NOCASE (bp, "constant")
4872                || LOOKING_AT_NOCASE (bp, "code")
4873                || LOOKING_AT_NOCASE (bp, "create")
4874                || LOOKING_AT_NOCASE (bp, "defer")
4875                || LOOKING_AT_NOCASE (bp, "value")
4876                || LOOKING_AT_NOCASE (bp, "variable")
4877                || LOOKING_AT_NOCASE (bp, "buffer:")
4878                || LOOKING_AT_NOCASE (bp, "field"))
4879         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4880       else
4881         bp = skip_non_spaces (bp);
4882 }
4883
4884 \f
4885 /*
4886  * Scheme tag functions
4887  * look for (def... xyzzy
4888  *          (def... (xyzzy
4889  *          (def ... ((...(xyzzy ....
4890  *          (set! xyzzy
4891  * Original code by Ken Haase (1985?)
4892  */
4893 static void
4894 Scheme_functions (FILE *inf)
4895 {
4896   register char *bp;
4897
4898   LOOP_ON_INPUT_LINES (inf, lb, bp)
4899     {
4900       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4901         {
4902           bp = skip_non_spaces (bp+4);
4903           /* Skip over open parens and white space.  Don't continue past
4904              '\0'. */
4905           while (*bp && notinname (*bp))
4906             bp++;
4907           get_tag (bp, NULL);
4908         }
4909       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4910         get_tag (bp, NULL);
4911     }
4912 }
4913
4914 \f
4915 /* Find tags in TeX and LaTeX input files.  */
4916
4917 /* TEX_toktab is a table of TeX control sequences that define tags.
4918  * Each entry records one such control sequence.
4919  *
4920  * Original code from who knows whom.
4921  * Ideas by:
4922  *   Stefan Monnier (2002)
4923  */
4924
4925 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4926
4927 /* Default set of control sequences to put into TEX_toktab.
4928    The value of environment var TEXTAGS is prepended to this.  */
4929 static const char *TEX_defenv = "\
4930 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4931 :part:appendix:entry:index:def\
4932 :newcommand:renewcommand:newenvironment:renewenvironment";
4933
4934 static void TEX_mode (FILE *);
4935 static void TEX_decode_env (const char *, const char *);
4936
4937 static char TEX_esc = '\\';
4938 static char TEX_opgrp = '{';
4939 static char TEX_clgrp = '}';
4940
4941 /*
4942  * TeX/LaTeX scanning loop.
4943  */
4944 static void
4945 TeX_commands (FILE *inf)
4946 {
4947   char *cp;
4948   linebuffer *key;
4949
4950   /* Select either \ or ! as escape character.  */
4951   TEX_mode (inf);
4952
4953   /* Initialize token table once from environment. */
4954   if (TEX_toktab == NULL)
4955     TEX_decode_env ("TEXTAGS", TEX_defenv);
4956
4957   LOOP_ON_INPUT_LINES (inf, lb, cp)
4958     {
4959       /* Look at each TEX keyword in line. */
4960       for (;;)
4961         {
4962           /* Look for a TEX escape. */
4963           while (*cp++ != TEX_esc)
4964             if (cp[-1] == '\0' || cp[-1] == '%')
4965               goto tex_next_line;
4966
4967           for (key = TEX_toktab; key->buffer != NULL; key++)
4968             if (strneq (cp, key->buffer, key->len))
4969               {
4970                 register char *p;
4971                 int namelen, linelen;
4972                 bool opgrp = FALSE;
4973
4974                 cp = skip_spaces (cp + key->len);
4975                 if (*cp == TEX_opgrp)
4976                   {
4977                     opgrp = TRUE;
4978                     cp++;
4979                   }
4980                 for (p = cp;
4981                      (!iswhite (*p) && *p != '#' &&
4982                       *p != TEX_opgrp && *p != TEX_clgrp);
4983                      p++)
4984                   continue;
4985                 namelen = p - cp;
4986                 linelen = lb.len;
4987                 if (!opgrp || *p == TEX_clgrp)
4988                   {
4989                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4990                       p++;
4991                     linelen = p - lb.buffer + 1;
4992                   }
4993                 make_tag (cp, namelen, TRUE,
4994                           lb.buffer, linelen, lineno, linecharno);
4995                 goto tex_next_line; /* We only tag a line once */
4996               }
4997         }
4998     tex_next_line:
4999       ;
5000     }
5001 }
5002
5003 #define TEX_LESC '\\'
5004 #define TEX_SESC '!'
5005
5006 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5007    chars accordingly. */
5008 static void
5009 TEX_mode (FILE *inf)
5010 {
5011   int c;
5012
5013   while ((c = getc (inf)) != EOF)
5014     {
5015       /* Skip to next line if we hit the TeX comment char. */
5016       if (c == '%')
5017         while (c != '\n' && c != EOF)
5018           c = getc (inf);
5019       else if (c == TEX_LESC || c == TEX_SESC )
5020         break;
5021     }
5022
5023   if (c == TEX_LESC)
5024     {
5025       TEX_esc = TEX_LESC;
5026       TEX_opgrp = '{';
5027       TEX_clgrp = '}';
5028     }
5029   else
5030     {
5031       TEX_esc = TEX_SESC;
5032       TEX_opgrp = '<';
5033       TEX_clgrp = '>';
5034     }
5035   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5036      No attempt is made to correct the situation. */
5037   rewind (inf);
5038 }
5039
5040 /* Read environment and prepend it to the default string.
5041    Build token table. */
5042 static void
5043 TEX_decode_env (const char *evarname, const char *defenv)
5044 {
5045   register const char *env, *p;
5046   int i, len;
5047
5048   /* Append default string to environment. */
5049   env = getenv (evarname);
5050   if (!env)
5051     env = defenv;
5052   else
5053     env = concat (env, defenv, "");
5054
5055   /* Allocate a token table */
5056   for (len = 1, p = env; p;)
5057     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5058       len++;
5059   TEX_toktab = xnew (len, linebuffer);
5060
5061   /* Unpack environment string into token table. Be careful about */
5062   /* zero-length strings (leading ':', "::" and trailing ':') */
5063   for (i = 0; *env != '\0';)
5064     {
5065       p = etags_strchr (env, ':');
5066       if (!p)                   /* End of environment string. */
5067         p = env + strlen (env);
5068       if (p - env > 0)
5069         {                       /* Only non-zero strings. */
5070           TEX_toktab[i].buffer = savenstr (env, p - env);
5071           TEX_toktab[i].len = p - env;
5072           i++;
5073         }
5074       if (*p)
5075         env = p + 1;
5076       else
5077         {
5078           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5079           TEX_toktab[i].len = 0;
5080           break;
5081         }
5082     }
5083 }
5084
5085 \f
5086 /* Texinfo support.  Dave Love, Mar. 2000.  */
5087 static void
5088 Texinfo_nodes (FILE *inf)
5089 {
5090   char *cp, *start;
5091   LOOP_ON_INPUT_LINES (inf, lb, cp)
5092     if (LOOKING_AT (cp, "@node"))
5093       {
5094         start = cp;
5095         while (*cp != '\0' && *cp != ',')
5096           cp++;
5097         make_tag (start, cp - start, TRUE,
5098                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5099       }
5100 }
5101
5102 \f
5103 /*
5104  * HTML support.
5105  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5106  * Contents of <a name=xxx> are tags with name xxx.
5107  *
5108  * Francesco Potortì, 2002.
5109  */
5110 static void
5111 HTML_labels (FILE *inf)
5112 {
5113   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5114   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5115   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5116   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5117   char *end;
5118
5119
5120   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5121
5122   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5123     for (;;)                    /* loop on the same line */
5124       {
5125         if (skiptag)            /* skip HTML tag */
5126           {
5127             while (*dbp != '\0' && *dbp != '>')
5128               dbp++;
5129             if (*dbp == '>')
5130               {
5131                 dbp += 1;
5132                 skiptag = FALSE;
5133                 continue;       /* look on the same line */
5134               }
5135             break;              /* go to next line */
5136           }
5137
5138         else if (intag) /* look for "name=" or "id=" */
5139           {
5140             while (*dbp != '\0' && *dbp != '>'
5141                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5142               dbp++;
5143             if (*dbp == '\0')
5144               break;            /* go to next line */
5145             if (*dbp == '>')
5146               {
5147                 dbp += 1;
5148                 intag = FALSE;
5149                 continue;       /* look on the same line */
5150               }
5151             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5152                 || LOOKING_AT_NOCASE (dbp, "id="))
5153               {
5154                 bool quoted = (dbp[0] == '"');
5155
5156                 if (quoted)
5157                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5158                     continue;
5159                 else
5160                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5161                     continue;
5162                 linebuffer_setlen (&token_name, end - dbp);
5163                 memcpy (token_name.buffer, dbp, end - dbp);
5164                 token_name.buffer[end - dbp] = '\0';
5165
5166                 dbp = end;
5167                 intag = FALSE;  /* we found what we looked for */
5168                 skiptag = TRUE; /* skip to the end of the tag */
5169                 getnext = TRUE; /* then grab the text */
5170                 continue;       /* look on the same line */
5171               }
5172             dbp += 1;
5173           }
5174
5175         else if (getnext)       /* grab next tokens and tag them */
5176           {
5177             dbp = skip_spaces (dbp);
5178             if (*dbp == '\0')
5179               break;            /* go to next line */
5180             if (*dbp == '<')
5181               {
5182                 intag = TRUE;
5183                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5184                 continue;       /* look on the same line */
5185               }
5186
5187             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5188               continue;
5189             make_tag (token_name.buffer, token_name.len, TRUE,
5190                       dbp, end - dbp, lineno, linecharno);
5191             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5192             getnext = FALSE;
5193             break;              /* go to next line */
5194           }
5195
5196         else                    /* look for an interesting HTML tag */
5197           {
5198             while (*dbp != '\0' && *dbp != '<')
5199               dbp++;
5200             if (*dbp == '\0')
5201               break;            /* go to next line */
5202             intag = TRUE;
5203             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5204               {
5205                 inanchor = TRUE;
5206                 continue;       /* look on the same line */
5207               }
5208             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5209                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5210                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5211                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5212               {
5213                 intag = FALSE;
5214                 getnext = TRUE;
5215                 continue;       /* look on the same line */
5216               }
5217             dbp += 1;
5218           }
5219       }
5220 }
5221
5222 \f
5223 /*
5224  * Prolog support
5225  *
5226  * Assumes that the predicate or rule starts at column 0.
5227  * Only the first clause of a predicate or rule is added.
5228  * Original code by Sunichirou Sugou (1989)
5229  * Rewritten by Anders Lindgren (1996)
5230  */
5231 static size_t prolog_pr (char *, char *);
5232 static void prolog_skip_comment (linebuffer *, FILE *);
5233 static size_t prolog_atom (char *, size_t);
5234
5235 static void
5236 Prolog_functions (FILE *inf)
5237 {
5238   char *cp, *last;
5239   size_t len;
5240   size_t allocated;
5241
5242   allocated = 0;
5243   len = 0;
5244   last = NULL;
5245
5246   LOOP_ON_INPUT_LINES (inf, lb, cp)
5247     {
5248       if (cp[0] == '\0')        /* Empty line */
5249         continue;
5250       else if (iswhite (cp[0])) /* Not a predicate */
5251         continue;
5252       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5253         prolog_skip_comment (&lb, inf);
5254       else if ((len = prolog_pr (cp, last)) > 0)
5255         {
5256           /* Predicate or rule.  Store the function name so that we
5257              only generate a tag for the first clause.  */
5258           if (last == NULL)
5259             last = xnew (len + 1, char);
5260           else if (len + 1 > allocated)
5261             xrnew (last, len + 1, char);
5262           allocated = len + 1;
5263           memcpy (last, cp, len);
5264           last[len] = '\0';
5265         }
5266     }
5267   free (last);
5268 }
5269
5270
5271 static void
5272 prolog_skip_comment (linebuffer *plb, FILE *inf)
5273 {
5274   char *cp;
5275
5276   do
5277     {
5278       for (cp = plb->buffer; *cp != '\0'; cp++)
5279         if (cp[0] == '*' && cp[1] == '/')
5280           return;
5281       readline (plb, inf);
5282     }
5283   while (!feof (inf));
5284 }
5285
5286 /*
5287  * A predicate or rule definition is added if it matches:
5288  *     <beginning of line><Prolog Atom><whitespace>(
5289  * or  <beginning of line><Prolog Atom><whitespace>:-
5290  *
5291  * It is added to the tags database if it doesn't match the
5292  * name of the previous clause header.
5293  *
5294  * Return the size of the name of the predicate or rule, or 0 if no
5295  * header was found.
5296  */
5297 static size_t
5298 prolog_pr (char *s, char *last)
5299
5300                                 /* Name of last clause. */
5301 {
5302   size_t pos;
5303   size_t len;
5304
5305   pos = prolog_atom (s, 0);
5306   if (! pos)
5307     return 0;
5308
5309   len = pos;
5310   pos = skip_spaces (s + pos) - s;
5311
5312   if ((s[pos] == '.'
5313        || (s[pos] == '(' && (pos += 1))
5314        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5315       && (last == NULL          /* save only the first clause */
5316           || len != strlen (last)
5317           || !strneq (s, last, len)))
5318         {
5319           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5320           return len;
5321         }
5322   else
5323     return 0;
5324 }
5325
5326 /*
5327  * Consume a Prolog atom.
5328  * Return the number of bytes consumed, or 0 if there was an error.
5329  *
5330  * A prolog atom, in this context, could be one of:
5331  * - An alphanumeric sequence, starting with a lower case letter.
5332  * - A quoted arbitrary string. Single quotes can escape themselves.
5333  *   Backslash quotes everything.
5334  */
5335 static size_t
5336 prolog_atom (char *s, size_t pos)
5337 {
5338   size_t origpos;
5339
5340   origpos = pos;
5341
5342   if (ISLOWER (s[pos]) || (s[pos] == '_'))
5343     {
5344       /* The atom is unquoted. */
5345       pos++;
5346       while (ISALNUM (s[pos]) || (s[pos] == '_'))
5347         {
5348           pos++;
5349         }
5350       return pos - origpos;
5351     }
5352   else if (s[pos] == '\'')
5353     {
5354       pos++;
5355
5356       for (;;)
5357         {
5358           if (s[pos] == '\'')
5359             {
5360               pos++;
5361               if (s[pos] != '\'')
5362                 break;
5363               pos++;            /* A double quote */
5364             }
5365           else if (s[pos] == '\0')
5366             /* Multiline quoted atoms are ignored. */
5367             return 0;
5368           else if (s[pos] == '\\')
5369             {
5370               if (s[pos+1] == '\0')
5371                 return 0;
5372               pos += 2;
5373             }
5374           else
5375             pos++;
5376         }
5377       return pos - origpos;
5378     }
5379   else
5380     return 0;
5381 }
5382
5383 \f
5384 /*
5385  * Support for Erlang
5386  *
5387  * Generates tags for functions, defines, and records.
5388  * Assumes that Erlang functions start at column 0.
5389  * Original code by Anders Lindgren (1996)
5390  */
5391 static int erlang_func (char *, char *);
5392 static void erlang_attribute (char *);
5393 static int erlang_atom (char *);
5394
5395 static void
5396 Erlang_functions (FILE *inf)
5397 {
5398   char *cp, *last;
5399   int len;
5400   int allocated;
5401
5402   allocated = 0;
5403   len = 0;
5404   last = NULL;
5405
5406   LOOP_ON_INPUT_LINES (inf, lb, cp)
5407     {
5408       if (cp[0] == '\0')        /* Empty line */
5409         continue;
5410       else if (iswhite (cp[0])) /* Not function nor attribute */
5411         continue;
5412       else if (cp[0] == '%')    /* comment */
5413         continue;
5414       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5415         continue;
5416       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5417         {
5418           erlang_attribute (cp);
5419           if (last != NULL)
5420             {
5421               free (last);
5422               last = NULL;
5423             }
5424         }
5425       else if ((len = erlang_func (cp, last)) > 0)
5426         {
5427           /*
5428            * Function.  Store the function name so that we only
5429            * generates a tag for the first clause.
5430            */
5431           if (last == NULL)
5432             last = xnew (len + 1, char);
5433           else if (len + 1 > allocated)
5434             xrnew (last, len + 1, char);
5435           allocated = len + 1;
5436           memcpy (last, cp, len);
5437           last[len] = '\0';
5438         }
5439     }
5440   free (last);
5441 }
5442
5443
5444 /*
5445  * A function definition is added if it matches:
5446  *     <beginning of line><Erlang Atom><whitespace>(
5447  *
5448  * It is added to the tags database if it doesn't match the
5449  * name of the previous clause header.
5450  *
5451  * Return the size of the name of the function, or 0 if no function
5452  * was found.
5453  */
5454 static int
5455 erlang_func (char *s, char *last)
5456
5457                                 /* Name of last clause. */
5458 {
5459   int pos;
5460   int len;
5461
5462   pos = erlang_atom (s);
5463   if (pos < 1)
5464     return 0;
5465
5466   len = pos;
5467   pos = skip_spaces (s + pos) - s;
5468
5469   /* Save only the first clause. */
5470   if (s[pos++] == '('
5471       && (last == NULL
5472           || len != (int)strlen (last)
5473           || !strneq (s, last, len)))
5474         {
5475           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5476           return len;
5477         }
5478
5479   return 0;
5480 }
5481
5482
5483 /*
5484  * Handle attributes.  Currently, tags are generated for defines
5485  * and records.
5486  *
5487  * They are on the form:
5488  * -define(foo, bar).
5489  * -define(Foo(M, N), M+N).
5490  * -record(graph, {vtab = notable, cyclic = true}).
5491  */
5492 static void
5493 erlang_attribute (char *s)
5494 {
5495   char *cp = s;
5496
5497   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5498       && *cp++ == '(')
5499     {
5500       int len = erlang_atom (skip_spaces (cp));
5501       if (len > 0)
5502         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5503     }
5504   return;
5505 }
5506
5507
5508 /*
5509  * Consume an Erlang atom (or variable).
5510  * Return the number of bytes consumed, or -1 if there was an error.
5511  */
5512 static int
5513 erlang_atom (char *s)
5514 {
5515   int pos = 0;
5516
5517   if (ISALPHA (s[pos]) || s[pos] == '_')
5518     {
5519       /* The atom is unquoted. */
5520       do
5521         pos++;
5522       while (ISALNUM (s[pos]) || s[pos] == '_');
5523     }
5524   else if (s[pos] == '\'')
5525     {
5526       for (pos++; s[pos] != '\''; pos++)
5527         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5528             || (s[pos] == '\\' && s[++pos] == '\0'))
5529           return 0;
5530       pos++;
5531     }
5532
5533   return pos;
5534 }
5535
5536 \f
5537 static char *scan_separators (char *);
5538 static void add_regex (char *, language *);
5539 static char *substitute (char *, char *, struct re_registers *);
5540
5541 /*
5542  * Take a string like "/blah/" and turn it into "blah", verifying
5543  * that the first and last characters are the same, and handling
5544  * quoted separator characters.  Actually, stops on the occurrence of
5545  * an unquoted separator.  Also process \t, \n, etc. and turn into
5546  * appropriate characters. Works in place.  Null terminates name string.
5547  * Returns pointer to terminating separator, or NULL for
5548  * unterminated regexps.
5549  */
5550 static char *
5551 scan_separators (char *name)
5552 {
5553   char sep = name[0];
5554   char *copyto = name;
5555   bool quoted = FALSE;
5556
5557   for (++name; *name != '\0'; ++name)
5558     {
5559       if (quoted)
5560         {
5561           switch (*name)
5562             {
5563             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5564             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5565             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5566             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5567             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5568             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5569             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5570             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5571             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5572             default:
5573               if (*name == sep)
5574                 *copyto++ = sep;
5575               else
5576                 {
5577                   /* Something else is quoted, so preserve the quote. */
5578                   *copyto++ = '\\';
5579                   *copyto++ = *name;
5580                 }
5581               break;
5582             }
5583           quoted = FALSE;
5584         }
5585       else if (*name == '\\')
5586         quoted = TRUE;
5587       else if (*name == sep)
5588         break;
5589       else
5590         *copyto++ = *name;
5591     }
5592   if (*name != sep)
5593     name = NULL;                /* signal unterminated regexp */
5594
5595   /* Terminate copied string. */
5596   *copyto = '\0';
5597   return name;
5598 }
5599
5600 /* Look at the argument of --regex or --no-regex and do the right
5601    thing.  Same for each line of a regexp file. */
5602 static void
5603 analyse_regex (char *regex_arg)
5604 {
5605   if (regex_arg == NULL)
5606     {
5607       free_regexps ();          /* --no-regex: remove existing regexps */
5608       return;
5609     }
5610
5611   /* A real --regexp option or a line in a regexp file. */
5612   switch (regex_arg[0])
5613     {
5614       /* Comments in regexp file or null arg to --regex. */
5615     case '\0':
5616     case ' ':
5617     case '\t':
5618       break;
5619
5620       /* Read a regex file.  This is recursive and may result in a
5621          loop, which will stop when the file descriptors are exhausted. */
5622     case '@':
5623       {
5624         FILE *regexfp;
5625         linebuffer regexbuf;
5626         char *regexfile = regex_arg + 1;
5627
5628         /* regexfile is a file containing regexps, one per line. */
5629         regexfp = fopen (regexfile, "r");
5630         if (regexfp == NULL)
5631           {
5632             pfatal (regexfile);
5633             return;
5634           }
5635         linebuffer_init (&regexbuf);
5636         while (readline_internal (&regexbuf, regexfp) > 0)
5637           analyse_regex (regexbuf.buffer);
5638         free (regexbuf.buffer);
5639         fclose (regexfp);
5640       }
5641       break;
5642
5643       /* Regexp to be used for a specific language only. */
5644     case '{':
5645       {
5646         language *lang;
5647         char *lang_name = regex_arg + 1;
5648         char *cp;
5649
5650         for (cp = lang_name; *cp != '}'; cp++)
5651           if (*cp == '\0')
5652             {
5653               error ("unterminated language name in regex: %s", regex_arg);
5654               return;
5655             }
5656         *cp++ = '\0';
5657         lang = get_language_from_langname (lang_name);
5658         if (lang == NULL)
5659           return;
5660         add_regex (cp, lang);
5661       }
5662       break;
5663
5664       /* Regexp to be used for any language. */
5665     default:
5666       add_regex (regex_arg, NULL);
5667       break;
5668     }
5669 }
5670
5671 /* Separate the regexp pattern, compile it,
5672    and care for optional name and modifiers. */
5673 static void
5674 add_regex (char *regexp_pattern, language *lang)
5675 {
5676   static struct re_pattern_buffer zeropattern;
5677   char sep, *pat, *name, *modifiers;
5678   char empty = '\0';
5679   const char *err;
5680   struct re_pattern_buffer *patbuf;
5681   regexp *rp;
5682   bool
5683     force_explicit_name = TRUE, /* do not use implicit tag names */
5684     ignore_case = FALSE,        /* case is significant */
5685     multi_line = FALSE,         /* matches are done one line at a time */
5686     single_line = FALSE;        /* dot does not match newline */
5687
5688
5689   if (strlen (regexp_pattern) < 3)
5690     {
5691       error ("null regexp");
5692       return;
5693     }
5694   sep = regexp_pattern[0];
5695   name = scan_separators (regexp_pattern);
5696   if (name == NULL)
5697     {
5698       error ("%s: unterminated regexp", regexp_pattern);
5699       return;
5700     }
5701   if (name[1] == sep)
5702     {
5703       error ("null name for regexp \"%s\"", regexp_pattern);
5704       return;
5705     }
5706   modifiers = scan_separators (name);
5707   if (modifiers == NULL)        /* no terminating separator --> no name */
5708     {
5709       modifiers = name;
5710       name = &empty;
5711     }
5712   else
5713     modifiers += 1;             /* skip separator */
5714
5715   /* Parse regex modifiers. */
5716   for (; modifiers[0] != '\0'; modifiers++)
5717     switch (modifiers[0])
5718       {
5719       case 'N':
5720         if (modifiers == name)
5721           error ("forcing explicit tag name but no name, ignoring");
5722         force_explicit_name = TRUE;
5723         break;
5724       case 'i':
5725         ignore_case = TRUE;
5726         break;
5727       case 's':
5728         single_line = TRUE;
5729         /* FALLTHRU */
5730       case 'm':
5731         multi_line = TRUE;
5732         need_filebuf = TRUE;
5733         break;
5734       default:
5735         error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5736         break;
5737       }
5738
5739   patbuf = xnew (1, struct re_pattern_buffer);
5740   *patbuf = zeropattern;
5741   if (ignore_case)
5742     {
5743       static char lc_trans[CHARS];
5744       int i;
5745       for (i = 0; i < CHARS; i++)
5746         lc_trans[i] = lowcase (i);
5747       patbuf->translate = lc_trans;     /* translation table to fold case  */
5748     }
5749
5750   if (multi_line)
5751     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5752   else
5753     pat = regexp_pattern;
5754
5755   if (single_line)
5756     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5757   else
5758     re_set_syntax (RE_SYNTAX_EMACS);
5759
5760   err = re_compile_pattern (pat, strlen (pat), patbuf);
5761   if (multi_line)
5762     free (pat);
5763   if (err != NULL)
5764     {
5765       error ("%s while compiling pattern", err);
5766       return;
5767     }
5768
5769   rp = p_head;
5770   p_head = xnew (1, regexp);
5771   p_head->pattern = savestr (regexp_pattern);
5772   p_head->p_next = rp;
5773   p_head->lang = lang;
5774   p_head->pat = patbuf;
5775   p_head->name = savestr (name);
5776   p_head->error_signaled = FALSE;
5777   p_head->force_explicit_name = force_explicit_name;
5778   p_head->ignore_case = ignore_case;
5779   p_head->multi_line = multi_line;
5780 }
5781
5782 /*
5783  * Do the substitutions indicated by the regular expression and
5784  * arguments.
5785  */
5786 static char *
5787 substitute (char *in, char *out, struct re_registers *regs)
5788 {
5789   char *result, *t;
5790   int size, dig, diglen;
5791
5792   result = NULL;
5793   size = strlen (out);
5794
5795   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5796   if (out[size - 1] == '\\')
5797     fatal ("pattern error in \"%s\"", out);
5798   for (t = etags_strchr (out, '\\');
5799        t != NULL;
5800        t = etags_strchr (t + 2, '\\'))
5801     if (ISDIGIT (t[1]))
5802       {
5803         dig = t[1] - '0';
5804         diglen = regs->end[dig] - regs->start[dig];
5805         size += diglen - 2;
5806       }
5807     else
5808       size -= 1;
5809
5810   /* Allocate space and do the substitutions. */
5811   assert (size >= 0);
5812   result = xnew (size + 1, char);
5813
5814   for (t = result; *out != '\0'; out++)
5815     if (*out == '\\' && ISDIGIT (*++out))
5816       {
5817         dig = *out - '0';
5818         diglen = regs->end[dig] - regs->start[dig];
5819         memcpy (t, in + regs->start[dig], diglen);
5820         t += diglen;
5821       }
5822     else
5823       *t++ = *out;
5824   *t = '\0';
5825
5826   assert (t <= result + size);
5827   assert (t - result == (int)strlen (result));
5828
5829   return result;
5830 }
5831
5832 /* Deallocate all regexps. */
5833 static void
5834 free_regexps (void)
5835 {
5836   regexp *rp;
5837   while (p_head != NULL)
5838     {
5839       rp = p_head->p_next;
5840       free (p_head->pattern);
5841       free (p_head->name);
5842       free (p_head);
5843       p_head = rp;
5844     }
5845   return;
5846 }
5847
5848 /*
5849  * Reads the whole file as a single string from `filebuf' and looks for
5850  * multi-line regular expressions, creating tags on matches.
5851  * readline already dealt with normal regexps.
5852  *
5853  * Idea by Ben Wing <ben@666.com> (2002).
5854  */
5855 static void
5856 regex_tag_multiline (void)
5857 {
5858   char *buffer = filebuf.buffer;
5859   regexp *rp;
5860   char *name;
5861
5862   for (rp = p_head; rp != NULL; rp = rp->p_next)
5863     {
5864       int match = 0;
5865
5866       if (!rp->multi_line)
5867         continue;               /* skip normal regexps */
5868
5869       /* Generic initializations before parsing file from memory. */
5870       lineno = 1;               /* reset global line number */
5871       charno = 0;               /* reset global char number */
5872       linecharno = 0;           /* reset global char number of line start */
5873
5874       /* Only use generic regexps or those for the current language. */
5875       if (rp->lang != NULL && rp->lang != curfdp->lang)
5876         continue;
5877
5878       while (match >= 0 && match < filebuf.len)
5879         {
5880           match = re_search (rp->pat, buffer, filebuf.len, charno,
5881                              filebuf.len - match, &rp->regs);
5882           switch (match)
5883             {
5884             case -2:
5885               /* Some error. */
5886               if (!rp->error_signaled)
5887                 {
5888                   error ("regexp stack overflow while matching \"%s\"",
5889                          rp->pattern);
5890                   rp->error_signaled = TRUE;
5891                 }
5892               break;
5893             case -1:
5894               /* No match. */
5895               break;
5896             default:
5897               if (match == rp->regs.end[0])
5898                 {
5899                   if (!rp->error_signaled)
5900                     {
5901                       error ("regexp matches the empty string: \"%s\"",
5902                              rp->pattern);
5903                       rp->error_signaled = TRUE;
5904                     }
5905                   match = -3;   /* exit from while loop */
5906                   break;
5907                 }
5908
5909               /* Match occurred.  Construct a tag. */
5910               while (charno < rp->regs.end[0])
5911                 if (buffer[charno++] == '\n')
5912                   lineno++, linecharno = charno;
5913               name = rp->name;
5914               if (name[0] == '\0')
5915                 name = NULL;
5916               else /* make a named tag */
5917                 name = substitute (buffer, rp->name, &rp->regs);
5918               if (rp->force_explicit_name)
5919                 /* Force explicit tag name, if a name is there. */
5920                 pfnote (name, TRUE, buffer + linecharno,
5921                         charno - linecharno + 1, lineno, linecharno);
5922               else
5923                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5924                           charno - linecharno + 1, lineno, linecharno);
5925               break;
5926             }
5927         }
5928     }
5929 }
5930
5931 \f
5932 static bool
5933 nocase_tail (const char *cp)
5934 {
5935   register int len = 0;
5936
5937   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5938     cp++, len++;
5939   if (*cp == '\0' && !intoken (dbp[len]))
5940     {
5941       dbp += len;
5942       return TRUE;
5943     }
5944   return FALSE;
5945 }
5946
5947 static void
5948 get_tag (register char *bp, char **namepp)
5949 {
5950   register char *cp = bp;
5951
5952   if (*bp != '\0')
5953     {
5954       /* Go till you get to white space or a syntactic break */
5955       for (cp = bp + 1; !notinname (*cp); cp++)
5956         continue;
5957       make_tag (bp, cp - bp, TRUE,
5958                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5959     }
5960
5961   if (namepp != NULL)
5962     *namepp = savenstr (bp, cp - bp);
5963 }
5964
5965 /*
5966  * Read a line of text from `stream' into `lbp', excluding the
5967  * newline or CR-NL, if any.  Return the number of characters read from
5968  * `stream', which is the length of the line including the newline.
5969  *
5970  * On DOS or Windows we do not count the CR character, if any before the
5971  * NL, in the returned length; this mirrors the behavior of Emacs on those
5972  * platforms (for text files, it translates CR-NL to NL as it reads in the
5973  * file).
5974  *
5975  * If multi-line regular expressions are requested, each line read is
5976  * appended to `filebuf'.
5977  */
5978 static long
5979 readline_internal (linebuffer *lbp, register FILE *stream)
5980 {
5981   char *buffer = lbp->buffer;
5982   register char *p = lbp->buffer;
5983   register char *pend;
5984   int chars_deleted;
5985
5986   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
5987
5988   for (;;)
5989     {
5990       register int c = getc (stream);
5991       if (p == pend)
5992         {
5993           /* We're at the end of linebuffer: expand it. */
5994           lbp->size *= 2;
5995           xrnew (buffer, lbp->size, char);
5996           p += buffer - lbp->buffer;
5997           pend = buffer + lbp->size;
5998           lbp->buffer = buffer;
5999         }
6000       if (c == EOF)
6001         {
6002           *p = '\0';
6003           chars_deleted = 0;
6004           break;
6005         }
6006       if (c == '\n')
6007         {
6008           if (p > buffer && p[-1] == '\r')
6009             {
6010               p -= 1;
6011 #ifdef DOS_NT
6012              /* Assume CRLF->LF translation will be performed by Emacs
6013                 when loading this file, so CRs won't appear in the buffer.
6014                 It would be cleaner to compensate within Emacs;
6015                 however, Emacs does not know how many CRs were deleted
6016                 before any given point in the file.  */
6017               chars_deleted = 1;
6018 #else
6019               chars_deleted = 2;
6020 #endif
6021             }
6022           else
6023             {
6024               chars_deleted = 1;
6025             }
6026           *p = '\0';
6027           break;
6028         }
6029       *p++ = c;
6030     }
6031   lbp->len = p - buffer;
6032
6033   if (need_filebuf              /* we need filebuf for multi-line regexps */
6034       && chars_deleted > 0)     /* not at EOF */
6035     {
6036       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6037         {
6038           /* Expand filebuf. */
6039           filebuf.size *= 2;
6040           xrnew (filebuf.buffer, filebuf.size, char);
6041         }
6042       memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6043       filebuf.len += lbp->len;
6044       filebuf.buffer[filebuf.len++] = '\n';
6045       filebuf.buffer[filebuf.len] = '\0';
6046     }
6047
6048   return lbp->len + chars_deleted;
6049 }
6050
6051 /*
6052  * Like readline_internal, above, but in addition try to match the
6053  * input line against relevant regular expressions and manage #line
6054  * directives.
6055  */
6056 static void
6057 readline (linebuffer *lbp, FILE *stream)
6058 {
6059   long result;
6060
6061   linecharno = charno;          /* update global char number of line start */
6062   result = readline_internal (lbp, stream); /* read line */
6063   lineno += 1;                  /* increment global line number */
6064   charno += result;             /* increment global char number */
6065
6066   /* Honor #line directives. */
6067   if (!no_line_directive)
6068     {
6069       static bool discard_until_line_directive;
6070
6071       /* Check whether this is a #line directive. */
6072       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6073         {
6074           unsigned int lno;
6075           int start = 0;
6076
6077           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6078               && start > 0)     /* double quote character found */
6079             {
6080               char *endp = lbp->buffer + start;
6081
6082               while ((endp = etags_strchr (endp, '"')) != NULL
6083                      && endp[-1] == '\\')
6084                 endp++;
6085               if (endp != NULL)
6086                 /* Ok, this is a real #line directive.  Let's deal with it. */
6087                 {
6088                   char *taggedabsname;  /* absolute name of original file */
6089                   char *taggedfname;    /* name of original file as given */
6090                   char *name;           /* temp var */
6091
6092                   discard_until_line_directive = FALSE; /* found it */
6093                   name = lbp->buffer + start;
6094                   *endp = '\0';
6095                   canonicalize_filename (name);
6096                   taggedabsname = absolute_filename (name, tagfiledir);
6097                   if (filename_is_absolute (name)
6098                       || filename_is_absolute (curfdp->infname))
6099                     taggedfname = savestr (taggedabsname);
6100                   else
6101                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6102
6103                   if (streq (curfdp->taggedfname, taggedfname))
6104                     /* The #line directive is only a line number change.  We
6105                        deal with this afterwards. */
6106                     free (taggedfname);
6107                   else
6108                     /* The tags following this #line directive should be
6109                        attributed to taggedfname.  In order to do this, set
6110                        curfdp accordingly. */
6111                     {
6112                       fdesc *fdp; /* file description pointer */
6113
6114                       /* Go look for a file description already set up for the
6115                          file indicated in the #line directive.  If there is
6116                          one, use it from now until the next #line
6117                          directive. */
6118                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6119                         if (streq (fdp->infname, curfdp->infname)
6120                             && streq (fdp->taggedfname, taggedfname))
6121                           /* If we remove the second test above (after the &&)
6122                              then all entries pertaining to the same file are
6123                              coalesced in the tags file.  If we use it, then
6124                              entries pertaining to the same file but generated
6125                              from different files (via #line directives) will
6126                              go into separate sections in the tags file.  These
6127                              alternatives look equivalent.  The first one
6128                              destroys some apparently useless information. */
6129                           {
6130                             curfdp = fdp;
6131                             free (taggedfname);
6132                             break;
6133                           }
6134                       /* Else, if we already tagged the real file, skip all
6135                          input lines until the next #line directive. */
6136                       if (fdp == NULL) /* not found */
6137                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6138                           if (streq (fdp->infabsname, taggedabsname))
6139                             {
6140                               discard_until_line_directive = TRUE;
6141                               free (taggedfname);
6142                               break;
6143                             }
6144                       /* Else create a new file description and use that from
6145                          now on, until the next #line directive. */
6146                       if (fdp == NULL) /* not found */
6147                         {
6148                           fdp = fdhead;
6149                           fdhead = xnew (1, fdesc);
6150                           *fdhead = *curfdp; /* copy curr. file description */
6151                           fdhead->next = fdp;
6152                           fdhead->infname = savestr (curfdp->infname);
6153                           fdhead->infabsname = savestr (curfdp->infabsname);
6154                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6155                           fdhead->taggedfname = taggedfname;
6156                           fdhead->usecharno = FALSE;
6157                           fdhead->prop = NULL;
6158                           fdhead->written = FALSE;
6159                           curfdp = fdhead;
6160                         }
6161                     }
6162                   free (taggedabsname);
6163                   lineno = lno - 1;
6164                   readline (lbp, stream);
6165                   return;
6166                 } /* if a real #line directive */
6167             } /* if #line is followed by a number */
6168         } /* if line begins with "#line " */
6169
6170       /* If we are here, no #line directive was found. */
6171       if (discard_until_line_directive)
6172         {
6173           if (result > 0)
6174             {
6175               /* Do a tail recursion on ourselves, thus discarding the contents
6176                  of the line buffer. */
6177               readline (lbp, stream);
6178               return;
6179             }
6180           /* End of file. */
6181           discard_until_line_directive = FALSE;
6182           return;
6183         }
6184     } /* if #line directives should be considered */
6185
6186   {
6187     int match;
6188     regexp *rp;
6189     char *name;
6190
6191     /* Match against relevant regexps. */
6192     if (lbp->len > 0)
6193       for (rp = p_head; rp != NULL; rp = rp->p_next)
6194         {
6195           /* Only use generic regexps or those for the current language.
6196              Also do not use multiline regexps, which is the job of
6197              regex_tag_multiline. */
6198           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6199               || rp->multi_line)
6200             continue;
6201
6202           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6203           switch (match)
6204             {
6205             case -2:
6206               /* Some error. */
6207               if (!rp->error_signaled)
6208                 {
6209                   error ("regexp stack overflow while matching \"%s\"",
6210                          rp->pattern);
6211                   rp->error_signaled = TRUE;
6212                 }
6213               break;
6214             case -1:
6215               /* No match. */
6216               break;
6217             case 0:
6218               /* Empty string matched. */
6219               if (!rp->error_signaled)
6220                 {
6221                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6222                   rp->error_signaled = TRUE;
6223                 }
6224               break;
6225             default:
6226               /* Match occurred.  Construct a tag. */
6227               name = rp->name;
6228               if (name[0] == '\0')
6229                 name = NULL;
6230               else /* make a named tag */
6231                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6232               if (rp->force_explicit_name)
6233                 /* Force explicit tag name, if a name is there. */
6234                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6235               else
6236                 make_tag (name, strlen (name), TRUE,
6237                           lbp->buffer, match, lineno, linecharno);
6238               break;
6239             }
6240         }
6241   }
6242 }
6243
6244 \f
6245 /*
6246  * Return a pointer to a space of size strlen(cp)+1 allocated
6247  * with xnew where the string CP has been copied.
6248  */
6249 static char *
6250 savestr (const char *cp)
6251 {
6252   return savenstr (cp, strlen (cp));
6253 }
6254
6255 /*
6256  * Return a pointer to a space of size LEN+1 allocated with xnew where
6257  * the string CP has been copied for at most the first LEN characters.
6258  */
6259 static char *
6260 savenstr (const char *cp, int len)
6261 {
6262   register char *dp;
6263
6264   dp = xnew (len + 1, char);
6265   memcpy (dp, cp, len);
6266   dp[len] = '\0';
6267   return dp;
6268 }
6269
6270 /*
6271  * Return the ptr in sp at which the character c last
6272  * appears; NULL if not found
6273  *
6274  * Identical to POSIX strrchr, included for portability.
6275  */
6276 static char *
6277 etags_strrchr (register const char *sp, register int c)
6278 {
6279   register const char *r;
6280
6281   r = NULL;
6282   do
6283     {
6284       if (*sp == c)
6285         r = sp;
6286   } while (*sp++);
6287   return (char *)r;
6288 }
6289
6290 /*
6291  * Return the ptr in sp at which the character c first
6292  * appears; NULL if not found
6293  *
6294  * Identical to POSIX strchr, included for portability.
6295  */
6296 static char *
6297 etags_strchr (register const char *sp, register int c)
6298 {
6299   do
6300     {
6301       if (*sp == c)
6302         return (char *)sp;
6303     } while (*sp++);
6304   return NULL;
6305 }
6306
6307 /* Skip spaces (end of string is not space), return new pointer. */
6308 static char *
6309 skip_spaces (char *cp)
6310 {
6311   while (iswhite (*cp))
6312     cp++;
6313   return cp;
6314 }
6315
6316 /* Skip non spaces, except end of string, return new pointer. */
6317 static char *
6318 skip_non_spaces (char *cp)
6319 {
6320   while (*cp != '\0' && !iswhite (*cp))
6321     cp++;
6322   return cp;
6323 }
6324
6325 /* Skip any chars in the "name" class.*/
6326 static char *
6327 skip_name (char *cp)
6328 {
6329   /* '\0' is a notinname() so loop stops there too */
6330   while (! notinname (*cp))
6331     cp++;
6332   return cp;
6333 }
6334
6335 /* Print error message and exit.  */
6336 void
6337 fatal (const char *s1, const char *s2)
6338 {
6339   error (s1, s2);
6340   exit (EXIT_FAILURE);
6341 }
6342
6343 static void
6344 pfatal (const char *s1)
6345 {
6346   perror (s1);
6347   exit (EXIT_FAILURE);
6348 }
6349
6350 static void
6351 suggest_asking_for_help (void)
6352 {
6353   fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6354            progname);
6355   exit (EXIT_FAILURE);
6356 }
6357
6358 /* Output a diagnostic with printf-style FORMAT and args.  */
6359 static void
6360 error (const char *format, ...)
6361 {
6362   va_list ap;
6363   va_start (ap, format);
6364   fprintf (stderr, "%s: ", progname);
6365   vfprintf (stderr, format, ap);
6366   fprintf (stderr, "\n");
6367   va_end (ap);
6368 }
6369
6370 /* Return a newly-allocated string whose contents
6371    concatenate those of s1, s2, s3.  */
6372 static char *
6373 concat (const char *s1, const char *s2, const char *s3)
6374 {
6375   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6376   char *result = xnew (len1 + len2 + len3 + 1, char);
6377
6378   strcpy (result, s1);
6379   strcpy (result + len1, s2);
6380   strcpy (result + len1 + len2, s3);
6381   result[len1 + len2 + len3] = '\0';
6382
6383   return result;
6384 }
6385
6386 \f
6387 /* Does the same work as the system V getcwd, but does not need to
6388    guess the buffer size in advance. */
6389 static char *
6390 etags_getcwd (void)
6391 {
6392   int bufsize = 200;
6393   char *path = xnew (bufsize, char);
6394
6395   while (getcwd (path, bufsize) == NULL)
6396     {
6397       if (errno != ERANGE)
6398         pfatal ("getcwd");
6399       bufsize *= 2;
6400       free (path);
6401       path = xnew (bufsize, char);
6402     }
6403
6404   canonicalize_filename (path);
6405   return path;
6406 }
6407
6408 /* Return a newly allocated string containing the file name of FILE
6409    relative to the absolute directory DIR (which should end with a slash). */
6410 static char *
6411 relative_filename (char *file, char *dir)
6412 {
6413   char *fp, *dp, *afn, *res;
6414   int i;
6415
6416   /* Find the common root of file and dir (with a trailing slash). */
6417   afn = absolute_filename (file, cwd);
6418   fp = afn;
6419   dp = dir;
6420   while (*fp++ == *dp++)
6421     continue;
6422   fp--, dp--;                   /* back to the first differing char */
6423 #ifdef DOS_NT
6424   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6425     return afn;
6426 #endif
6427   do                            /* look at the equal chars until '/' */
6428     fp--, dp--;
6429   while (*fp != '/');
6430
6431   /* Build a sequence of "../" strings for the resulting relative file name. */
6432   i = 0;
6433   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6434     i += 1;
6435   res = xnew (3*i + strlen (fp + 1) + 1, char);
6436   res[0] = '\0';
6437   while (i-- > 0)
6438     strcat (res, "../");
6439
6440   /* Add the file name relative to the common root of file and dir. */
6441   strcat (res, fp + 1);
6442   free (afn);
6443
6444   return res;
6445 }
6446
6447 /* Return a newly allocated string containing the absolute file name
6448    of FILE given DIR (which should end with a slash). */
6449 static char *
6450 absolute_filename (char *file, char *dir)
6451 {
6452   char *slashp, *cp, *res;
6453
6454   if (filename_is_absolute (file))
6455     res = savestr (file);
6456 #ifdef DOS_NT
6457   /* We don't support non-absolute file names with a drive
6458      letter, like `d:NAME' (it's too much hassle).  */
6459   else if (file[1] == ':')
6460     fatal ("%s: relative file names with drive letters not supported", file);
6461 #endif
6462   else
6463     res = concat (dir, file, "");
6464
6465   /* Delete the "/dirname/.." and "/." substrings. */
6466   slashp = etags_strchr (res, '/');
6467   while (slashp != NULL && slashp[0] != '\0')
6468     {
6469       if (slashp[1] == '.')
6470         {
6471           if (slashp[2] == '.'
6472               && (slashp[3] == '/' || slashp[3] == '\0'))
6473             {
6474               cp = slashp;
6475               do
6476                 cp--;
6477               while (cp >= res && !filename_is_absolute (cp));
6478               if (cp < res)
6479                 cp = slashp;    /* the absolute name begins with "/.." */
6480 #ifdef DOS_NT
6481               /* Under MSDOS and NT we get `d:/NAME' as absolute
6482                  file name, so the luser could say `d:/../NAME'.
6483                  We silently treat this as `d:/NAME'.  */
6484               else if (cp[0] != '/')
6485                 cp = slashp;
6486 #endif
6487               memmove (cp, slashp + 3, strlen (slashp + 2));
6488               slashp = cp;
6489               continue;
6490             }
6491           else if (slashp[2] == '/' || slashp[2] == '\0')
6492             {
6493               memmove (slashp, slashp + 2, strlen (slashp + 1));
6494               continue;
6495             }
6496         }
6497
6498       slashp = etags_strchr (slashp + 1, '/');
6499     }
6500
6501   if (res[0] == '\0')           /* just a safety net: should never happen */
6502     {
6503       free (res);
6504       return savestr ("/");
6505     }
6506   else
6507     return res;
6508 }
6509
6510 /* Return a newly allocated string containing the absolute
6511    file name of dir where FILE resides given DIR (which should
6512    end with a slash). */
6513 static char *
6514 absolute_dirname (char *file, char *dir)
6515 {
6516   char *slashp, *res;
6517   char save;
6518
6519   slashp = etags_strrchr (file, '/');
6520   if (slashp == NULL)
6521     return savestr (dir);
6522   save = slashp[1];
6523   slashp[1] = '\0';
6524   res = absolute_filename (file, dir);
6525   slashp[1] = save;
6526
6527   return res;
6528 }
6529
6530 /* Whether the argument string is an absolute file name.  The argument
6531    string must have been canonicalized with canonicalize_filename. */
6532 static bool
6533 filename_is_absolute (char *fn)
6534 {
6535   return (fn[0] == '/'
6536 #ifdef DOS_NT
6537           || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/')
6538 #endif
6539           );
6540 }
6541
6542 /* Downcase DOS drive letter and collapse separators into single slashes.
6543    Works in place. */
6544 static void
6545 canonicalize_filename (register char *fn)
6546 {
6547   register char* cp;
6548   char sep = '/';
6549
6550 #ifdef DOS_NT
6551   /* Canonicalize drive letter case.  */
6552 # define ISUPPER(c)     isupper (CHAR (c))
6553   if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6554     fn[0] = lowcase (fn[0]);
6555
6556   sep = '\\';
6557 #endif
6558
6559   /* Collapse multiple separators into a single slash. */
6560   for (cp = fn; *cp != '\0'; cp++, fn++)
6561     if (*cp == sep)
6562       {
6563         *fn = '/';
6564         while (cp[1] == sep)
6565           cp++;
6566       }
6567     else
6568       *fn = *cp;
6569   *fn = '\0';
6570 }
6571
6572 \f
6573 /* Initialize a linebuffer for use. */
6574 static void
6575 linebuffer_init (linebuffer *lbp)
6576 {
6577   lbp->size = (DEBUG) ? 3 : 200;
6578   lbp->buffer = xnew (lbp->size, char);
6579   lbp->buffer[0] = '\0';
6580   lbp->len = 0;
6581 }
6582
6583 /* Set the minimum size of a string contained in a linebuffer. */
6584 static void
6585 linebuffer_setlen (linebuffer *lbp, int toksize)
6586 {
6587   while (lbp->size <= toksize)
6588     {
6589       lbp->size *= 2;
6590       xrnew (lbp->buffer, lbp->size, char);
6591     }
6592   lbp->len = toksize;
6593 }
6594
6595 /* Like malloc but get fatal error if memory is exhausted. */
6596 static void *
6597 xmalloc (size_t size)
6598 {
6599   void *result = malloc (size);
6600   if (result == NULL)
6601     fatal ("virtual memory exhausted", (char *)NULL);
6602   return result;
6603 }
6604
6605 static void *
6606 xrealloc (char *ptr, size_t size)
6607 {
6608   void *result = realloc (ptr, size);
6609   if (result == NULL)
6610     fatal ("virtual memory exhausted", (char *)NULL);
6611   return result;
6612 }
6613
6614 /*
6615  * Local Variables:
6616  * indent-tabs-mode: t
6617  * tab-width: 8
6618  * fill-column: 79
6619  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6620  * c-file-style: "gnu"
6621  * End:
6622  */
6623
6624 /* etags.c ends here */