lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2011
  32   Free Software Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  *
  72  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #define TRUE    1
  84 #define FALSE   0
  85
  86 #ifdef DEBUG
  87 #  undef DEBUG
  88 #  define DEBUG TRUE
  89 #else
  90 #  define DEBUG  FALSE
  91 #  define NDEBUG                /* disable assert */
  92 #endif
  93
  94 #ifdef HAVE_CONFIG_H
  95 # include <config.h>
  96   /* On some systems, Emacs defines static as nothing for the sake
  97      of unexec.  We don't want that here since we don't use unexec. */
  98 # undef static
  99 # ifndef PTR                    /* for XEmacs */
 100 #   define PTR void *
 101 # endif
 102 #else  /* no config.h */
 103 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 104 #   define PTR void *           /* for generic pointers */
 105 # else /* not standard C */
 106 #   define const                /* remove const for old compilers' sake */
 107 #   define PTR long *           /* don't use void* */
 108 # endif
 109 #endif /* !HAVE_CONFIG_H */
 110
 111 #ifndef _GNU_SOURCE
 112 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 113 #endif
 114
 115 /* WIN32_NATIVE is for XEmacs.
 116    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 117 #ifdef WIN32_NATIVE
 118 # undef MSDOS
 119 # undef  WINDOWSNT
 120 # define WINDOWSNT
 121 #endif /* WIN32_NATIVE */
 122
 123 #ifdef MSDOS
 124 # undef MSDOS
 125 # define MSDOS TRUE
 126 # include <fcntl.h>
 127 # include <sys/param.h>
 128 # include <io.h>
 129 # ifndef HAVE_CONFIG_H
 130 #   define DOS_NT
 131 #   include <sys/config.h>
 132 # endif
 133 #else
 134 # define MSDOS FALSE
 135 #endif /* MSDOS */
 136
 137 #ifdef WINDOWSNT
 138 # include <stdlib.h>
 139 # include <fcntl.h>
 140 # include <string.h>
 141 # include <direct.h>
 142 # include <io.h>
 143 # define MAXPATHLEN _MAX_PATH
 144 # undef HAVE_NTGUI
 145 # undef  DOS_NT
 146 # define DOS_NT
 147 # ifndef HAVE_GETCWD
 148 #   define HAVE_GETCWD
 149 # endif /* undef HAVE_GETCWD */
 150 #else /* not WINDOWSNT */
 151 # ifdef STDC_HEADERS
 152 #  include <stdlib.h>
 153 #  include <string.h>
 154 # else /* no standard C headers */
 155    extern char *getenv (const char *);
 156    extern char *strcpy (char *, const char *);
 157    extern char *strncpy (char *, const char *, unsigned long);
 158    extern char *strcat (char *, const char *);
 159    extern char *strncat (char *, const char *, unsigned long);
 160    extern int strcmp (const char *, const char *);
 161    extern int strncmp (const char *, const char *, unsigned long);
 162    extern int system (const char *);
 163    extern unsigned long strlen (const char *);
 164    extern void *malloc (unsigned long);
 165    extern void *realloc (void *, unsigned long);
 166    extern void exit (int);
 167    extern void free (void *);
 168    extern void *memmove (void *, const void *, unsigned long);
 169 #  define EXIT_SUCCESS  0
 170 #  define EXIT_FAILURE  1
 171 # endif
 172 #endif /* !WINDOWSNT */
 173
 174 #include <unistd.h>
 175 #ifndef HAVE_UNISTD_H
 176 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 177     extern char *getcwd (char *buf, size_t size);
 178 # endif
 179 #endif /* HAVE_UNISTD_H */
 180
 181 #include <stdio.h>
 182 #include <ctype.h>
 183 #include <errno.h>
 184 #include <sys/types.h>
 185 #include <sys/stat.h>
 186
 187 #include <assert.h>
 188 #ifdef NDEBUG
 189 # undef  assert                 /* some systems have a buggy assert.h */
 190 # define assert(x) ((void) 0)
 191 #endif
 192
 193 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 194 # define NO_LONG_OPTIONS TRUE
 195 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 196   extern char *optarg;
 197   extern int optind, opterr;
 198 #else
 199 # define NO_LONG_OPTIONS FALSE
 200 # include <getopt.h>
 201 #endif /* NO_LONG_OPTIONS */
 202
 203 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 204 # ifdef __CYGWIN__              /* compiling on Cygwin */
 205                              !!! NOTICE !!!
 206  the regex.h distributed with Cygwin is not compatible with etags, alas!
 207 If you want regular expression support, you should delete this notice and
 208               arrange to use the GNU regex.h and regex.c.
 209 # endif
 210 #endif
 211 #include <regex.h>
 212
 213 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 214  Leave it undefined to make the program "etags", which makes emacs-style
 215  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 216 #ifdef CTAGS
 217 # undef  CTAGS
 218 # define CTAGS TRUE
 219 #else
 220 # define CTAGS FALSE
 221 #endif
 222
 223 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 224 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 225 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 226 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 227
 228 #define CHARS 256               /* 2^sizeof(char) */
 229 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 230 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 231 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 232 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 233 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 234 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 235
 236 #define ISALNUM(c)      isalnum (CHAR(c))
 237 #define ISALPHA(c)      isalpha (CHAR(c))
 238 #define ISDIGIT(c)      isdigit (CHAR(c))
 239 #define ISLOWER(c)      islower (CHAR(c))
 240
 241 #define lowcase(c)      tolower (CHAR(c))
 242
 243
 244 /*
 245  *      xnew, xrnew -- allocate, reallocate storage
 246  *
 247  * SYNOPSIS:    Type *xnew (int n, Type);
 248  *              void xrnew (OldPointer, int n, Type);
 249  */
 250 #if DEBUG
 251 # include "chkmalloc.h"
 252 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 253                                                   (n) * sizeof (Type)))
 254 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 255                                         (char *) (op), (n) * sizeof (Type)))
 256 #else
 257 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 258 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 259                                         (char *) (op), (n) * sizeof (Type)))
 260 #endif
 261
 262 #define bool int
 263
 264 typedef void Lang_function (FILE *);
 265
 266 typedef struct
 267 {
 268   const char *suffix;           /* file name suffix for this compressor */
 269   const char *command;          /* takes one arg and decompresses to stdout */
 270 } compressor;
 271
 272 typedef struct
 273 {
 274   const char *name;             /* language name */
 275   const char *help;             /* detailed help for the language */
 276   Lang_function *function;      /* parse function */
 277   const char **suffixes;        /* name suffixes of this language's files */
 278   const char **filenames;       /* names of this language's files */
 279   const char **interpreters;    /* interpreters for this language */
 280   bool metasource;              /* source used to generate other sources */
 281 } language;
 282
 283 typedef struct fdesc
 284 {
 285   struct fdesc *next;           /* for the linked list */
 286   char *infname;                /* uncompressed input file name */
 287   char *infabsname;             /* absolute uncompressed input file name */
 288   char *infabsdir;              /* absolute dir of input file */
 289   char *taggedfname;            /* file name to write in tagfile */
 290   language *lang;               /* language of file */
 291   char *prop;                   /* file properties to write in tagfile */
 292   bool usecharno;               /* etags tags shall contain char number */
 293   bool written;                 /* entry written in the tags file */
 294 } fdesc;
 295
 296 typedef struct node_st
 297 {                               /* sorting structure */
 298   struct node_st *left, *right; /* left and right sons */
 299   fdesc *fdp;                   /* description of file to whom tag belongs */
 300   char *name;                   /* tag name */
 301   char *regex;                  /* search regexp */
 302   bool valid;                   /* write this tag on the tag file */
 303   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 304   bool been_warned;             /* warning already given for duplicated tag */
 305   int lno;                      /* line number tag is on */
 306   long cno;                     /* character number line starts on */
 307 } node;
 308
 309 /*
 310  * A `linebuffer' is a structure which holds a line of text.
 311  * `readline_internal' reads a line from a stream into a linebuffer
 312  * and works regardless of the length of the line.
 313  * SIZE is the size of BUFFER, LEN is the length of the string in
 314  * BUFFER after readline reads it.
 315  */
 316 typedef struct
 317 {
 318   long size;
 319   int len;
 320   char *buffer;
 321 } linebuffer;
 322
 323 /* Used to support mixing of --lang and file names. */
 324 typedef struct
 325 {
 326   enum {
 327     at_language,                /* a language specification */
 328     at_regexp,                  /* a regular expression */
 329     at_filename,                /* a file name */
 330     at_stdin,                   /* read from stdin here */
 331     at_end                      /* stop parsing the list */
 332   } arg_type;                   /* argument type */
 333   language *lang;               /* language associated with the argument */
 334   char *what;                   /* the argument itself */
 335 } argument;
 336
 337 /* Structure defining a regular expression. */
 338 typedef struct regexp
 339 {
 340   struct regexp *p_next;        /* pointer to next in list */
 341   language *lang;               /* if set, use only for this language */
 342   char *pattern;                /* the regexp pattern */
 343   char *name;                   /* tag name */
 344   struct re_pattern_buffer *pat; /* the compiled pattern */
 345   struct re_registers regs;     /* re registers */
 346   bool error_signaled;          /* already signaled for this regexp */
 347   bool force_explicit_name;     /* do not allow implict tag name */
 348   bool ignore_case;             /* ignore case when matching */
 349   bool multi_line;              /* do a multi-line match on the whole file */
 350 } regexp;
 351
 352
 353 /* Many compilers barf on this:
 354         Lang_function Ada_funcs;
 355    so let's write it this way */
 356 static void Ada_funcs (FILE *);
 357 static void Asm_labels (FILE *);
 358 static void C_entries (int c_ext, FILE *);
 359 static void default_C_entries (FILE *);
 360 static void plain_C_entries (FILE *);
 361 static void Cjava_entries (FILE *);
 362 static void Cobol_paragraphs (FILE *);
 363 static void Cplusplus_entries (FILE *);
 364 static void Cstar_entries (FILE *);
 365 static void Erlang_functions (FILE *);
 366 static void Forth_words (FILE *);
 367 static void Fortran_functions (FILE *);
 368 static void HTML_labels (FILE *);
 369 static void Lisp_functions (FILE *);
 370 static void Lua_functions (FILE *);
 371 static void Makefile_targets (FILE *);
 372 static void Pascal_functions (FILE *);
 373 static void Perl_functions (FILE *);
 374 static void PHP_functions (FILE *);
 375 static void PS_functions (FILE *);
 376 static void Prolog_functions (FILE *);
 377 static void Python_functions (FILE *);
 378 static void Scheme_functions (FILE *);
 379 static void TeX_commands (FILE *);
 380 static void Texinfo_nodes (FILE *);
 381 static void Yacc_entries (FILE *);
 382 static void just_read_file (FILE *);
 383
 384 static void print_language_names (void);
 385 static void print_version (void);
 386 static void print_help (argument *);
 387 int main (int, char **);
 388
 389 static compressor *get_compressor_from_suffix (char *, char **);
 390 static language *get_language_from_langname (const char *);
 391 static language *get_language_from_interpreter (char *);
 392 static language *get_language_from_filename (char *, bool);
 393 static void readline (linebuffer *, FILE *);
 394 static long readline_internal (linebuffer *, FILE *);
 395 static bool nocase_tail (const char *);
 396 static void get_tag (char *, char **);
 397
 398 static void analyse_regex (char *);
 399 static void free_regexps (void);
 400 static void regex_tag_multiline (void);
 401 static void error (const char *, const char *);
 402 static void suggest_asking_for_help (void) NO_RETURN;
 403 void fatal (const char *, const char *) NO_RETURN;
 404 static void pfatal (const char *) NO_RETURN;
 405 static void add_node (node *, node **);
 406
 407 static void init (void);
 408 static void process_file_name (char *, language *);
 409 static void process_file (FILE *, char *, language *);
 410 static void find_entries (FILE *);
 411 static void free_tree (node *);
 412 static void free_fdesc (fdesc *);
 413 static void pfnote (char *, bool, char *, int, int, long);
 414 static void make_tag (const char *, int, bool, char *, int, int, long);
 415 static void invalidate_nodes (fdesc *, node **);
 416 static void put_entries (node *);
 417
 418 static char *concat (const char *, const char *, const char *);
 419 static char *skip_spaces (char *);
 420 static char *skip_non_spaces (char *);
 421 static char *savenstr (const char *, int);
 422 static char *savestr (const char *);
 423 static char *etags_strchr (const char *, int);
 424 static char *etags_strrchr (const char *, int);
 425 static int etags_strcasecmp (const char *, const char *);
 426 static int etags_strncasecmp (const char *, const char *, int);
 427 static char *etags_getcwd (void);
 428 static char *relative_filename (char *, char *);
 429 static char *absolute_filename (char *, char *);
 430 static char *absolute_dirname (char *, char *);
 431 static bool filename_is_absolute (char *f);
 432 static void canonicalize_filename (char *);
 433 static void linebuffer_init (linebuffer *);
 434 static void linebuffer_setlen (linebuffer *, int);
 435 static PTR xmalloc (unsigned int);
 436 static PTR xrealloc (char *, unsigned int);
 437
 438 \f
 439 static char searchar = '/';     /* use /.../ searches */
 440
 441 static char *tagfile;           /* output file */
 442 static char *progname;          /* name this program was invoked with */
 443 static char *cwd;               /* current working directory */
 444 static char *tagfiledir;        /* directory of tagfile */
 445 static FILE *tagf;              /* ioptr for tags file */
 446
 447 static fdesc *fdhead;           /* head of file description list */
 448 static fdesc *curfdp;           /* current file description */
 449 static int lineno;              /* line number of current line */
 450 static long charno;             /* current character number */
 451 static long linecharno;         /* charno of start of current line */
 452 static char *dbp;               /* pointer to start of current tag */
 453
 454 static const int invalidcharno = -1;
 455
 456 static node *nodehead;          /* the head of the binary tree of tags */
 457 static node *last_node;         /* the last node created */
 458
 459 static linebuffer lb;           /* the current line */
 460 static linebuffer filebuf;      /* a buffer containing the whole file */
 461 static linebuffer token_name;   /* a buffer containing a tag name */
 462
 463 /* boolean "functions" (see init)       */
 464 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 465 static const char
 466   /* white chars */
 467   *white = " \f\t\n\r\v",
 468   /* not in a name */
 469   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 470   /* token ending chars */
 471   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 472   /* token starting chars */
 473   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 474   /* valid in-token chars */
 475   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 476
 477 static bool append_to_tagfile;  /* -a: append to tags */
 478 /* The next five default to TRUE in C and derived languages.  */
 479 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 480 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 481                                 /* 0 struct/enum/union decls, and C++ */
 482                                 /* member functions. */
 483 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 484                                 /* constants and variables. */
 485                                 /* -D: opposite of -d.  Default under ctags. */
 486 static bool globals;            /* create tags for global variables */
 487 static bool members;            /* create tags for C member variables */
 488 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 489 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 490 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 491 static bool update;             /* -u: update tags */
 492 static bool vgrind_style;       /* -v: create vgrind style index output */
 493 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 494 static bool cxref_style;        /* -x: create cxref style output */
 495 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 496 static bool ignoreindent;       /* -I: ignore indentation in C */
 497 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 498
 499 /* STDIN is defined in LynxOS system headers */
 500 #ifdef STDIN
 501 # undef STDIN
 502 #endif
 503
 504 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 505 static bool parsing_stdin;      /* --parse-stdin used */
 506
 507 static regexp *p_head;          /* list of all regexps */
 508 static bool need_filebuf;       /* some regexes are multi-line */
 509
 510 static struct option longopts[] =
 511 {
 512   { "append",             no_argument,       NULL,               'a'   },
 513   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 514   { "c++",                no_argument,       NULL,               'C'   },
 515   { "declarations",       no_argument,       &declarations,      TRUE  },
 516   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 517   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 518   { "help",               no_argument,       NULL,               'h'   },
 519   { "help",               no_argument,       NULL,               'H'   },
 520   { "ignore-indentation", no_argument,       NULL,               'I'   },
 521   { "language",           required_argument, NULL,               'l'   },
 522   { "members",            no_argument,       &members,           TRUE  },
 523   { "no-members",         no_argument,       &members,           FALSE },
 524   { "output",             required_argument, NULL,               'o'   },
 525   { "regex",              required_argument, NULL,               'r'   },
 526   { "no-regex",           no_argument,       NULL,               'R'   },
 527   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 528   { "parse-stdin",        required_argument, NULL,               STDIN },
 529   { "version",            no_argument,       NULL,               'V'   },
 530
 531 #if CTAGS /* Ctags options */
 532   { "backward-search",    no_argument,       NULL,               'B'   },
 533   { "cxref",              no_argument,       NULL,               'x'   },
 534   { "defines",            no_argument,       NULL,               'd'   },
 535   { "globals",            no_argument,       &globals,           TRUE  },
 536   { "typedefs",           no_argument,       NULL,               't'   },
 537   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 538   { "update",             no_argument,       NULL,               'u'   },
 539   { "vgrind",             no_argument,       NULL,               'v'   },
 540   { "no-warn",            no_argument,       NULL,               'w'   },
 541
 542 #else /* Etags options */
 543   { "no-defines",         no_argument,       NULL,               'D'   },
 544   { "no-globals",         no_argument,       &globals,           FALSE },
 545   { "include",            required_argument, NULL,               'i'   },
 546 #endif
 547   { NULL }
 548 };
 549
 550 static compressor compressors[] =
 551 {
 552   { "z", "gzip -d -c"},
 553   { "Z", "gzip -d -c"},
 554   { "gz", "gzip -d -c"},
 555   { "GZ", "gzip -d -c"},
 556   { "bz2", "bzip2 -d -c" },
 557   { "xz", "xz -d -c" },
 558   { NULL }
 559 };
 560
 561 /*
 562  * Language stuff.
 563  */
 564
 565 /* Ada code */
 566 static const char *Ada_suffixes [] =
 567   { "ads", "adb", "ada", NULL };
 568 static const char Ada_help [] =
 569 "In Ada code, functions, procedures, packages, tasks and types are\n\
 570 tags.  Use the `--packages-only' option to create tags for\n\
 571 packages only.\n\
 572 Ada tag names have suffixes indicating the type of entity:\n\
 573         Entity type:    Qualifier:\n\
 574         ------------    ----------\n\
 575         function        /f\n\
 576         procedure       /p\n\
 577         package spec    /s\n\
 578         package body    /b\n\
 579         type            /t\n\
 580         task            /k\n\
 581 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 582 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 583 will just search for any tag `bidule'.";
 584
 585 /* Assembly code */
 586 static const char *Asm_suffixes [] =
 587   { "a",        /* Unix assembler */
 588     "asm", /* Microcontroller assembly */
 589     "def", /* BSO/Tasking definition includes  */
 590     "inc", /* Microcontroller include files */
 591     "ins", /* Microcontroller include files */
 592     "s", "sa", /* Unix assembler */
 593     "S",   /* cpp-processed Unix assembler */
 594     "src", /* BSO/Tasking C compiler output */
 595     NULL
 596   };
 597 static const char Asm_help [] =
 598 "In assembler code, labels appearing at the beginning of a line,\n\
 599 followed by a colon, are tags.";
 600
 601
 602 /* Note that .c and .h can be considered C++, if the --c++ flag was
 603    given, or if the `class' or `template' keywords are met inside the file.
 604    That is why default_C_entries is called for these. */
 605 static const char *default_C_suffixes [] =
 606   { "c", "h", NULL };
 607 #if CTAGS                               /* C help for Ctags */
 608 static const char default_C_help [] =
 609 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 610 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 611 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 612 Use --globals to tag global variables.\n\
 613 You can tag function declarations and external variables by\n\
 614 using `--declarations', and struct members by using `--members'.";
 615 #else                                   /* C help for Etags */
 616 static const char default_C_help [] =
 617 "In C code, any C function or typedef is a tag, and so are\n\
 618 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 619 definitions and `enum' constants are tags unless you specify\n\
 620 `--no-defines'.  Global variables are tags unless you specify\n\
 621 `--no-globals' and so are struct members unless you specify\n\
 622 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 623 `--no-members' can make the tags table file much smaller.\n\
 624 You can tag function declarations and external variables by\n\
 625 using `--declarations'.";
 626 #endif  /* C help for Ctags and Etags */
 627
 628 static const char *Cplusplus_suffixes [] =
 629   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 630     "M",                        /* Objective C++ */
 631     "pdb",                      /* Postscript with C syntax */
 632     NULL };
 633 static const char Cplusplus_help [] =
 634 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 635 --help --lang=c --lang=c++ for full help.)\n\
 636 In addition to C tags, member functions are also recognized.  Member\n\
 637 variables are recognized unless you use the `--no-members' option.\n\
 638 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 639 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 640 `operator+'.";
 641
 642 static const char *Cjava_suffixes [] =
 643   { "java", NULL };
 644 static char Cjava_help [] =
 645 "In Java code, all the tags constructs of C and C++ code are\n\
 646 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 647
 648
 649 static const char *Cobol_suffixes [] =
 650   { "COB", "cob", NULL };
 651 static char Cobol_help [] =
 652 "In Cobol code, tags are paragraph names; that is, any word\n\
 653 starting in column 8 and followed by a period.";
 654
 655 static const char *Cstar_suffixes [] =
 656   { "cs", "hs", NULL };
 657
 658 static const char *Erlang_suffixes [] =
 659   { "erl", "hrl", NULL };
 660 static const char Erlang_help [] =
 661 "In Erlang code, the tags are the functions, records and macros\n\
 662 defined in the file.";
 663
 664 const char *Forth_suffixes [] =
 665   { "fth", "tok", NULL };
 666 static const char Forth_help [] =
 667 "In Forth code, tags are words defined by `:',\n\
 668 constant, code, create, defer, value, variable, buffer:, field.";
 669
 670 static const char *Fortran_suffixes [] =
 671   { "F", "f", "f90", "for", NULL };
 672 static const char Fortran_help [] =
 673 "In Fortran code, functions, subroutines and block data are tags.";
 674
 675 static const char *HTML_suffixes [] =
 676   { "htm", "html", "shtml", NULL };
 677 static const char HTML_help [] =
 678 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 679 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 680 occurrences of `id='.";
 681
 682 static const char *Lisp_suffixes [] =
 683   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 684 static const char Lisp_help [] =
 685 "In Lisp code, any function defined with `defun', any variable\n\
 686 defined with `defvar' or `defconst', and in general the first\n\
 687 argument of any expression that starts with `(def' in column zero\n\
 688 is a tag.";
 689
 690 static const char *Lua_suffixes [] =
 691   { "lua", "LUA", NULL };
 692 static const char Lua_help [] =
 693 "In Lua scripts, all functions are tags.";
 694
 695 static const char *Makefile_filenames [] =
 696   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 697 static const char Makefile_help [] =
 698 "In makefiles, targets are tags; additionally, variables are tags\n\
 699 unless you specify `--no-globals'.";
 700
 701 static const char *Objc_suffixes [] =
 702   { "lm",                       /* Objective lex file */
 703     "m",                        /* Objective C file */
 704      NULL };
 705 static const char Objc_help [] =
 706 "In Objective C code, tags include Objective C definitions for classes,\n\
 707 class categories, methods and protocols.  Tags for variables and\n\
 708 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 709 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 710
 711 static const char *Pascal_suffixes [] =
 712   { "p", "pas", NULL };
 713 static const char Pascal_help [] =
 714 "In Pascal code, the tags are the functions and procedures defined\n\
 715 in the file.";
 716 /* " // this is for working around an Emacs highlighting bug... */
 717
 718 static const char *Perl_suffixes [] =
 719   { "pl", "pm", NULL };
 720 static const char *Perl_interpreters [] =
 721   { "perl", "@PERL@", NULL };
 722 static const char Perl_help [] =
 723 "In Perl code, the tags are the packages, subroutines and variables\n\
 724 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 725 `--globals' if you want to tag global variables.  Tags for\n\
 726 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 727 defined in the default package is `main::SUB'.";
 728
 729 static const char *PHP_suffixes [] =
 730   { "php", "php3", "php4", NULL };
 731 static const char PHP_help [] =
 732 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 733 the `--no-members' option, vars are tags too.";
 734
 735 static const char *plain_C_suffixes [] =
 736   { "pc",                       /* Pro*C file */
 737      NULL };
 738
 739 static const char *PS_suffixes [] =
 740   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 741 static const char PS_help [] =
 742 "In PostScript code, the tags are the functions.";
 743
 744 static const char *Prolog_suffixes [] =
 745   { "prolog", NULL };
 746 static const char Prolog_help [] =
 747 "In Prolog code, tags are predicates and rules at the beginning of\n\
 748 line.";
 749
 750 static const char *Python_suffixes [] =
 751   { "py", NULL };
 752 static const char Python_help [] =
 753 "In Python code, `def' or `class' at the beginning of a line\n\
 754 generate a tag.";
 755
 756 /* Can't do the `SCM' or `scm' prefix with a version number. */
 757 static const char *Scheme_suffixes [] =
 758   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 759 static const char Scheme_help [] =
 760 "In Scheme code, tags include anything defined with `def' or with a\n\
 761 construct whose name starts with `def'.  They also include\n\
 762 variables set with `set!' at top level in the file.";
 763
 764 static const char *TeX_suffixes [] =
 765   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 766 static const char TeX_help [] =
 767 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 768 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 769 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 770 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 771 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 772 \n\
 773 Other commands can be specified by setting the environment variable\n\
 774 `TEXTAGS' to a colon-separated list like, for example,\n\
 775      TEXTAGS=\"mycommand:myothercommand\".";
 776
 777
 778 static const char *Texinfo_suffixes [] =
 779   { "texi", "texinfo", "txi", NULL };
 780 static const char Texinfo_help [] =
 781 "for texinfo files, lines starting with @node are tagged.";
 782
 783 static const char *Yacc_suffixes [] =
 784   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 785 static const char Yacc_help [] =
 786 "In Bison or Yacc input files, each rule defines as a tag the\n\
 787 nonterminal it constructs.  The portions of the file that contain\n\
 788 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 789 for full help).";
 790
 791 static const char auto_help [] =
 792 "`auto' is not a real language, it indicates to use\n\
 793 a default language for files base on file name suffix and file contents.";
 794
 795 static const char none_help [] =
 796 "`none' is not a real language, it indicates to only do\n\
 797 regexp processing on files.";
 798
 799 static const char no_lang_help [] =
 800 "No detailed help available for this language.";
 801
 802
 803 /*
 804  * Table of languages.
 805  *
 806  * It is ok for a given function to be listed under more than one
 807  * name.  I just didn't.
 808  */
 809
 810 static language lang_names [] =
 811 {
 812   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 813   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 814   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 815   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 816   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 817   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 818   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 819   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 820   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 821   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 822   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 823   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 824   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 825   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 826   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 827   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 828   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 829   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 830   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 831   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 832   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 833   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 834   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 835   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 836   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 837   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 838   { "auto",      auto_help },                      /* default guessing scheme */
 839   { "none",      none_help,      just_read_file }, /* regexp matching only */
 840   { NULL }                /* end of list */
 841 };
 842
 843 \f
 844 static void
 845 print_language_names (void)
 846 {
 847   language *lang;
 848   const char **name, **ext;
 849
 850   puts ("\nThese are the currently supported languages, along with the\n\
 851 default file names and dot suffixes:");
 852   for (lang = lang_names; lang->name != NULL; lang++)
 853     {
 854       printf ("  %-*s", 10, lang->name);
 855       if (lang->filenames != NULL)
 856         for (name = lang->filenames; *name != NULL; name++)
 857           printf (" %s", *name);
 858       if (lang->suffixes != NULL)
 859         for (ext = lang->suffixes; *ext != NULL; ext++)
 860           printf (" .%s", *ext);
 861       puts ("");
 862     }
 863   puts ("where `auto' means use default language for files based on file\n\
 864 name suffix, and `none' means only do regexp processing on files.\n\
 865 If no language is specified and no matching suffix is found,\n\
 866 the first line of the file is read for a sharp-bang (#!) sequence\n\
 867 followed by the name of an interpreter.  If no such sequence is found,\n\
 868 Fortran is tried first; if no tags are found, C is tried next.\n\
 869 When parsing any C file, a \"class\" or \"template\" keyword\n\
 870 switches to C++.");
 871   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 872 \n\
 873 For detailed help on a given language use, for example,\n\
 874 etags --help --lang=ada.");
 875 }
 876
 877 #ifndef EMACS_NAME
 878 # define EMACS_NAME "standalone"
 879 #endif
 880 #ifndef VERSION
 881 # define VERSION "17.38.1.4"
 882 #endif
 883 static void
 884 print_version (void)
 885 {
 886   /* Makes it easier to update automatically. */
 887   char emacs_copyright[] = "Copyright (C) 2011 Free Software Foundation, Inc.";
 888
 889   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 890   puts (emacs_copyright);
 891   puts ("This program is distributed under the terms in ETAGS.README");
 892
 893   exit (EXIT_SUCCESS);
 894 }
 895
 896 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 897 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 898 #endif
 899
 900 static void
 901 print_help (argument *argbuffer)
 902 {
 903   bool help_for_lang = FALSE;
 904
 905   for (; argbuffer->arg_type != at_end; argbuffer++)
 906     if (argbuffer->arg_type == at_language)
 907       {
 908         if (help_for_lang)
 909           puts ("");
 910         puts (argbuffer->lang->help);
 911         help_for_lang = TRUE;
 912       }
 913
 914   if (help_for_lang)
 915     exit (EXIT_SUCCESS);
 916
 917   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 918 \n\
 919 These are the options accepted by %s.\n", progname, progname);
 920   if (NO_LONG_OPTIONS)
 921     puts ("WARNING: long option names do not work with this executable,\n\
 922 as it is not linked with GNU getopt.");
 923   else
 924     puts ("You may use unambiguous abbreviations for the long option names.");
 925   puts ("  A - as file name means read names from stdin (one per line).\n\
 926 Absolute names are stored in the output file as they are.\n\
 927 Relative ones are stored relative to the output file's directory.\n");
 928
 929   puts ("-a, --append\n\
 930         Append tag entries to existing tags file.");
 931
 932   puts ("--packages-only\n\
 933         For Ada files, only generate tags for packages.");
 934
 935   if (CTAGS)
 936     puts ("-B, --backward-search\n\
 937         Write the search commands for the tag entries using '?', the\n\
 938         backward-search command instead of '/', the forward-search command.");
 939
 940   /* This option is mostly obsolete, because etags can now automatically
 941      detect C++.  Retained for backward compatibility and for debugging and
 942      experimentation.  In principle, we could want to tag as C++ even
 943      before any "class" or "template" keyword.
 944   puts ("-C, --c++\n\
 945         Treat files whose name suffix defaults to C language as C++ files.");
 946   */
 947
 948   puts ("--declarations\n\
 949         In C and derived languages, create tags for function declarations,");
 950   if (CTAGS)
 951     puts ("\tand create tags for extern variables if --globals is used.");
 952   else
 953     puts
 954       ("\tand create tags for extern variables unless --no-globals is used.");
 955
 956   if (CTAGS)
 957     puts ("-d, --defines\n\
 958         Create tag entries for C #define constants and enum constants, too.");
 959   else
 960     puts ("-D, --no-defines\n\
 961         Don't create tag entries for C #define constants and enum constants.\n\
 962         This makes the tags file smaller.");
 963
 964   if (!CTAGS)
 965     puts ("-i FILE, --include=FILE\n\
 966         Include a note in tag file indicating that, when searching for\n\
 967         a tag, one should also consult the tags file FILE after\n\
 968         checking the current file.");
 969
 970   puts ("-l LANG, --language=LANG\n\
 971         Force the following files to be considered as written in the\n\
 972         named language up to the next --language=LANG option.");
 973
 974   if (CTAGS)
 975     puts ("--globals\n\
 976         Create tag entries for global variables in some languages.");
 977   else
 978     puts ("--no-globals\n\
 979         Do not create tag entries for global variables in some\n\
 980         languages.  This makes the tags file smaller.");
 981
 982   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 983     puts ("--no-line-directive\n\
 984         Ignore #line preprocessor directives in C and derived languages.");
 985
 986   if (CTAGS)
 987     puts ("--members\n\
 988         Create tag entries for members of structures in some languages.");
 989   else
 990     puts ("--no-members\n\
 991         Do not create tag entries for members of structures\n\
 992         in some languages.");
 993
 994   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 995         Make a tag for each line matching a regular expression pattern\n\
 996         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 997         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 998         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 999         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
1000   puts ("       If TAGNAME/ is present, the tags created are named.\n\
1001         For example Tcl named tags can be created with:\n\
1002           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1003         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1004         `m' means to allow multi-line matches, `s' implies `m' and\n\
1005         causes dot to match any character, including newline.");
1006
1007   puts ("-R, --no-regex\n\
1008         Don't create tags from regexps for the following files.");
1009
1010   puts ("-I, --ignore-indentation\n\
1011         In C and C++ do not assume that a closing brace in the first\n\
1012         column is the final brace of a function or structure definition.");
1013
1014   puts ("-o FILE, --output=FILE\n\
1015         Write the tags to FILE.");
1016
1017   puts ("--parse-stdin=NAME\n\
1018         Read from standard input and record tags as belonging to file NAME.");
1019
1020   if (CTAGS)
1021     {
1022       puts ("-t, --typedefs\n\
1023         Generate tag entries for C and Ada typedefs.");
1024       puts ("-T, --typedefs-and-c++\n\
1025         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1026         and C++ member functions.");
1027     }
1028
1029   if (CTAGS)
1030     puts ("-u, --update\n\
1031         Update the tag entries for the given files, leaving tag\n\
1032         entries for other files in place.  Currently, this is\n\
1033         implemented by deleting the existing entries for the given\n\
1034         files and then rewriting the new entries at the end of the\n\
1035         tags file.  It is often faster to simply rebuild the entire\n\
1036         tag file than to use this.");
1037
1038   if (CTAGS)
1039     {
1040       puts ("-v, --vgrind\n\
1041         Print on the standard output an index of items intended for\n\
1042         human consumption, similar to the output of vgrind.  The index\n\
1043         is sorted, and gives the page number of each item.");
1044
1045       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1046         puts ("-w, --no-duplicates\n\
1047         Do not create duplicate tag entries, for compatibility with\n\
1048         traditional ctags.");
1049
1050       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1051         puts ("-w, --no-warn\n\
1052         Suppress warning messages about duplicate tag entries.");
1053
1054       puts ("-x, --cxref\n\
1055         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1056         The output uses line numbers instead of page numbers, but\n\
1057         beyond that the differences are cosmetic; try both to see\n\
1058         which you like.");
1059     }
1060
1061   puts ("-V, --version\n\
1062         Print the version of the program.\n\
1063 -h, --help\n\
1064         Print this help message.\n\
1065         Followed by one or more `--language' options prints detailed\n\
1066         help about tag generation for the specified languages.");
1067
1068   print_language_names ();
1069
1070   puts ("");
1071   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1072
1073   exit (EXIT_SUCCESS);
1074 }
1075
1076 \f
1077 int
1078 main (int argc, char **argv)
1079 {
1080   int i;
1081   unsigned int nincluded_files;
1082   char **included_files;
1083   argument *argbuffer;
1084   int current_arg, file_count;
1085   linebuffer filename_lb;
1086   bool help_asked = FALSE;
1087  char *optstring;
1088  int opt;
1089
1090
1091 #ifdef DOS_NT
1092   _fmode = O_BINARY;   /* all of files are treated as binary files */
1093 #endif /* DOS_NT */
1094
1095   progname = argv[0];
1096   nincluded_files = 0;
1097   included_files = xnew (argc, char *);
1098   current_arg = 0;
1099   file_count = 0;
1100
1101   /* Allocate enough no matter what happens.  Overkill, but each one
1102      is small. */
1103   argbuffer = xnew (argc, argument);
1104
1105   /*
1106    * Always find typedefs and structure tags.
1107    * Also default to find macro constants, enum constants, struct
1108    * members and global variables.  Do it for both etags and ctags.
1109    */
1110   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1111   globals = members = TRUE;
1112
1113   /* When the optstring begins with a '-' getopt_long does not rearrange the
1114      non-options arguments to be at the end, but leaves them alone. */
1115   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1116                       "ac:Cf:Il:o:r:RSVhH",
1117                       (CTAGS) ? "BxdtTuvw" : "Di:");
1118
1119   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1120     switch (opt)
1121       {
1122       case 0:
1123         /* If getopt returns 0, then it has already processed a
1124            long-named option.  We should do nothing.  */
1125         break;
1126
1127       case 1:
1128         /* This means that a file name has been seen.  Record it. */
1129         argbuffer[current_arg].arg_type = at_filename;
1130         argbuffer[current_arg].what     = optarg;
1131         ++current_arg;
1132         ++file_count;
1133         break;
1134
1135       case STDIN:
1136         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1137         argbuffer[current_arg].arg_type = at_stdin;
1138         argbuffer[current_arg].what     = optarg;
1139         ++current_arg;
1140         ++file_count;
1141         if (parsing_stdin)
1142           fatal ("cannot parse standard input more than once", (char *)NULL);
1143         parsing_stdin = TRUE;
1144         break;
1145
1146         /* Common options. */
1147       case 'a': append_to_tagfile = TRUE;       break;
1148       case 'C': cplusplus = TRUE;               break;
1149       case 'f':         /* for compatibility with old makefiles */
1150       case 'o':
1151         if (tagfile)
1152           {
1153             error ("-o option may only be given once.", (char *)NULL);
1154             suggest_asking_for_help ();
1155             /* NOTREACHED */
1156           }
1157         tagfile = optarg;
1158         break;
1159       case 'I':
1160       case 'S':         /* for backward compatibility */
1161         ignoreindent = TRUE;
1162         break;
1163       case 'l':
1164         {
1165           language *lang = get_language_from_langname (optarg);
1166           if (lang != NULL)
1167             {
1168               argbuffer[current_arg].lang = lang;
1169               argbuffer[current_arg].arg_type = at_language;
1170               ++current_arg;
1171             }
1172         }
1173         break;
1174       case 'c':
1175         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1176         optarg = concat (optarg, "i", ""); /* memory leak here */
1177         /* FALLTHRU */
1178       case 'r':
1179         argbuffer[current_arg].arg_type = at_regexp;
1180         argbuffer[current_arg].what = optarg;
1181         ++current_arg;
1182         break;
1183       case 'R':
1184         argbuffer[current_arg].arg_type = at_regexp;
1185         argbuffer[current_arg].what = NULL;
1186         ++current_arg;
1187         break;
1188       case 'V':
1189         print_version ();
1190         break;
1191       case 'h':
1192       case 'H':
1193         help_asked = TRUE;
1194         break;
1195
1196         /* Etags options */
1197       case 'D': constantypedefs = FALSE;                        break;
1198       case 'i': included_files[nincluded_files++] = optarg;     break;
1199
1200         /* Ctags options. */
1201       case 'B': searchar = '?';                                 break;
1202       case 'd': constantypedefs = TRUE;                         break;
1203       case 't': typedefs = TRUE;                                break;
1204       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1205       case 'u': update = TRUE;                                  break;
1206       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1207       case 'x': cxref_style = TRUE;                             break;
1208       case 'w': no_warnings = TRUE;                             break;
1209       default:
1210         suggest_asking_for_help ();
1211         /* NOTREACHED */
1212       }
1213
1214   /* No more options.  Store the rest of arguments. */
1215   for (; optind < argc; optind++)
1216     {
1217       argbuffer[current_arg].arg_type = at_filename;
1218       argbuffer[current_arg].what = argv[optind];
1219       ++current_arg;
1220       ++file_count;
1221     }
1222
1223   argbuffer[current_arg].arg_type = at_end;
1224
1225   if (help_asked)
1226     print_help (argbuffer);
1227     /* NOTREACHED */
1228
1229   if (nincluded_files == 0 && file_count == 0)
1230     {
1231       error ("no input files specified.", (char *)NULL);
1232       suggest_asking_for_help ();
1233       /* NOTREACHED */
1234     }
1235
1236   if (tagfile == NULL)
1237     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1238   cwd = etags_getcwd ();        /* the current working directory */
1239   if (cwd[strlen (cwd) - 1] != '/')
1240     {
1241       char *oldcwd = cwd;
1242       cwd = concat (oldcwd, "/", "");
1243       free (oldcwd);
1244     }
1245
1246   /* Compute base directory for relative file names. */
1247   if (streq (tagfile, "-")
1248       || strneq (tagfile, "/dev/", 5))
1249     tagfiledir = cwd;            /* relative file names are relative to cwd */
1250   else
1251     {
1252       canonicalize_filename (tagfile);
1253       tagfiledir = absolute_dirname (tagfile, cwd);
1254     }
1255
1256   init ();                      /* set up boolean "functions" */
1257
1258   linebuffer_init (&lb);
1259   linebuffer_init (&filename_lb);
1260   linebuffer_init (&filebuf);
1261   linebuffer_init (&token_name);
1262
1263   if (!CTAGS)
1264     {
1265       if (streq (tagfile, "-"))
1266         {
1267           tagf = stdout;
1268 #ifdef DOS_NT
1269           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1270              doesn't take effect until after `stdout' is already open). */
1271           if (!isatty (fileno (stdout)))
1272             setmode (fileno (stdout), O_BINARY);
1273 #endif /* DOS_NT */
1274         }
1275       else
1276         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1277       if (tagf == NULL)
1278         pfatal (tagfile);
1279     }
1280
1281   /*
1282    * Loop through files finding functions.
1283    */
1284   for (i = 0; i < current_arg; i++)
1285     {
1286       static language *lang;    /* non-NULL if language is forced */
1287       char *this_file;
1288
1289       switch (argbuffer[i].arg_type)
1290         {
1291         case at_language:
1292           lang = argbuffer[i].lang;
1293           break;
1294         case at_regexp:
1295           analyse_regex (argbuffer[i].what);
1296           break;
1297         case at_filename:
1298               this_file = argbuffer[i].what;
1299               /* Input file named "-" means read file names from stdin
1300                  (one per line) and use them. */
1301               if (streq (this_file, "-"))
1302                 {
1303                   if (parsing_stdin)
1304                     fatal ("cannot parse standard input AND read file names from it",
1305                            (char *)NULL);
1306                   while (readline_internal (&filename_lb, stdin) > 0)
1307                     process_file_name (filename_lb.buffer, lang);
1308                 }
1309               else
1310                 process_file_name (this_file, lang);
1311           break;
1312         case at_stdin:
1313           this_file = argbuffer[i].what;
1314           process_file (stdin, this_file, lang);
1315           break;
1316         }
1317     }
1318
1319   free_regexps ();
1320   free (lb.buffer);
1321   free (filebuf.buffer);
1322   free (token_name.buffer);
1323
1324   if (!CTAGS || cxref_style)
1325     {
1326       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1327       put_entries (nodehead);
1328       free_tree (nodehead);
1329       nodehead = NULL;
1330       if (!CTAGS)
1331         {
1332           fdesc *fdp;
1333
1334           /* Output file entries that have no tags. */
1335           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1336             if (!fdp->written)
1337               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1338
1339           while (nincluded_files-- > 0)
1340             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1341
1342           if (fclose (tagf) == EOF)
1343             pfatal (tagfile);
1344         }
1345
1346       exit (EXIT_SUCCESS);
1347     }
1348
1349   /* From here on, we are in (CTAGS && !cxref_style) */
1350   if (update)
1351     {
1352       char cmd[BUFSIZ];
1353       for (i = 0; i < current_arg; ++i)
1354         {
1355           switch (argbuffer[i].arg_type)
1356             {
1357             case at_filename:
1358             case at_stdin:
1359               break;
1360             default:
1361               continue;         /* the for loop */
1362             }
1363           sprintf (cmd,
1364                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1365                    tagfile, argbuffer[i].what, tagfile);
1366           if (system (cmd) != EXIT_SUCCESS)
1367             fatal ("failed to execute shell command", (char *)NULL);
1368         }
1369       append_to_tagfile = TRUE;
1370     }
1371
1372   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1373   if (tagf == NULL)
1374     pfatal (tagfile);
1375   put_entries (nodehead);       /* write all the tags (CTAGS) */
1376   free_tree (nodehead);
1377   nodehead = NULL;
1378   if (fclose (tagf) == EOF)
1379     pfatal (tagfile);
1380
1381   if (CTAGS)
1382     if (append_to_tagfile || update)
1383       {
1384         char cmd[2*BUFSIZ+20];
1385         /* Maybe these should be used:
1386            setenv ("LC_COLLATE", "C", 1);
1387            setenv ("LC_ALL", "C", 1); */
1388         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1389         exit (system (cmd));
1390       }
1391   return EXIT_SUCCESS;
1392 }
1393
1394
1395 /*
1396  * Return a compressor given the file name.  If EXTPTR is non-zero,
1397  * return a pointer into FILE where the compressor-specific
1398  * extension begins.  If no compressor is found, NULL is returned
1399  * and EXTPTR is not significant.
1400  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1401  */
1402 static compressor *
1403 get_compressor_from_suffix (char *file, char **extptr)
1404 {
1405   compressor *compr;
1406   char *slash, *suffix;
1407
1408   /* File has been processed by canonicalize_filename,
1409      so we don't need to consider backslashes on DOS_NT.  */
1410   slash = etags_strrchr (file, '/');
1411   suffix = etags_strrchr (file, '.');
1412   if (suffix == NULL || suffix < slash)
1413     return NULL;
1414   if (extptr != NULL)
1415     *extptr = suffix;
1416   suffix += 1;
1417   /* Let those poor souls who live with DOS 8+3 file name limits get
1418      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1419      Only the first do loop is run if not MSDOS */
1420   do
1421     {
1422       for (compr = compressors; compr->suffix != NULL; compr++)
1423         if (streq (compr->suffix, suffix))
1424           return compr;
1425       if (!MSDOS)
1426         break;                  /* do it only once: not really a loop */
1427       if (extptr != NULL)
1428         *extptr = ++suffix;
1429     } while (*suffix != '\0');
1430   return NULL;
1431 }
1432
1433
1434
1435 /*
1436  * Return a language given the name.
1437  */
1438 static language *
1439 get_language_from_langname (const char *name)
1440 {
1441   language *lang;
1442
1443   if (name == NULL)
1444     error ("empty language name", (char *)NULL);
1445   else
1446     {
1447       for (lang = lang_names; lang->name != NULL; lang++)
1448         if (streq (name, lang->name))
1449           return lang;
1450       error ("unknown language \"%s\"", name);
1451     }
1452
1453   return NULL;
1454 }
1455
1456
1457 /*
1458  * Return a language given the interpreter name.
1459  */
1460 static language *
1461 get_language_from_interpreter (char *interpreter)
1462 {
1463   language *lang;
1464   const char **iname;
1465
1466   if (interpreter == NULL)
1467     return NULL;
1468   for (lang = lang_names; lang->name != NULL; lang++)
1469     if (lang->interpreters != NULL)
1470       for (iname = lang->interpreters; *iname != NULL; iname++)
1471         if (streq (*iname, interpreter))
1472             return lang;
1473
1474   return NULL;
1475 }
1476
1477
1478
1479 /*
1480  * Return a language given the file name.
1481  */
1482 static language *
1483 get_language_from_filename (char *file, int case_sensitive)
1484 {
1485   language *lang;
1486   const char **name, **ext, *suffix;
1487
1488   /* Try whole file name first. */
1489   for (lang = lang_names; lang->name != NULL; lang++)
1490     if (lang->filenames != NULL)
1491       for (name = lang->filenames; *name != NULL; name++)
1492         if ((case_sensitive)
1493             ? streq (*name, file)
1494             : strcaseeq (*name, file))
1495           return lang;
1496
1497   /* If not found, try suffix after last dot. */
1498   suffix = etags_strrchr (file, '.');
1499   if (suffix == NULL)
1500     return NULL;
1501   suffix += 1;
1502   for (lang = lang_names; lang->name != NULL; lang++)
1503     if (lang->suffixes != NULL)
1504       for (ext = lang->suffixes; *ext != NULL; ext++)
1505         if ((case_sensitive)
1506             ? streq (*ext, suffix)
1507             : strcaseeq (*ext, suffix))
1508           return lang;
1509   return NULL;
1510 }
1511
1512 \f
1513 /*
1514  * This routine is called on each file argument.
1515  */
1516 static void
1517 process_file_name (char *file, language *lang)
1518 {
1519   struct stat stat_buf;
1520   FILE *inf;
1521   fdesc *fdp;
1522   compressor *compr;
1523   char *compressed_name, *uncompressed_name;
1524   char *ext, *real_name;
1525   int retval;
1526
1527   canonicalize_filename (file);
1528   if (streq (file, tagfile) && !streq (tagfile, "-"))
1529     {
1530       error ("skipping inclusion of %s in self.", file);
1531       return;
1532     }
1533   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1534     {
1535       compressed_name = NULL;
1536       real_name = uncompressed_name = savestr (file);
1537     }
1538   else
1539     {
1540       real_name = compressed_name = savestr (file);
1541       uncompressed_name = savenstr (file, ext - file);
1542     }
1543
1544   /* If the canonicalized uncompressed name
1545      has already been dealt with, skip it silently. */
1546   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1547     {
1548       assert (fdp->infname != NULL);
1549       if (streq (uncompressed_name, fdp->infname))
1550         goto cleanup;
1551     }
1552
1553   if (stat (real_name, &stat_buf) != 0)
1554     {
1555       /* Reset real_name and try with a different name. */
1556       real_name = NULL;
1557       if (compressed_name != NULL) /* try with the given suffix */
1558         {
1559           if (stat (uncompressed_name, &stat_buf) == 0)
1560             real_name = uncompressed_name;
1561         }
1562       else                      /* try all possible suffixes */
1563         {
1564           for (compr = compressors; compr->suffix != NULL; compr++)
1565             {
1566               compressed_name = concat (file, ".", compr->suffix);
1567               if (stat (compressed_name, &stat_buf) != 0)
1568                 {
1569                   if (MSDOS)
1570                     {
1571                       char *suf = compressed_name + strlen (file);
1572                       size_t suflen = strlen (compr->suffix) + 1;
1573                       for ( ; suf[1]; suf++, suflen--)
1574                         {
1575                           memmove (suf, suf + 1, suflen);
1576                           if (stat (compressed_name, &stat_buf) == 0)
1577                             {
1578                               real_name = compressed_name;
1579                               break;
1580                             }
1581                         }
1582                       if (real_name != NULL)
1583                         break;
1584                     } /* MSDOS */
1585                   free (compressed_name);
1586                   compressed_name = NULL;
1587                 }
1588               else
1589                 {
1590                   real_name = compressed_name;
1591                   break;
1592                 }
1593             }
1594         }
1595       if (real_name == NULL)
1596         {
1597           perror (file);
1598           goto cleanup;
1599         }
1600     } /* try with a different name */
1601
1602   if (!S_ISREG (stat_buf.st_mode))
1603     {
1604       error ("skipping %s: it is not a regular file.", real_name);
1605       goto cleanup;
1606     }
1607   if (real_name == compressed_name)
1608     {
1609       char *cmd = concat (compr->command, " ", real_name);
1610       inf = (FILE *) popen (cmd, "r");
1611       free (cmd);
1612     }
1613   else
1614     inf = fopen (real_name, "r");
1615   if (inf == NULL)
1616     {
1617       perror (real_name);
1618       goto cleanup;
1619     }
1620
1621   process_file (inf, uncompressed_name, lang);
1622
1623   if (real_name == compressed_name)
1624     retval = pclose (inf);
1625   else
1626     retval = fclose (inf);
1627   if (retval < 0)
1628     pfatal (file);
1629
1630  cleanup:
1631   free (compressed_name);
1632   free (uncompressed_name);
1633   last_node = NULL;
1634   curfdp = NULL;
1635   return;
1636 }
1637
1638 static void
1639 process_file (FILE *fh, char *fn, language *lang)
1640 {
1641   static const fdesc emptyfdesc;
1642   fdesc *fdp;
1643
1644   /* Create a new input file description entry. */
1645   fdp = xnew (1, fdesc);
1646   *fdp = emptyfdesc;
1647   fdp->next = fdhead;
1648   fdp->infname = savestr (fn);
1649   fdp->lang = lang;
1650   fdp->infabsname = absolute_filename (fn, cwd);
1651   fdp->infabsdir = absolute_dirname (fn, cwd);
1652   if (filename_is_absolute (fn))
1653     {
1654       /* An absolute file name.  Canonicalize it. */
1655       fdp->taggedfname = absolute_filename (fn, NULL);
1656     }
1657   else
1658     {
1659       /* A file name relative to cwd.  Make it relative
1660          to the directory of the tags file. */
1661       fdp->taggedfname = relative_filename (fn, tagfiledir);
1662     }
1663   fdp->usecharno = TRUE;        /* use char position when making tags */
1664   fdp->prop = NULL;
1665   fdp->written = FALSE;         /* not written on tags file yet */
1666
1667   fdhead = fdp;
1668   curfdp = fdhead;              /* the current file description */
1669
1670   find_entries (fh);
1671
1672   /* If not Ctags, and if this is not metasource and if it contained no #line
1673      directives, we can write the tags and free all nodes pointing to
1674      curfdp. */
1675   if (!CTAGS
1676       && curfdp->usecharno      /* no #line directives in this file */
1677       && !curfdp->lang->metasource)
1678     {
1679       node *np, *prev;
1680
1681       /* Look for the head of the sublist relative to this file.  See add_node
1682          for the structure of the node tree. */
1683       prev = NULL;
1684       for (np = nodehead; np != NULL; prev = np, np = np->left)
1685         if (np->fdp == curfdp)
1686           break;
1687
1688       /* If we generated tags for this file, write and delete them. */
1689       if (np != NULL)
1690         {
1691           /* This is the head of the last sublist, if any.  The following
1692              instructions depend on this being true. */
1693           assert (np->left == NULL);
1694
1695           assert (fdhead == curfdp);
1696           assert (last_node->fdp == curfdp);
1697           put_entries (np);     /* write tags for file curfdp->taggedfname */
1698           free_tree (np);       /* remove the written nodes */
1699           if (prev == NULL)
1700             nodehead = NULL;    /* no nodes left */
1701           else
1702             prev->left = NULL;  /* delete the pointer to the sublist */
1703         }
1704     }
1705 }
1706
1707 /*
1708  * This routine sets up the boolean pseudo-functions which work
1709  * by setting boolean flags dependent upon the corresponding character.
1710  * Every char which is NOT in that string is not a white char.  Therefore,
1711  * all of the array "_wht" is set to FALSE, and then the elements
1712  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1713  * of a char is TRUE if it is the string "white", else FALSE.
1714  */
1715 static void
1716 init (void)
1717 {
1718   register const char *sp;
1719   register int i;
1720
1721   for (i = 0; i < CHARS; i++)
1722     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1723   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1724   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1725   notinname('\0') = notinname('\n');
1726   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1727   begtoken('\0') = begtoken('\n');
1728   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1729   intoken('\0') = intoken('\n');
1730   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1731   endtoken('\0') = endtoken('\n');
1732 }
1733
1734 /*
1735  * This routine opens the specified file and calls the function
1736  * which finds the function and type definitions.
1737  */
1738 static void
1739 find_entries (FILE *inf)
1740 {
1741   char *cp;
1742   language *lang = curfdp->lang;
1743   Lang_function *parser = NULL;
1744
1745   /* If user specified a language, use it. */
1746   if (lang != NULL && lang->function != NULL)
1747     {
1748       parser = lang->function;
1749     }
1750
1751   /* Else try to guess the language given the file name. */
1752   if (parser == NULL)
1753     {
1754       lang = get_language_from_filename (curfdp->infname, TRUE);
1755       if (lang != NULL && lang->function != NULL)
1756         {
1757           curfdp->lang = lang;
1758           parser = lang->function;
1759         }
1760     }
1761
1762   /* Else look for sharp-bang as the first two characters. */
1763   if (parser == NULL
1764       && readline_internal (&lb, inf) > 0
1765       && lb.len >= 2
1766       && lb.buffer[0] == '#'
1767       && lb.buffer[1] == '!')
1768     {
1769       char *lp;
1770
1771       /* Set lp to point at the first char after the last slash in the
1772          line or, if no slashes, at the first nonblank.  Then set cp to
1773          the first successive blank and terminate the string. */
1774       lp = etags_strrchr (lb.buffer+2, '/');
1775       if (lp != NULL)
1776         lp += 1;
1777       else
1778         lp = skip_spaces (lb.buffer + 2);
1779       cp = skip_non_spaces (lp);
1780       *cp = '\0';
1781
1782       if (strlen (lp) > 0)
1783         {
1784           lang = get_language_from_interpreter (lp);
1785           if (lang != NULL && lang->function != NULL)
1786             {
1787               curfdp->lang = lang;
1788               parser = lang->function;
1789             }
1790         }
1791     }
1792
1793   /* We rewind here, even if inf may be a pipe.  We fail if the
1794      length of the first line is longer than the pipe block size,
1795      which is unlikely. */
1796   rewind (inf);
1797
1798   /* Else try to guess the language given the case insensitive file name. */
1799   if (parser == NULL)
1800     {
1801       lang = get_language_from_filename (curfdp->infname, FALSE);
1802       if (lang != NULL && lang->function != NULL)
1803         {
1804           curfdp->lang = lang;
1805           parser = lang->function;
1806         }
1807     }
1808
1809   /* Else try Fortran or C. */
1810   if (parser == NULL)
1811     {
1812       node *old_last_node = last_node;
1813
1814       curfdp->lang = get_language_from_langname ("fortran");
1815       find_entries (inf);
1816
1817       if (old_last_node == last_node)
1818         /* No Fortran entries found.  Try C. */
1819         {
1820           /* We do not tag if rewind fails.
1821              Only the file name will be recorded in the tags file. */
1822           rewind (inf);
1823           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1824           find_entries (inf);
1825         }
1826       return;
1827     }
1828
1829   if (!no_line_directive
1830       && curfdp->lang != NULL && curfdp->lang->metasource)
1831     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1832        file, or anyway we parsed a file that is automatically generated from
1833        this one.  If this is the case, the bingo.c file contained #line
1834        directives that generated tags pointing to this file.  Let's delete
1835        them all before parsing this file, which is the real source. */
1836     {
1837       fdesc **fdpp = &fdhead;
1838       while (*fdpp != NULL)
1839         if (*fdpp != curfdp
1840             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1841           /* We found one of those!  We must delete both the file description
1842              and all tags referring to it. */
1843           {
1844             fdesc *badfdp = *fdpp;
1845
1846             /* Delete the tags referring to badfdp->taggedfname
1847                that were obtained from badfdp->infname. */
1848             invalidate_nodes (badfdp, &nodehead);
1849
1850             *fdpp = badfdp->next; /* remove the bad description from the list */
1851             free_fdesc (badfdp);
1852           }
1853         else
1854           fdpp = &(*fdpp)->next; /* advance the list pointer */
1855     }
1856
1857   assert (parser != NULL);
1858
1859   /* Generic initialisations before reading from file. */
1860   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1861
1862   /* Generic initialisations before parsing file with readline. */
1863   lineno = 0;                  /* reset global line number */
1864   charno = 0;                  /* reset global char number */
1865   linecharno = 0;              /* reset global char number of line start */
1866
1867   parser (inf);
1868
1869   regex_tag_multiline ();
1870 }
1871
1872 \f
1873 /*
1874  * Check whether an implicitly named tag should be created,
1875  * then call `pfnote'.
1876  * NAME is a string that is internally copied by this function.
1877  *
1878  * TAGS format specification
1879  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1880  * The following is explained in some more detail in etc/ETAGS.EBNF.
1881  *
1882  * make_tag creates tags with "implicit tag names" (unnamed tags)
1883  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1884  *  1. NAME does not contain any of the characters in NONAM;
1885  *  2. LINESTART contains name as either a rightmost, or rightmost but
1886  *     one character, substring;
1887  *  3. the character, if any, immediately before NAME in LINESTART must
1888  *     be a character in NONAM;
1889  *  4. the character, if any, immediately after NAME in LINESTART must
1890  *     also be a character in NONAM.
1891  *
1892  * The implementation uses the notinname() macro, which recognises the
1893  * characters stored in the string `nonam'.
1894  * etags.el needs to use the same characters that are in NONAM.
1895  */
1896 static void
1897 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1898           int namelen,          /* tag length */
1899           int is_func,          /* tag is a function */
1900           char *linestart,      /* start of the line where tag is */
1901           int linelen,          /* length of the line where tag is */
1902           int lno,              /* line number */
1903           long int cno)         /* character number */
1904 {
1905   bool named = (name != NULL && namelen > 0);
1906   char *nname = NULL;
1907
1908   if (!CTAGS && named)          /* maybe set named to false */
1909     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1910        such that etags.el can guess a name from it. */
1911     {
1912       int i;
1913       register const char *cp = name;
1914
1915       for (i = 0; i < namelen; i++)
1916         if (notinname (*cp++))
1917           break;
1918       if (i == namelen)                         /* rule #1 */
1919         {
1920           cp = linestart + linelen - namelen;
1921           if (notinname (linestart[linelen-1]))
1922             cp -= 1;                            /* rule #4 */
1923           if (cp >= linestart                   /* rule #2 */
1924               && (cp == linestart
1925                   || notinname (cp[-1]))        /* rule #3 */
1926               && strneq (name, cp, namelen))    /* rule #2 */
1927             named = FALSE;      /* use implicit tag name */
1928         }
1929     }
1930
1931   if (named)
1932     nname = savenstr (name, namelen);
1933
1934   pfnote (nname, is_func, linestart, linelen, lno, cno);
1935 }
1936
1937 /* Record a tag. */
1938 static void
1939 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1940                                 /* tag name, or NULL if unnamed */
1941                                 /* tag is a function */
1942                                 /* start of the line where tag is */
1943                                 /* length of the line where tag is */
1944                                 /* line number */
1945                                 /* character number */
1946 {
1947   register node *np;
1948
1949   assert (name == NULL || name[0] != '\0');
1950   if (CTAGS && name == NULL)
1951     return;
1952
1953   np = xnew (1, node);
1954
1955   /* If ctags mode, change name "main" to M<thisfilename>. */
1956   if (CTAGS && !cxref_style && streq (name, "main"))
1957     {
1958       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1959       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1960       fp = etags_strrchr (np->name, '.');
1961       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1962         fp[0] = '\0';
1963     }
1964   else
1965     np->name = name;
1966   np->valid = TRUE;
1967   np->been_warned = FALSE;
1968   np->fdp = curfdp;
1969   np->is_func = is_func;
1970   np->lno = lno;
1971   if (np->fdp->usecharno)
1972     /* Our char numbers are 0-base, because of C language tradition?
1973        ctags compatibility?  old versions compatibility?   I don't know.
1974        Anyway, since emacs's are 1-base we expect etags.el to take care
1975        of the difference.  If we wanted to have 1-based numbers, we would
1976        uncomment the +1 below. */
1977     np->cno = cno /* + 1 */ ;
1978   else
1979     np->cno = invalidcharno;
1980   np->left = np->right = NULL;
1981   if (CTAGS && !cxref_style)
1982     {
1983       if (strlen (linestart) < 50)
1984         np->regex = concat (linestart, "$", "");
1985       else
1986         np->regex = savenstr (linestart, 50);
1987     }
1988   else
1989     np->regex = savenstr (linestart, linelen);
1990
1991   add_node (np, &nodehead);
1992 }
1993
1994 /*
1995  * free_tree ()
1996  *      recurse on left children, iterate on right children.
1997  */
1998 static void
1999 free_tree (register node *np)
2000 {
2001   while (np)
2002     {
2003       register node *node_right = np->right;
2004       free_tree (np->left);
2005       free (np->name);
2006       free (np->regex);
2007       free (np);
2008       np = node_right;
2009     }
2010 }
2011
2012 /*
2013  * free_fdesc ()
2014  *      delete a file description
2015  */
2016 static void
2017 free_fdesc (register fdesc *fdp)
2018 {
2019   free (fdp->infname);
2020   free (fdp->infabsname);
2021   free (fdp->infabsdir);
2022   free (fdp->taggedfname);
2023   free (fdp->prop);
2024   free (fdp);
2025 }
2026
2027 /*
2028  * add_node ()
2029  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2030  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2031  *      balancing.
2032  *
2033  *      add_node is the only function allowed to add nodes, so it can
2034  *      maintain state.
2035  */
2036 static void
2037 add_node (node *np, node **cur_node_p)
2038 {
2039   register int dif;
2040   register node *cur_node = *cur_node_p;
2041
2042   if (cur_node == NULL)
2043     {
2044       *cur_node_p = np;
2045       last_node = np;
2046       return;
2047     }
2048
2049   if (!CTAGS)
2050     /* Etags Mode */
2051     {
2052       /* For each file name, tags are in a linked sublist on the right
2053          pointer.  The first tags of different files are a linked list
2054          on the left pointer.  last_node points to the end of the last
2055          used sublist. */
2056       if (last_node != NULL && last_node->fdp == np->fdp)
2057         {
2058           /* Let's use the same sublist as the last added node. */
2059           assert (last_node->right == NULL);
2060           last_node->right = np;
2061           last_node = np;
2062         }
2063       else if (cur_node->fdp == np->fdp)
2064         {
2065           /* Scanning the list we found the head of a sublist which is
2066              good for us.  Let's scan this sublist. */
2067           add_node (np, &cur_node->right);
2068         }
2069       else
2070         /* The head of this sublist is not good for us.  Let's try the
2071            next one. */
2072         add_node (np, &cur_node->left);
2073     } /* if ETAGS mode */
2074
2075   else
2076     {
2077       /* Ctags Mode */
2078       dif = strcmp (np->name, cur_node->name);
2079
2080       /*
2081        * If this tag name matches an existing one, then
2082        * do not add the node, but maybe print a warning.
2083        */
2084       if (no_duplicates && !dif)
2085         {
2086           if (np->fdp == cur_node->fdp)
2087             {
2088               if (!no_warnings)
2089                 {
2090                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2091                            np->fdp->infname, lineno, np->name);
2092                   fprintf (stderr, "Second entry ignored\n");
2093                 }
2094             }
2095           else if (!cur_node->been_warned && !no_warnings)
2096             {
2097               fprintf
2098                 (stderr,
2099                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2100                  np->fdp->infname, cur_node->fdp->infname, np->name);
2101               cur_node->been_warned = TRUE;
2102             }
2103           return;
2104         }
2105
2106       /* Actually add the node */
2107       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2108     } /* if CTAGS mode */
2109 }
2110
2111 /*
2112  * invalidate_nodes ()
2113  *      Scan the node tree and invalidate all nodes pointing to the
2114  *      given file description (CTAGS case) or free them (ETAGS case).
2115  */
2116 static void
2117 invalidate_nodes (fdesc *badfdp, node **npp)
2118 {
2119   node *np = *npp;
2120
2121   if (np == NULL)
2122     return;
2123
2124   if (CTAGS)
2125     {
2126       if (np->left != NULL)
2127         invalidate_nodes (badfdp, &np->left);
2128       if (np->fdp == badfdp)
2129         np->valid = FALSE;
2130       if (np->right != NULL)
2131         invalidate_nodes (badfdp, &np->right);
2132     }
2133   else
2134     {
2135       assert (np->fdp != NULL);
2136       if (np->fdp == badfdp)
2137         {
2138           *npp = np->left;      /* detach the sublist from the list */
2139           np->left = NULL;      /* isolate it */
2140           free_tree (np);       /* free it */
2141           invalidate_nodes (badfdp, npp);
2142         }
2143       else
2144         invalidate_nodes (badfdp, &np->left);
2145     }
2146 }
2147
2148 \f
2149 static int total_size_of_entries (node *);
2150 static int number_len (long);
2151
2152 /* Length of a non-negative number's decimal representation. */
2153 static int
2154 number_len (long int num)
2155 {
2156   int len = 1;
2157   while ((num /= 10) > 0)
2158     len += 1;
2159   return len;
2160 }
2161
2162 /*
2163  * Return total number of characters that put_entries will output for
2164  * the nodes in the linked list at the right of the specified node.
2165  * This count is irrelevant with etags.el since emacs 19.34 at least,
2166  * but is still supplied for backward compatibility.
2167  */
2168 static int
2169 total_size_of_entries (register node *np)
2170 {
2171   register int total = 0;
2172
2173   for (; np != NULL; np = np->right)
2174     if (np->valid)
2175       {
2176         total += strlen (np->regex) + 1;                /* pat\177 */
2177         if (np->name != NULL)
2178           total += strlen (np->name) + 1;               /* name\001 */
2179         total += number_len ((long) np->lno) + 1;       /* lno, */
2180         if (np->cno != invalidcharno)                   /* cno */
2181           total += number_len (np->cno);
2182         total += 1;                                     /* newline */
2183       }
2184
2185   return total;
2186 }
2187
2188 static void
2189 put_entries (register node *np)
2190 {
2191   register char *sp;
2192   static fdesc *fdp = NULL;
2193
2194   if (np == NULL)
2195     return;
2196
2197   /* Output subentries that precede this one */
2198   if (CTAGS)
2199     put_entries (np->left);
2200
2201   /* Output this entry */
2202   if (np->valid)
2203     {
2204       if (!CTAGS)
2205         {
2206           /* Etags mode */
2207           if (fdp != np->fdp)
2208             {
2209               fdp = np->fdp;
2210               fprintf (tagf, "\f\n%s,%d\n",
2211                        fdp->taggedfname, total_size_of_entries (np));
2212               fdp->written = TRUE;
2213             }
2214           fputs (np->regex, tagf);
2215           fputc ('\177', tagf);
2216           if (np->name != NULL)
2217             {
2218               fputs (np->name, tagf);
2219               fputc ('\001', tagf);
2220             }
2221           fprintf (tagf, "%d,", np->lno);
2222           if (np->cno != invalidcharno)
2223             fprintf (tagf, "%ld", np->cno);
2224           fputs ("\n", tagf);
2225         }
2226       else
2227         {
2228           /* Ctags mode */
2229           if (np->name == NULL)
2230             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2231
2232           if (cxref_style)
2233             {
2234               if (vgrind_style)
2235                 fprintf (stdout, "%s %s %d\n",
2236                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2237               else
2238                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2239                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2240             }
2241           else
2242             {
2243               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2244
2245               if (np->is_func)
2246                 {               /* function or #define macro with args */
2247                   putc (searchar, tagf);
2248                   putc ('^', tagf);
2249
2250                   for (sp = np->regex; *sp; sp++)
2251                     {
2252                       if (*sp == '\\' || *sp == searchar)
2253                         putc ('\\', tagf);
2254                       putc (*sp, tagf);
2255                     }
2256                   putc (searchar, tagf);
2257                 }
2258               else
2259                 {               /* anything else; text pattern inadequate */
2260                   fprintf (tagf, "%d", np->lno);
2261                 }
2262               putc ('\n', tagf);
2263             }
2264         }
2265     } /* if this node contains a valid tag */
2266
2267   /* Output subentries that follow this one */
2268   put_entries (np->right);
2269   if (!CTAGS)
2270     put_entries (np->left);
2271 }
2272
2273 \f
2274 /* C extensions. */
2275 #define C_EXT   0x00fff         /* C extensions */
2276 #define C_PLAIN 0x00000         /* C */
2277 #define C_PLPL  0x00001         /* C++ */
2278 #define C_STAR  0x00003         /* C* */
2279 #define C_JAVA  0x00005         /* JAVA */
2280 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2281 #define YACC    0x10000         /* yacc file */
2282
2283 /*
2284  * The C symbol tables.
2285  */
2286 enum sym_type
2287 {
2288   st_none,
2289   st_C_objprot, st_C_objimpl, st_C_objend,
2290   st_C_gnumacro,
2291   st_C_ignore, st_C_attribute,
2292   st_C_javastruct,
2293   st_C_operator,
2294   st_C_class, st_C_template,
2295   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2296 };
2297
2298 static unsigned int hash (const char *, unsigned int);
2299 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2300 static enum sym_type C_symtype (char *, int, int);
2301
2302 /* Feed stuff between (but not including) %[ and %] lines to:
2303      gperf -m 5
2304 %[
2305 %compare-strncmp
2306 %enum
2307 %struct-type
2308 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2309 %%
2310 if,             0,                      st_C_ignore
2311 for,            0,                      st_C_ignore
2312 while,          0,                      st_C_ignore
2313 switch,         0,                      st_C_ignore
2314 return,         0,                      st_C_ignore
2315 __attribute__,  0,                      st_C_attribute
2316 GTY,            0,                      st_C_attribute
2317 @interface,     0,                      st_C_objprot
2318 @protocol,      0,                      st_C_objprot
2319 @implementation,0,                      st_C_objimpl
2320 @end,           0,                      st_C_objend
2321 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2322 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2323 friend,         C_PLPL,                 st_C_ignore
2324 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2325 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2326 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2327 class,          0,                      st_C_class
2328 namespace,      C_PLPL,                 st_C_struct
2329 domain,         C_STAR,                 st_C_struct
2330 union,          0,                      st_C_struct
2331 struct,         0,                      st_C_struct
2332 extern,         0,                      st_C_extern
2333 enum,           0,                      st_C_enum
2334 typedef,        0,                      st_C_typedef
2335 define,         0,                      st_C_define
2336 undef,          0,                      st_C_define
2337 operator,       C_PLPL,                 st_C_operator
2338 template,       0,                      st_C_template
2339 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2340 DEFUN,          0,                      st_C_gnumacro
2341 SYSCALL,        0,                      st_C_gnumacro
2342 ENTRY,          0,                      st_C_gnumacro
2343 PSEUDO,         0,                      st_C_gnumacro
2344 # These are defined inside C functions, so currently they are not met.
2345 # EXFUN used in glibc, DEFVAR_* in emacs.
2346 #EXFUN,         0,                      st_C_gnumacro
2347 #DEFVAR_,       0,                      st_C_gnumacro
2348 %]
2349 and replace lines between %< and %> with its output, then:
2350  - remove the #if characterset check
2351  - make in_word_set static and not inline. */
2352 /*%<*/
2353 /* C code produced by gperf version 3.0.1 */
2354 /* Command-line: gperf -m 5  */
2355 /* Computed positions: -k'2-3' */
2356
2357 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2358 /* maximum key range = 33, duplicates = 0 */
2359
2360 #ifdef __GNUC__
2361 __inline
2362 #else
2363 #ifdef __cplusplus
2364 inline
2365 #endif
2366 #endif
2367 static unsigned int
2368 hash (register const char *str, register unsigned int len)
2369 {
2370   static unsigned char asso_values[] =
2371     {
2372       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2373       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2374       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2375       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2376       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2377       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2378       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2379       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2380       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2381       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2382       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2383        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2384        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2385       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2386       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2387       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2388       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2389       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2390       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2391       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2392       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2393       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2394       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2395       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2396       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2397       35, 35, 35, 35, 35, 35
2398     };
2399   register int hval = len;
2400
2401   switch (hval)
2402     {
2403       default:
2404         hval += asso_values[(unsigned char)str[2]];
2405       /*FALLTHROUGH*/
2406       case 2:
2407         hval += asso_values[(unsigned char)str[1]];
2408         break;
2409     }
2410   return hval;
2411 }
2412
2413 static struct C_stab_entry *
2414 in_word_set (register const char *str, register unsigned int len)
2415 {
2416   enum
2417     {
2418       TOTAL_KEYWORDS = 33,
2419       MIN_WORD_LENGTH = 2,
2420       MAX_WORD_LENGTH = 15,
2421       MIN_HASH_VALUE = 2,
2422       MAX_HASH_VALUE = 34
2423     };
2424
2425   static struct C_stab_entry wordlist[] =
2426     {
2427       {""}, {""},
2428       {"if",            0,                      st_C_ignore},
2429       {"GTY",           0,                      st_C_attribute},
2430       {"@end",          0,                      st_C_objend},
2431       {"union",         0,                      st_C_struct},
2432       {"define",                0,                      st_C_define},
2433       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2434       {"template",      0,                      st_C_template},
2435       {"operator",      C_PLPL,                 st_C_operator},
2436       {"@interface",    0,                      st_C_objprot},
2437       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2438       {"friend",                C_PLPL,                 st_C_ignore},
2439       {"typedef",       0,                      st_C_typedef},
2440       {"return",                0,                      st_C_ignore},
2441       {"@implementation",0,                     st_C_objimpl},
2442       {"@protocol",     0,                      st_C_objprot},
2443       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2444       {"extern",                0,                      st_C_extern},
2445       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2446       {"struct",                0,                      st_C_struct},
2447       {"domain",                C_STAR,                 st_C_struct},
2448       {"switch",                0,                      st_C_ignore},
2449       {"enum",          0,                      st_C_enum},
2450       {"for",           0,                      st_C_ignore},
2451       {"namespace",     C_PLPL,                 st_C_struct},
2452       {"class",         0,                      st_C_class},
2453       {"while",         0,                      st_C_ignore},
2454       {"undef",         0,                      st_C_define},
2455       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2456       {"__attribute__", 0,                      st_C_attribute},
2457       {"SYSCALL",       0,                      st_C_gnumacro},
2458       {"ENTRY",         0,                      st_C_gnumacro},
2459       {"PSEUDO",                0,                      st_C_gnumacro},
2460       {"DEFUN",         0,                      st_C_gnumacro}
2461     };
2462
2463   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2464     {
2465       register int key = hash (str, len);
2466
2467       if (key <= MAX_HASH_VALUE && key >= 0)
2468         {
2469           register const char *s = wordlist[key].name;
2470
2471           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2472             return &wordlist[key];
2473         }
2474     }
2475   return 0;
2476 }
2477 /*%>*/
2478
2479 static enum sym_type
2480 C_symtype (char *str, int len, int c_ext)
2481 {
2482   register struct C_stab_entry *se = in_word_set (str, len);
2483
2484   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2485     return st_none;
2486   return se->type;
2487 }
2488
2489 \f
2490 /*
2491  * Ignoring __attribute__ ((list))
2492  */
2493 static bool inattribute;        /* looking at an __attribute__ construct */
2494
2495 /*
2496  * C functions and variables are recognized using a simple
2497  * finite automaton.  fvdef is its state variable.
2498  */
2499 static enum
2500 {
2501   fvnone,                       /* nothing seen */
2502   fdefunkey,                    /* Emacs DEFUN keyword seen */
2503   fdefunname,                   /* Emacs DEFUN name seen */
2504   foperator,                    /* func: operator keyword seen (cplpl) */
2505   fvnameseen,                   /* function or variable name seen */
2506   fstartlist,                   /* func: just after open parenthesis */
2507   finlist,                      /* func: in parameter list */
2508   flistseen,                    /* func: after parameter list */
2509   fignore,                      /* func: before open brace */
2510   vignore                       /* var-like: ignore until ';' */
2511 } fvdef;
2512
2513 static bool fvextern;           /* func or var: extern keyword seen; */
2514
2515 /*
2516  * typedefs are recognized using a simple finite automaton.
2517  * typdef is its state variable.
2518  */
2519 static enum
2520 {
2521   tnone,                        /* nothing seen */
2522   tkeyseen,                     /* typedef keyword seen */
2523   ttypeseen,                    /* defined type seen */
2524   tinbody,                      /* inside typedef body */
2525   tend,                         /* just before typedef tag */
2526   tignore                       /* junk after typedef tag */
2527 } typdef;
2528
2529 /*
2530  * struct-like structures (enum, struct and union) are recognized
2531  * using another simple finite automaton.  `structdef' is its state
2532  * variable.
2533  */
2534 static enum
2535 {
2536   snone,                        /* nothing seen yet,
2537                                    or in struct body if bracelev > 0 */
2538   skeyseen,                     /* struct-like keyword seen */
2539   stagseen,                     /* struct-like tag seen */
2540   scolonseen                    /* colon seen after struct-like tag */
2541 } structdef;
2542
2543 /*
2544  * When objdef is different from onone, objtag is the name of the class.
2545  */
2546 static const char *objtag = "<uninited>";
2547
2548 /*
2549  * Yet another little state machine to deal with preprocessor lines.
2550  */
2551 static enum
2552 {
2553   dnone,                        /* nothing seen */
2554   dsharpseen,                   /* '#' seen as first char on line */
2555   ddefineseen,                  /* '#' and 'define' seen */
2556   dignorerest                   /* ignore rest of line */
2557 } definedef;
2558
2559 /*
2560  * State machine for Objective C protocols and implementations.
2561  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2562  */
2563 static enum
2564 {
2565   onone,                        /* nothing seen */
2566   oprotocol,                    /* @interface or @protocol seen */
2567   oimplementation,              /* @implementations seen */
2568   otagseen,                     /* class name seen */
2569   oparenseen,                   /* parenthesis before category seen */
2570   ocatseen,                     /* category name seen */
2571   oinbody,                      /* in @implementation body */
2572   omethodsign,                  /* in @implementation body, after +/- */
2573   omethodtag,                   /* after method name */
2574   omethodcolon,                 /* after method colon */
2575   omethodparm,                  /* after method parameter */
2576   oignore                       /* wait for @end */
2577 } objdef;
2578
2579
2580 /*
2581  * Use this structure to keep info about the token read, and how it
2582  * should be tagged.  Used by the make_C_tag function to build a tag.
2583  */
2584 static struct tok
2585 {
2586   char *line;                   /* string containing the token */
2587   int offset;                   /* where the token starts in LINE */
2588   int length;                   /* token length */
2589   /*
2590     The previous members can be used to pass strings around for generic
2591     purposes.  The following ones specifically refer to creating tags.  In this
2592     case the token contained here is the pattern that will be used to create a
2593     tag.
2594   */
2595   bool valid;                   /* do not create a tag; the token should be
2596                                    invalidated whenever a state machine is
2597                                    reset prematurely */
2598   bool named;                   /* create a named tag */
2599   int lineno;                   /* source line number of tag */
2600   long linepos;                 /* source char number of tag */
2601 } token;                        /* latest token read */
2602
2603 /*
2604  * Variables and functions for dealing with nested structures.
2605  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2606  */
2607 static void pushclass_above (int, char *, int);
2608 static void popclass_above (int);
2609 static void write_classname (linebuffer *, const char *qualifier);
2610
2611 static struct {
2612   char **cname;                 /* nested class names */
2613   int *bracelev;                /* nested class brace level */
2614   int nl;                       /* class nesting level (elements used) */
2615   int size;                     /* length of the array */
2616 } cstack;                       /* stack for nested declaration tags */
2617 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2618 #define nestlev         (cstack.nl)
2619 /* After struct keyword or in struct body, not inside a nested function. */
2620 #define instruct        (structdef == snone && nestlev > 0                      \
2621                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2622
2623 static void
2624 pushclass_above (int bracelev, char *str, int len)
2625 {
2626   int nl;
2627
2628   popclass_above (bracelev);
2629   nl = cstack.nl;
2630   if (nl >= cstack.size)
2631     {
2632       int size = cstack.size *= 2;
2633       xrnew (cstack.cname, size, char *);
2634       xrnew (cstack.bracelev, size, int);
2635     }
2636   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2637   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2638   cstack.bracelev[nl] = bracelev;
2639   cstack.nl = nl + 1;
2640 }
2641
2642 static void
2643 popclass_above (int bracelev)
2644 {
2645   int nl;
2646
2647   for (nl = cstack.nl - 1;
2648        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2649        nl--)
2650     {
2651       free (cstack.cname[nl]);
2652       cstack.nl = nl;
2653     }
2654 }
2655
2656 static void
2657 write_classname (linebuffer *cn, const char *qualifier)
2658 {
2659   int i, len;
2660   int qlen = strlen (qualifier);
2661
2662   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2663     {
2664       len = 0;
2665       cn->len = 0;
2666       cn->buffer[0] = '\0';
2667     }
2668   else
2669     {
2670       len = strlen (cstack.cname[0]);
2671       linebuffer_setlen (cn, len);
2672       strcpy (cn->buffer, cstack.cname[0]);
2673     }
2674   for (i = 1; i < cstack.nl; i++)
2675     {
2676       char *s;
2677       int slen;
2678
2679       s = cstack.cname[i];
2680       if (s == NULL)
2681         continue;
2682       slen = strlen (s);
2683       len += slen + qlen;
2684       linebuffer_setlen (cn, len);
2685       strncat (cn->buffer, qualifier, qlen);
2686       strncat (cn->buffer, s, slen);
2687     }
2688 }
2689
2690 \f
2691 static bool consider_token (char *, int, int, int *, int, int, bool *);
2692 static void make_C_tag (bool);
2693
2694 /*
2695  * consider_token ()
2696  *      checks to see if the current token is at the start of a
2697  *      function or variable, or corresponds to a typedef, or
2698  *      is a struct/union/enum tag, or #define, or an enum constant.
2699  *
2700  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2701  *      with args.  C_EXTP points to which language we are looking at.
2702  *
2703  * Globals
2704  *      fvdef                   IN OUT
2705  *      structdef               IN OUT
2706  *      definedef               IN OUT
2707  *      typdef                  IN OUT
2708  *      objdef                  IN OUT
2709  */
2710
2711 static bool
2712 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2713                                 /* IN: token pointer */
2714                                 /* IN: token length */
2715                                 /* IN: first char after the token */
2716                                 /* IN, OUT: C extensions mask */
2717                                 /* IN: brace level */
2718                                 /* IN: parenthesis level */
2719                                 /* OUT: function or variable found */
2720 {
2721   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2722      structtype is the type of the preceding struct-like keyword, and
2723      structbracelev is the brace level where it has been seen. */
2724   static enum sym_type structtype;
2725   static int structbracelev;
2726   static enum sym_type toktype;
2727
2728
2729   toktype = C_symtype (str, len, *c_extp);
2730
2731   /*
2732    * Skip __attribute__
2733    */
2734   if (toktype == st_C_attribute)
2735     {
2736       inattribute = TRUE;
2737       return FALSE;
2738      }
2739
2740    /*
2741     * Advance the definedef state machine.
2742     */
2743    switch (definedef)
2744      {
2745      case dnone:
2746        /* We're not on a preprocessor line. */
2747        if (toktype == st_C_gnumacro)
2748          {
2749            fvdef = fdefunkey;
2750            return FALSE;
2751          }
2752        break;
2753      case dsharpseen:
2754        if (toktype == st_C_define)
2755          {
2756            definedef = ddefineseen;
2757          }
2758        else
2759          {
2760            definedef = dignorerest;
2761          }
2762        return FALSE;
2763      case ddefineseen:
2764        /*
2765         * Make a tag for any macro, unless it is a constant
2766         * and constantypedefs is FALSE.
2767         */
2768        definedef = dignorerest;
2769        *is_func_or_var = (c == '(');
2770        if (!*is_func_or_var && !constantypedefs)
2771          return FALSE;
2772        else
2773          return TRUE;
2774      case dignorerest:
2775        return FALSE;
2776      default:
2777        error ("internal error: definedef value.", (char *)NULL);
2778      }
2779
2780    /*
2781     * Now typedefs
2782     */
2783    switch (typdef)
2784      {
2785      case tnone:
2786        if (toktype == st_C_typedef)
2787          {
2788            if (typedefs)
2789              typdef = tkeyseen;
2790            fvextern = FALSE;
2791            fvdef = fvnone;
2792            return FALSE;
2793          }
2794        break;
2795      case tkeyseen:
2796        switch (toktype)
2797          {
2798          case st_none:
2799          case st_C_class:
2800          case st_C_struct:
2801          case st_C_enum:
2802            typdef = ttypeseen;
2803          }
2804        break;
2805      case ttypeseen:
2806        if (structdef == snone && fvdef == fvnone)
2807          {
2808            fvdef = fvnameseen;
2809            return TRUE;
2810          }
2811        break;
2812      case tend:
2813        switch (toktype)
2814          {
2815          case st_C_class:
2816          case st_C_struct:
2817          case st_C_enum:
2818            return FALSE;
2819          }
2820        return TRUE;
2821      }
2822
2823    switch (toktype)
2824      {
2825      case st_C_javastruct:
2826        if (structdef == stagseen)
2827          structdef = scolonseen;
2828        return FALSE;
2829      case st_C_template:
2830      case st_C_class:
2831        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2832            && bracelev == 0
2833            && definedef == dnone && structdef == snone
2834            && typdef == tnone && fvdef == fvnone)
2835          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2836        if (toktype == st_C_template)
2837          break;
2838        /* FALLTHRU */
2839      case st_C_struct:
2840      case st_C_enum:
2841        if (parlev == 0
2842            && fvdef != vignore
2843            && (typdef == tkeyseen
2844                || (typedefs_or_cplusplus && structdef == snone)))
2845          {
2846            structdef = skeyseen;
2847            structtype = toktype;
2848            structbracelev = bracelev;
2849            if (fvdef == fvnameseen)
2850              fvdef = fvnone;
2851          }
2852        return FALSE;
2853      }
2854
2855    if (structdef == skeyseen)
2856      {
2857        structdef = stagseen;
2858        return TRUE;
2859      }
2860
2861    if (typdef != tnone)
2862      definedef = dnone;
2863
2864    /* Detect Objective C constructs. */
2865    switch (objdef)
2866      {
2867      case onone:
2868        switch (toktype)
2869          {
2870          case st_C_objprot:
2871            objdef = oprotocol;
2872            return FALSE;
2873          case st_C_objimpl:
2874            objdef = oimplementation;
2875            return FALSE;
2876          }
2877        break;
2878      case oimplementation:
2879        /* Save the class tag for functions or variables defined inside. */
2880        objtag = savenstr (str, len);
2881        objdef = oinbody;
2882        return FALSE;
2883      case oprotocol:
2884        /* Save the class tag for categories. */
2885        objtag = savenstr (str, len);
2886        objdef = otagseen;
2887        *is_func_or_var = TRUE;
2888        return TRUE;
2889      case oparenseen:
2890        objdef = ocatseen;
2891        *is_func_or_var = TRUE;
2892        return TRUE;
2893      case oinbody:
2894        break;
2895      case omethodsign:
2896        if (parlev == 0)
2897          {
2898            fvdef = fvnone;
2899            objdef = omethodtag;
2900            linebuffer_setlen (&token_name, len);
2901            strncpy (token_name.buffer, str, len);
2902            token_name.buffer[len] = '\0';
2903            return TRUE;
2904          }
2905        return FALSE;
2906      case omethodcolon:
2907        if (parlev == 0)
2908          objdef = omethodparm;
2909        return FALSE;
2910      case omethodparm:
2911        if (parlev == 0)
2912          {
2913            fvdef = fvnone;
2914            objdef = omethodtag;
2915            linebuffer_setlen (&token_name, token_name.len + len);
2916            strncat (token_name.buffer, str, len);
2917            return TRUE;
2918          }
2919        return FALSE;
2920      case oignore:
2921        if (toktype == st_C_objend)
2922          {
2923            /* Memory leakage here: the string pointed by objtag is
2924               never released, because many tests would be needed to
2925               avoid breaking on incorrect input code.  The amount of
2926               memory leaked here is the sum of the lengths of the
2927               class tags.
2928            free (objtag); */
2929            objdef = onone;
2930          }
2931        return FALSE;
2932      }
2933
2934    /* A function, variable or enum constant? */
2935    switch (toktype)
2936      {
2937      case st_C_extern:
2938        fvextern = TRUE;
2939        switch  (fvdef)
2940          {
2941          case finlist:
2942          case flistseen:
2943          case fignore:
2944          case vignore:
2945            break;
2946          default:
2947            fvdef = fvnone;
2948          }
2949        return FALSE;
2950      case st_C_ignore:
2951        fvextern = FALSE;
2952        fvdef = vignore;
2953        return FALSE;
2954      case st_C_operator:
2955        fvdef = foperator;
2956        *is_func_or_var = TRUE;
2957        return TRUE;
2958      case st_none:
2959        if (constantypedefs
2960            && structdef == snone
2961            && structtype == st_C_enum && bracelev > structbracelev)
2962          return TRUE;           /* enum constant */
2963        switch (fvdef)
2964          {
2965          case fdefunkey:
2966            if (bracelev > 0)
2967              break;
2968            fvdef = fdefunname;  /* GNU macro */
2969            *is_func_or_var = TRUE;
2970            return TRUE;
2971          case fvnone:
2972            switch (typdef)
2973              {
2974              case ttypeseen:
2975                return FALSE;
2976              case tnone:
2977                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2978                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2979                  {
2980                    fvdef = vignore;
2981                    return FALSE;
2982                  }
2983                break;
2984              }
2985           /* FALLTHRU */
2986           case fvnameseen:
2987           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2988             {
2989               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2990                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2991               fvdef = foperator;
2992               *is_func_or_var = TRUE;
2993               return TRUE;
2994             }
2995           if (bracelev > 0 && !instruct)
2996             break;
2997           fvdef = fvnameseen;   /* function or variable */
2998           *is_func_or_var = TRUE;
2999           return TRUE;
3000         }
3001       break;
3002     }
3003
3004   return FALSE;
3005 }
3006
3007 \f
3008 /*
3009  * C_entries often keeps pointers to tokens or lines which are older than
3010  * the line currently read.  By keeping two line buffers, and switching
3011  * them at end of line, it is possible to use those pointers.
3012  */
3013 static struct
3014 {
3015   long linepos;
3016   linebuffer lb;
3017 } lbs[2];
3018
3019 #define current_lb_is_new (newndx == curndx)
3020 #define switch_line_buffers() (curndx = 1 - curndx)
3021
3022 #define curlb (lbs[curndx].lb)
3023 #define newlb (lbs[newndx].lb)
3024 #define curlinepos (lbs[curndx].linepos)
3025 #define newlinepos (lbs[newndx].linepos)
3026
3027 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3028 #define cplpl (c_ext & C_PLPL)
3029 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3030
3031 #define CNL_SAVE_DEFINEDEF()                                            \
3032 do {                                                                    \
3033   curlinepos = charno;                                                  \
3034   readline (&curlb, inf);                                               \
3035   lp = curlb.buffer;                                                    \
3036   quotednl = FALSE;                                                     \
3037   newndx = curndx;                                                      \
3038 } while (0)
3039
3040 #define CNL()                                                           \
3041 do {                                                                    \
3042   CNL_SAVE_DEFINEDEF();                                                 \
3043   if (savetoken.valid)                                                  \
3044     {                                                                   \
3045       token = savetoken;                                                \
3046       savetoken.valid = FALSE;                                          \
3047     }                                                                   \
3048   definedef = dnone;                                                    \
3049 } while (0)
3050
3051
3052 static void
3053 make_C_tag (int isfun)
3054 {
3055   /* This function is never called when token.valid is FALSE, but
3056      we must protect against invalid input or internal errors. */
3057   if (token.valid)
3058     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3059               token.offset+token.length+1, token.lineno, token.linepos);
3060   else if (DEBUG)
3061     {                             /* this branch is optimised away if !DEBUG */
3062       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3063                 token_name.len + 17, isfun, token.line,
3064                 token.offset+token.length+1, token.lineno, token.linepos);
3065       error ("INVALID TOKEN", NULL);
3066     }
3067
3068   token.valid = FALSE;
3069 }
3070
3071
3072 /*
3073  * C_entries ()
3074  *      This routine finds functions, variables, typedefs,
3075  *      #define's, enum constants and struct/union/enum definitions in
3076  *      C syntax and adds them to the list.
3077  */
3078 static void
3079 C_entries (int c_ext, FILE *inf)
3080                                 /* extension of C */
3081                                 /* input file */
3082 {
3083   register char c;              /* latest char read; '\0' for end of line */
3084   register char *lp;            /* pointer one beyond the character `c' */
3085   int curndx, newndx;           /* indices for current and new lb */
3086   register int tokoff;          /* offset in line of start of current token */
3087   register int toklen;          /* length of current token */
3088   const char *qualifier;        /* string used to qualify names */
3089   int qlen;                     /* length of qualifier */
3090   int bracelev;                 /* current brace level */
3091   int bracketlev;               /* current bracket level */
3092   int parlev;                   /* current parenthesis level */
3093   int attrparlev;               /* __attribute__ parenthesis level */
3094   int templatelev;              /* current template level */
3095   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3096   bool incomm, inquote, inchar, quotednl, midtoken;
3097   bool yacc_rules;              /* in the rules part of a yacc file */
3098   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3099
3100
3101   linebuffer_init (&lbs[0].lb);
3102   linebuffer_init (&lbs[1].lb);
3103   if (cstack.size == 0)
3104     {
3105       cstack.size = (DEBUG) ? 1 : 4;
3106       cstack.nl = 0;
3107       cstack.cname = xnew (cstack.size, char *);
3108       cstack.bracelev = xnew (cstack.size, int);
3109     }
3110
3111   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3112   curndx = newndx = 0;
3113   lp = curlb.buffer;
3114   *lp = 0;
3115
3116   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3117   structdef = snone; definedef = dnone; objdef = onone;
3118   yacc_rules = FALSE;
3119   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3120   token.valid = savetoken.valid = FALSE;
3121   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3122   if (cjava)
3123     { qualifier = "."; qlen = 1; }
3124   else
3125     { qualifier = "::"; qlen = 2; }
3126
3127
3128   while (!feof (inf))
3129     {
3130       c = *lp++;
3131       if (c == '\\')
3132         {
3133           /* If we are at the end of the line, the next character is a
3134              '\0'; do not skip it, because it is what tells us
3135              to read the next line.  */
3136           if (*lp == '\0')
3137             {
3138               quotednl = TRUE;
3139               continue;
3140             }
3141           lp++;
3142           c = ' ';
3143         }
3144       else if (incomm)
3145         {
3146           switch (c)
3147             {
3148             case '*':
3149               if (*lp == '/')
3150                 {
3151                   c = *lp++;
3152                   incomm = FALSE;
3153                 }
3154               break;
3155             case '\0':
3156               /* Newlines inside comments do not end macro definitions in
3157                  traditional cpp. */
3158               CNL_SAVE_DEFINEDEF ();
3159               break;
3160             }
3161           continue;
3162         }
3163       else if (inquote)
3164         {
3165           switch (c)
3166             {
3167             case '"':
3168               inquote = FALSE;
3169               break;
3170             case '\0':
3171               /* Newlines inside strings do not end macro definitions
3172                  in traditional cpp, even though compilers don't
3173                  usually accept them. */
3174               CNL_SAVE_DEFINEDEF ();
3175               break;
3176             }
3177           continue;
3178         }
3179       else if (inchar)
3180         {
3181           switch (c)
3182             {
3183             case '\0':
3184               /* Hmmm, something went wrong. */
3185               CNL ();
3186               /* FALLTHRU */
3187             case '\'':
3188               inchar = FALSE;
3189               break;
3190             }
3191           continue;
3192         }
3193       else if (bracketlev > 0)
3194         {
3195           switch (c)
3196             {
3197             case ']':
3198               if (--bracketlev > 0)
3199                 continue;
3200               break;
3201             case '\0':
3202               CNL_SAVE_DEFINEDEF ();
3203               break;
3204             }
3205           continue;
3206         }
3207       else switch (c)
3208         {
3209         case '"':
3210           inquote = TRUE;
3211           if (inattribute)
3212             break;
3213           switch (fvdef)
3214             {
3215             case fdefunkey:
3216             case fstartlist:
3217             case finlist:
3218             case fignore:
3219             case vignore:
3220               break;
3221             default:
3222               fvextern = FALSE;
3223               fvdef = fvnone;
3224             }
3225           continue;
3226         case '\'':
3227           inchar = TRUE;
3228           if (inattribute)
3229             break;
3230           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3231             {
3232               fvextern = FALSE;
3233               fvdef = fvnone;
3234             }
3235           continue;
3236         case '/':
3237           if (*lp == '*')
3238             {
3239               incomm = TRUE;
3240               lp++;
3241               c = ' ';
3242             }
3243           else if (/* cplpl && */ *lp == '/')
3244             {
3245               c = '\0';
3246             }
3247           break;
3248         case '%':
3249           if ((c_ext & YACC) && *lp == '%')
3250             {
3251               /* Entering or exiting rules section in yacc file. */
3252               lp++;
3253               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3254               typdef = tnone; structdef = snone;
3255               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3256               bracelev = 0;
3257               yacc_rules = !yacc_rules;
3258               continue;
3259             }
3260           else
3261             break;
3262         case '#':
3263           if (definedef == dnone)
3264             {
3265               char *cp;
3266               bool cpptoken = TRUE;
3267
3268               /* Look back on this line.  If all blanks, or nonblanks
3269                  followed by an end of comment, this is a preprocessor
3270                  token. */
3271               for (cp = newlb.buffer; cp < lp-1; cp++)
3272                 if (!iswhite (*cp))
3273                   {
3274                     if (*cp == '*' && *(cp+1) == '/')
3275                       {
3276                         cp++;
3277                         cpptoken = TRUE;
3278                       }
3279                     else
3280                       cpptoken = FALSE;
3281                   }
3282               if (cpptoken)
3283                 definedef = dsharpseen;
3284             } /* if (definedef == dnone) */
3285           continue;
3286         case '[':
3287           bracketlev++;
3288             continue;
3289         } /* switch (c) */
3290
3291
3292       /* Consider token only if some involved conditions are satisfied. */
3293       if (typdef != tignore
3294           && definedef != dignorerest
3295           && fvdef != finlist
3296           && templatelev == 0
3297           && (definedef != dnone
3298               || structdef != scolonseen)
3299           && !inattribute)
3300         {
3301           if (midtoken)
3302             {
3303               if (endtoken (c))
3304                 {
3305                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3306                     /* This handles :: in the middle,
3307                        but not at the beginning of an identifier.
3308                        Also, space-separated :: is not recognised. */
3309                     {
3310                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3311                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3312                       lp += 2;
3313                       toklen += 2;
3314                       c = lp[-1];
3315                       goto still_in_token;
3316                     }
3317                   else
3318                     {
3319                       bool funorvar = FALSE;
3320
3321                       if (yacc_rules
3322                           || consider_token (newlb.buffer + tokoff, toklen, c,
3323                                              &c_ext, bracelev, parlev,
3324                                              &funorvar))
3325                         {
3326                           if (fvdef == foperator)
3327                             {
3328                               char *oldlp = lp;
3329                               lp = skip_spaces (lp-1);
3330                               if (*lp != '\0')
3331                                 lp += 1;
3332                               while (*lp != '\0'
3333                                      && !iswhite (*lp) && *lp != '(')
3334                                 lp += 1;
3335                               c = *lp++;
3336                               toklen += lp - oldlp;
3337                             }
3338                           token.named = FALSE;
3339                           if (!plainc
3340                               && nestlev > 0 && definedef == dnone)
3341                             /* in struct body */
3342                             {
3343                               write_classname (&token_name, qualifier);
3344                               linebuffer_setlen (&token_name,
3345                                                  token_name.len+qlen+toklen);
3346                               strcat (token_name.buffer, qualifier);
3347                               strncat (token_name.buffer,
3348                                        newlb.buffer + tokoff, toklen);
3349                               token.named = TRUE;
3350                             }
3351                           else if (objdef == ocatseen)
3352                             /* Objective C category */
3353                             {
3354                               int len = strlen (objtag) + 2 + toklen;
3355                               linebuffer_setlen (&token_name, len);
3356                               strcpy (token_name.buffer, objtag);
3357                               strcat (token_name.buffer, "(");
3358                               strncat (token_name.buffer,
3359                                        newlb.buffer + tokoff, toklen);
3360                               strcat (token_name.buffer, ")");
3361                               token.named = TRUE;
3362                             }
3363                           else if (objdef == omethodtag
3364                                    || objdef == omethodparm)
3365                             /* Objective C method */
3366                             {
3367                               token.named = TRUE;
3368                             }
3369                           else if (fvdef == fdefunname)
3370                             /* GNU DEFUN and similar macros */
3371                             {
3372                               bool defun = (newlb.buffer[tokoff] == 'F');
3373                               int off = tokoff;
3374                               int len = toklen;
3375
3376                               /* Rewrite the tag so that emacs lisp DEFUNs
3377                                  can be found by their elisp name */
3378                               if (defun)
3379                                 {
3380                                   off += 1;
3381                                   len -= 1;
3382                                 }
3383                               linebuffer_setlen (&token_name, len);
3384                               strncpy (token_name.buffer,
3385                                        newlb.buffer + off, len);
3386                               token_name.buffer[len] = '\0';
3387                               if (defun)
3388                                 while (--len >= 0)
3389                                   if (token_name.buffer[len] == '_')
3390                                     token_name.buffer[len] = '-';
3391                               token.named = defun;
3392                             }
3393                           else
3394                             {
3395                               linebuffer_setlen (&token_name, toklen);
3396                               strncpy (token_name.buffer,
3397                                        newlb.buffer + tokoff, toklen);
3398                               token_name.buffer[toklen] = '\0';
3399                               /* Name macros and members. */
3400                               token.named = (structdef == stagseen
3401                                              || typdef == ttypeseen
3402                                              || typdef == tend
3403                                              || (funorvar
3404                                                  && definedef == dignorerest)
3405                                              || (funorvar
3406                                                  && definedef == dnone
3407                                                  && structdef == snone
3408                                                  && bracelev > 0));
3409                             }
3410                           token.lineno = lineno;
3411                           token.offset = tokoff;
3412                           token.length = toklen;
3413                           token.line = newlb.buffer;
3414                           token.linepos = newlinepos;
3415                           token.valid = TRUE;
3416
3417                           if (definedef == dnone
3418                               && (fvdef == fvnameseen
3419                                   || fvdef == foperator
3420                                   || structdef == stagseen
3421                                   || typdef == tend
3422                                   || typdef == ttypeseen
3423                                   || objdef != onone))
3424                             {
3425                               if (current_lb_is_new)
3426                                 switch_line_buffers ();
3427                             }
3428                           else if (definedef != dnone
3429                                    || fvdef == fdefunname
3430                                    || instruct)
3431                             make_C_tag (funorvar);
3432                         }
3433                       else /* not yacc and consider_token failed */
3434                         {
3435                           if (inattribute && fvdef == fignore)
3436                             {
3437                               /* We have just met __attribute__ after a
3438                                  function parameter list: do not tag the
3439                                  function again. */
3440                               fvdef = fvnone;
3441                             }
3442                         }
3443                       midtoken = FALSE;
3444                     }
3445                 } /* if (endtoken (c)) */
3446               else if (intoken (c))
3447                 still_in_token:
3448                 {
3449                   toklen++;
3450                   continue;
3451                 }
3452             } /* if (midtoken) */
3453           else if (begtoken (c))
3454             {
3455               switch (definedef)
3456                 {
3457                 case dnone:
3458                   switch (fvdef)
3459                     {
3460                     case fstartlist:
3461                       /* This prevents tagging fb in
3462                          void (__attribute__((noreturn)) *fb) (void);
3463                          Fixing this is not easy and not very important. */
3464                       fvdef = finlist;
3465                       continue;
3466                     case flistseen:
3467                       if (plainc || declarations)
3468                         {
3469                           make_C_tag (TRUE); /* a function */
3470                           fvdef = fignore;
3471                         }
3472                       break;
3473                     }
3474                   if (structdef == stagseen && !cjava)
3475                     {
3476                       popclass_above (bracelev);
3477                       structdef = snone;
3478                     }
3479                   break;
3480                 case dsharpseen:
3481                   savetoken = token;
3482                   break;
3483                 }
3484               if (!yacc_rules || lp == newlb.buffer + 1)
3485                 {
3486                   tokoff = lp - 1 - newlb.buffer;
3487                   toklen = 1;
3488                   midtoken = TRUE;
3489                 }
3490               continue;
3491             } /* if (begtoken) */
3492         } /* if must look at token */
3493
3494
3495       /* Detect end of line, colon, comma, semicolon and various braces
3496          after having handled a token.*/
3497       switch (c)
3498         {
3499         case ':':
3500           if (inattribute)
3501             break;
3502           if (yacc_rules && token.offset == 0 && token.valid)
3503             {
3504               make_C_tag (FALSE); /* a yacc function */
3505               break;
3506             }
3507           if (definedef != dnone)
3508             break;
3509           switch (objdef)
3510             {
3511             case  otagseen:
3512               objdef = oignore;
3513               make_C_tag (TRUE); /* an Objective C class */
3514               break;
3515             case omethodtag:
3516             case omethodparm:
3517               objdef = omethodcolon;
3518               linebuffer_setlen (&token_name, token_name.len + 1);
3519               strcat (token_name.buffer, ":");
3520               break;
3521             }
3522           if (structdef == stagseen)
3523             {
3524               structdef = scolonseen;
3525               break;
3526             }
3527           /* Should be useless, but may be work as a safety net. */
3528           if (cplpl && fvdef == flistseen)
3529             {
3530               make_C_tag (TRUE); /* a function */
3531               fvdef = fignore;
3532               break;
3533             }
3534           break;
3535         case ';':
3536           if (definedef != dnone || inattribute)
3537             break;
3538           switch (typdef)
3539             {
3540             case tend:
3541             case ttypeseen:
3542               make_C_tag (FALSE); /* a typedef */
3543               typdef = tnone;
3544               fvdef = fvnone;
3545               break;
3546             case tnone:
3547             case tinbody:
3548             case tignore:
3549               switch (fvdef)
3550                 {
3551                 case fignore:
3552                   if (typdef == tignore || cplpl)
3553                     fvdef = fvnone;
3554                   break;
3555                 case fvnameseen:
3556                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3557                       || (members && instruct))
3558                     make_C_tag (FALSE); /* a variable */
3559                   fvextern = FALSE;
3560                   fvdef = fvnone;
3561                   token.valid = FALSE;
3562                   break;
3563                 case flistseen:
3564                   if ((declarations
3565                        && (cplpl || !instruct)
3566                        && (typdef == tnone || (typdef != tignore && instruct)))
3567                       || (members
3568                           && plainc && instruct))
3569                     make_C_tag (TRUE);  /* a function */
3570                   /* FALLTHRU */
3571                 default:
3572                   fvextern = FALSE;
3573                   fvdef = fvnone;
3574                   if (declarations
3575                        && cplpl && structdef == stagseen)
3576                     make_C_tag (FALSE); /* forward declaration */
3577                   else
3578                     token.valid = FALSE;
3579                 } /* switch (fvdef) */
3580               /* FALLTHRU */
3581             default:
3582               if (!instruct)
3583                 typdef = tnone;
3584             }
3585           if (structdef == stagseen)
3586             structdef = snone;
3587           break;
3588         case ',':
3589           if (definedef != dnone || inattribute)
3590             break;
3591           switch (objdef)
3592             {
3593             case omethodtag:
3594             case omethodparm:
3595               make_C_tag (TRUE); /* an Objective C method */
3596               objdef = oinbody;
3597               break;
3598             }
3599           switch (fvdef)
3600             {
3601             case fdefunkey:
3602             case foperator:
3603             case fstartlist:
3604             case finlist:
3605             case fignore:
3606             case vignore:
3607               break;
3608             case fdefunname:
3609               fvdef = fignore;
3610               break;
3611             case fvnameseen:
3612               if (parlev == 0
3613                   && ((globals
3614                        && bracelev == 0
3615                        && templatelev == 0
3616                        && (!fvextern || declarations))
3617                       || (members && instruct)))
3618                   make_C_tag (FALSE); /* a variable */
3619               break;
3620             case flistseen:
3621               if ((declarations && typdef == tnone && !instruct)
3622                   || (members && typdef != tignore && instruct))
3623                 {
3624                   make_C_tag (TRUE); /* a function */
3625                   fvdef = fvnameseen;
3626                 }
3627               else if (!declarations)
3628                 fvdef = fvnone;
3629               token.valid = FALSE;
3630               break;
3631             default:
3632               fvdef = fvnone;
3633             }
3634           if (structdef == stagseen)
3635             structdef = snone;
3636           break;
3637         case ']':
3638           if (definedef != dnone || inattribute)
3639             break;
3640           if (structdef == stagseen)
3641             structdef = snone;
3642           switch (typdef)
3643             {
3644             case ttypeseen:
3645             case tend:
3646               typdef = tignore;
3647               make_C_tag (FALSE);       /* a typedef */
3648               break;
3649             case tnone:
3650             case tinbody:
3651               switch (fvdef)
3652                 {
3653                 case foperator:
3654                 case finlist:
3655                 case fignore:
3656                 case vignore:
3657                   break;
3658                 case fvnameseen:
3659                   if ((members && bracelev == 1)
3660                       || (globals && bracelev == 0
3661                           && (!fvextern || declarations)))
3662                     make_C_tag (FALSE); /* a variable */
3663                   /* FALLTHRU */
3664                 default:
3665                   fvdef = fvnone;
3666                 }
3667               break;
3668             }
3669           break;
3670         case '(':
3671           if (inattribute)
3672             {
3673               attrparlev++;
3674               break;
3675             }
3676           if (definedef != dnone)
3677             break;
3678           if (objdef == otagseen && parlev == 0)
3679             objdef = oparenseen;
3680           switch (fvdef)
3681             {
3682             case fvnameseen:
3683               if (typdef == ttypeseen
3684                   && *lp != '*'
3685                   && !instruct)
3686                 {
3687                   /* This handles constructs like:
3688                      typedef void OperatorFun (int fun); */
3689                   make_C_tag (FALSE);
3690                   typdef = tignore;
3691                   fvdef = fignore;
3692                   break;
3693                 }
3694               /* FALLTHRU */
3695             case foperator:
3696               fvdef = fstartlist;
3697               break;
3698             case flistseen:
3699               fvdef = finlist;
3700               break;
3701             }
3702           parlev++;
3703           break;
3704         case ')':
3705           if (inattribute)
3706             {
3707               if (--attrparlev == 0)
3708                 inattribute = FALSE;
3709               break;
3710             }
3711           if (definedef != dnone)
3712             break;
3713           if (objdef == ocatseen && parlev == 1)
3714             {
3715               make_C_tag (TRUE); /* an Objective C category */
3716               objdef = oignore;
3717             }
3718           if (--parlev == 0)
3719             {
3720               switch (fvdef)
3721                 {
3722                 case fstartlist:
3723                 case finlist:
3724                   fvdef = flistseen;
3725                   break;
3726                 }
3727               if (!instruct
3728                   && (typdef == tend
3729                       || typdef == ttypeseen))
3730                 {
3731                   typdef = tignore;
3732                   make_C_tag (FALSE); /* a typedef */
3733                 }
3734             }
3735           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3736             parlev = 0;
3737           break;
3738         case '{':
3739           if (definedef != dnone)
3740             break;
3741           if (typdef == ttypeseen)
3742             {
3743               /* Whenever typdef is set to tinbody (currently only
3744                  here), typdefbracelev should be set to bracelev. */
3745               typdef = tinbody;
3746               typdefbracelev = bracelev;
3747             }
3748           switch (fvdef)
3749             {
3750             case flistseen:
3751               make_C_tag (TRUE);    /* a function */
3752               /* FALLTHRU */
3753             case fignore:
3754               fvdef = fvnone;
3755               break;
3756             case fvnone:
3757               switch (objdef)
3758                 {
3759                 case otagseen:
3760                   make_C_tag (TRUE); /* an Objective C class */
3761                   objdef = oignore;
3762                   break;
3763                 case omethodtag:
3764                 case omethodparm:
3765                   make_C_tag (TRUE); /* an Objective C method */
3766                   objdef = oinbody;
3767                   break;
3768                 default:
3769                   /* Neutralize `extern "C" {' grot. */
3770                   if (bracelev == 0 && structdef == snone && nestlev == 0
3771                       && typdef == tnone)
3772                     bracelev = -1;
3773                 }
3774               break;
3775             }
3776           switch (structdef)
3777             {
3778             case skeyseen:         /* unnamed struct */
3779               pushclass_above (bracelev, NULL, 0);
3780               structdef = snone;
3781               break;
3782             case stagseen:         /* named struct or enum */
3783             case scolonseen:       /* a class */
3784               pushclass_above (bracelev,token.line+token.offset, token.length);
3785               structdef = snone;
3786               make_C_tag (FALSE);  /* a struct or enum */
3787               break;
3788             }
3789           bracelev += 1;
3790           break;
3791         case '*':
3792           if (definedef != dnone)
3793             break;
3794           if (fvdef == fstartlist)
3795             {
3796               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3797               token.valid = FALSE;
3798             }
3799           break;
3800         case '}':
3801           if (definedef != dnone)
3802             break;
3803           bracelev -= 1;
3804           if (!ignoreindent && lp == newlb.buffer + 1)
3805             {
3806               if (bracelev != 0)
3807                 token.valid = FALSE; /* unexpected value, token unreliable */
3808               bracelev = 0;     /* reset brace level if first column */
3809               parlev = 0;       /* also reset paren level, just in case... */
3810             }
3811           else if (bracelev < 0)
3812             {
3813               token.valid = FALSE; /* something gone amiss, token unreliable */
3814               bracelev = 0;
3815             }
3816           if (bracelev == 0 && fvdef == vignore)
3817             fvdef = fvnone;             /* end of function */
3818           popclass_above (bracelev);
3819           structdef = snone;
3820           /* Only if typdef == tinbody is typdefbracelev significant. */
3821           if (typdef == tinbody && bracelev <= typdefbracelev)
3822             {
3823               assert (bracelev == typdefbracelev);
3824               typdef = tend;
3825             }
3826           break;
3827         case '=':
3828           if (definedef != dnone)
3829             break;
3830           switch (fvdef)
3831             {
3832             case foperator:
3833             case finlist:
3834             case fignore:
3835             case vignore:
3836               break;
3837             case fvnameseen:
3838               if ((members && bracelev == 1)
3839                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3840                 make_C_tag (FALSE); /* a variable */
3841               /* FALLTHRU */
3842             default:
3843               fvdef = vignore;
3844             }
3845           break;
3846         case '<':
3847           if (cplpl
3848               && (structdef == stagseen || fvdef == fvnameseen))
3849             {
3850               templatelev++;
3851               break;
3852             }
3853           goto resetfvdef;
3854         case '>':
3855           if (templatelev > 0)
3856             {
3857               templatelev--;
3858               break;
3859             }
3860           goto resetfvdef;
3861         case '+':
3862         case '-':
3863           if (objdef == oinbody && bracelev == 0)
3864             {
3865               objdef = omethodsign;
3866               break;
3867             }
3868           /* FALLTHRU */
3869         resetfvdef:
3870         case '#': case '~': case '&': case '%': case '/':
3871         case '|': case '^': case '!': case '.': case '?':
3872           if (definedef != dnone)
3873             break;
3874           /* These surely cannot follow a function tag in C. */
3875           switch (fvdef)
3876             {
3877             case foperator:
3878             case finlist:
3879             case fignore:
3880             case vignore:
3881               break;
3882             default:
3883               fvdef = fvnone;
3884             }
3885           break;
3886         case '\0':
3887           if (objdef == otagseen)
3888             {
3889               make_C_tag (TRUE); /* an Objective C class */
3890               objdef = oignore;
3891             }
3892           /* If a macro spans multiple lines don't reset its state. */
3893           if (quotednl)
3894             CNL_SAVE_DEFINEDEF ();
3895           else
3896             CNL ();
3897           break;
3898         } /* switch (c) */
3899
3900     } /* while not eof */
3901
3902   free (lbs[0].lb.buffer);
3903   free (lbs[1].lb.buffer);
3904 }
3905
3906 /*
3907  * Process either a C++ file or a C file depending on the setting
3908  * of a global flag.
3909  */
3910 static void
3911 default_C_entries (FILE *inf)
3912 {
3913   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3914 }
3915
3916 /* Always do plain C. */
3917 static void
3918 plain_C_entries (FILE *inf)
3919 {
3920   C_entries (0, inf);
3921 }
3922
3923 /* Always do C++. */
3924 static void
3925 Cplusplus_entries (FILE *inf)
3926 {
3927   C_entries (C_PLPL, inf);
3928 }
3929
3930 /* Always do Java. */
3931 static void
3932 Cjava_entries (FILE *inf)
3933 {
3934   C_entries (C_JAVA, inf);
3935 }
3936
3937 /* Always do C*. */
3938 static void
3939 Cstar_entries (FILE *inf)
3940 {
3941   C_entries (C_STAR, inf);
3942 }
3943
3944 /* Always do Yacc. */
3945 static void
3946 Yacc_entries (FILE *inf)
3947 {
3948   C_entries (YACC, inf);
3949 }
3950
3951 \f
3952 /* Useful macros. */
3953 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3954   for (;                        /* loop initialization */               \
3955        !feof (file_pointer)     /* loop test */                         \
3956        &&                       /* instructions at start of loop */     \
3957           (readline (&line_buffer, file_pointer),                       \
3958            char_pointer = line_buffer.buffer,                           \
3959            TRUE);                                                       \
3960       )
3961
3962 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3963   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
3964    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
3965    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
3966    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
3967
3968 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3969 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3970   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
3971    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
3972    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
3973
3974 /*
3975  * Read a file, but do no processing.  This is used to do regexp
3976  * matching on files that have no language defined.
3977  */
3978 static void
3979 just_read_file (FILE *inf)
3980 {
3981   register char *dummy;
3982
3983   LOOP_ON_INPUT_LINES (inf, lb, dummy)
3984     continue;
3985 }
3986
3987 \f
3988 /* Fortran parsing */
3989
3990 static void F_takeprec (void);
3991 static void F_getit (FILE *);
3992
3993 static void
3994 F_takeprec (void)
3995 {
3996   dbp = skip_spaces (dbp);
3997   if (*dbp != '*')
3998     return;
3999   dbp++;
4000   dbp = skip_spaces (dbp);
4001   if (strneq (dbp, "(*)", 3))
4002     {
4003       dbp += 3;
4004       return;
4005     }
4006   if (!ISDIGIT (*dbp))
4007     {
4008       --dbp;                    /* force failure */
4009       return;
4010     }
4011   do
4012     dbp++;
4013   while (ISDIGIT (*dbp));
4014 }
4015
4016 static void
4017 F_getit (FILE *inf)
4018 {
4019   register char *cp;
4020
4021   dbp = skip_spaces (dbp);
4022   if (*dbp == '\0')
4023     {
4024       readline (&lb, inf);
4025       dbp = lb.buffer;
4026       if (dbp[5] != '&')
4027         return;
4028       dbp += 6;
4029       dbp = skip_spaces (dbp);
4030     }
4031   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4032     return;
4033   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4034     continue;
4035   make_tag (dbp, cp-dbp, TRUE,
4036             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4037 }
4038
4039
4040 static void
4041 Fortran_functions (FILE *inf)
4042 {
4043   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4044     {
4045       if (*dbp == '%')
4046         dbp++;                  /* Ratfor escape to fortran */
4047       dbp = skip_spaces (dbp);
4048       if (*dbp == '\0')
4049         continue;
4050
4051       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4052         dbp = skip_spaces (dbp);
4053
4054       switch (lowcase (*dbp))
4055         {
4056         case 'i':
4057           if (nocase_tail ("integer"))
4058             F_takeprec ();
4059           break;
4060         case 'r':
4061           if (nocase_tail ("real"))
4062             F_takeprec ();
4063           break;
4064         case 'l':
4065           if (nocase_tail ("logical"))
4066             F_takeprec ();
4067           break;
4068         case 'c':
4069           if (nocase_tail ("complex") || nocase_tail ("character"))
4070             F_takeprec ();
4071           break;
4072         case 'd':
4073           if (nocase_tail ("double"))
4074             {
4075               dbp = skip_spaces (dbp);
4076               if (*dbp == '\0')
4077                 continue;
4078               if (nocase_tail ("precision"))
4079                 break;
4080               continue;
4081             }
4082           break;
4083         }
4084       dbp = skip_spaces (dbp);
4085       if (*dbp == '\0')
4086         continue;
4087       switch (lowcase (*dbp))
4088         {
4089         case 'f':
4090           if (nocase_tail ("function"))
4091             F_getit (inf);
4092           continue;
4093         case 's':
4094           if (nocase_tail ("subroutine"))
4095             F_getit (inf);
4096           continue;
4097         case 'e':
4098           if (nocase_tail ("entry"))
4099             F_getit (inf);
4100           continue;
4101         case 'b':
4102           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4103             {
4104               dbp = skip_spaces (dbp);
4105               if (*dbp == '\0') /* assume un-named */
4106                 make_tag ("blockdata", 9, TRUE,
4107                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4108               else
4109                 F_getit (inf);  /* look for name */
4110             }
4111           continue;
4112         }
4113     }
4114 }
4115
4116 \f
4117 /*
4118  * Ada parsing
4119  * Original code by
4120  * Philippe Waroquiers (1998)
4121  */
4122
4123 /* Once we are positioned after an "interesting" keyword, let's get
4124    the real tag value necessary. */
4125 static void
4126 Ada_getit (FILE *inf, const char *name_qualifier)
4127 {
4128   register char *cp;
4129   char *name;
4130   char c;
4131
4132   while (!feof (inf))
4133     {
4134       dbp = skip_spaces (dbp);
4135       if (*dbp == '\0'
4136           || (dbp[0] == '-' && dbp[1] == '-'))
4137         {
4138           readline (&lb, inf);
4139           dbp = lb.buffer;
4140         }
4141       switch (lowcase(*dbp))
4142         {
4143         case 'b':
4144           if (nocase_tail ("body"))
4145             {
4146               /* Skipping body of   procedure body   or   package body or ....
4147                  resetting qualifier to body instead of spec. */
4148               name_qualifier = "/b";
4149               continue;
4150             }
4151           break;
4152         case 't':
4153           /* Skipping type of   task type   or   protected type ... */
4154           if (nocase_tail ("type"))
4155             continue;
4156           break;
4157         }
4158       if (*dbp == '"')
4159         {
4160           dbp += 1;
4161           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4162             continue;
4163         }
4164       else
4165         {
4166           dbp = skip_spaces (dbp);
4167           for (cp = dbp;
4168                (*cp != '\0'
4169                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4170                cp++)
4171             continue;
4172           if (cp == dbp)
4173             return;
4174         }
4175       c = *cp;
4176       *cp = '\0';
4177       name = concat (dbp, name_qualifier, "");
4178       *cp = c;
4179       make_tag (name, strlen (name), TRUE,
4180                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4181       free (name);
4182       if (c == '"')
4183         dbp = cp + 1;
4184       return;
4185     }
4186 }
4187
4188 static void
4189 Ada_funcs (FILE *inf)
4190 {
4191   bool inquote = FALSE;
4192   bool skip_till_semicolumn = FALSE;
4193
4194   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4195     {
4196       while (*dbp != '\0')
4197         {
4198           /* Skip a string i.e. "abcd". */
4199           if (inquote || (*dbp == '"'))
4200             {
4201               dbp = etags_strchr (dbp + !inquote, '"');
4202               if (dbp != NULL)
4203                 {
4204                   inquote = FALSE;
4205                   dbp += 1;
4206                   continue;     /* advance char */
4207                 }
4208               else
4209                 {
4210                   inquote = TRUE;
4211                   break;        /* advance line */
4212                 }
4213             }
4214
4215           /* Skip comments. */
4216           if (dbp[0] == '-' && dbp[1] == '-')
4217             break;              /* advance line */
4218
4219           /* Skip character enclosed in single quote i.e. 'a'
4220              and skip single quote starting an attribute i.e. 'Image. */
4221           if (*dbp == '\'')
4222             {
4223               dbp++ ;
4224               if (*dbp != '\0')
4225                 dbp++;
4226               continue;
4227             }
4228
4229           if (skip_till_semicolumn)
4230             {
4231               if (*dbp == ';')
4232                 skip_till_semicolumn = FALSE;
4233               dbp++;
4234               continue;         /* advance char */
4235             }
4236
4237           /* Search for beginning of a token.  */
4238           if (!begtoken (*dbp))
4239             {
4240               dbp++;
4241               continue;         /* advance char */
4242             }
4243
4244           /* We are at the beginning of a token. */
4245           switch (lowcase(*dbp))
4246             {
4247             case 'f':
4248               if (!packages_only && nocase_tail ("function"))
4249                 Ada_getit (inf, "/f");
4250               else
4251                 break;          /* from switch */
4252               continue;         /* advance char */
4253             case 'p':
4254               if (!packages_only && nocase_tail ("procedure"))
4255                 Ada_getit (inf, "/p");
4256               else if (nocase_tail ("package"))
4257                 Ada_getit (inf, "/s");
4258               else if (nocase_tail ("protected")) /* protected type */
4259                 Ada_getit (inf, "/t");
4260               else
4261                 break;          /* from switch */
4262               continue;         /* advance char */
4263
4264             case 'u':
4265               if (typedefs && !packages_only && nocase_tail ("use"))
4266                 {
4267                   /* when tagging types, avoid tagging  use type Pack.Typename;
4268                      for this, we will skip everything till a ; */
4269                   skip_till_semicolumn = TRUE;
4270                   continue;     /* advance char */
4271                 }
4272
4273             case 't':
4274               if (!packages_only && nocase_tail ("task"))
4275                 Ada_getit (inf, "/k");
4276               else if (typedefs && !packages_only && nocase_tail ("type"))
4277                 {
4278                   Ada_getit (inf, "/t");
4279                   while (*dbp != '\0')
4280                     dbp += 1;
4281                 }
4282               else
4283                 break;          /* from switch */
4284               continue;         /* advance char */
4285             }
4286
4287           /* Look for the end of the token. */
4288           while (!endtoken (*dbp))
4289             dbp++;
4290
4291         } /* advance char */
4292     } /* advance line */
4293 }
4294
4295 \f
4296 /*
4297  * Unix and microcontroller assembly tag handling
4298  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4299  * Idea by Bob Weiner, Motorola Inc. (1994)
4300  */
4301 static void
4302 Asm_labels (FILE *inf)
4303 {
4304   register char *cp;
4305
4306   LOOP_ON_INPUT_LINES (inf, lb, cp)
4307     {
4308       /* If first char is alphabetic or one of [_.$], test for colon
4309          following identifier. */
4310       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4311         {
4312           /* Read past label. */
4313           cp++;
4314           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4315             cp++;
4316           if (*cp == ':' || iswhite (*cp))
4317             /* Found end of label, so copy it and add it to the table. */
4318             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4319                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4320         }
4321     }
4322 }
4323
4324 \f
4325 /*
4326  * Perl support
4327  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4328  * Perl variable names: /^(my|local).../
4329  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4330  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4331  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4332  */
4333 static void
4334 Perl_functions (FILE *inf)
4335 {
4336   char *package = savestr ("main"); /* current package name */
4337   register char *cp;
4338
4339   LOOP_ON_INPUT_LINES (inf, lb, cp)
4340     {
4341       cp = skip_spaces (cp);
4342
4343       if (LOOKING_AT (cp, "package"))
4344         {
4345           free (package);
4346           get_tag (cp, &package);
4347         }
4348       else if (LOOKING_AT (cp, "sub"))
4349         {
4350           char *pos;
4351           char *sp = cp;
4352
4353           while (!notinname (*cp))
4354             cp++;
4355           if (cp == sp)
4356             continue;           /* nothing found */
4357           if ((pos = etags_strchr (sp, ':')) != NULL
4358               && pos < cp && pos[1] == ':')
4359             /* The name is already qualified. */
4360             make_tag (sp, cp - sp, TRUE,
4361                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4362           else
4363             /* Qualify it. */
4364             {
4365               char savechar, *name;
4366
4367               savechar = *cp;
4368               *cp = '\0';
4369               name = concat (package, "::", sp);
4370               *cp = savechar;
4371               make_tag (name, strlen(name), TRUE,
4372                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4373               free (name);
4374             }
4375         }
4376        else if (globals)        /* only if we are tagging global vars */
4377         {
4378           /* Skip a qualifier, if any. */
4379           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4380           /* After "my" or "local", but before any following paren or space. */
4381           char *varstart = cp;
4382
4383           if (qual              /* should this be removed?  If yes, how? */
4384               && (*cp == '$' || *cp == '@' || *cp == '%'))
4385             {
4386               varstart += 1;
4387               do
4388                 cp++;
4389               while (ISALNUM (*cp) || *cp == '_');
4390             }
4391           else if (qual)
4392             {
4393               /* Should be examining a variable list at this point;
4394                  could insist on seeing an open parenthesis. */
4395               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4396                 cp++;
4397             }
4398           else
4399             continue;
4400
4401           make_tag (varstart, cp - varstart, FALSE,
4402                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4403         }
4404     }
4405   free (package);
4406 }
4407
4408
4409 /*
4410  * Python support
4411  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4412  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4413  * More ideas by seb bacon <seb@jamkit.com> (2002)
4414  */
4415 static void
4416 Python_functions (FILE *inf)
4417 {
4418   register char *cp;
4419
4420   LOOP_ON_INPUT_LINES (inf, lb, cp)
4421     {
4422       cp = skip_spaces (cp);
4423       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4424         {
4425           char *name = cp;
4426           while (!notinname (*cp) && *cp != ':')
4427             cp++;
4428           make_tag (name, cp - name, TRUE,
4429                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4430         }
4431     }
4432 }
4433
4434 \f
4435 /*
4436  * PHP support
4437  * Look for:
4438  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4439  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4440  *  - /^[ \t]*define\(\"[^\"]+/
4441  * Only with --members:
4442  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4443  * Idea by Diez B. Roggisch (2001)
4444  */
4445 static void
4446 PHP_functions (FILE *inf)
4447 {
4448   register char *cp, *name;
4449   bool search_identifier = FALSE;
4450
4451   LOOP_ON_INPUT_LINES (inf, lb, cp)
4452     {
4453       cp = skip_spaces (cp);
4454       name = cp;
4455       if (search_identifier
4456           && *cp != '\0')
4457         {
4458           while (!notinname (*cp))
4459             cp++;
4460           make_tag (name, cp - name, TRUE,
4461                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4462           search_identifier = FALSE;
4463         }
4464       else if (LOOKING_AT (cp, "function"))
4465         {
4466           if(*cp == '&')
4467             cp = skip_spaces (cp+1);
4468           if(*cp != '\0')
4469             {
4470               name = cp;
4471               while (!notinname (*cp))
4472                 cp++;
4473               make_tag (name, cp - name, TRUE,
4474                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4475             }
4476           else
4477             search_identifier = TRUE;
4478         }
4479       else if (LOOKING_AT (cp, "class"))
4480         {
4481           if (*cp != '\0')
4482             {
4483               name = cp;
4484               while (*cp != '\0' && !iswhite (*cp))
4485                 cp++;
4486               make_tag (name, cp - name, FALSE,
4487                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4488             }
4489           else
4490             search_identifier = TRUE;
4491         }
4492       else if (strneq (cp, "define", 6)
4493                && (cp = skip_spaces (cp+6))
4494                && *cp++ == '('
4495                && (*cp == '"' || *cp == '\''))
4496         {
4497           char quote = *cp++;
4498           name = cp;
4499           while (*cp != quote && *cp != '\0')
4500             cp++;
4501           make_tag (name, cp - name, FALSE,
4502                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4503         }
4504       else if (members
4505                && LOOKING_AT (cp, "var")
4506                && *cp == '$')
4507         {
4508           name = cp;
4509           while (!notinname(*cp))
4510             cp++;
4511           make_tag (name, cp - name, FALSE,
4512                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4513         }
4514     }
4515 }
4516
4517 \f
4518 /*
4519  * Cobol tag functions
4520  * We could look for anything that could be a paragraph name.
4521  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4522  * Idea by Corny de Souza (1993)
4523  */
4524 static void
4525 Cobol_paragraphs (FILE *inf)
4526 {
4527   register char *bp, *ep;
4528
4529   LOOP_ON_INPUT_LINES (inf, lb, bp)
4530     {
4531       if (lb.len < 9)
4532         continue;
4533       bp += 8;
4534
4535       /* If eoln, compiler option or comment ignore whole line. */
4536       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4537         continue;
4538
4539       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4540         continue;
4541       if (*ep++ == '.')
4542         make_tag (bp, ep - bp, TRUE,
4543                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4544     }
4545 }
4546
4547 \f
4548 /*
4549  * Makefile support
4550  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4551  */
4552 static void
4553 Makefile_targets (FILE *inf)
4554 {
4555   register char *bp;
4556
4557   LOOP_ON_INPUT_LINES (inf, lb, bp)
4558     {
4559       if (*bp == '\t' || *bp == '#')
4560         continue;
4561       while (*bp != '\0' && *bp != '=' && *bp != ':')
4562         bp++;
4563       if (*bp == ':' || (globals && *bp == '='))
4564         {
4565           /* We should detect if there is more than one tag, but we do not.
4566              We just skip initial and final spaces. */
4567           char * namestart = skip_spaces (lb.buffer);
4568           while (--bp > namestart)
4569             if (!notinname (*bp))
4570               break;
4571           make_tag (namestart, bp - namestart + 1, TRUE,
4572                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4573         }
4574     }
4575 }
4576
4577 \f
4578 /*
4579  * Pascal parsing
4580  * Original code by Mosur K. Mohan (1989)
4581  *
4582  *  Locates tags for procedures & functions.  Doesn't do any type- or
4583  *  var-definitions.  It does look for the keyword "extern" or
4584  *  "forward" immediately following the procedure statement; if found,
4585  *  the tag is skipped.
4586  */
4587 static void
4588 Pascal_functions (FILE *inf)
4589 {
4590   linebuffer tline;             /* mostly copied from C_entries */
4591   long save_lcno;
4592   int save_lineno, namelen, taglen;
4593   char c, *name;
4594
4595   bool                          /* each of these flags is TRUE if: */
4596     incomment,                  /* point is inside a comment */
4597     inquote,                    /* point is inside '..' string */
4598     get_tagname,                /* point is after PROCEDURE/FUNCTION
4599                                    keyword, so next item = potential tag */
4600     found_tag,                  /* point is after a potential tag */
4601     inparms,                    /* point is within parameter-list */
4602     verify_tag;                 /* point has passed the parm-list, so the
4603                                    next token will determine whether this
4604                                    is a FORWARD/EXTERN to be ignored, or
4605                                    whether it is a real tag */
4606
4607   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4608   name = NULL;                  /* keep compiler quiet */
4609   dbp = lb.buffer;
4610   *dbp = '\0';
4611   linebuffer_init (&tline);
4612
4613   incomment = inquote = FALSE;
4614   found_tag = FALSE;            /* have a proc name; check if extern */
4615   get_tagname = FALSE;          /* found "procedure" keyword         */
4616   inparms = FALSE;              /* found '(' after "proc"            */
4617   verify_tag = FALSE;           /* check if "extern" is ahead        */
4618
4619
4620   while (!feof (inf))           /* long main loop to get next char */
4621     {
4622       c = *dbp++;
4623       if (c == '\0')            /* if end of line */
4624         {
4625           readline (&lb, inf);
4626           dbp = lb.buffer;
4627           if (*dbp == '\0')
4628             continue;
4629           if (!((found_tag && verify_tag)
4630                 || get_tagname))
4631             c = *dbp++;         /* only if don't need *dbp pointing
4632                                    to the beginning of the name of
4633                                    the procedure or function */
4634         }
4635       if (incomment)
4636         {
4637           if (c == '}')         /* within { } comments */
4638             incomment = FALSE;
4639           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4640             {
4641               dbp++;
4642               incomment = FALSE;
4643             }
4644           continue;
4645         }
4646       else if (inquote)
4647         {
4648           if (c == '\'')
4649             inquote = FALSE;
4650           continue;
4651         }
4652       else
4653         switch (c)
4654           {
4655           case '\'':
4656             inquote = TRUE;     /* found first quote */
4657             continue;
4658           case '{':             /* found open { comment */
4659             incomment = TRUE;
4660             continue;
4661           case '(':
4662             if (*dbp == '*')    /* found open (* comment */
4663               {
4664                 incomment = TRUE;
4665                 dbp++;
4666               }
4667             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4668               inparms = TRUE;
4669             continue;
4670           case ')':             /* end of parms list */
4671             if (inparms)
4672               inparms = FALSE;
4673             continue;
4674           case ';':
4675             if (found_tag && !inparms) /* end of proc or fn stmt */
4676               {
4677                 verify_tag = TRUE;
4678                 break;
4679               }
4680             continue;
4681           }
4682       if (found_tag && verify_tag && (*dbp != ' '))
4683         {
4684           /* Check if this is an "extern" declaration. */
4685           if (*dbp == '\0')
4686             continue;
4687           if (lowcase (*dbp == 'e'))
4688             {
4689               if (nocase_tail ("extern")) /* superfluous, really! */
4690                 {
4691                   found_tag = FALSE;
4692                   verify_tag = FALSE;
4693                 }
4694             }
4695           else if (lowcase (*dbp) == 'f')
4696             {
4697               if (nocase_tail ("forward")) /* check for forward reference */
4698                 {
4699                   found_tag = FALSE;
4700                   verify_tag = FALSE;
4701                 }
4702             }
4703           if (found_tag && verify_tag) /* not external proc, so make tag */
4704             {
4705               found_tag = FALSE;
4706               verify_tag = FALSE;
4707               make_tag (name, namelen, TRUE,
4708                         tline.buffer, taglen, save_lineno, save_lcno);
4709               continue;
4710             }
4711         }
4712       if (get_tagname)          /* grab name of proc or fn */
4713         {
4714           char *cp;
4715
4716           if (*dbp == '\0')
4717             continue;
4718
4719           /* Find block name. */
4720           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4721             continue;
4722
4723           /* Save all values for later tagging. */
4724           linebuffer_setlen (&tline, lb.len);
4725           strcpy (tline.buffer, lb.buffer);
4726           save_lineno = lineno;
4727           save_lcno = linecharno;
4728           name = tline.buffer + (dbp - lb.buffer);
4729           namelen = cp - dbp;
4730           taglen = cp - lb.buffer + 1;
4731
4732           dbp = cp;             /* set dbp to e-o-token */
4733           get_tagname = FALSE;
4734           found_tag = TRUE;
4735           continue;
4736
4737           /* And proceed to check for "extern". */
4738         }
4739       else if (!incomment && !inquote && !found_tag)
4740         {
4741           /* Check for proc/fn keywords. */
4742           switch (lowcase (c))
4743             {
4744             case 'p':
4745               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4746                 get_tagname = TRUE;
4747               continue;
4748             case 'f':
4749               if (nocase_tail ("unction"))
4750                 get_tagname = TRUE;
4751               continue;
4752             }
4753         }
4754     } /* while not eof */
4755
4756   free (tline.buffer);
4757 }
4758
4759 \f
4760 /*
4761  * Lisp tag functions
4762  *  look for (def or (DEF, quote or QUOTE
4763  */
4764
4765 static void L_getit (void);
4766
4767 static void
4768 L_getit (void)
4769 {
4770   if (*dbp == '\'')             /* Skip prefix quote */
4771     dbp++;
4772   else if (*dbp == '(')
4773   {
4774     dbp++;
4775     /* Try to skip "(quote " */
4776     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4777       /* Ok, then skip "(" before name in (defstruct (foo)) */
4778       dbp = skip_spaces (dbp);
4779   }
4780   get_tag (dbp, NULL);
4781 }
4782
4783 static void
4784 Lisp_functions (FILE *inf)
4785 {
4786   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4787     {
4788       if (dbp[0] != '(')
4789         continue;
4790
4791       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4792         {
4793           dbp = skip_non_spaces (dbp);
4794           dbp = skip_spaces (dbp);
4795           L_getit ();
4796         }
4797       else
4798         {
4799           /* Check for (foo::defmumble name-defined ... */
4800           do
4801             dbp++;
4802           while (!notinname (*dbp) && *dbp != ':');
4803           if (*dbp == ':')
4804             {
4805               do
4806                 dbp++;
4807               while (*dbp == ':');
4808
4809               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4810                 {
4811                   dbp = skip_non_spaces (dbp);
4812                   dbp = skip_spaces (dbp);
4813                   L_getit ();
4814                 }
4815             }
4816         }
4817     }
4818 }
4819
4820 \f
4821 /*
4822  * Lua script language parsing
4823  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4824  *
4825  *  "function" and "local function" are tags if they start at column 1.
4826  */
4827 static void
4828 Lua_functions (FILE *inf)
4829 {
4830   register char *bp;
4831
4832   LOOP_ON_INPUT_LINES (inf, lb, bp)
4833     {
4834       if (bp[0] != 'f' && bp[0] != 'l')
4835         continue;
4836
4837       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4838
4839       if (LOOKING_AT (bp, "function"))
4840         get_tag (bp, NULL);
4841     }
4842 }
4843
4844 \f
4845 /*
4846  * Postscript tags
4847  * Just look for lines where the first character is '/'
4848  * Also look at "defineps" for PSWrap
4849  * Ideas by:
4850  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4851  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4852  */
4853 static void
4854 PS_functions (FILE *inf)
4855 {
4856   register char *bp, *ep;
4857
4858   LOOP_ON_INPUT_LINES (inf, lb, bp)
4859     {
4860       if (bp[0] == '/')
4861         {
4862           for (ep = bp+1;
4863                *ep != '\0' && *ep != ' ' && *ep != '{';
4864                ep++)
4865             continue;
4866           make_tag (bp, ep - bp, TRUE,
4867                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4868         }
4869       else if (LOOKING_AT (bp, "defineps"))
4870         get_tag (bp, NULL);
4871     }
4872 }
4873
4874 \f
4875 /*
4876  * Forth tags
4877  * Ignore anything after \ followed by space or in ( )
4878  * Look for words defined by :
4879  * Look for constant, code, create, defer, value, and variable
4880  * OBP extensions:  Look for buffer:, field,
4881  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4882  */
4883 static void
4884 Forth_words (FILE *inf)
4885 {
4886   register char *bp;
4887
4888   LOOP_ON_INPUT_LINES (inf, lb, bp)
4889     while ((bp = skip_spaces (bp))[0] != '\0')
4890       if (bp[0] == '\\' && iswhite(bp[1]))
4891         break;                  /* read next line */
4892       else if (bp[0] == '(' && iswhite(bp[1]))
4893         do                      /* skip to ) or eol */
4894           bp++;
4895         while (*bp != ')' && *bp != '\0');
4896       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4897                || LOOKING_AT_NOCASE (bp, "constant")
4898                || LOOKING_AT_NOCASE (bp, "code")
4899                || LOOKING_AT_NOCASE (bp, "create")
4900                || LOOKING_AT_NOCASE (bp, "defer")
4901                || LOOKING_AT_NOCASE (bp, "value")
4902                || LOOKING_AT_NOCASE (bp, "variable")
4903                || LOOKING_AT_NOCASE (bp, "buffer:")
4904                || LOOKING_AT_NOCASE (bp, "field"))
4905         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4906       else
4907         bp = skip_non_spaces (bp);
4908 }
4909
4910 \f
4911 /*
4912  * Scheme tag functions
4913  * look for (def... xyzzy
4914  *          (def... (xyzzy
4915  *          (def ... ((...(xyzzy ....
4916  *          (set! xyzzy
4917  * Original code by Ken Haase (1985?)
4918  */
4919 static void
4920 Scheme_functions (FILE *inf)
4921 {
4922   register char *bp;
4923
4924   LOOP_ON_INPUT_LINES (inf, lb, bp)
4925     {
4926       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4927         {
4928           bp = skip_non_spaces (bp+4);
4929           /* Skip over open parens and white space.  Don't continue past
4930              '\0'. */
4931           while (*bp && notinname (*bp))
4932             bp++;
4933           get_tag (bp, NULL);
4934         }
4935       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4936         get_tag (bp, NULL);
4937     }
4938 }
4939
4940 \f
4941 /* Find tags in TeX and LaTeX input files.  */
4942
4943 /* TEX_toktab is a table of TeX control sequences that define tags.
4944  * Each entry records one such control sequence.
4945  *
4946  * Original code from who knows whom.
4947  * Ideas by:
4948  *   Stefan Monnier (2002)
4949  */
4950
4951 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4952
4953 /* Default set of control sequences to put into TEX_toktab.
4954    The value of environment var TEXTAGS is prepended to this.  */
4955 static const char *TEX_defenv = "\
4956 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4957 :part:appendix:entry:index:def\
4958 :newcommand:renewcommand:newenvironment:renewenvironment";
4959
4960 static void TEX_mode (FILE *);
4961 static void TEX_decode_env (const char *, const char *);
4962
4963 static char TEX_esc = '\\';
4964 static char TEX_opgrp = '{';
4965 static char TEX_clgrp = '}';
4966
4967 /*
4968  * TeX/LaTeX scanning loop.
4969  */
4970 static void
4971 TeX_commands (FILE *inf)
4972 {
4973   char *cp;
4974   linebuffer *key;
4975
4976   /* Select either \ or ! as escape character.  */
4977   TEX_mode (inf);
4978
4979   /* Initialize token table once from environment. */
4980   if (TEX_toktab == NULL)
4981     TEX_decode_env ("TEXTAGS", TEX_defenv);
4982
4983   LOOP_ON_INPUT_LINES (inf, lb, cp)
4984     {
4985       /* Look at each TEX keyword in line. */
4986       for (;;)
4987         {
4988           /* Look for a TEX escape. */
4989           while (*cp++ != TEX_esc)
4990             if (cp[-1] == '\0' || cp[-1] == '%')
4991               goto tex_next_line;
4992
4993           for (key = TEX_toktab; key->buffer != NULL; key++)
4994             if (strneq (cp, key->buffer, key->len))
4995               {
4996                 register char *p;
4997                 int namelen, linelen;
4998                 bool opgrp = FALSE;
4999
5000                 cp = skip_spaces (cp + key->len);
5001                 if (*cp == TEX_opgrp)
5002                   {
5003                     opgrp = TRUE;
5004                     cp++;
5005                   }
5006                 for (p = cp;
5007                      (!iswhite (*p) && *p != '#' &&
5008                       *p != TEX_opgrp && *p != TEX_clgrp);
5009                      p++)
5010                   continue;
5011                 namelen = p - cp;
5012                 linelen = lb.len;
5013                 if (!opgrp || *p == TEX_clgrp)
5014                   {
5015                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5016                       p++;
5017                     linelen = p - lb.buffer + 1;
5018                   }
5019                 make_tag (cp, namelen, TRUE,
5020                           lb.buffer, linelen, lineno, linecharno);
5021                 goto tex_next_line; /* We only tag a line once */
5022               }
5023         }
5024     tex_next_line:
5025       ;
5026     }
5027 }
5028
5029 #define TEX_LESC '\\'
5030 #define TEX_SESC '!'
5031
5032 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5033    chars accordingly. */
5034 static void
5035 TEX_mode (FILE *inf)
5036 {
5037   int c;
5038
5039   while ((c = getc (inf)) != EOF)
5040     {
5041       /* Skip to next line if we hit the TeX comment char. */
5042       if (c == '%')
5043         while (c != '\n' && c != EOF)
5044           c = getc (inf);
5045       else if (c == TEX_LESC || c == TEX_SESC )
5046         break;
5047     }
5048
5049   if (c == TEX_LESC)
5050     {
5051       TEX_esc = TEX_LESC;
5052       TEX_opgrp = '{';
5053       TEX_clgrp = '}';
5054     }
5055   else
5056     {
5057       TEX_esc = TEX_SESC;
5058       TEX_opgrp = '<';
5059       TEX_clgrp = '>';
5060     }
5061   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5062      No attempt is made to correct the situation. */
5063   rewind (inf);
5064 }
5065
5066 /* Read environment and prepend it to the default string.
5067    Build token table. */
5068 static void
5069 TEX_decode_env (const char *evarname, const char *defenv)
5070 {
5071   register const char *env, *p;
5072   int i, len;
5073
5074   /* Append default string to environment. */
5075   env = getenv (evarname);
5076   if (!env)
5077     env = defenv;
5078   else
5079     env = concat (env, defenv, "");
5080
5081   /* Allocate a token table */
5082   for (len = 1, p = env; p;)
5083     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5084       len++;
5085   TEX_toktab = xnew (len, linebuffer);
5086
5087   /* Unpack environment string into token table. Be careful about */
5088   /* zero-length strings (leading ':', "::" and trailing ':') */
5089   for (i = 0; *env != '\0';)
5090     {
5091       p = etags_strchr (env, ':');
5092       if (!p)                   /* End of environment string. */
5093         p = env + strlen (env);
5094       if (p - env > 0)
5095         {                       /* Only non-zero strings. */
5096           TEX_toktab[i].buffer = savenstr (env, p - env);
5097           TEX_toktab[i].len = p - env;
5098           i++;
5099         }
5100       if (*p)
5101         env = p + 1;
5102       else
5103         {
5104           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5105           TEX_toktab[i].len = 0;
5106           break;
5107         }
5108     }
5109 }
5110
5111 \f
5112 /* Texinfo support.  Dave Love, Mar. 2000.  */
5113 static void
5114 Texinfo_nodes (FILE *inf)
5115 {
5116   char *cp, *start;
5117   LOOP_ON_INPUT_LINES (inf, lb, cp)
5118     if (LOOKING_AT (cp, "@node"))
5119       {
5120         start = cp;
5121         while (*cp != '\0' && *cp != ',')
5122           cp++;
5123         make_tag (start, cp - start, TRUE,
5124                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5125       }
5126 }
5127
5128 \f
5129 /*
5130  * HTML support.
5131  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5132  * Contents of <a name=xxx> are tags with name xxx.
5133  *
5134  * Francesco Potortì, 2002.
5135  */
5136 static void
5137 HTML_labels (FILE *inf)
5138 {
5139   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5140   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5141   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5142   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5143   char *end;
5144
5145
5146   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5147
5148   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5149     for (;;)                    /* loop on the same line */
5150       {
5151         if (skiptag)            /* skip HTML tag */
5152           {
5153             while (*dbp != '\0' && *dbp != '>')
5154               dbp++;
5155             if (*dbp == '>')
5156               {
5157                 dbp += 1;
5158                 skiptag = FALSE;
5159                 continue;       /* look on the same line */
5160               }
5161             break;              /* go to next line */
5162           }
5163
5164         else if (intag) /* look for "name=" or "id=" */
5165           {
5166             while (*dbp != '\0' && *dbp != '>'
5167                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5168               dbp++;
5169             if (*dbp == '\0')
5170               break;            /* go to next line */
5171             if (*dbp == '>')
5172               {
5173                 dbp += 1;
5174                 intag = FALSE;
5175                 continue;       /* look on the same line */
5176               }
5177             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5178                 || LOOKING_AT_NOCASE (dbp, "id="))
5179               {
5180                 bool quoted = (dbp[0] == '"');
5181
5182                 if (quoted)
5183                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5184                     continue;
5185                 else
5186                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5187                     continue;
5188                 linebuffer_setlen (&token_name, end - dbp);
5189                 strncpy (token_name.buffer, dbp, end - dbp);
5190                 token_name.buffer[end - dbp] = '\0';
5191
5192                 dbp = end;
5193                 intag = FALSE;  /* we found what we looked for */
5194                 skiptag = TRUE; /* skip to the end of the tag */
5195                 getnext = TRUE; /* then grab the text */
5196                 continue;       /* look on the same line */
5197               }
5198             dbp += 1;
5199           }
5200
5201         else if (getnext)       /* grab next tokens and tag them */
5202           {
5203             dbp = skip_spaces (dbp);
5204             if (*dbp == '\0')
5205               break;            /* go to next line */
5206             if (*dbp == '<')
5207               {
5208                 intag = TRUE;
5209                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5210                 continue;       /* look on the same line */
5211               }
5212
5213             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5214               continue;
5215             make_tag (token_name.buffer, token_name.len, TRUE,
5216                       dbp, end - dbp, lineno, linecharno);
5217             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5218             getnext = FALSE;
5219             break;              /* go to next line */
5220           }
5221
5222         else                    /* look for an interesting HTML tag */
5223           {
5224             while (*dbp != '\0' && *dbp != '<')
5225               dbp++;
5226             if (*dbp == '\0')
5227               break;            /* go to next line */
5228             intag = TRUE;
5229             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5230               {
5231                 inanchor = TRUE;
5232                 continue;       /* look on the same line */
5233               }
5234             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5235                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5236                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5237                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5238               {
5239                 intag = FALSE;
5240                 getnext = TRUE;
5241                 continue;       /* look on the same line */
5242               }
5243             dbp += 1;
5244           }
5245       }
5246 }
5247
5248 \f
5249 /*
5250  * Prolog support
5251  *
5252  * Assumes that the predicate or rule starts at column 0.
5253  * Only the first clause of a predicate or rule is added.
5254  * Original code by Sunichirou Sugou (1989)
5255  * Rewritten by Anders Lindgren (1996)
5256  */
5257 static size_t prolog_pr (char *, char *);
5258 static void prolog_skip_comment (linebuffer *, FILE *);
5259 static size_t prolog_atom (char *, size_t);
5260
5261 static void
5262 Prolog_functions (FILE *inf)
5263 {
5264   char *cp, *last;
5265   size_t len;
5266   size_t allocated;
5267
5268   allocated = 0;
5269   len = 0;
5270   last = NULL;
5271
5272   LOOP_ON_INPUT_LINES (inf, lb, cp)
5273     {
5274       if (cp[0] == '\0')        /* Empty line */
5275         continue;
5276       else if (iswhite (cp[0])) /* Not a predicate */
5277         continue;
5278       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5279         prolog_skip_comment (&lb, inf);
5280       else if ((len = prolog_pr (cp, last)) > 0)
5281         {
5282           /* Predicate or rule.  Store the function name so that we
5283              only generate a tag for the first clause.  */
5284           if (last == NULL)
5285             last = xnew(len + 1, char);
5286           else if (len + 1 > allocated)
5287             xrnew (last, len + 1, char);
5288           allocated = len + 1;
5289           strncpy (last, cp, len);
5290           last[len] = '\0';
5291         }
5292     }
5293   free (last);
5294 }
5295
5296
5297 static void
5298 prolog_skip_comment (linebuffer *plb, FILE *inf)
5299 {
5300   char *cp;
5301
5302   do
5303     {
5304       for (cp = plb->buffer; *cp != '\0'; cp++)
5305         if (cp[0] == '*' && cp[1] == '/')
5306           return;
5307       readline (plb, inf);
5308     }
5309   while (!feof(inf));
5310 }
5311
5312 /*
5313  * A predicate or rule definition is added if it matches:
5314  *     <beginning of line><Prolog Atom><whitespace>(
5315  * or  <beginning of line><Prolog Atom><whitespace>:-
5316  *
5317  * It is added to the tags database if it doesn't match the
5318  * name of the previous clause header.
5319  *
5320  * Return the size of the name of the predicate or rule, or 0 if no
5321  * header was found.
5322  */
5323 static size_t
5324 prolog_pr (char *s, char *last)
5325
5326                                 /* Name of last clause. */
5327 {
5328   size_t pos;
5329   size_t len;
5330
5331   pos = prolog_atom (s, 0);
5332   if (! pos)
5333     return 0;
5334
5335   len = pos;
5336   pos = skip_spaces (s + pos) - s;
5337
5338   if ((s[pos] == '.'
5339        || (s[pos] == '(' && (pos += 1))
5340        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5341       && (last == NULL          /* save only the first clause */
5342           || len != strlen (last)
5343           || !strneq (s, last, len)))
5344         {
5345           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5346           return len;
5347         }
5348   else
5349     return 0;
5350 }
5351
5352 /*
5353  * Consume a Prolog atom.
5354  * Return the number of bytes consumed, or 0 if there was an error.
5355  *
5356  * A prolog atom, in this context, could be one of:
5357  * - An alphanumeric sequence, starting with a lower case letter.
5358  * - A quoted arbitrary string. Single quotes can escape themselves.
5359  *   Backslash quotes everything.
5360  */
5361 static size_t
5362 prolog_atom (char *s, size_t pos)
5363 {
5364   size_t origpos;
5365
5366   origpos = pos;
5367
5368   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5369     {
5370       /* The atom is unquoted. */
5371       pos++;
5372       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5373         {
5374           pos++;
5375         }
5376       return pos - origpos;
5377     }
5378   else if (s[pos] == '\'')
5379     {
5380       pos++;
5381
5382       for (;;)
5383         {
5384           if (s[pos] == '\'')
5385             {
5386               pos++;
5387               if (s[pos] != '\'')
5388                 break;
5389               pos++;            /* A double quote */
5390             }
5391           else if (s[pos] == '\0')
5392             /* Multiline quoted atoms are ignored. */
5393             return 0;
5394           else if (s[pos] == '\\')
5395             {
5396               if (s[pos+1] == '\0')
5397                 return 0;
5398               pos += 2;
5399             }
5400           else
5401             pos++;
5402         }
5403       return pos - origpos;
5404     }
5405   else
5406     return 0;
5407 }
5408
5409 \f
5410 /*
5411  * Support for Erlang
5412  *
5413  * Generates tags for functions, defines, and records.
5414  * Assumes that Erlang functions start at column 0.
5415  * Original code by Anders Lindgren (1996)
5416  */
5417 static int erlang_func (char *, char *);
5418 static void erlang_attribute (char *);
5419 static int erlang_atom (char *);
5420
5421 static void
5422 Erlang_functions (FILE *inf)
5423 {
5424   char *cp, *last;
5425   int len;
5426   int allocated;
5427
5428   allocated = 0;
5429   len = 0;
5430   last = NULL;
5431
5432   LOOP_ON_INPUT_LINES (inf, lb, cp)
5433     {
5434       if (cp[0] == '\0')        /* Empty line */
5435         continue;
5436       else if (iswhite (cp[0])) /* Not function nor attribute */
5437         continue;
5438       else if (cp[0] == '%')    /* comment */
5439         continue;
5440       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5441         continue;
5442       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5443         {
5444           erlang_attribute (cp);
5445           if (last != NULL)
5446             {
5447               free (last);
5448               last = NULL;
5449             }
5450         }
5451       else if ((len = erlang_func (cp, last)) > 0)
5452         {
5453           /*
5454            * Function.  Store the function name so that we only
5455            * generates a tag for the first clause.
5456            */
5457           if (last == NULL)
5458             last = xnew (len + 1, char);
5459           else if (len + 1 > allocated)
5460             xrnew (last, len + 1, char);
5461           allocated = len + 1;
5462           strncpy (last, cp, len);
5463           last[len] = '\0';
5464         }
5465     }
5466   free (last);
5467 }
5468
5469
5470 /*
5471  * A function definition is added if it matches:
5472  *     <beginning of line><Erlang Atom><whitespace>(
5473  *
5474  * It is added to the tags database if it doesn't match the
5475  * name of the previous clause header.
5476  *
5477  * Return the size of the name of the function, or 0 if no function
5478  * was found.
5479  */
5480 static int
5481 erlang_func (char *s, char *last)
5482
5483                                 /* Name of last clause. */
5484 {
5485   int pos;
5486   int len;
5487
5488   pos = erlang_atom (s);
5489   if (pos < 1)
5490     return 0;
5491
5492   len = pos;
5493   pos = skip_spaces (s + pos) - s;
5494
5495   /* Save only the first clause. */
5496   if (s[pos++] == '('
5497       && (last == NULL
5498           || len != (int)strlen (last)
5499           || !strneq (s, last, len)))
5500         {
5501           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5502           return len;
5503         }
5504
5505   return 0;
5506 }
5507
5508
5509 /*
5510  * Handle attributes.  Currently, tags are generated for defines
5511  * and records.
5512  *
5513  * They are on the form:
5514  * -define(foo, bar).
5515  * -define(Foo(M, N), M+N).
5516  * -record(graph, {vtab = notable, cyclic = true}).
5517  */
5518 static void
5519 erlang_attribute (char *s)
5520 {
5521   char *cp = s;
5522
5523   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5524       && *cp++ == '(')
5525     {
5526       int len = erlang_atom (skip_spaces (cp));
5527       if (len > 0)
5528         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5529     }
5530   return;
5531 }
5532
5533
5534 /*
5535  * Consume an Erlang atom (or variable).
5536  * Return the number of bytes consumed, or -1 if there was an error.
5537  */
5538 static int
5539 erlang_atom (char *s)
5540 {
5541   int pos = 0;
5542
5543   if (ISALPHA (s[pos]) || s[pos] == '_')
5544     {
5545       /* The atom is unquoted. */
5546       do
5547         pos++;
5548       while (ISALNUM (s[pos]) || s[pos] == '_');
5549     }
5550   else if (s[pos] == '\'')
5551     {
5552       for (pos++; s[pos] != '\''; pos++)
5553         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5554             || (s[pos] == '\\' && s[++pos] == '\0'))
5555           return 0;
5556       pos++;
5557     }
5558
5559   return pos;
5560 }
5561
5562 \f
5563 static char *scan_separators (char *);
5564 static void add_regex (char *, language *);
5565 static char *substitute (char *, char *, struct re_registers *);
5566
5567 /*
5568  * Take a string like "/blah/" and turn it into "blah", verifying
5569  * that the first and last characters are the same, and handling
5570  * quoted separator characters.  Actually, stops on the occurrence of
5571  * an unquoted separator.  Also process \t, \n, etc. and turn into
5572  * appropriate characters. Works in place.  Null terminates name string.
5573  * Returns pointer to terminating separator, or NULL for
5574  * unterminated regexps.
5575  */
5576 static char *
5577 scan_separators (char *name)
5578 {
5579   char sep = name[0];
5580   char *copyto = name;
5581   bool quoted = FALSE;
5582
5583   for (++name; *name != '\0'; ++name)
5584     {
5585       if (quoted)
5586         {
5587           switch (*name)
5588             {
5589             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5590             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5591             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5592             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5593             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5594             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5595             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5596             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5597             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5598             default:
5599               if (*name == sep)
5600                 *copyto++ = sep;
5601               else
5602                 {
5603                   /* Something else is quoted, so preserve the quote. */
5604                   *copyto++ = '\\';
5605                   *copyto++ = *name;
5606                 }
5607               break;
5608             }
5609           quoted = FALSE;
5610         }
5611       else if (*name == '\\')
5612         quoted = TRUE;
5613       else if (*name == sep)
5614         break;
5615       else
5616         *copyto++ = *name;
5617     }
5618   if (*name != sep)
5619     name = NULL;                /* signal unterminated regexp */
5620
5621   /* Terminate copied string. */
5622   *copyto = '\0';
5623   return name;
5624 }
5625
5626 /* Look at the argument of --regex or --no-regex and do the right
5627    thing.  Same for each line of a regexp file. */
5628 static void
5629 analyse_regex (char *regex_arg)
5630 {
5631   if (regex_arg == NULL)
5632     {
5633       free_regexps ();          /* --no-regex: remove existing regexps */
5634       return;
5635     }
5636
5637   /* A real --regexp option or a line in a regexp file. */
5638   switch (regex_arg[0])
5639     {
5640       /* Comments in regexp file or null arg to --regex. */
5641     case '\0':
5642     case ' ':
5643     case '\t':
5644       break;
5645
5646       /* Read a regex file.  This is recursive and may result in a
5647          loop, which will stop when the file descriptors are exhausted. */
5648     case '@':
5649       {
5650         FILE *regexfp;
5651         linebuffer regexbuf;
5652         char *regexfile = regex_arg + 1;
5653
5654         /* regexfile is a file containing regexps, one per line. */
5655         regexfp = fopen (regexfile, "r");
5656         if (regexfp == NULL)
5657           {
5658             pfatal (regexfile);
5659             return;
5660           }
5661         linebuffer_init (&regexbuf);
5662         while (readline_internal (&regexbuf, regexfp) > 0)
5663           analyse_regex (regexbuf.buffer);
5664         free (regexbuf.buffer);
5665         fclose (regexfp);
5666       }
5667       break;
5668
5669       /* Regexp to be used for a specific language only. */
5670     case '{':
5671       {
5672         language *lang;
5673         char *lang_name = regex_arg + 1;
5674         char *cp;
5675
5676         for (cp = lang_name; *cp != '}'; cp++)
5677           if (*cp == '\0')
5678             {
5679               error ("unterminated language name in regex: %s", regex_arg);
5680               return;
5681             }
5682         *cp++ = '\0';
5683         lang = get_language_from_langname (lang_name);
5684         if (lang == NULL)
5685           return;
5686         add_regex (cp, lang);
5687       }
5688       break;
5689
5690       /* Regexp to be used for any language. */
5691     default:
5692       add_regex (regex_arg, NULL);
5693       break;
5694     }
5695 }
5696
5697 /* Separate the regexp pattern, compile it,
5698    and care for optional name and modifiers. */
5699 static void
5700 add_regex (char *regexp_pattern, language *lang)
5701 {
5702   static struct re_pattern_buffer zeropattern;
5703   char sep, *pat, *name, *modifiers;
5704   char empty[] = "";
5705   const char *err;
5706   struct re_pattern_buffer *patbuf;
5707   regexp *rp;
5708   bool
5709     force_explicit_name = TRUE, /* do not use implicit tag names */
5710     ignore_case = FALSE,        /* case is significant */
5711     multi_line = FALSE,         /* matches are done one line at a time */
5712     single_line = FALSE;        /* dot does not match newline */
5713
5714
5715   if (strlen(regexp_pattern) < 3)
5716     {
5717       error ("null regexp", (char *)NULL);
5718       return;
5719     }
5720   sep = regexp_pattern[0];
5721   name = scan_separators (regexp_pattern);
5722   if (name == NULL)
5723     {
5724       error ("%s: unterminated regexp", regexp_pattern);
5725       return;
5726     }
5727   if (name[1] == sep)
5728     {
5729       error ("null name for regexp \"%s\"", regexp_pattern);
5730       return;
5731     }
5732   modifiers = scan_separators (name);
5733   if (modifiers == NULL)        /* no terminating separator --> no name */
5734     {
5735       modifiers = name;
5736       name = empty;
5737     }
5738   else
5739     modifiers += 1;             /* skip separator */
5740
5741   /* Parse regex modifiers. */
5742   for (; modifiers[0] != '\0'; modifiers++)
5743     switch (modifiers[0])
5744       {
5745       case 'N':
5746         if (modifiers == name)
5747           error ("forcing explicit tag name but no name, ignoring", NULL);
5748         force_explicit_name = TRUE;
5749         break;
5750       case 'i':
5751         ignore_case = TRUE;
5752         break;
5753       case 's':
5754         single_line = TRUE;
5755         /* FALLTHRU */
5756       case 'm':
5757         multi_line = TRUE;
5758         need_filebuf = TRUE;
5759         break;
5760       default:
5761         {
5762           char wrongmod [2];
5763           wrongmod[0] = modifiers[0];
5764           wrongmod[1] = '\0';
5765           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5766         }
5767         break;
5768       }
5769
5770   patbuf = xnew (1, struct re_pattern_buffer);
5771   *patbuf = zeropattern;
5772   if (ignore_case)
5773     {
5774       static char lc_trans[CHARS];
5775       int i;
5776       for (i = 0; i < CHARS; i++)
5777         lc_trans[i] = lowcase (i);
5778       patbuf->translate = lc_trans;     /* translation table to fold case  */
5779     }
5780
5781   if (multi_line)
5782     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5783   else
5784     pat = regexp_pattern;
5785
5786   if (single_line)
5787     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5788   else
5789     re_set_syntax (RE_SYNTAX_EMACS);
5790
5791   err = re_compile_pattern (pat, strlen (pat), patbuf);
5792   if (multi_line)
5793     free (pat);
5794   if (err != NULL)
5795     {
5796       error ("%s while compiling pattern", err);
5797       return;
5798     }
5799
5800   rp = p_head;
5801   p_head = xnew (1, regexp);
5802   p_head->pattern = savestr (regexp_pattern);
5803   p_head->p_next = rp;
5804   p_head->lang = lang;
5805   p_head->pat = patbuf;
5806   p_head->name = savestr (name);
5807   p_head->error_signaled = FALSE;
5808   p_head->force_explicit_name = force_explicit_name;
5809   p_head->ignore_case = ignore_case;
5810   p_head->multi_line = multi_line;
5811 }
5812
5813 /*
5814  * Do the substitutions indicated by the regular expression and
5815  * arguments.
5816  */
5817 static char *
5818 substitute (char *in, char *out, struct re_registers *regs)
5819 {
5820   char *result, *t;
5821   int size, dig, diglen;
5822
5823   result = NULL;
5824   size = strlen (out);
5825
5826   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5827   if (out[size - 1] == '\\')
5828     fatal ("pattern error in \"%s\"", out);
5829   for (t = etags_strchr (out, '\\');
5830        t != NULL;
5831        t = etags_strchr (t + 2, '\\'))
5832     if (ISDIGIT (t[1]))
5833       {
5834         dig = t[1] - '0';
5835         diglen = regs->end[dig] - regs->start[dig];
5836         size += diglen - 2;
5837       }
5838     else
5839       size -= 1;
5840
5841   /* Allocate space and do the substitutions. */
5842   assert (size >= 0);
5843   result = xnew (size + 1, char);
5844
5845   for (t = result; *out != '\0'; out++)
5846     if (*out == '\\' && ISDIGIT (*++out))
5847       {
5848         dig = *out - '0';
5849         diglen = regs->end[dig] - regs->start[dig];
5850         strncpy (t, in + regs->start[dig], diglen);
5851         t += diglen;
5852       }
5853     else
5854       *t++ = *out;
5855   *t = '\0';
5856
5857   assert (t <= result + size);
5858   assert (t - result == (int)strlen (result));
5859
5860   return result;
5861 }
5862
5863 /* Deallocate all regexps. */
5864 static void
5865 free_regexps (void)
5866 {
5867   regexp *rp;
5868   while (p_head != NULL)
5869     {
5870       rp = p_head->p_next;
5871       free (p_head->pattern);
5872       free (p_head->name);
5873       free (p_head);
5874       p_head = rp;
5875     }
5876   return;
5877 }
5878
5879 /*
5880  * Reads the whole file as a single string from `filebuf' and looks for
5881  * multi-line regular expressions, creating tags on matches.
5882  * readline already dealt with normal regexps.
5883  *
5884  * Idea by Ben Wing <ben@666.com> (2002).
5885  */
5886 static void
5887 regex_tag_multiline (void)
5888 {
5889   char *buffer = filebuf.buffer;
5890   regexp *rp;
5891   char *name;
5892
5893   for (rp = p_head; rp != NULL; rp = rp->p_next)
5894     {
5895       int match = 0;
5896
5897       if (!rp->multi_line)
5898         continue;               /* skip normal regexps */
5899
5900       /* Generic initialisations before parsing file from memory. */
5901       lineno = 1;               /* reset global line number */
5902       charno = 0;               /* reset global char number */
5903       linecharno = 0;           /* reset global char number of line start */
5904
5905       /* Only use generic regexps or those for the current language. */
5906       if (rp->lang != NULL && rp->lang != curfdp->lang)
5907         continue;
5908
5909       while (match >= 0 && match < filebuf.len)
5910         {
5911           match = re_search (rp->pat, buffer, filebuf.len, charno,
5912                              filebuf.len - match, &rp->regs);
5913           switch (match)
5914             {
5915             case -2:
5916               /* Some error. */
5917               if (!rp->error_signaled)
5918                 {
5919                   error ("regexp stack overflow while matching \"%s\"",
5920                          rp->pattern);
5921                   rp->error_signaled = TRUE;
5922                 }
5923               break;
5924             case -1:
5925               /* No match. */
5926               break;
5927             default:
5928               if (match == rp->regs.end[0])
5929                 {
5930                   if (!rp->error_signaled)
5931                     {
5932                       error ("regexp matches the empty string: \"%s\"",
5933                              rp->pattern);
5934                       rp->error_signaled = TRUE;
5935                     }
5936                   match = -3;   /* exit from while loop */
5937                   break;
5938                 }
5939
5940               /* Match occurred.  Construct a tag. */
5941               while (charno < rp->regs.end[0])
5942                 if (buffer[charno++] == '\n')
5943                   lineno++, linecharno = charno;
5944               name = rp->name;
5945               if (name[0] == '\0')
5946                 name = NULL;
5947               else /* make a named tag */
5948                 name = substitute (buffer, rp->name, &rp->regs);
5949               if (rp->force_explicit_name)
5950                 /* Force explicit tag name, if a name is there. */
5951                 pfnote (name, TRUE, buffer + linecharno,
5952                         charno - linecharno + 1, lineno, linecharno);
5953               else
5954                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5955                           charno - linecharno + 1, lineno, linecharno);
5956               break;
5957             }
5958         }
5959     }
5960 }
5961
5962 \f
5963 static bool
5964 nocase_tail (const char *cp)
5965 {
5966   register int len = 0;
5967
5968   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5969     cp++, len++;
5970   if (*cp == '\0' && !intoken (dbp[len]))
5971     {
5972       dbp += len;
5973       return TRUE;
5974     }
5975   return FALSE;
5976 }
5977
5978 static void
5979 get_tag (register char *bp, char **namepp)
5980 {
5981   register char *cp = bp;
5982
5983   if (*bp != '\0')
5984     {
5985       /* Go till you get to white space or a syntactic break */
5986       for (cp = bp + 1; !notinname (*cp); cp++)
5987         continue;
5988       make_tag (bp, cp - bp, TRUE,
5989                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5990     }
5991
5992   if (namepp != NULL)
5993     *namepp = savenstr (bp, cp - bp);
5994 }
5995
5996 /*
5997  * Read a line of text from `stream' into `lbp', excluding the
5998  * newline or CR-NL, if any.  Return the number of characters read from
5999  * `stream', which is the length of the line including the newline.
6000  *
6001  * On DOS or Windows we do not count the CR character, if any before the
6002  * NL, in the returned length; this mirrors the behavior of Emacs on those
6003  * platforms (for text files, it translates CR-NL to NL as it reads in the
6004  * file).
6005  *
6006  * If multi-line regular expressions are requested, each line read is
6007  * appended to `filebuf'.
6008  */
6009 static long
6010 readline_internal (linebuffer *lbp, register FILE *stream)
6011 {
6012   char *buffer = lbp->buffer;
6013   register char *p = lbp->buffer;
6014   register char *pend;
6015   int chars_deleted;
6016
6017   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6018
6019   for (;;)
6020     {
6021       register int c = getc (stream);
6022       if (p == pend)
6023         {
6024           /* We're at the end of linebuffer: expand it. */
6025           lbp->size *= 2;
6026           xrnew (buffer, lbp->size, char);
6027           p += buffer - lbp->buffer;
6028           pend = buffer + lbp->size;
6029           lbp->buffer = buffer;
6030         }
6031       if (c == EOF)
6032         {
6033           *p = '\0';
6034           chars_deleted = 0;
6035           break;
6036         }
6037       if (c == '\n')
6038         {
6039           if (p > buffer && p[-1] == '\r')
6040             {
6041               p -= 1;
6042 #ifdef DOS_NT
6043              /* Assume CRLF->LF translation will be performed by Emacs
6044                 when loading this file, so CRs won't appear in the buffer.
6045                 It would be cleaner to compensate within Emacs;
6046                 however, Emacs does not know how many CRs were deleted
6047                 before any given point in the file.  */
6048               chars_deleted = 1;
6049 #else
6050               chars_deleted = 2;
6051 #endif
6052             }
6053           else
6054             {
6055               chars_deleted = 1;
6056             }
6057           *p = '\0';
6058           break;
6059         }
6060       *p++ = c;
6061     }
6062   lbp->len = p - buffer;
6063
6064   if (need_filebuf              /* we need filebuf for multi-line regexps */
6065       && chars_deleted > 0)     /* not at EOF */
6066     {
6067       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6068         {
6069           /* Expand filebuf. */
6070           filebuf.size *= 2;
6071           xrnew (filebuf.buffer, filebuf.size, char);
6072         }
6073       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6074       filebuf.len += lbp->len;
6075       filebuf.buffer[filebuf.len++] = '\n';
6076       filebuf.buffer[filebuf.len] = '\0';
6077     }
6078
6079   return lbp->len + chars_deleted;
6080 }
6081
6082 /*
6083  * Like readline_internal, above, but in addition try to match the
6084  * input line against relevant regular expressions and manage #line
6085  * directives.
6086  */
6087 static void
6088 readline (linebuffer *lbp, FILE *stream)
6089 {
6090   long result;
6091
6092   linecharno = charno;          /* update global char number of line start */
6093   result = readline_internal (lbp, stream); /* read line */
6094   lineno += 1;                  /* increment global line number */
6095   charno += result;             /* increment global char number */
6096
6097   /* Honour #line directives. */
6098   if (!no_line_directive)
6099     {
6100       static bool discard_until_line_directive;
6101
6102       /* Check whether this is a #line directive. */
6103       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6104         {
6105           unsigned int lno;
6106           int start = 0;
6107
6108           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6109               && start > 0)     /* double quote character found */
6110             {
6111               char *endp = lbp->buffer + start;
6112
6113               while ((endp = etags_strchr (endp, '"')) != NULL
6114                      && endp[-1] == '\\')
6115                 endp++;
6116               if (endp != NULL)
6117                 /* Ok, this is a real #line directive.  Let's deal with it. */
6118                 {
6119                   char *taggedabsname;  /* absolute name of original file */
6120                   char *taggedfname;    /* name of original file as given */
6121                   char *name;           /* temp var */
6122
6123                   discard_until_line_directive = FALSE; /* found it */
6124                   name = lbp->buffer + start;
6125                   *endp = '\0';
6126                   canonicalize_filename (name);
6127                   taggedabsname = absolute_filename (name, tagfiledir);
6128                   if (filename_is_absolute (name)
6129                       || filename_is_absolute (curfdp->infname))
6130                     taggedfname = savestr (taggedabsname);
6131                   else
6132                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6133
6134                   if (streq (curfdp->taggedfname, taggedfname))
6135                     /* The #line directive is only a line number change.  We
6136                        deal with this afterwards. */
6137                     free (taggedfname);
6138                   else
6139                     /* The tags following this #line directive should be
6140                        attributed to taggedfname.  In order to do this, set
6141                        curfdp accordingly. */
6142                     {
6143                       fdesc *fdp; /* file description pointer */
6144
6145                       /* Go look for a file description already set up for the
6146                          file indicated in the #line directive.  If there is
6147                          one, use it from now until the next #line
6148                          directive. */
6149                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6150                         if (streq (fdp->infname, curfdp->infname)
6151                             && streq (fdp->taggedfname, taggedfname))
6152                           /* If we remove the second test above (after the &&)
6153                              then all entries pertaining to the same file are
6154                              coalesced in the tags file.  If we use it, then
6155                              entries pertaining to the same file but generated
6156                              from different files (via #line directives) will
6157                              go into separate sections in the tags file.  These
6158                              alternatives look equivalent.  The first one
6159                              destroys some apparently useless information. */
6160                           {
6161                             curfdp = fdp;
6162                             free (taggedfname);
6163                             break;
6164                           }
6165                       /* Else, if we already tagged the real file, skip all
6166                          input lines until the next #line directive. */
6167                       if (fdp == NULL) /* not found */
6168                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6169                           if (streq (fdp->infabsname, taggedabsname))
6170                             {
6171                               discard_until_line_directive = TRUE;
6172                               free (taggedfname);
6173                               break;
6174                             }
6175                       /* Else create a new file description and use that from
6176                          now on, until the next #line directive. */
6177                       if (fdp == NULL) /* not found */
6178                         {
6179                           fdp = fdhead;
6180                           fdhead = xnew (1, fdesc);
6181                           *fdhead = *curfdp; /* copy curr. file description */
6182                           fdhead->next = fdp;
6183                           fdhead->infname = savestr (curfdp->infname);
6184                           fdhead->infabsname = savestr (curfdp->infabsname);
6185                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6186                           fdhead->taggedfname = taggedfname;
6187                           fdhead->usecharno = FALSE;
6188                           fdhead->prop = NULL;
6189                           fdhead->written = FALSE;
6190                           curfdp = fdhead;
6191                         }
6192                     }
6193                   free (taggedabsname);
6194                   lineno = lno - 1;
6195                   readline (lbp, stream);
6196                   return;
6197                 } /* if a real #line directive */
6198             } /* if #line is followed by a number */
6199         } /* if line begins with "#line " */
6200
6201       /* If we are here, no #line directive was found. */
6202       if (discard_until_line_directive)
6203         {
6204           if (result > 0)
6205             {
6206               /* Do a tail recursion on ourselves, thus discarding the contents
6207                  of the line buffer. */
6208               readline (lbp, stream);
6209               return;
6210             }
6211           /* End of file. */
6212           discard_until_line_directive = FALSE;
6213           return;
6214         }
6215     } /* if #line directives should be considered */
6216
6217   {
6218     int match;
6219     regexp *rp;
6220     char *name;
6221
6222     /* Match against relevant regexps. */
6223     if (lbp->len > 0)
6224       for (rp = p_head; rp != NULL; rp = rp->p_next)
6225         {
6226           /* Only use generic regexps or those for the current language.
6227              Also do not use multiline regexps, which is the job of
6228              regex_tag_multiline. */
6229           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6230               || rp->multi_line)
6231             continue;
6232
6233           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6234           switch (match)
6235             {
6236             case -2:
6237               /* Some error. */
6238               if (!rp->error_signaled)
6239                 {
6240                   error ("regexp stack overflow while matching \"%s\"",
6241                          rp->pattern);
6242                   rp->error_signaled = TRUE;
6243                 }
6244               break;
6245             case -1:
6246               /* No match. */
6247               break;
6248             case 0:
6249               /* Empty string matched. */
6250               if (!rp->error_signaled)
6251                 {
6252                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6253                   rp->error_signaled = TRUE;
6254                 }
6255               break;
6256             default:
6257               /* Match occurred.  Construct a tag. */
6258               name = rp->name;
6259               if (name[0] == '\0')
6260                 name = NULL;
6261               else /* make a named tag */
6262                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6263               if (rp->force_explicit_name)
6264                 /* Force explicit tag name, if a name is there. */
6265                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6266               else
6267                 make_tag (name, strlen (name), TRUE,
6268                           lbp->buffer, match, lineno, linecharno);
6269               break;
6270             }
6271         }
6272   }
6273 }
6274
6275 \f
6276 /*
6277  * Return a pointer to a space of size strlen(cp)+1 allocated
6278  * with xnew where the string CP has been copied.
6279  */
6280 static char *
6281 savestr (const char *cp)
6282 {
6283   return savenstr (cp, strlen (cp));
6284 }
6285
6286 /*
6287  * Return a pointer to a space of size LEN+1 allocated with xnew where
6288  * the string CP has been copied for at most the first LEN characters.
6289  */
6290 static char *
6291 savenstr (const char *cp, int len)
6292 {
6293   register char *dp;
6294
6295   dp = xnew (len + 1, char);
6296   strncpy (dp, cp, len);
6297   dp[len] = '\0';
6298   return dp;
6299 }
6300
6301 /*
6302  * Return the ptr in sp at which the character c last
6303  * appears; NULL if not found
6304  *
6305  * Identical to POSIX strrchr, included for portability.
6306  */
6307 static char *
6308 etags_strrchr (register const char *sp, register int c)
6309 {
6310   register const char *r;
6311
6312   r = NULL;
6313   do
6314     {
6315       if (*sp == c)
6316         r = sp;
6317   } while (*sp++);
6318   return (char *)r;
6319 }
6320
6321 /*
6322  * Return the ptr in sp at which the character c first
6323  * appears; NULL if not found
6324  *
6325  * Identical to POSIX strchr, included for portability.
6326  */
6327 static char *
6328 etags_strchr (register const char *sp, register int c)
6329 {
6330   do
6331     {
6332       if (*sp == c)
6333         return (char *)sp;
6334     } while (*sp++);
6335   return NULL;
6336 }
6337
6338 /*
6339  * Compare two strings, ignoring case for alphabetic characters.
6340  *
6341  * Same as BSD's strcasecmp, included for portability.
6342  */
6343 static int
6344 etags_strcasecmp (register const char *s1, register const char *s2)
6345 {
6346   while (*s1 != '\0'
6347          && (ISALPHA (*s1) && ISALPHA (*s2)
6348              ? lowcase (*s1) == lowcase (*s2)
6349              : *s1 == *s2))
6350     s1++, s2++;
6351
6352   return (ISALPHA (*s1) && ISALPHA (*s2)
6353           ? lowcase (*s1) - lowcase (*s2)
6354           : *s1 - *s2);
6355 }
6356
6357 /*
6358  * Compare two strings, ignoring case for alphabetic characters.
6359  * Stop after a given number of characters
6360  *
6361  * Same as BSD's strncasecmp, included for portability.
6362  */
6363 static int
6364 etags_strncasecmp (register const char *s1, register const char *s2, register int n)
6365 {
6366   while (*s1 != '\0' && n-- > 0
6367          && (ISALPHA (*s1) && ISALPHA (*s2)
6368              ? lowcase (*s1) == lowcase (*s2)
6369              : *s1 == *s2))
6370     s1++, s2++;
6371
6372   if (n < 0)
6373     return 0;
6374   else
6375     return (ISALPHA (*s1) && ISALPHA (*s2)
6376             ? lowcase (*s1) - lowcase (*s2)
6377             : *s1 - *s2);
6378 }
6379
6380 /* Skip spaces (end of string is not space), return new pointer. */
6381 static char *
6382 skip_spaces (char *cp)
6383 {
6384   while (iswhite (*cp))
6385     cp++;
6386   return cp;
6387 }
6388
6389 /* Skip non spaces, except end of string, return new pointer. */
6390 static char *
6391 skip_non_spaces (char *cp)
6392 {
6393   while (*cp != '\0' && !iswhite (*cp))
6394     cp++;
6395   return cp;
6396 }
6397
6398 /* Print error message and exit.  */
6399 void
6400 fatal (const char *s1, const char *s2)
6401 {
6402   error (s1, s2);
6403   exit (EXIT_FAILURE);
6404 }
6405
6406 static void
6407 pfatal (const char *s1)
6408 {
6409   perror (s1);
6410   exit (EXIT_FAILURE);
6411 }
6412
6413 static void
6414 suggest_asking_for_help (void)
6415 {
6416   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6417            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6418   exit (EXIT_FAILURE);
6419 }
6420
6421 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6422 static void
6423 error (const char *s1, const char *s2)
6424 {
6425   fprintf (stderr, "%s: ", progname);
6426   fprintf (stderr, s1, s2);
6427   fprintf (stderr, "\n");
6428 }
6429
6430 /* Return a newly-allocated string whose contents
6431    concatenate those of s1, s2, s3.  */
6432 static char *
6433 concat (const char *s1, const char *s2, const char *s3)
6434 {
6435   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6436   char *result = xnew (len1 + len2 + len3 + 1, char);
6437
6438   strcpy (result, s1);
6439   strcpy (result + len1, s2);
6440   strcpy (result + len1 + len2, s3);
6441   result[len1 + len2 + len3] = '\0';
6442
6443   return result;
6444 }
6445
6446 \f
6447 /* Does the same work as the system V getcwd, but does not need to
6448    guess the buffer size in advance. */
6449 static char *
6450 etags_getcwd (void)
6451 {
6452 #ifdef HAVE_GETCWD
6453   int bufsize = 200;
6454   char *path = xnew (bufsize, char);
6455
6456   while (getcwd (path, bufsize) == NULL)
6457     {
6458       if (errno != ERANGE)
6459         pfatal ("getcwd");
6460       bufsize *= 2;
6461       free (path);
6462       path = xnew (bufsize, char);
6463     }
6464
6465   canonicalize_filename (path);
6466   return path;
6467
6468 #else /* not HAVE_GETCWD */
6469 #if MSDOS
6470
6471   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6472
6473   getwd (path);
6474
6475   for (p = path; *p != '\0'; p++)
6476     if (*p == '\\')
6477       *p = '/';
6478     else
6479       *p = lowcase (*p);
6480
6481   return strdup (path);
6482 #else /* not MSDOS */
6483   linebuffer path;
6484   FILE *pipe;
6485
6486   linebuffer_init (&path);
6487   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6488   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6489     pfatal ("pwd");
6490   pclose (pipe);
6491
6492   return path.buffer;
6493 #endif /* not MSDOS */
6494 #endif /* not HAVE_GETCWD */
6495 }
6496
6497 /* Return a newly allocated string containing the file name of FILE
6498    relative to the absolute directory DIR (which should end with a slash). */
6499 static char *
6500 relative_filename (char *file, char *dir)
6501 {
6502   char *fp, *dp, *afn, *res;
6503   int i;
6504
6505   /* Find the common root of file and dir (with a trailing slash). */
6506   afn = absolute_filename (file, cwd);
6507   fp = afn;
6508   dp = dir;
6509   while (*fp++ == *dp++)
6510     continue;
6511   fp--, dp--;                   /* back to the first differing char */
6512 #ifdef DOS_NT
6513   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6514     return afn;
6515 #endif
6516   do                            /* look at the equal chars until '/' */
6517     fp--, dp--;
6518   while (*fp != '/');
6519
6520   /* Build a sequence of "../" strings for the resulting relative file name. */
6521   i = 0;
6522   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6523     i += 1;
6524   res = xnew (3*i + strlen (fp + 1) + 1, char);
6525   res[0] = '\0';
6526   while (i-- > 0)
6527     strcat (res, "../");
6528
6529   /* Add the file name relative to the common root of file and dir. */
6530   strcat (res, fp + 1);
6531   free (afn);
6532
6533   return res;
6534 }
6535
6536 /* Return a newly allocated string containing the absolute file name
6537    of FILE given DIR (which should end with a slash). */
6538 static char *
6539 absolute_filename (char *file, char *dir)
6540 {
6541   char *slashp, *cp, *res;
6542
6543   if (filename_is_absolute (file))
6544     res = savestr (file);
6545 #ifdef DOS_NT
6546   /* We don't support non-absolute file names with a drive
6547      letter, like `d:NAME' (it's too much hassle).  */
6548   else if (file[1] == ':')
6549     fatal ("%s: relative file names with drive letters not supported", file);
6550 #endif
6551   else
6552     res = concat (dir, file, "");
6553
6554   /* Delete the "/dirname/.." and "/." substrings. */
6555   slashp = etags_strchr (res, '/');
6556   while (slashp != NULL && slashp[0] != '\0')
6557     {
6558       if (slashp[1] == '.')
6559         {
6560           if (slashp[2] == '.'
6561               && (slashp[3] == '/' || slashp[3] == '\0'))
6562             {
6563               cp = slashp;
6564               do
6565                 cp--;
6566               while (cp >= res && !filename_is_absolute (cp));
6567               if (cp < res)
6568                 cp = slashp;    /* the absolute name begins with "/.." */
6569 #ifdef DOS_NT
6570               /* Under MSDOS and NT we get `d:/NAME' as absolute
6571                  file name, so the luser could say `d:/../NAME'.
6572                  We silently treat this as `d:/NAME'.  */
6573               else if (cp[0] != '/')
6574                 cp = slashp;
6575 #endif
6576 #ifdef HAVE_MEMMOVE
6577               memmove (cp, slashp + 3, strlen (slashp + 2));
6578 #else
6579               /* Overlapping copy isn't really okay */
6580               strcpy (cp, slashp + 3);
6581 #endif
6582               slashp = cp;
6583               continue;
6584             }
6585           else if (slashp[2] == '/' || slashp[2] == '\0')
6586             {
6587 #ifdef HAVE_MEMMOVE
6588               memmove (slashp, slashp + 2, strlen (slashp + 1));
6589 #else
6590               strcpy (slashp, slashp + 2);
6591 #endif
6592               continue;
6593             }
6594         }
6595
6596       slashp = etags_strchr (slashp + 1, '/');
6597     }
6598
6599   if (res[0] == '\0')           /* just a safety net: should never happen */
6600     {
6601       free (res);
6602       return savestr ("/");
6603     }
6604   else
6605     return res;
6606 }
6607
6608 /* Return a newly allocated string containing the absolute
6609    file name of dir where FILE resides given DIR (which should
6610    end with a slash). */
6611 static char *
6612 absolute_dirname (char *file, char *dir)
6613 {
6614   char *slashp, *res;
6615   char save;
6616
6617   slashp = etags_strrchr (file, '/');
6618   if (slashp == NULL)
6619     return savestr (dir);
6620   save = slashp[1];
6621   slashp[1] = '\0';
6622   res = absolute_filename (file, dir);
6623   slashp[1] = save;
6624
6625   return res;
6626 }
6627
6628 /* Whether the argument string is an absolute file name.  The argument
6629    string must have been canonicalized with canonicalize_filename. */
6630 static bool
6631 filename_is_absolute (char *fn)
6632 {
6633   return (fn[0] == '/'
6634 #ifdef DOS_NT
6635           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6636 #endif
6637           );
6638 }
6639
6640 /* Downcase DOS drive letter and collapse separators into single slashes.
6641    Works in place. */
6642 static void
6643 canonicalize_filename (register char *fn)
6644 {
6645   register char* cp;
6646   char sep = '/';
6647
6648 #ifdef DOS_NT
6649   /* Canonicalize drive letter case.  */
6650 # define ISUPPER(c)     isupper (CHAR(c))
6651   if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6652     fn[0] = lowcase (fn[0]);
6653
6654   sep = '\\';
6655 #endif
6656
6657   /* Collapse multiple separators into a single slash. */
6658   for (cp = fn; *cp != '\0'; cp++, fn++)
6659     if (*cp == sep)
6660       {
6661         *fn = '/';
6662         while (cp[1] == sep)
6663           cp++;
6664       }
6665     else
6666       *fn = *cp;
6667   *fn = '\0';
6668 }
6669
6670 \f
6671 /* Initialize a linebuffer for use. */
6672 static void
6673 linebuffer_init (linebuffer *lbp)
6674 {
6675   lbp->size = (DEBUG) ? 3 : 200;
6676   lbp->buffer = xnew (lbp->size, char);
6677   lbp->buffer[0] = '\0';
6678   lbp->len = 0;
6679 }
6680
6681 /* Set the minimum size of a string contained in a linebuffer. */
6682 static void
6683 linebuffer_setlen (linebuffer *lbp, int toksize)
6684 {
6685   while (lbp->size <= toksize)
6686     {
6687       lbp->size *= 2;
6688       xrnew (lbp->buffer, lbp->size, char);
6689     }
6690   lbp->len = toksize;
6691 }
6692
6693 /* Like malloc but get fatal error if memory is exhausted. */
6694 static PTR
6695 xmalloc (unsigned int size)
6696 {
6697   PTR result = (PTR) malloc (size);
6698   if (result == NULL)
6699     fatal ("virtual memory exhausted", (char *)NULL);
6700   return result;
6701 }
6702
6703 static PTR
6704 xrealloc (char *ptr, unsigned int size)
6705 {
6706   PTR result = (PTR) realloc (ptr, size);
6707   if (result == NULL)
6708     fatal ("virtual memory exhausted", (char *)NULL);
6709   return result;
6710 }
6711
6712 /*
6713  * Local Variables:
6714  * indent-tabs-mode: t
6715  * tab-width: 8
6716  * fill-column: 79
6717  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6718  * c-file-style: "gnu"
6719  * End:
6720  */
6721
6722 /* etags.c ends here */