lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2011
  32   Free Software Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  *
  72  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #define TRUE    1
  84 #define FALSE   0
  85
  86 #ifdef DEBUG
  87 #  undef DEBUG
  88 #  define DEBUG TRUE
  89 #else
  90 #  define DEBUG  FALSE
  91 #  define NDEBUG                /* disable assert */
  92 #endif
  93
  94 #ifdef HAVE_CONFIG_H
  95 # include <config.h>
  96   /* On some systems, Emacs defines static as nothing for the sake
  97      of unexec.  We don't want that here since we don't use unexec. */
  98 # undef static
  99 # ifndef PTR                    /* for XEmacs */
 100 #   define PTR void *
 101 # endif
 102 #else  /* no config.h */
 103 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 104 #   define PTR void *           /* for generic pointers */
 105 # else /* not standard C */
 106 #   define const                /* remove const for old compilers' sake */
 107 #   define PTR long *           /* don't use void* */
 108 # endif
 109 #endif /* !HAVE_CONFIG_H */
 110
 111 #ifndef _GNU_SOURCE
 112 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 113 #endif
 114
 115 /* WIN32_NATIVE is for XEmacs.
 116    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 117 #ifdef WIN32_NATIVE
 118 # undef MSDOS
 119 # undef  WINDOWSNT
 120 # define WINDOWSNT
 121 #endif /* WIN32_NATIVE */
 122
 123 #ifdef MSDOS
 124 # undef MSDOS
 125 # define MSDOS TRUE
 126 # include <fcntl.h>
 127 # include <sys/param.h>
 128 # include <io.h>
 129 # ifndef HAVE_CONFIG_H
 130 #   define DOS_NT
 131 #   include <sys/config.h>
 132 # endif
 133 #else
 134 # define MSDOS FALSE
 135 #endif /* MSDOS */
 136
 137 #ifdef WINDOWSNT
 138 # include <stdlib.h>
 139 # include <fcntl.h>
 140 # include <string.h>
 141 # include <direct.h>
 142 # include <io.h>
 143 # define MAXPATHLEN _MAX_PATH
 144 # undef HAVE_NTGUI
 145 # undef  DOS_NT
 146 # define DOS_NT
 147 # ifndef HAVE_GETCWD
 148 #   define HAVE_GETCWD
 149 # endif /* undef HAVE_GETCWD */
 150 #else /* not WINDOWSNT */
 151 # ifdef STDC_HEADERS
 152 #  include <stdlib.h>
 153 #  include <string.h>
 154 # else /* no standard C headers */
 155    extern char *getenv (const char *);
 156    extern char *strcpy (char *, const char *);
 157    extern char *strncpy (char *, const char *, unsigned long);
 158    extern char *strcat (char *, const char *);
 159    extern char *strncat (char *, const char *, unsigned long);
 160    extern int strcmp (const char *, const char *);
 161    extern int strncmp (const char *, const char *, unsigned long);
 162    extern int system (const char *);
 163    extern unsigned long strlen (const char *);
 164    extern void *malloc (unsigned long);
 165    extern void *realloc (void *, unsigned long);
 166    extern void exit (int);
 167    extern void free (void *);
 168    extern void *memmove (void *, const void *, unsigned long);
 169 #  define EXIT_SUCCESS  0
 170 #  define EXIT_FAILURE  1
 171 # endif
 172 #endif /* !WINDOWSNT */
 173
 174 #include <unistd.h>
 175 #ifndef HAVE_UNISTD_H
 176 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 177     extern char *getcwd (char *buf, size_t size);
 178 # endif
 179 #endif /* HAVE_UNISTD_H */
 180
 181 #include <stdio.h>
 182 #include <ctype.h>
 183 #include <errno.h>
 184 #include <sys/types.h>
 185 #include <sys/stat.h>
 186
 187 #include <assert.h>
 188 #ifdef NDEBUG
 189 # undef  assert                 /* some systems have a buggy assert.h */
 190 # define assert(x) ((void) 0)
 191 #endif
 192
 193 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 194 # define NO_LONG_OPTIONS TRUE
 195 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 196   extern char *optarg;
 197   extern int optind, opterr;
 198 #else
 199 # define NO_LONG_OPTIONS FALSE
 200 # include <getopt.h>
 201 #endif /* NO_LONG_OPTIONS */
 202
 203 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 204 # ifdef __CYGWIN__              /* compiling on Cygwin */
 205                              !!! NOTICE !!!
 206  the regex.h distributed with Cygwin is not compatible with etags, alas!
 207 If you want regular expression support, you should delete this notice and
 208               arrange to use the GNU regex.h and regex.c.
 209 # endif
 210 #endif
 211 #include <regex.h>
 212
 213 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 214  Leave it undefined to make the program "etags", which makes emacs-style
 215  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 216 #ifdef CTAGS
 217 # undef  CTAGS
 218 # define CTAGS TRUE
 219 #else
 220 # define CTAGS FALSE
 221 #endif
 222
 223 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 224 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 225 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 226 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 227
 228 #define CHARS 256               /* 2^sizeof(char) */
 229 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 230 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 231 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 232 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 233 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 234 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 235
 236 #define ISALNUM(c)      isalnum (CHAR(c))
 237 #define ISALPHA(c)      isalpha (CHAR(c))
 238 #define ISDIGIT(c)      isdigit (CHAR(c))
 239 #define ISLOWER(c)      islower (CHAR(c))
 240
 241 #define lowcase(c)      tolower (CHAR(c))
 242
 243
 244 /*
 245  *      xnew, xrnew -- allocate, reallocate storage
 246  *
 247  * SYNOPSIS:    Type *xnew (int n, Type);
 248  *              void xrnew (OldPointer, int n, Type);
 249  */
 250 #if DEBUG
 251 # include "chkmalloc.h"
 252 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 253                                                   (n) * sizeof (Type)))
 254 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 255                                         (char *) (op), (n) * sizeof (Type)))
 256 #else
 257 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 258 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 259                                         (char *) (op), (n) * sizeof (Type)))
 260 #endif
 261
 262 #define bool int
 263
 264 typedef void Lang_function (FILE *);
 265
 266 typedef struct
 267 {
 268   const char *suffix;           /* file name suffix for this compressor */
 269   const char *command;          /* takes one arg and decompresses to stdout */
 270 } compressor;
 271
 272 typedef struct
 273 {
 274   const char *name;             /* language name */
 275   const char *help;             /* detailed help for the language */
 276   Lang_function *function;      /* parse function */
 277   const char **suffixes;        /* name suffixes of this language's files */
 278   const char **filenames;       /* names of this language's files */
 279   const char **interpreters;    /* interpreters for this language */
 280   bool metasource;              /* source used to generate other sources */
 281 } language;
 282
 283 typedef struct fdesc
 284 {
 285   struct fdesc *next;           /* for the linked list */
 286   char *infname;                /* uncompressed input file name */
 287   char *infabsname;             /* absolute uncompressed input file name */
 288   char *infabsdir;              /* absolute dir of input file */
 289   char *taggedfname;            /* file name to write in tagfile */
 290   language *lang;               /* language of file */
 291   char *prop;                   /* file properties to write in tagfile */
 292   bool usecharno;               /* etags tags shall contain char number */
 293   bool written;                 /* entry written in the tags file */
 294 } fdesc;
 295
 296 typedef struct node_st
 297 {                               /* sorting structure */
 298   struct node_st *left, *right; /* left and right sons */
 299   fdesc *fdp;                   /* description of file to whom tag belongs */
 300   char *name;                   /* tag name */
 301   char *regex;                  /* search regexp */
 302   bool valid;                   /* write this tag on the tag file */
 303   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 304   bool been_warned;             /* warning already given for duplicated tag */
 305   int lno;                      /* line number tag is on */
 306   long cno;                     /* character number line starts on */
 307 } node;
 308
 309 /*
 310  * A `linebuffer' is a structure which holds a line of text.
 311  * `readline_internal' reads a line from a stream into a linebuffer
 312  * and works regardless of the length of the line.
 313  * SIZE is the size of BUFFER, LEN is the length of the string in
 314  * BUFFER after readline reads it.
 315  */
 316 typedef struct
 317 {
 318   long size;
 319   int len;
 320   char *buffer;
 321 } linebuffer;
 322
 323 /* Used to support mixing of --lang and file names. */
 324 typedef struct
 325 {
 326   enum {
 327     at_language,                /* a language specification */
 328     at_regexp,                  /* a regular expression */
 329     at_filename,                /* a file name */
 330     at_stdin,                   /* read from stdin here */
 331     at_end                      /* stop parsing the list */
 332   } arg_type;                   /* argument type */
 333   language *lang;               /* language associated with the argument */
 334   char *what;                   /* the argument itself */
 335 } argument;
 336
 337 /* Structure defining a regular expression. */
 338 typedef struct regexp
 339 {
 340   struct regexp *p_next;        /* pointer to next in list */
 341   language *lang;               /* if set, use only for this language */
 342   char *pattern;                /* the regexp pattern */
 343   char *name;                   /* tag name */
 344   struct re_pattern_buffer *pat; /* the compiled pattern */
 345   struct re_registers regs;     /* re registers */
 346   bool error_signaled;          /* already signaled for this regexp */
 347   bool force_explicit_name;     /* do not allow implict tag name */
 348   bool ignore_case;             /* ignore case when matching */
 349   bool multi_line;              /* do a multi-line match on the whole file */
 350 } regexp;
 351
 352
 353 /* Many compilers barf on this:
 354         Lang_function Ada_funcs;
 355    so let's write it this way */
 356 static void Ada_funcs (FILE *);
 357 static void Asm_labels (FILE *);
 358 static void C_entries (int c_ext, FILE *);
 359 static void default_C_entries (FILE *);
 360 static void plain_C_entries (FILE *);
 361 static void Cjava_entries (FILE *);
 362 static void Cobol_paragraphs (FILE *);
 363 static void Cplusplus_entries (FILE *);
 364 static void Cstar_entries (FILE *);
 365 static void Erlang_functions (FILE *);
 366 static void Forth_words (FILE *);
 367 static void Fortran_functions (FILE *);
 368 static void HTML_labels (FILE *);
 369 static void Lisp_functions (FILE *);
 370 static void Lua_functions (FILE *);
 371 static void Makefile_targets (FILE *);
 372 static void Pascal_functions (FILE *);
 373 static void Perl_functions (FILE *);
 374 static void PHP_functions (FILE *);
 375 static void PS_functions (FILE *);
 376 static void Prolog_functions (FILE *);
 377 static void Python_functions (FILE *);
 378 static void Scheme_functions (FILE *);
 379 static void TeX_commands (FILE *);
 380 static void Texinfo_nodes (FILE *);
 381 static void Yacc_entries (FILE *);
 382 static void just_read_file (FILE *);
 383
 384 static void print_language_names (void);
 385 static void print_version (void);
 386 static void print_help (argument *);
 387 int main (int, char **);
 388
 389 static compressor *get_compressor_from_suffix (char *, char **);
 390 static language *get_language_from_langname (const char *);
 391 static language *get_language_from_interpreter (char *);
 392 static language *get_language_from_filename (char *, bool);
 393 static void readline (linebuffer *, FILE *);
 394 static long readline_internal (linebuffer *, FILE *);
 395 static bool nocase_tail (const char *);
 396 static void get_tag (char *, char **);
 397
 398 static void analyse_regex (char *);
 399 static void free_regexps (void);
 400 static void regex_tag_multiline (void);
 401 static void error (const char *, const char *);
 402 static void suggest_asking_for_help (void) NO_RETURN;
 403 void fatal (const char *, const char *) NO_RETURN;
 404 static void pfatal (const char *) NO_RETURN;
 405 static void add_node (node *, node **);
 406
 407 static void init (void);
 408 static void process_file_name (char *, language *);
 409 static void process_file (FILE *, char *, language *);
 410 static void find_entries (FILE *);
 411 static void free_tree (node *);
 412 static void free_fdesc (fdesc *);
 413 static void pfnote (char *, bool, char *, int, int, long);
 414 static void make_tag (const char *, int, bool, char *, int, int, long);
 415 static void invalidate_nodes (fdesc *, node **);
 416 static void put_entries (node *);
 417
 418 static char *concat (const char *, const char *, const char *);
 419 static char *skip_spaces (char *);
 420 static char *skip_non_spaces (char *);
 421 static char *savenstr (const char *, int);
 422 static char *savestr (const char *);
 423 static char *etags_strchr (const char *, int);
 424 static char *etags_strrchr (const char *, int);
 425 static int etags_strcasecmp (const char *, const char *);
 426 static int etags_strncasecmp (const char *, const char *, int);
 427 static char *etags_getcwd (void);
 428 static char *relative_filename (char *, char *);
 429 static char *absolute_filename (char *, char *);
 430 static char *absolute_dirname (char *, char *);
 431 static bool filename_is_absolute (char *f);
 432 static void canonicalize_filename (char *);
 433 static void linebuffer_init (linebuffer *);
 434 static void linebuffer_setlen (linebuffer *, int);
 435 static PTR xmalloc (unsigned int);
 436 static PTR xrealloc (char *, unsigned int);
 437
 438 \f
 439 static char searchar = '/';     /* use /.../ searches */
 440
 441 static char *tagfile;           /* output file */
 442 static char *progname;          /* name this program was invoked with */
 443 static char *cwd;               /* current working directory */
 444 static char *tagfiledir;        /* directory of tagfile */
 445 static FILE *tagf;              /* ioptr for tags file */
 446
 447 static fdesc *fdhead;           /* head of file description list */
 448 static fdesc *curfdp;           /* current file description */
 449 static int lineno;              /* line number of current line */
 450 static long charno;             /* current character number */
 451 static long linecharno;         /* charno of start of current line */
 452 static char *dbp;               /* pointer to start of current tag */
 453
 454 static const int invalidcharno = -1;
 455
 456 static node *nodehead;          /* the head of the binary tree of tags */
 457 static node *last_node;         /* the last node created */
 458
 459 static linebuffer lb;           /* the current line */
 460 static linebuffer filebuf;      /* a buffer containing the whole file */
 461 static linebuffer token_name;   /* a buffer containing a tag name */
 462
 463 /* boolean "functions" (see init)       */
 464 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 465 static const char
 466   /* white chars */
 467   *white = " \f\t\n\r\v",
 468   /* not in a name */
 469   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 470   /* token ending chars */
 471   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 472   /* token starting chars */
 473   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 474   /* valid in-token chars */
 475   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 476
 477 static bool append_to_tagfile;  /* -a: append to tags */
 478 /* The next five default to TRUE in C and derived languages.  */
 479 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 480 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 481                                 /* 0 struct/enum/union decls, and C++ */
 482                                 /* member functions. */
 483 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 484                                 /* constants and variables. */
 485                                 /* -D: opposite of -d.  Default under ctags. */
 486 static bool globals;            /* create tags for global variables */
 487 static bool members;            /* create tags for C member variables */
 488 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 489 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 490 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 491 static bool update;             /* -u: update tags */
 492 static bool vgrind_style;       /* -v: create vgrind style index output */
 493 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 494 static bool cxref_style;        /* -x: create cxref style output */
 495 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 496 static bool ignoreindent;       /* -I: ignore indentation in C */
 497 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 498
 499 /* STDIN is defined in LynxOS system headers */
 500 #ifdef STDIN
 501 # undef STDIN
 502 #endif
 503
 504 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 505 static bool parsing_stdin;      /* --parse-stdin used */
 506
 507 static regexp *p_head;          /* list of all regexps */
 508 static bool need_filebuf;       /* some regexes are multi-line */
 509
 510 static struct option longopts[] =
 511 {
 512   { "append",             no_argument,       NULL,               'a'   },
 513   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 514   { "c++",                no_argument,       NULL,               'C'   },
 515   { "declarations",       no_argument,       &declarations,      TRUE  },
 516   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 517   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 518   { "help",               no_argument,       NULL,               'h'   },
 519   { "help",               no_argument,       NULL,               'H'   },
 520   { "ignore-indentation", no_argument,       NULL,               'I'   },
 521   { "language",           required_argument, NULL,               'l'   },
 522   { "members",            no_argument,       &members,           TRUE  },
 523   { "no-members",         no_argument,       &members,           FALSE },
 524   { "output",             required_argument, NULL,               'o'   },
 525   { "regex",              required_argument, NULL,               'r'   },
 526   { "no-regex",           no_argument,       NULL,               'R'   },
 527   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 528   { "parse-stdin",        required_argument, NULL,               STDIN },
 529   { "version",            no_argument,       NULL,               'V'   },
 530
 531 #if CTAGS /* Ctags options */
 532   { "backward-search",    no_argument,       NULL,               'B'   },
 533   { "cxref",              no_argument,       NULL,               'x'   },
 534   { "defines",            no_argument,       NULL,               'd'   },
 535   { "globals",            no_argument,       &globals,           TRUE  },
 536   { "typedefs",           no_argument,       NULL,               't'   },
 537   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 538   { "update",             no_argument,       NULL,               'u'   },
 539   { "vgrind",             no_argument,       NULL,               'v'   },
 540   { "no-warn",            no_argument,       NULL,               'w'   },
 541
 542 #else /* Etags options */
 543   { "no-defines",         no_argument,       NULL,               'D'   },
 544   { "no-globals",         no_argument,       &globals,           FALSE },
 545   { "include",            required_argument, NULL,               'i'   },
 546 #endif
 547   { NULL }
 548 };
 549
 550 static compressor compressors[] =
 551 {
 552   { "z", "gzip -d -c"},
 553   { "Z", "gzip -d -c"},
 554   { "gz", "gzip -d -c"},
 555   { "GZ", "gzip -d -c"},
 556   { "bz2", "bzip2 -d -c" },
 557   { "xz", "xz -d -c" },
 558   { NULL }
 559 };
 560
 561 /*
 562  * Language stuff.
 563  */
 564
 565 /* Ada code */
 566 static const char *Ada_suffixes [] =
 567   { "ads", "adb", "ada", NULL };
 568 static const char Ada_help [] =
 569 "In Ada code, functions, procedures, packages, tasks and types are\n\
 570 tags.  Use the `--packages-only' option to create tags for\n\
 571 packages only.\n\
 572 Ada tag names have suffixes indicating the type of entity:\n\
 573         Entity type:    Qualifier:\n\
 574         ------------    ----------\n\
 575         function        /f\n\
 576         procedure       /p\n\
 577         package spec    /s\n\
 578         package body    /b\n\
 579         type            /t\n\
 580         task            /k\n\
 581 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 582 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 583 will just search for any tag `bidule'.";
 584
 585 /* Assembly code */
 586 static const char *Asm_suffixes [] =
 587   { "a",        /* Unix assembler */
 588     "asm", /* Microcontroller assembly */
 589     "def", /* BSO/Tasking definition includes  */
 590     "inc", /* Microcontroller include files */
 591     "ins", /* Microcontroller include files */
 592     "s", "sa", /* Unix assembler */
 593     "S",   /* cpp-processed Unix assembler */
 594     "src", /* BSO/Tasking C compiler output */
 595     NULL
 596   };
 597 static const char Asm_help [] =
 598 "In assembler code, labels appearing at the beginning of a line,\n\
 599 followed by a colon, are tags.";
 600
 601
 602 /* Note that .c and .h can be considered C++, if the --c++ flag was
 603    given, or if the `class' or `template' keywords are met inside the file.
 604    That is why default_C_entries is called for these. */
 605 static const char *default_C_suffixes [] =
 606   { "c", "h", NULL };
 607 #if CTAGS                               /* C help for Ctags */
 608 static const char default_C_help [] =
 609 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 610 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 611 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 612 Use --globals to tag global variables.\n\
 613 You can tag function declarations and external variables by\n\
 614 using `--declarations', and struct members by using `--members'.";
 615 #else                                   /* C help for Etags */
 616 static const char default_C_help [] =
 617 "In C code, any C function or typedef is a tag, and so are\n\
 618 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 619 definitions and `enum' constants are tags unless you specify\n\
 620 `--no-defines'.  Global variables are tags unless you specify\n\
 621 `--no-globals' and so are struct members unless you specify\n\
 622 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 623 `--no-members' can make the tags table file much smaller.\n\
 624 You can tag function declarations and external variables by\n\
 625 using `--declarations'.";
 626 #endif  /* C help for Ctags and Etags */
 627
 628 static const char *Cplusplus_suffixes [] =
 629   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 630     "M",                        /* Objective C++ */
 631     "pdb",                      /* Postscript with C syntax */
 632     NULL };
 633 static const char Cplusplus_help [] =
 634 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 635 --help --lang=c --lang=c++ for full help.)\n\
 636 In addition to C tags, member functions are also recognized.  Member\n\
 637 variables are recognized unless you use the `--no-members' option.\n\
 638 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 639 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 640 `operator+'.";
 641
 642 static const char *Cjava_suffixes [] =
 643   { "java", NULL };
 644 static char Cjava_help [] =
 645 "In Java code, all the tags constructs of C and C++ code are\n\
 646 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 647
 648
 649 static const char *Cobol_suffixes [] =
 650   { "COB", "cob", NULL };
 651 static char Cobol_help [] =
 652 "In Cobol code, tags are paragraph names; that is, any word\n\
 653 starting in column 8 and followed by a period.";
 654
 655 static const char *Cstar_suffixes [] =
 656   { "cs", "hs", NULL };
 657
 658 static const char *Erlang_suffixes [] =
 659   { "erl", "hrl", NULL };
 660 static const char Erlang_help [] =
 661 "In Erlang code, the tags are the functions, records and macros\n\
 662 defined in the file.";
 663
 664 const char *Forth_suffixes [] =
 665   { "fth", "tok", NULL };
 666 static const char Forth_help [] =
 667 "In Forth code, tags are words defined by `:',\n\
 668 constant, code, create, defer, value, variable, buffer:, field.";
 669
 670 static const char *Fortran_suffixes [] =
 671   { "F", "f", "f90", "for", NULL };
 672 static const char Fortran_help [] =
 673 "In Fortran code, functions, subroutines and block data are tags.";
 674
 675 static const char *HTML_suffixes [] =
 676   { "htm", "html", "shtml", NULL };
 677 static const char HTML_help [] =
 678 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 679 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 680 occurrences of `id='.";
 681
 682 static const char *Lisp_suffixes [] =
 683   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 684 static const char Lisp_help [] =
 685 "In Lisp code, any function defined with `defun', any variable\n\
 686 defined with `defvar' or `defconst', and in general the first\n\
 687 argument of any expression that starts with `(def' in column zero\n\
 688 is a tag.";
 689
 690 static const char *Lua_suffixes [] =
 691   { "lua", "LUA", NULL };
 692 static const char Lua_help [] =
 693 "In Lua scripts, all functions are tags.";
 694
 695 static const char *Makefile_filenames [] =
 696   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 697 static const char Makefile_help [] =
 698 "In makefiles, targets are tags; additionally, variables are tags\n\
 699 unless you specify `--no-globals'.";
 700
 701 static const char *Objc_suffixes [] =
 702   { "lm",                       /* Objective lex file */
 703     "m",                        /* Objective C file */
 704      NULL };
 705 static const char Objc_help [] =
 706 "In Objective C code, tags include Objective C definitions for classes,\n\
 707 class categories, methods and protocols.  Tags for variables and\n\
 708 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 709 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 710
 711 static const char *Pascal_suffixes [] =
 712   { "p", "pas", NULL };
 713 static const char Pascal_help [] =
 714 "In Pascal code, the tags are the functions and procedures defined\n\
 715 in the file.";
 716 /* " // this is for working around an Emacs highlighting bug... */
 717
 718 static const char *Perl_suffixes [] =
 719   { "pl", "pm", NULL };
 720 static const char *Perl_interpreters [] =
 721   { "perl", "@PERL@", NULL };
 722 static const char Perl_help [] =
 723 "In Perl code, the tags are the packages, subroutines and variables\n\
 724 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 725 `--globals' if you want to tag global variables.  Tags for\n\
 726 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 727 defined in the default package is `main::SUB'.";
 728
 729 static const char *PHP_suffixes [] =
 730   { "php", "php3", "php4", NULL };
 731 static const char PHP_help [] =
 732 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 733 the `--no-members' option, vars are tags too.";
 734
 735 static const char *plain_C_suffixes [] =
 736   { "pc",                       /* Pro*C file */
 737      NULL };
 738
 739 static const char *PS_suffixes [] =
 740   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 741 static const char PS_help [] =
 742 "In PostScript code, the tags are the functions.";
 743
 744 static const char *Prolog_suffixes [] =
 745   { "prolog", NULL };
 746 static const char Prolog_help [] =
 747 "In Prolog code, tags are predicates and rules at the beginning of\n\
 748 line.";
 749
 750 static const char *Python_suffixes [] =
 751   { "py", NULL };
 752 static const char Python_help [] =
 753 "In Python code, `def' or `class' at the beginning of a line\n\
 754 generate a tag.";
 755
 756 /* Can't do the `SCM' or `scm' prefix with a version number. */
 757 static const char *Scheme_suffixes [] =
 758   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 759 static const char Scheme_help [] =
 760 "In Scheme code, tags include anything defined with `def' or with a\n\
 761 construct whose name starts with `def'.  They also include\n\
 762 variables set with `set!' at top level in the file.";
 763
 764 static const char *TeX_suffixes [] =
 765   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 766 static const char TeX_help [] =
 767 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 768 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 769 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 770 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 771 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 772 \n\
 773 Other commands can be specified by setting the environment variable\n\
 774 `TEXTAGS' to a colon-separated list like, for example,\n\
 775      TEXTAGS=\"mycommand:myothercommand\".";
 776
 777
 778 static const char *Texinfo_suffixes [] =
 779   { "texi", "texinfo", "txi", NULL };
 780 static const char Texinfo_help [] =
 781 "for texinfo files, lines starting with @node are tagged.";
 782
 783 static const char *Yacc_suffixes [] =
 784   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 785 static const char Yacc_help [] =
 786 "In Bison or Yacc input files, each rule defines as a tag the\n\
 787 nonterminal it constructs.  The portions of the file that contain\n\
 788 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 789 for full help).";
 790
 791 static const char auto_help [] =
 792 "`auto' is not a real language, it indicates to use\n\
 793 a default language for files base on file name suffix and file contents.";
 794
 795 static const char none_help [] =
 796 "`none' is not a real language, it indicates to only do\n\
 797 regexp processing on files.";
 798
 799 static const char no_lang_help [] =
 800 "No detailed help available for this language.";
 801
 802
 803 /*
 804  * Table of languages.
 805  *
 806  * It is ok for a given function to be listed under more than one
 807  * name.  I just didn't.
 808  */
 809
 810 static language lang_names [] =
 811 {
 812   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 813   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 814   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 815   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 816   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 817   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 818   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 819   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 820   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 821   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 822   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 823   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 824   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 825   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 826   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 827   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 828   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 829   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 830   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 831   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 832   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 833   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 834   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 835   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 836   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 837   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 838   { "auto",      auto_help },                      /* default guessing scheme */
 839   { "none",      none_help,      just_read_file }, /* regexp matching only */
 840   { NULL }                /* end of list */
 841 };
 842
 843 \f
 844 static void
 845 print_language_names (void)
 846 {
 847   language *lang;
 848   const char **name, **ext;
 849
 850   puts ("\nThese are the currently supported languages, along with the\n\
 851 default file names and dot suffixes:");
 852   for (lang = lang_names; lang->name != NULL; lang++)
 853     {
 854       printf ("  %-*s", 10, lang->name);
 855       if (lang->filenames != NULL)
 856         for (name = lang->filenames; *name != NULL; name++)
 857           printf (" %s", *name);
 858       if (lang->suffixes != NULL)
 859         for (ext = lang->suffixes; *ext != NULL; ext++)
 860           printf (" .%s", *ext);
 861       puts ("");
 862     }
 863   puts ("where `auto' means use default language for files based on file\n\
 864 name suffix, and `none' means only do regexp processing on files.\n\
 865 If no language is specified and no matching suffix is found,\n\
 866 the first line of the file is read for a sharp-bang (#!) sequence\n\
 867 followed by the name of an interpreter.  If no such sequence is found,\n\
 868 Fortran is tried first; if no tags are found, C is tried next.\n\
 869 When parsing any C file, a \"class\" or \"template\" keyword\n\
 870 switches to C++.");
 871   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 872 \n\
 873 For detailed help on a given language use, for example,\n\
 874 etags --help --lang=ada.");
 875 }
 876
 877 #ifndef EMACS_NAME
 878 # define EMACS_NAME "standalone"
 879 #endif
 880 #ifndef VERSION
 881 # define VERSION "17.38.1.4"
 882 #endif
 883 static void
 884 print_version (void)
 885 {
 886   /* Makes it easier to update automatically. */
 887   char emacs_copyright[] = "Copyright (C) 2011 Free Software Foundation, Inc.";
 888
 889   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 890   puts (emacs_copyright);
 891   puts ("This program is distributed under the terms in ETAGS.README");
 892
 893   exit (EXIT_SUCCESS);
 894 }
 895
 896 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 897 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 898 #endif
 899
 900 static void
 901 print_help (argument *argbuffer)
 902 {
 903   bool help_for_lang = FALSE;
 904
 905   for (; argbuffer->arg_type != at_end; argbuffer++)
 906     if (argbuffer->arg_type == at_language)
 907       {
 908         if (help_for_lang)
 909           puts ("");
 910         puts (argbuffer->lang->help);
 911         help_for_lang = TRUE;
 912       }
 913
 914   if (help_for_lang)
 915     exit (EXIT_SUCCESS);
 916
 917   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 918 \n\
 919 These are the options accepted by %s.\n", progname, progname);
 920   if (NO_LONG_OPTIONS)
 921     puts ("WARNING: long option names do not work with this executable,\n\
 922 as it is not linked with GNU getopt.");
 923   else
 924     puts ("You may use unambiguous abbreviations for the long option names.");
 925   puts ("  A - as file name means read names from stdin (one per line).\n\
 926 Absolute names are stored in the output file as they are.\n\
 927 Relative ones are stored relative to the output file's directory.\n");
 928
 929   puts ("-a, --append\n\
 930         Append tag entries to existing tags file.");
 931
 932   puts ("--packages-only\n\
 933         For Ada files, only generate tags for packages.");
 934
 935   if (CTAGS)
 936     puts ("-B, --backward-search\n\
 937         Write the search commands for the tag entries using '?', the\n\
 938         backward-search command instead of '/', the forward-search command.");
 939
 940   /* This option is mostly obsolete, because etags can now automatically
 941      detect C++.  Retained for backward compatibility and for debugging and
 942      experimentation.  In principle, we could want to tag as C++ even
 943      before any "class" or "template" keyword.
 944   puts ("-C, --c++\n\
 945         Treat files whose name suffix defaults to C language as C++ files.");
 946   */
 947
 948   puts ("--declarations\n\
 949         In C and derived languages, create tags for function declarations,");
 950   if (CTAGS)
 951     puts ("\tand create tags for extern variables if --globals is used.");
 952   else
 953     puts
 954       ("\tand create tags for extern variables unless --no-globals is used.");
 955
 956   if (CTAGS)
 957     puts ("-d, --defines\n\
 958         Create tag entries for C #define constants and enum constants, too.");
 959   else
 960     puts ("-D, --no-defines\n\
 961         Don't create tag entries for C #define constants and enum constants.\n\
 962         This makes the tags file smaller.");
 963
 964   if (!CTAGS)
 965     puts ("-i FILE, --include=FILE\n\
 966         Include a note in tag file indicating that, when searching for\n\
 967         a tag, one should also consult the tags file FILE after\n\
 968         checking the current file.");
 969
 970   puts ("-l LANG, --language=LANG\n\
 971         Force the following files to be considered as written in the\n\
 972         named language up to the next --language=LANG option.");
 973
 974   if (CTAGS)
 975     puts ("--globals\n\
 976         Create tag entries for global variables in some languages.");
 977   else
 978     puts ("--no-globals\n\
 979         Do not create tag entries for global variables in some\n\
 980         languages.  This makes the tags file smaller.");
 981
 982   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 983     puts ("--no-line-directive\n\
 984         Ignore #line preprocessor directives in C and derived languages.");
 985
 986   if (CTAGS)
 987     puts ("--members\n\
 988         Create tag entries for members of structures in some languages.");
 989   else
 990     puts ("--no-members\n\
 991         Do not create tag entries for members of structures\n\
 992         in some languages.");
 993
 994   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 995         Make a tag for each line matching a regular expression pattern\n\
 996         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 997         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 998         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 999         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
1000   puts ("       If TAGNAME/ is present, the tags created are named.\n\
1001         For example Tcl named tags can be created with:\n\
1002           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1003         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1004         `m' means to allow multi-line matches, `s' implies `m' and\n\
1005         causes dot to match any character, including newline.");
1006
1007   puts ("-R, --no-regex\n\
1008         Don't create tags from regexps for the following files.");
1009
1010   puts ("-I, --ignore-indentation\n\
1011         In C and C++ do not assume that a closing brace in the first\n\
1012         column is the final brace of a function or structure definition.");
1013
1014   puts ("-o FILE, --output=FILE\n\
1015         Write the tags to FILE.");
1016
1017   puts ("--parse-stdin=NAME\n\
1018         Read from standard input and record tags as belonging to file NAME.");
1019
1020   if (CTAGS)
1021     {
1022       puts ("-t, --typedefs\n\
1023         Generate tag entries for C and Ada typedefs.");
1024       puts ("-T, --typedefs-and-c++\n\
1025         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1026         and C++ member functions.");
1027     }
1028
1029   if (CTAGS)
1030     puts ("-u, --update\n\
1031         Update the tag entries for the given files, leaving tag\n\
1032         entries for other files in place.  Currently, this is\n\
1033         implemented by deleting the existing entries for the given\n\
1034         files and then rewriting the new entries at the end of the\n\
1035         tags file.  It is often faster to simply rebuild the entire\n\
1036         tag file than to use this.");
1037
1038   if (CTAGS)
1039     {
1040       puts ("-v, --vgrind\n\
1041         Print on the standard output an index of items intended for\n\
1042         human consumption, similar to the output of vgrind.  The index\n\
1043         is sorted, and gives the page number of each item.");
1044
1045       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1046         puts ("-w, --no-duplicates\n\
1047         Do not create duplicate tag entries, for compatibility with\n\
1048         traditional ctags.");
1049
1050       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1051         puts ("-w, --no-warn\n\
1052         Suppress warning messages about duplicate tag entries.");
1053
1054       puts ("-x, --cxref\n\
1055         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1056         The output uses line numbers instead of page numbers, but\n\
1057         beyond that the differences are cosmetic; try both to see\n\
1058         which you like.");
1059     }
1060
1061   puts ("-V, --version\n\
1062         Print the version of the program.\n\
1063 -h, --help\n\
1064         Print this help message.\n\
1065         Followed by one or more `--language' options prints detailed\n\
1066         help about tag generation for the specified languages.");
1067
1068   print_language_names ();
1069
1070   puts ("");
1071   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1072
1073   exit (EXIT_SUCCESS);
1074 }
1075
1076 \f
1077 int
1078 main (int argc, char **argv)
1079 {
1080   int i;
1081   unsigned int nincluded_files;
1082   char **included_files;
1083   argument *argbuffer;
1084   int current_arg, file_count;
1085   linebuffer filename_lb;
1086   bool help_asked = FALSE;
1087  char *optstring;
1088  int opt;
1089
1090
1091 #ifdef DOS_NT
1092   _fmode = O_BINARY;   /* all of files are treated as binary files */
1093 #endif /* DOS_NT */
1094
1095   progname = argv[0];
1096   nincluded_files = 0;
1097   included_files = xnew (argc, char *);
1098   current_arg = 0;
1099   file_count = 0;
1100
1101   /* Allocate enough no matter what happens.  Overkill, but each one
1102      is small. */
1103   argbuffer = xnew (argc, argument);
1104
1105   /*
1106    * Always find typedefs and structure tags.
1107    * Also default to find macro constants, enum constants, struct
1108    * members and global variables.  Do it for both etags and ctags.
1109    */
1110   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1111   globals = members = TRUE;
1112
1113   /* When the optstring begins with a '-' getopt_long does not rearrange the
1114      non-options arguments to be at the end, but leaves them alone. */
1115   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1116                       "ac:Cf:Il:o:r:RSVhH",
1117                       (CTAGS) ? "BxdtTuvw" : "Di:");
1118
1119   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1120     switch (opt)
1121       {
1122       case 0:
1123         /* If getopt returns 0, then it has already processed a
1124            long-named option.  We should do nothing.  */
1125         break;
1126
1127       case 1:
1128         /* This means that a file name has been seen.  Record it. */
1129         argbuffer[current_arg].arg_type = at_filename;
1130         argbuffer[current_arg].what     = optarg;
1131         ++current_arg;
1132         ++file_count;
1133         break;
1134
1135       case STDIN:
1136         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1137         argbuffer[current_arg].arg_type = at_stdin;
1138         argbuffer[current_arg].what     = optarg;
1139         ++current_arg;
1140         ++file_count;
1141         if (parsing_stdin)
1142           fatal ("cannot parse standard input more than once", (char *)NULL);
1143         parsing_stdin = TRUE;
1144         break;
1145
1146         /* Common options. */
1147       case 'a': append_to_tagfile = TRUE;       break;
1148       case 'C': cplusplus = TRUE;               break;
1149       case 'f':         /* for compatibility with old makefiles */
1150       case 'o':
1151         if (tagfile)
1152           {
1153             error ("-o option may only be given once.", (char *)NULL);
1154             suggest_asking_for_help ();
1155             /* NOTREACHED */
1156           }
1157         tagfile = optarg;
1158         break;
1159       case 'I':
1160       case 'S':         /* for backward compatibility */
1161         ignoreindent = TRUE;
1162         break;
1163       case 'l':
1164         {
1165           language *lang = get_language_from_langname (optarg);
1166           if (lang != NULL)
1167             {
1168               argbuffer[current_arg].lang = lang;
1169               argbuffer[current_arg].arg_type = at_language;
1170               ++current_arg;
1171             }
1172         }
1173         break;
1174       case 'c':
1175         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1176         optarg = concat (optarg, "i", ""); /* memory leak here */
1177         /* FALLTHRU */
1178       case 'r':
1179         argbuffer[current_arg].arg_type = at_regexp;
1180         argbuffer[current_arg].what = optarg;
1181         ++current_arg;
1182         break;
1183       case 'R':
1184         argbuffer[current_arg].arg_type = at_regexp;
1185         argbuffer[current_arg].what = NULL;
1186         ++current_arg;
1187         break;
1188       case 'V':
1189         print_version ();
1190         break;
1191       case 'h':
1192       case 'H':
1193         help_asked = TRUE;
1194         break;
1195
1196         /* Etags options */
1197       case 'D': constantypedefs = FALSE;                        break;
1198       case 'i': included_files[nincluded_files++] = optarg;     break;
1199
1200         /* Ctags options. */
1201       case 'B': searchar = '?';                                 break;
1202       case 'd': constantypedefs = TRUE;                         break;
1203       case 't': typedefs = TRUE;                                break;
1204       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1205       case 'u': update = TRUE;                                  break;
1206       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1207       case 'x': cxref_style = TRUE;                             break;
1208       case 'w': no_warnings = TRUE;                             break;
1209       default:
1210         suggest_asking_for_help ();
1211         /* NOTREACHED */
1212       }
1213
1214   /* No more options.  Store the rest of arguments. */
1215   for (; optind < argc; optind++)
1216     {
1217       argbuffer[current_arg].arg_type = at_filename;
1218       argbuffer[current_arg].what = argv[optind];
1219       ++current_arg;
1220       ++file_count;
1221     }
1222
1223   argbuffer[current_arg].arg_type = at_end;
1224
1225   if (help_asked)
1226     print_help (argbuffer);
1227     /* NOTREACHED */
1228
1229   if (nincluded_files == 0 && file_count == 0)
1230     {
1231       error ("no input files specified.", (char *)NULL);
1232       suggest_asking_for_help ();
1233       /* NOTREACHED */
1234     }
1235
1236   if (tagfile == NULL)
1237     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1238   cwd = etags_getcwd ();        /* the current working directory */
1239   if (cwd[strlen (cwd) - 1] != '/')
1240     {
1241       char *oldcwd = cwd;
1242       cwd = concat (oldcwd, "/", "");
1243       free (oldcwd);
1244     }
1245
1246   /* Compute base directory for relative file names. */
1247   if (streq (tagfile, "-")
1248       || strneq (tagfile, "/dev/", 5))
1249     tagfiledir = cwd;            /* relative file names are relative to cwd */
1250   else
1251     {
1252       canonicalize_filename (tagfile);
1253       tagfiledir = absolute_dirname (tagfile, cwd);
1254     }
1255
1256   init ();                      /* set up boolean "functions" */
1257
1258   linebuffer_init (&lb);
1259   linebuffer_init (&filename_lb);
1260   linebuffer_init (&filebuf);
1261   linebuffer_init (&token_name);
1262
1263   if (!CTAGS)
1264     {
1265       if (streq (tagfile, "-"))
1266         {
1267           tagf = stdout;
1268 #ifdef DOS_NT
1269           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1270              doesn't take effect until after `stdout' is already open). */
1271           if (!isatty (fileno (stdout)))
1272             setmode (fileno (stdout), O_BINARY);
1273 #endif /* DOS_NT */
1274         }
1275       else
1276         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1277       if (tagf == NULL)
1278         pfatal (tagfile);
1279     }
1280
1281   /*
1282    * Loop through files finding functions.
1283    */
1284   for (i = 0; i < current_arg; i++)
1285     {
1286       static language *lang;    /* non-NULL if language is forced */
1287       char *this_file;
1288
1289       switch (argbuffer[i].arg_type)
1290         {
1291         case at_language:
1292           lang = argbuffer[i].lang;
1293           break;
1294         case at_regexp:
1295           analyse_regex (argbuffer[i].what);
1296           break;
1297         case at_filename:
1298               this_file = argbuffer[i].what;
1299               /* Input file named "-" means read file names from stdin
1300                  (one per line) and use them. */
1301               if (streq (this_file, "-"))
1302                 {
1303                   if (parsing_stdin)
1304                     fatal ("cannot parse standard input AND read file names from it",
1305                            (char *)NULL);
1306                   while (readline_internal (&filename_lb, stdin) > 0)
1307                     process_file_name (filename_lb.buffer, lang);
1308                 }
1309               else
1310                 process_file_name (this_file, lang);
1311           break;
1312         case at_stdin:
1313           this_file = argbuffer[i].what;
1314           process_file (stdin, this_file, lang);
1315           break;
1316         }
1317     }
1318
1319   free_regexps ();
1320   free (lb.buffer);
1321   free (filebuf.buffer);
1322   free (token_name.buffer);
1323
1324   if (!CTAGS || cxref_style)
1325     {
1326       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1327       put_entries (nodehead);
1328       free_tree (nodehead);
1329       nodehead = NULL;
1330       if (!CTAGS)
1331         {
1332           fdesc *fdp;
1333
1334           /* Output file entries that have no tags. */
1335           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1336             if (!fdp->written)
1337               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1338
1339           while (nincluded_files-- > 0)
1340             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1341
1342           if (fclose (tagf) == EOF)
1343             pfatal (tagfile);
1344         }
1345
1346       exit (EXIT_SUCCESS);
1347     }
1348
1349   /* From here on, we are in (CTAGS && !cxref_style) */
1350   if (update)
1351     {
1352       char cmd[BUFSIZ];
1353       for (i = 0; i < current_arg; ++i)
1354         {
1355           switch (argbuffer[i].arg_type)
1356             {
1357             case at_filename:
1358             case at_stdin:
1359               break;
1360             default:
1361               continue;         /* the for loop */
1362             }
1363           sprintf (cmd,
1364                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1365                    tagfile, argbuffer[i].what, tagfile);
1366           if (system (cmd) != EXIT_SUCCESS)
1367             fatal ("failed to execute shell command", (char *)NULL);
1368         }
1369       append_to_tagfile = TRUE;
1370     }
1371
1372   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1373   if (tagf == NULL)
1374     pfatal (tagfile);
1375   put_entries (nodehead);       /* write all the tags (CTAGS) */
1376   free_tree (nodehead);
1377   nodehead = NULL;
1378   if (fclose (tagf) == EOF)
1379     pfatal (tagfile);
1380
1381   if (CTAGS)
1382     if (append_to_tagfile || update)
1383       {
1384         char cmd[2*BUFSIZ+20];
1385         /* Maybe these should be used:
1386            setenv ("LC_COLLATE", "C", 1);
1387            setenv ("LC_ALL", "C", 1); */
1388         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1389         exit (system (cmd));
1390       }
1391   return EXIT_SUCCESS;
1392 }
1393
1394
1395 /*
1396  * Return a compressor given the file name.  If EXTPTR is non-zero,
1397  * return a pointer into FILE where the compressor-specific
1398  * extension begins.  If no compressor is found, NULL is returned
1399  * and EXTPTR is not significant.
1400  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1401  */
1402 static compressor *
1403 get_compressor_from_suffix (char *file, char **extptr)
1404 {
1405   compressor *compr;
1406   char *slash, *suffix;
1407
1408   /* File has been processed by canonicalize_filename,
1409      so we don't need to consider backslashes on DOS_NT.  */
1410   slash = etags_strrchr (file, '/');
1411   suffix = etags_strrchr (file, '.');
1412   if (suffix == NULL || suffix < slash)
1413     return NULL;
1414   if (extptr != NULL)
1415     *extptr = suffix;
1416   suffix += 1;
1417   /* Let those poor souls who live with DOS 8+3 file name limits get
1418      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1419      Only the first do loop is run if not MSDOS */
1420   do
1421     {
1422       for (compr = compressors; compr->suffix != NULL; compr++)
1423         if (streq (compr->suffix, suffix))
1424           return compr;
1425       if (!MSDOS)
1426         break;                  /* do it only once: not really a loop */
1427       if (extptr != NULL)
1428         *extptr = ++suffix;
1429     } while (*suffix != '\0');
1430   return NULL;
1431 }
1432
1433
1434
1435 /*
1436  * Return a language given the name.
1437  */
1438 static language *
1439 get_language_from_langname (const char *name)
1440 {
1441   language *lang;
1442
1443   if (name == NULL)
1444     error ("empty language name", (char *)NULL);
1445   else
1446     {
1447       for (lang = lang_names; lang->name != NULL; lang++)
1448         if (streq (name, lang->name))
1449           return lang;
1450       error ("unknown language \"%s\"", name);
1451     }
1452
1453   return NULL;
1454 }
1455
1456
1457 /*
1458  * Return a language given the interpreter name.
1459  */
1460 static language *
1461 get_language_from_interpreter (char *interpreter)
1462 {
1463   language *lang;
1464   const char **iname;
1465
1466   if (interpreter == NULL)
1467     return NULL;
1468   for (lang = lang_names; lang->name != NULL; lang++)
1469     if (lang->interpreters != NULL)
1470       for (iname = lang->interpreters; *iname != NULL; iname++)
1471         if (streq (*iname, interpreter))
1472             return lang;
1473
1474   return NULL;
1475 }
1476
1477
1478
1479 /*
1480  * Return a language given the file name.
1481  */
1482 static language *
1483 get_language_from_filename (char *file, int case_sensitive)
1484 {
1485   language *lang;
1486   const char **name, **ext, *suffix;
1487
1488   /* Try whole file name first. */
1489   for (lang = lang_names; lang->name != NULL; lang++)
1490     if (lang->filenames != NULL)
1491       for (name = lang->filenames; *name != NULL; name++)
1492         if ((case_sensitive)
1493             ? streq (*name, file)
1494             : strcaseeq (*name, file))
1495           return lang;
1496
1497   /* If not found, try suffix after last dot. */
1498   suffix = etags_strrchr (file, '.');
1499   if (suffix == NULL)
1500     return NULL;
1501   suffix += 1;
1502   for (lang = lang_names; lang->name != NULL; lang++)
1503     if (lang->suffixes != NULL)
1504       for (ext = lang->suffixes; *ext != NULL; ext++)
1505         if ((case_sensitive)
1506             ? streq (*ext, suffix)
1507             : strcaseeq (*ext, suffix))
1508           return lang;
1509   return NULL;
1510 }
1511
1512 \f
1513 /*
1514  * This routine is called on each file argument.
1515  */
1516 static void
1517 process_file_name (char *file, language *lang)
1518 {
1519   struct stat stat_buf;
1520   FILE *inf;
1521   fdesc *fdp;
1522   compressor *compr;
1523   char *compressed_name, *uncompressed_name;
1524   char *ext, *real_name;
1525   int retval;
1526
1527   canonicalize_filename (file);
1528   if (streq (file, tagfile) && !streq (tagfile, "-"))
1529     {
1530       error ("skipping inclusion of %s in self.", file);
1531       return;
1532     }
1533   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1534     {
1535       compressed_name = NULL;
1536       real_name = uncompressed_name = savestr (file);
1537     }
1538   else
1539     {
1540       real_name = compressed_name = savestr (file);
1541       uncompressed_name = savenstr (file, ext - file);
1542     }
1543
1544   /* If the canonicalized uncompressed name
1545      has already been dealt with, skip it silently. */
1546   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1547     {
1548       assert (fdp->infname != NULL);
1549       if (streq (uncompressed_name, fdp->infname))
1550         goto cleanup;
1551     }
1552
1553   if (stat (real_name, &stat_buf) != 0)
1554     {
1555       /* Reset real_name and try with a different name. */
1556       real_name = NULL;
1557       if (compressed_name != NULL) /* try with the given suffix */
1558         {
1559           if (stat (uncompressed_name, &stat_buf) == 0)
1560             real_name = uncompressed_name;
1561         }
1562       else                      /* try all possible suffixes */
1563         {
1564           for (compr = compressors; compr->suffix != NULL; compr++)
1565             {
1566               compressed_name = concat (file, ".", compr->suffix);
1567               if (stat (compressed_name, &stat_buf) != 0)
1568                 {
1569                   if (MSDOS)
1570                     {
1571                       char *suf = compressed_name + strlen (file);
1572                       size_t suflen = strlen (compr->suffix) + 1;
1573                       for ( ; suf[1]; suf++, suflen--)
1574                         {
1575                           memmove (suf, suf + 1, suflen);
1576                           if (stat (compressed_name, &stat_buf) == 0)
1577                             {
1578                               real_name = compressed_name;
1579                               break;
1580                             }
1581                         }
1582                       if (real_name != NULL)
1583                         break;
1584                     } /* MSDOS */
1585                   free (compressed_name);
1586                   compressed_name = NULL;
1587                 }
1588               else
1589                 {
1590                   real_name = compressed_name;
1591                   break;
1592                 }
1593             }
1594         }
1595       if (real_name == NULL)
1596         {
1597           perror (file);
1598           goto cleanup;
1599         }
1600     } /* try with a different name */
1601
1602   if (!S_ISREG (stat_buf.st_mode))
1603     {
1604       error ("skipping %s: it is not a regular file.", real_name);
1605       goto cleanup;
1606     }
1607   if (real_name == compressed_name)
1608     {
1609       char *cmd = concat (compr->command, " ", real_name);
1610       inf = (FILE *) popen (cmd, "r");
1611       free (cmd);
1612     }
1613   else
1614     inf = fopen (real_name, "r");
1615   if (inf == NULL)
1616     {
1617       perror (real_name);
1618       goto cleanup;
1619     }
1620
1621   process_file (inf, uncompressed_name, lang);
1622
1623   if (real_name == compressed_name)
1624     retval = pclose (inf);
1625   else
1626     retval = fclose (inf);
1627   if (retval < 0)
1628     pfatal (file);
1629
1630  cleanup:
1631   free (compressed_name);
1632   free (uncompressed_name);
1633   last_node = NULL;
1634   curfdp = NULL;
1635   return;
1636 }
1637
1638 static void
1639 process_file (FILE *fh, char *fn, language *lang)
1640 {
1641   static const fdesc emptyfdesc;
1642   fdesc *fdp;
1643
1644   /* Create a new input file description entry. */
1645   fdp = xnew (1, fdesc);
1646   *fdp = emptyfdesc;
1647   fdp->next = fdhead;
1648   fdp->infname = savestr (fn);
1649   fdp->lang = lang;
1650   fdp->infabsname = absolute_filename (fn, cwd);
1651   fdp->infabsdir = absolute_dirname (fn, cwd);
1652   if (filename_is_absolute (fn))
1653     {
1654       /* An absolute file name.  Canonicalize it. */
1655       fdp->taggedfname = absolute_filename (fn, NULL);
1656     }
1657   else
1658     {
1659       /* A file name relative to cwd.  Make it relative
1660          to the directory of the tags file. */
1661       fdp->taggedfname = relative_filename (fn, tagfiledir);
1662     }
1663   fdp->usecharno = TRUE;        /* use char position when making tags */
1664   fdp->prop = NULL;
1665   fdp->written = FALSE;         /* not written on tags file yet */
1666
1667   fdhead = fdp;
1668   curfdp = fdhead;              /* the current file description */
1669
1670   find_entries (fh);
1671
1672   /* If not Ctags, and if this is not metasource and if it contained no #line
1673      directives, we can write the tags and free all nodes pointing to
1674      curfdp. */
1675   if (!CTAGS
1676       && curfdp->usecharno      /* no #line directives in this file */
1677       && !curfdp->lang->metasource)
1678     {
1679       node *np, *prev;
1680
1681       /* Look for the head of the sublist relative to this file.  See add_node
1682          for the structure of the node tree. */
1683       prev = NULL;
1684       for (np = nodehead; np != NULL; prev = np, np = np->left)
1685         if (np->fdp == curfdp)
1686           break;
1687
1688       /* If we generated tags for this file, write and delete them. */
1689       if (np != NULL)
1690         {
1691           /* This is the head of the last sublist, if any.  The following
1692              instructions depend on this being true. */
1693           assert (np->left == NULL);
1694
1695           assert (fdhead == curfdp);
1696           assert (last_node->fdp == curfdp);
1697           put_entries (np);     /* write tags for file curfdp->taggedfname */
1698           free_tree (np);       /* remove the written nodes */
1699           if (prev == NULL)
1700             nodehead = NULL;    /* no nodes left */
1701           else
1702             prev->left = NULL;  /* delete the pointer to the sublist */
1703         }
1704     }
1705 }
1706
1707 /*
1708  * This routine sets up the boolean pseudo-functions which work
1709  * by setting boolean flags dependent upon the corresponding character.
1710  * Every char which is NOT in that string is not a white char.  Therefore,
1711  * all of the array "_wht" is set to FALSE, and then the elements
1712  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1713  * of a char is TRUE if it is the string "white", else FALSE.
1714  */
1715 static void
1716 init (void)
1717 {
1718   register const char *sp;
1719   register int i;
1720
1721   for (i = 0; i < CHARS; i++)
1722     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1723   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1724   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1725   notinname('\0') = notinname('\n');
1726   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1727   begtoken('\0') = begtoken('\n');
1728   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1729   intoken('\0') = intoken('\n');
1730   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1731   endtoken('\0') = endtoken('\n');
1732 }
1733
1734 /*
1735  * This routine opens the specified file and calls the function
1736  * which finds the function and type definitions.
1737  */
1738 static void
1739 find_entries (FILE *inf)
1740 {
1741   char *cp;
1742   language *lang = curfdp->lang;
1743   Lang_function *parser = NULL;
1744
1745   /* If user specified a language, use it. */
1746   if (lang != NULL && lang->function != NULL)
1747     {
1748       parser = lang->function;
1749     }
1750
1751   /* Else try to guess the language given the file name. */
1752   if (parser == NULL)
1753     {
1754       lang = get_language_from_filename (curfdp->infname, TRUE);
1755       if (lang != NULL && lang->function != NULL)
1756         {
1757           curfdp->lang = lang;
1758           parser = lang->function;
1759         }
1760     }
1761
1762   /* Else look for sharp-bang as the first two characters. */
1763   if (parser == NULL
1764       && readline_internal (&lb, inf) > 0
1765       && lb.len >= 2
1766       && lb.buffer[0] == '#'
1767       && lb.buffer[1] == '!')
1768     {
1769       char *lp;
1770
1771       /* Set lp to point at the first char after the last slash in the
1772          line or, if no slashes, at the first nonblank.  Then set cp to
1773          the first successive blank and terminate the string. */
1774       lp = etags_strrchr (lb.buffer+2, '/');
1775       if (lp != NULL)
1776         lp += 1;
1777       else
1778         lp = skip_spaces (lb.buffer + 2);
1779       cp = skip_non_spaces (lp);
1780       *cp = '\0';
1781
1782       if (strlen (lp) > 0)
1783         {
1784           lang = get_language_from_interpreter (lp);
1785           if (lang != NULL && lang->function != NULL)
1786             {
1787               curfdp->lang = lang;
1788               parser = lang->function;
1789             }
1790         }
1791     }
1792
1793   /* We rewind here, even if inf may be a pipe.  We fail if the
1794      length of the first line is longer than the pipe block size,
1795      which is unlikely. */
1796   rewind (inf);
1797
1798   /* Else try to guess the language given the case insensitive file name. */
1799   if (parser == NULL)
1800     {
1801       lang = get_language_from_filename (curfdp->infname, FALSE);
1802       if (lang != NULL && lang->function != NULL)
1803         {
1804           curfdp->lang = lang;
1805           parser = lang->function;
1806         }
1807     }
1808
1809   /* Else try Fortran or C. */
1810   if (parser == NULL)
1811     {
1812       node *old_last_node = last_node;
1813
1814       curfdp->lang = get_language_from_langname ("fortran");
1815       find_entries (inf);
1816
1817       if (old_last_node == last_node)
1818         /* No Fortran entries found.  Try C. */
1819         {
1820           /* We do not tag if rewind fails.
1821              Only the file name will be recorded in the tags file. */
1822           rewind (inf);
1823           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1824           find_entries (inf);
1825         }
1826       return;
1827     }
1828
1829   if (!no_line_directive
1830       && curfdp->lang != NULL && curfdp->lang->metasource)
1831     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1832        file, or anyway we parsed a file that is automatically generated from
1833        this one.  If this is the case, the bingo.c file contained #line
1834        directives that generated tags pointing to this file.  Let's delete
1835        them all before parsing this file, which is the real source. */
1836     {
1837       fdesc **fdpp = &fdhead;
1838       while (*fdpp != NULL)
1839         if (*fdpp != curfdp
1840             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1841           /* We found one of those!  We must delete both the file description
1842              and all tags referring to it. */
1843           {
1844             fdesc *badfdp = *fdpp;
1845
1846             /* Delete the tags referring to badfdp->taggedfname
1847                that were obtained from badfdp->infname. */
1848             invalidate_nodes (badfdp, &nodehead);
1849
1850             *fdpp = badfdp->next; /* remove the bad description from the list */
1851             free_fdesc (badfdp);
1852           }
1853         else
1854           fdpp = &(*fdpp)->next; /* advance the list pointer */
1855     }
1856
1857   assert (parser != NULL);
1858
1859   /* Generic initialisations before reading from file. */
1860   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1861
1862   /* Generic initialisations before parsing file with readline. */
1863   lineno = 0;                  /* reset global line number */
1864   charno = 0;                  /* reset global char number */
1865   linecharno = 0;              /* reset global char number of line start */
1866
1867   parser (inf);
1868
1869   regex_tag_multiline ();
1870 }
1871
1872 \f
1873 /*
1874  * Check whether an implicitly named tag should be created,
1875  * then call `pfnote'.
1876  * NAME is a string that is internally copied by this function.
1877  *
1878  * TAGS format specification
1879  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1880  * The following is explained in some more detail in etc/ETAGS.EBNF.
1881  *
1882  * make_tag creates tags with "implicit tag names" (unnamed tags)
1883  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1884  *  1. NAME does not contain any of the characters in NONAM;
1885  *  2. LINESTART contains name as either a rightmost, or rightmost but
1886  *     one character, substring;
1887  *  3. the character, if any, immediately before NAME in LINESTART must
1888  *     be a character in NONAM;
1889  *  4. the character, if any, immediately after NAME in LINESTART must
1890  *     also be a character in NONAM.
1891  *
1892  * The implementation uses the notinname() macro, which recognises the
1893  * characters stored in the string `nonam'.
1894  * etags.el needs to use the same characters that are in NONAM.
1895  */
1896 static void
1897 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1898           int namelen,          /* tag length */
1899           int is_func,          /* tag is a function */
1900           char *linestart,      /* start of the line where tag is */
1901           int linelen,          /* length of the line where tag is */
1902           int lno,              /* line number */
1903           long int cno)         /* character number */
1904 {
1905   bool named = (name != NULL && namelen > 0);
1906   char *nname = NULL;
1907
1908   if (!CTAGS && named)          /* maybe set named to false */
1909     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1910        such that etags.el can guess a name from it. */
1911     {
1912       int i;
1913       register const char *cp = name;
1914
1915       for (i = 0; i < namelen; i++)
1916         if (notinname (*cp++))
1917           break;
1918       if (i == namelen)                         /* rule #1 */
1919         {
1920           cp = linestart + linelen - namelen;
1921           if (notinname (linestart[linelen-1]))
1922             cp -= 1;                            /* rule #4 */
1923           if (cp >= linestart                   /* rule #2 */
1924               && (cp == linestart
1925                   || notinname (cp[-1]))        /* rule #3 */
1926               && strneq (name, cp, namelen))    /* rule #2 */
1927             named = FALSE;      /* use implicit tag name */
1928         }
1929     }
1930
1931   if (named)
1932     nname = savenstr (name, namelen);
1933
1934   pfnote (nname, is_func, linestart, linelen, lno, cno);
1935 }
1936
1937 /* Record a tag. */
1938 static void
1939 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1940                                 /* tag name, or NULL if unnamed */
1941                                 /* tag is a function */
1942                                 /* start of the line where tag is */
1943                                 /* length of the line where tag is */
1944                                 /* line number */
1945                                 /* character number */
1946 {
1947   register node *np;
1948
1949   assert (name == NULL || name[0] != '\0');
1950   if (CTAGS && name == NULL)
1951     return;
1952
1953   np = xnew (1, node);
1954
1955   /* If ctags mode, change name "main" to M<thisfilename>. */
1956   if (CTAGS && !cxref_style && streq (name, "main"))
1957     {
1958       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1959       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1960       fp = etags_strrchr (np->name, '.');
1961       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1962         fp[0] = '\0';
1963     }
1964   else
1965     np->name = name;
1966   np->valid = TRUE;
1967   np->been_warned = FALSE;
1968   np->fdp = curfdp;
1969   np->is_func = is_func;
1970   np->lno = lno;
1971   if (np->fdp->usecharno)
1972     /* Our char numbers are 0-base, because of C language tradition?
1973        ctags compatibility?  old versions compatibility?   I don't know.
1974        Anyway, since emacs's are 1-base we expect etags.el to take care
1975        of the difference.  If we wanted to have 1-based numbers, we would
1976        uncomment the +1 below. */
1977     np->cno = cno /* + 1 */ ;
1978   else
1979     np->cno = invalidcharno;
1980   np->left = np->right = NULL;
1981   if (CTAGS && !cxref_style)
1982     {
1983       if (strlen (linestart) < 50)
1984         np->regex = concat (linestart, "$", "");
1985       else
1986         np->regex = savenstr (linestart, 50);
1987     }
1988   else
1989     np->regex = savenstr (linestart, linelen);
1990
1991   add_node (np, &nodehead);
1992 }
1993
1994 /*
1995  * free_tree ()
1996  *      recurse on left children, iterate on right children.
1997  */
1998 static void
1999 free_tree (register node *np)
2000 {
2001   while (np)
2002     {
2003       register node *node_right = np->right;
2004       free_tree (np->left);
2005       free (np->name);
2006       free (np->regex);
2007       free (np);
2008       np = node_right;
2009     }
2010 }
2011
2012 /*
2013  * free_fdesc ()
2014  *      delete a file description
2015  */
2016 static void
2017 free_fdesc (register fdesc *fdp)
2018 {
2019   free (fdp->infname);
2020   free (fdp->infabsname);
2021   free (fdp->infabsdir);
2022   free (fdp->taggedfname);
2023   free (fdp->prop);
2024   free (fdp);
2025 }
2026
2027 /*
2028  * add_node ()
2029  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2030  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2031  *      balancing.
2032  *
2033  *      add_node is the only function allowed to add nodes, so it can
2034  *      maintain state.
2035  */
2036 static void
2037 add_node (node *np, node **cur_node_p)
2038 {
2039   register int dif;
2040   register node *cur_node = *cur_node_p;
2041
2042   if (cur_node == NULL)
2043     {
2044       *cur_node_p = np;
2045       last_node = np;
2046       return;
2047     }
2048
2049   if (!CTAGS)
2050     /* Etags Mode */
2051     {
2052       /* For each file name, tags are in a linked sublist on the right
2053          pointer.  The first tags of different files are a linked list
2054          on the left pointer.  last_node points to the end of the last
2055          used sublist. */
2056       if (last_node != NULL && last_node->fdp == np->fdp)
2057         {
2058           /* Let's use the same sublist as the last added node. */
2059           assert (last_node->right == NULL);
2060           last_node->right = np;
2061           last_node = np;
2062         }
2063       else if (cur_node->fdp == np->fdp)
2064         {
2065           /* Scanning the list we found the head of a sublist which is
2066              good for us.  Let's scan this sublist. */
2067           add_node (np, &cur_node->right);
2068         }
2069       else
2070         /* The head of this sublist is not good for us.  Let's try the
2071            next one. */
2072         add_node (np, &cur_node->left);
2073     } /* if ETAGS mode */
2074
2075   else
2076     {
2077       /* Ctags Mode */
2078       dif = strcmp (np->name, cur_node->name);
2079
2080       /*
2081        * If this tag name matches an existing one, then
2082        * do not add the node, but maybe print a warning.
2083        */
2084       if (no_duplicates && !dif)
2085         {
2086           if (np->fdp == cur_node->fdp)
2087             {
2088               if (!no_warnings)
2089                 {
2090                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2091                            np->fdp->infname, lineno, np->name);
2092                   fprintf (stderr, "Second entry ignored\n");
2093                 }
2094             }
2095           else if (!cur_node->been_warned && !no_warnings)
2096             {
2097               fprintf
2098                 (stderr,
2099                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2100                  np->fdp->infname, cur_node->fdp->infname, np->name);
2101               cur_node->been_warned = TRUE;
2102             }
2103           return;
2104         }
2105
2106       /* Actually add the node */
2107       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2108     } /* if CTAGS mode */
2109 }
2110
2111 /*
2112  * invalidate_nodes ()
2113  *      Scan the node tree and invalidate all nodes pointing to the
2114  *      given file description (CTAGS case) or free them (ETAGS case).
2115  */
2116 static void
2117 invalidate_nodes (fdesc *badfdp, node **npp)
2118 {
2119   node *np = *npp;
2120
2121   if (np == NULL)
2122     return;
2123
2124   if (CTAGS)
2125     {
2126       if (np->left != NULL)
2127         invalidate_nodes (badfdp, &np->left);
2128       if (np->fdp == badfdp)
2129         np->valid = FALSE;
2130       if (np->right != NULL)
2131         invalidate_nodes (badfdp, &np->right);
2132     }
2133   else
2134     {
2135       assert (np->fdp != NULL);
2136       if (np->fdp == badfdp)
2137         {
2138           *npp = np->left;      /* detach the sublist from the list */
2139           np->left = NULL;      /* isolate it */
2140           free_tree (np);       /* free it */
2141           invalidate_nodes (badfdp, npp);
2142         }
2143       else
2144         invalidate_nodes (badfdp, &np->left);
2145     }
2146 }
2147
2148 \f
2149 static int total_size_of_entries (node *);
2150 static int number_len (long);
2151
2152 /* Length of a non-negative number's decimal representation. */
2153 static int
2154 number_len (long int num)
2155 {
2156   int len = 1;
2157   while ((num /= 10) > 0)
2158     len += 1;
2159   return len;
2160 }
2161
2162 /*
2163  * Return total number of characters that put_entries will output for
2164  * the nodes in the linked list at the right of the specified node.
2165  * This count is irrelevant with etags.el since emacs 19.34 at least,
2166  * but is still supplied for backward compatibility.
2167  */
2168 static int
2169 total_size_of_entries (register node *np)
2170 {
2171   register int total = 0;
2172
2173   for (; np != NULL; np = np->right)
2174     if (np->valid)
2175       {
2176         total += strlen (np->regex) + 1;                /* pat\177 */
2177         if (np->name != NULL)
2178           total += strlen (np->name) + 1;               /* name\001 */
2179         total += number_len ((long) np->lno) + 1;       /* lno, */
2180         if (np->cno != invalidcharno)                   /* cno */
2181           total += number_len (np->cno);
2182         total += 1;                                     /* newline */
2183       }
2184
2185   return total;
2186 }
2187
2188 static void
2189 put_entries (register node *np)
2190 {
2191   register char *sp;
2192   static fdesc *fdp = NULL;
2193
2194   if (np == NULL)
2195     return;
2196
2197   /* Output subentries that precede this one */
2198   if (CTAGS)
2199     put_entries (np->left);
2200
2201   /* Output this entry */
2202   if (np->valid)
2203     {
2204       if (!CTAGS)
2205         {
2206           /* Etags mode */
2207           if (fdp != np->fdp)
2208             {
2209               fdp = np->fdp;
2210               fprintf (tagf, "\f\n%s,%d\n",
2211                        fdp->taggedfname, total_size_of_entries (np));
2212               fdp->written = TRUE;
2213             }
2214           fputs (np->regex, tagf);
2215           fputc ('\177', tagf);
2216           if (np->name != NULL)
2217             {
2218               fputs (np->name, tagf);
2219               fputc ('\001', tagf);
2220             }
2221           fprintf (tagf, "%d,", np->lno);
2222           if (np->cno != invalidcharno)
2223             fprintf (tagf, "%ld", np->cno);
2224           fputs ("\n", tagf);
2225         }
2226       else
2227         {
2228           /* Ctags mode */
2229           if (np->name == NULL)
2230             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2231
2232           if (cxref_style)
2233             {
2234               if (vgrind_style)
2235                 fprintf (stdout, "%s %s %d\n",
2236                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2237               else
2238                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2239                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2240             }
2241           else
2242             {
2243               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2244
2245               if (np->is_func)
2246                 {               /* function or #define macro with args */
2247                   putc (searchar, tagf);
2248                   putc ('^', tagf);
2249
2250                   for (sp = np->regex; *sp; sp++)
2251                     {
2252                       if (*sp == '\\' || *sp == searchar)
2253                         putc ('\\', tagf);
2254                       putc (*sp, tagf);
2255                     }
2256                   putc (searchar, tagf);
2257                 }
2258               else
2259                 {               /* anything else; text pattern inadequate */
2260                   fprintf (tagf, "%d", np->lno);
2261                 }
2262               putc ('\n', tagf);
2263             }
2264         }
2265     } /* if this node contains a valid tag */
2266
2267   /* Output subentries that follow this one */
2268   put_entries (np->right);
2269   if (!CTAGS)
2270     put_entries (np->left);
2271 }
2272
2273 \f
2274 /* C extensions. */
2275 #define C_EXT   0x00fff         /* C extensions */
2276 #define C_PLAIN 0x00000         /* C */
2277 #define C_PLPL  0x00001         /* C++ */
2278 #define C_STAR  0x00003         /* C* */
2279 #define C_JAVA  0x00005         /* JAVA */
2280 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2281 #define YACC    0x10000         /* yacc file */
2282
2283 /*
2284  * The C symbol tables.
2285  */
2286 enum sym_type
2287 {
2288   st_none,
2289   st_C_objprot, st_C_objimpl, st_C_objend,
2290   st_C_gnumacro,
2291   st_C_ignore, st_C_attribute,
2292   st_C_javastruct,
2293   st_C_operator,
2294   st_C_class, st_C_template,
2295   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2296 };
2297
2298 static unsigned int hash (const char *, unsigned int);
2299 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2300 static enum sym_type C_symtype (char *, int, int);
2301
2302 /* Feed stuff between (but not including) %[ and %] lines to:
2303      gperf -m 5
2304 %[
2305 %compare-strncmp
2306 %enum
2307 %struct-type
2308 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2309 %%
2310 if,             0,                      st_C_ignore
2311 for,            0,                      st_C_ignore
2312 while,          0,                      st_C_ignore
2313 switch,         0,                      st_C_ignore
2314 return,         0,                      st_C_ignore
2315 __attribute__,  0,                      st_C_attribute
2316 GTY,            0,                      st_C_attribute
2317 @interface,     0,                      st_C_objprot
2318 @protocol,      0,                      st_C_objprot
2319 @implementation,0,                      st_C_objimpl
2320 @end,           0,                      st_C_objend
2321 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2322 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2323 friend,         C_PLPL,                 st_C_ignore
2324 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2325 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2326 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2327 class,          0,                      st_C_class
2328 namespace,      C_PLPL,                 st_C_struct
2329 domain,         C_STAR,                 st_C_struct
2330 union,          0,                      st_C_struct
2331 struct,         0,                      st_C_struct
2332 extern,         0,                      st_C_extern
2333 enum,           0,                      st_C_enum
2334 typedef,        0,                      st_C_typedef
2335 define,         0,                      st_C_define
2336 undef,          0,                      st_C_define
2337 operator,       C_PLPL,                 st_C_operator
2338 template,       0,                      st_C_template
2339 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2340 DEFUN,          0,                      st_C_gnumacro
2341 SYSCALL,        0,                      st_C_gnumacro
2342 ENTRY,          0,                      st_C_gnumacro
2343 PSEUDO,         0,                      st_C_gnumacro
2344 # These are defined inside C functions, so currently they are not met.
2345 # EXFUN used in glibc, DEFVAR_* in emacs.
2346 #EXFUN,         0,                      st_C_gnumacro
2347 #DEFVAR_,       0,                      st_C_gnumacro
2348 %]
2349 and replace lines between %< and %> with its output, then:
2350  - remove the #if characterset check
2351  - make in_word_set static and not inline. */
2352 /*%<*/
2353 /* C code produced by gperf version 3.0.1 */
2354 /* Command-line: gperf -m 5  */
2355 /* Computed positions: -k'2-3' */
2356
2357 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2358 /* maximum key range = 33, duplicates = 0 */
2359
2360 #ifdef __GNUC__
2361 __inline
2362 #else
2363 #ifdef __cplusplus
2364 inline
2365 #endif
2366 #endif
2367 static unsigned int
2368 hash (register const char *str, register unsigned int len)
2369 {
2370   static unsigned char asso_values[] =
2371     {
2372       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2373       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2374       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2375       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2376       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2377       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2378       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2379       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2380       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2381       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2382       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2383        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2384        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2385       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2386       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2387       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2388       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2389       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2390       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2391       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2392       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2393       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2394       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2395       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2396       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2397       35, 35, 35, 35, 35, 35
2398     };
2399   register int hval = len;
2400
2401   switch (hval)
2402     {
2403       default:
2404         hval += asso_values[(unsigned char)str[2]];
2405       /*FALLTHROUGH*/
2406       case 2:
2407         hval += asso_values[(unsigned char)str[1]];
2408         break;
2409     }
2410   return hval;
2411 }
2412
2413 static struct C_stab_entry *
2414 in_word_set (register const char *str, register unsigned int len)
2415 {
2416   enum
2417     {
2418       TOTAL_KEYWORDS = 33,
2419       MIN_WORD_LENGTH = 2,
2420       MAX_WORD_LENGTH = 15,
2421       MIN_HASH_VALUE = 2,
2422       MAX_HASH_VALUE = 34
2423     };
2424
2425   static struct C_stab_entry wordlist[] =
2426     {
2427       {""}, {""},
2428       {"if",            0,                      st_C_ignore},
2429       {"GTY",           0,                      st_C_attribute},
2430       {"@end",          0,                      st_C_objend},
2431       {"union",         0,                      st_C_struct},
2432       {"define",                0,                      st_C_define},
2433       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2434       {"template",      0,                      st_C_template},
2435       {"operator",      C_PLPL,                 st_C_operator},
2436       {"@interface",    0,                      st_C_objprot},
2437       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2438       {"friend",                C_PLPL,                 st_C_ignore},
2439       {"typedef",       0,                      st_C_typedef},
2440       {"return",                0,                      st_C_ignore},
2441       {"@implementation",0,                     st_C_objimpl},
2442       {"@protocol",     0,                      st_C_objprot},
2443       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2444       {"extern",                0,                      st_C_extern},
2445       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2446       {"struct",                0,                      st_C_struct},
2447       {"domain",                C_STAR,                 st_C_struct},
2448       {"switch",                0,                      st_C_ignore},
2449       {"enum",          0,                      st_C_enum},
2450       {"for",           0,                      st_C_ignore},
2451       {"namespace",     C_PLPL,                 st_C_struct},
2452       {"class",         0,                      st_C_class},
2453       {"while",         0,                      st_C_ignore},
2454       {"undef",         0,                      st_C_define},
2455       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2456       {"__attribute__", 0,                      st_C_attribute},
2457       {"SYSCALL",       0,                      st_C_gnumacro},
2458       {"ENTRY",         0,                      st_C_gnumacro},
2459       {"PSEUDO",                0,                      st_C_gnumacro},
2460       {"DEFUN",         0,                      st_C_gnumacro}
2461     };
2462
2463   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2464     {
2465       register int key = hash (str, len);
2466
2467       if (key <= MAX_HASH_VALUE && key >= 0)
2468         {
2469           register const char *s = wordlist[key].name;
2470
2471           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2472             return &wordlist[key];
2473         }
2474     }
2475   return 0;
2476 }
2477 /*%>*/
2478
2479 static enum sym_type
2480 C_symtype (char *str, int len, int c_ext)
2481 {
2482   register struct C_stab_entry *se = in_word_set (str, len);
2483
2484   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2485     return st_none;
2486   return se->type;
2487 }
2488
2489 \f
2490 /*
2491  * Ignoring __attribute__ ((list))
2492  */
2493 static bool inattribute;        /* looking at an __attribute__ construct */
2494
2495 /*
2496  * C functions and variables are recognized using a simple
2497  * finite automaton.  fvdef is its state variable.
2498  */
2499 static enum
2500 {
2501   fvnone,                       /* nothing seen */
2502   fdefunkey,                    /* Emacs DEFUN keyword seen */
2503   fdefunname,                   /* Emacs DEFUN name seen */
2504   foperator,                    /* func: operator keyword seen (cplpl) */
2505   fvnameseen,                   /* function or variable name seen */
2506   fstartlist,                   /* func: just after open parenthesis */
2507   finlist,                      /* func: in parameter list */
2508   flistseen,                    /* func: after parameter list */
2509   fignore,                      /* func: before open brace */
2510   vignore                       /* var-like: ignore until ';' */
2511 } fvdef;
2512
2513 static bool fvextern;           /* func or var: extern keyword seen; */
2514
2515 /*
2516  * typedefs are recognized using a simple finite automaton.
2517  * typdef is its state variable.
2518  */
2519 static enum
2520 {
2521   tnone,                        /* nothing seen */
2522   tkeyseen,                     /* typedef keyword seen */
2523   ttypeseen,                    /* defined type seen */
2524   tinbody,                      /* inside typedef body */
2525   tend,                         /* just before typedef tag */
2526   tignore                       /* junk after typedef tag */
2527 } typdef;
2528
2529 /*
2530  * struct-like structures (enum, struct and union) are recognized
2531  * using another simple finite automaton.  `structdef' is its state
2532  * variable.
2533  */
2534 static enum
2535 {
2536   snone,                        /* nothing seen yet,
2537                                    or in struct body if bracelev > 0 */
2538   skeyseen,                     /* struct-like keyword seen */
2539   stagseen,                     /* struct-like tag seen */
2540   scolonseen                    /* colon seen after struct-like tag */
2541 } structdef;
2542
2543 /*
2544  * When objdef is different from onone, objtag is the name of the class.
2545  */
2546 static const char *objtag = "<uninited>";
2547
2548 /*
2549  * Yet another little state machine to deal with preprocessor lines.
2550  */
2551 static enum
2552 {
2553   dnone,                        /* nothing seen */
2554   dsharpseen,                   /* '#' seen as first char on line */
2555   ddefineseen,                  /* '#' and 'define' seen */
2556   dignorerest                   /* ignore rest of line */
2557 } definedef;
2558
2559 /*
2560  * State machine for Objective C protocols and implementations.
2561  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2562  */
2563 static enum
2564 {
2565   onone,                        /* nothing seen */
2566   oprotocol,                    /* @interface or @protocol seen */
2567   oimplementation,              /* @implementations seen */
2568   otagseen,                     /* class name seen */
2569   oparenseen,                   /* parenthesis before category seen */
2570   ocatseen,                     /* category name seen */
2571   oinbody,                      /* in @implementation body */
2572   omethodsign,                  /* in @implementation body, after +/- */
2573   omethodtag,                   /* after method name */
2574   omethodcolon,                 /* after method colon */
2575   omethodparm,                  /* after method parameter */
2576   oignore                       /* wait for @end */
2577 } objdef;
2578
2579
2580 /*
2581  * Use this structure to keep info about the token read, and how it
2582  * should be tagged.  Used by the make_C_tag function to build a tag.
2583  */
2584 static struct tok
2585 {
2586   char *line;                   /* string containing the token */
2587   int offset;                   /* where the token starts in LINE */
2588   int length;                   /* token length */
2589   /*
2590     The previous members can be used to pass strings around for generic
2591     purposes.  The following ones specifically refer to creating tags.  In this
2592     case the token contained here is the pattern that will be used to create a
2593     tag.
2594   */
2595   bool valid;                   /* do not create a tag; the token should be
2596                                    invalidated whenever a state machine is
2597                                    reset prematurely */
2598   bool named;                   /* create a named tag */
2599   int lineno;                   /* source line number of tag */
2600   long linepos;                 /* source char number of tag */
2601 } token;                        /* latest token read */
2602
2603 /*
2604  * Variables and functions for dealing with nested structures.
2605  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2606  */
2607 static void pushclass_above (int, char *, int);
2608 static void popclass_above (int);
2609 static void write_classname (linebuffer *, const char *qualifier);
2610
2611 static struct {
2612   char **cname;                 /* nested class names */
2613   int *bracelev;                /* nested class brace level */
2614   int nl;                       /* class nesting level (elements used) */
2615   int size;                     /* length of the array */
2616 } cstack;                       /* stack for nested declaration tags */
2617 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2618 #define nestlev         (cstack.nl)
2619 /* After struct keyword or in struct body, not inside a nested function. */
2620 #define instruct        (structdef == snone && nestlev > 0                      \
2621                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2622
2623 static void
2624 pushclass_above (int bracelev, char *str, int len)
2625 {
2626   int nl;
2627
2628   popclass_above (bracelev);
2629   nl = cstack.nl;
2630   if (nl >= cstack.size)
2631     {
2632       int size = cstack.size *= 2;
2633       xrnew (cstack.cname, size, char *);
2634       xrnew (cstack.bracelev, size, int);
2635     }
2636   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2637   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2638   cstack.bracelev[nl] = bracelev;
2639   cstack.nl = nl + 1;
2640 }
2641
2642 static void
2643 popclass_above (int bracelev)
2644 {
2645   int nl;
2646
2647   for (nl = cstack.nl - 1;
2648        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2649        nl--)
2650     {
2651       free (cstack.cname[nl]);
2652       cstack.nl = nl;
2653     }
2654 }
2655
2656 static void
2657 write_classname (linebuffer *cn, const char *qualifier)
2658 {
2659   int i, len;
2660   int qlen = strlen (qualifier);
2661
2662   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2663     {
2664       len = 0;
2665       cn->len = 0;
2666       cn->buffer[0] = '\0';
2667     }
2668   else
2669     {
2670       len = strlen (cstack.cname[0]);
2671       linebuffer_setlen (cn, len);
2672       strcpy (cn->buffer, cstack.cname[0]);
2673     }
2674   for (i = 1; i < cstack.nl; i++)
2675     {
2676       char *s;
2677       int slen;
2678
2679       s = cstack.cname[i];
2680       if (s == NULL)
2681         continue;
2682       slen = strlen (s);
2683       len += slen + qlen;
2684       linebuffer_setlen (cn, len);
2685       strncat (cn->buffer, qualifier, qlen);
2686       strncat (cn->buffer, s, slen);
2687     }
2688 }
2689
2690 \f
2691 static bool consider_token (char *, int, int, int *, int, int, bool *);
2692 static void make_C_tag (bool);
2693
2694 /*
2695  * consider_token ()
2696  *      checks to see if the current token is at the start of a
2697  *      function or variable, or corresponds to a typedef, or
2698  *      is a struct/union/enum tag, or #define, or an enum constant.
2699  *
2700  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2701  *      with args.  C_EXTP points to which language we are looking at.
2702  *
2703  * Globals
2704  *      fvdef                   IN OUT
2705  *      structdef               IN OUT
2706  *      definedef               IN OUT
2707  *      typdef                  IN OUT
2708  *      objdef                  IN OUT
2709  */
2710
2711 static bool
2712 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2713                                 /* IN: token pointer */
2714                                 /* IN: token length */
2715                                 /* IN: first char after the token */
2716                                 /* IN, OUT: C extensions mask */
2717                                 /* IN: brace level */
2718                                 /* IN: parenthesis level */
2719                                 /* OUT: function or variable found */
2720 {
2721   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2722      structtype is the type of the preceding struct-like keyword, and
2723      structbracelev is the brace level where it has been seen. */
2724   static enum sym_type structtype;
2725   static int structbracelev;
2726   static enum sym_type toktype;
2727
2728
2729   toktype = C_symtype (str, len, *c_extp);
2730
2731   /*
2732    * Skip __attribute__
2733    */
2734   if (toktype == st_C_attribute)
2735     {
2736       inattribute = TRUE;
2737       return FALSE;
2738      }
2739
2740    /*
2741     * Advance the definedef state machine.
2742     */
2743    switch (definedef)
2744      {
2745      case dnone:
2746        /* We're not on a preprocessor line. */
2747        if (toktype == st_C_gnumacro)
2748          {
2749            fvdef = fdefunkey;
2750            return FALSE;
2751          }
2752        break;
2753      case dsharpseen:
2754        if (toktype == st_C_define)
2755          {
2756            definedef = ddefineseen;
2757          }
2758        else
2759          {
2760            definedef = dignorerest;
2761          }
2762        return FALSE;
2763      case ddefineseen:
2764        /*
2765         * Make a tag for any macro, unless it is a constant
2766         * and constantypedefs is FALSE.
2767         */
2768        definedef = dignorerest;
2769        *is_func_or_var = (c == '(');
2770        if (!*is_func_or_var && !constantypedefs)
2771          return FALSE;
2772        else
2773          return TRUE;
2774      case dignorerest:
2775        return FALSE;
2776      default:
2777        error ("internal error: definedef value.", (char *)NULL);
2778      }
2779
2780    /*
2781     * Now typedefs
2782     */
2783    switch (typdef)
2784      {
2785      case tnone:
2786        if (toktype == st_C_typedef)
2787          {
2788            if (typedefs)
2789              typdef = tkeyseen;
2790            fvextern = FALSE;
2791            fvdef = fvnone;
2792            return FALSE;
2793          }
2794        break;
2795      case tkeyseen:
2796        switch (toktype)
2797          {
2798          case st_none:
2799          case st_C_class:
2800          case st_C_struct:
2801          case st_C_enum:
2802            typdef = ttypeseen;
2803          }
2804        break;
2805      case ttypeseen:
2806        if (structdef == snone && fvdef == fvnone)
2807          {
2808            fvdef = fvnameseen;
2809            return TRUE;
2810          }
2811        break;
2812      case tend:
2813        switch (toktype)
2814          {
2815          case st_C_class:
2816          case st_C_struct:
2817          case st_C_enum:
2818            return FALSE;
2819          }
2820        return TRUE;
2821      }
2822
2823    switch (toktype)
2824      {
2825      case st_C_javastruct:
2826        if (structdef == stagseen)
2827          structdef = scolonseen;
2828        return FALSE;
2829      case st_C_template:
2830      case st_C_class:
2831        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2832            && bracelev == 0
2833            && definedef == dnone && structdef == snone
2834            && typdef == tnone && fvdef == fvnone)
2835          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2836        if (toktype == st_C_template)
2837          break;
2838        /* FALLTHRU */
2839      case st_C_struct:
2840      case st_C_enum:
2841        if (parlev == 0
2842            && fvdef != vignore
2843            && (typdef == tkeyseen
2844                || (typedefs_or_cplusplus && structdef == snone)))
2845          {
2846            structdef = skeyseen;
2847            structtype = toktype;
2848            structbracelev = bracelev;
2849            if (fvdef == fvnameseen)
2850              fvdef = fvnone;
2851          }
2852        return FALSE;
2853      }
2854
2855    if (structdef == skeyseen)
2856      {
2857        structdef = stagseen;
2858        return TRUE;
2859      }
2860
2861    if (typdef != tnone)
2862      definedef = dnone;
2863
2864    /* Detect Objective C constructs. */
2865    switch (objdef)
2866      {
2867      case onone:
2868        switch (toktype)
2869          {
2870          case st_C_objprot:
2871            objdef = oprotocol;
2872            return FALSE;
2873          case st_C_objimpl:
2874            objdef = oimplementation;
2875            return FALSE;
2876          }
2877        break;
2878      case oimplementation:
2879        /* Save the class tag for functions or variables defined inside. */
2880        objtag = savenstr (str, len);
2881        objdef = oinbody;
2882        return FALSE;
2883      case oprotocol:
2884        /* Save the class tag for categories. */
2885        objtag = savenstr (str, len);
2886        objdef = otagseen;
2887        *is_func_or_var = TRUE;
2888        return TRUE;
2889      case oparenseen:
2890        objdef = ocatseen;
2891        *is_func_or_var = TRUE;
2892        return TRUE;
2893      case oinbody:
2894        break;
2895      case omethodsign:
2896        if (parlev == 0)
2897          {
2898            fvdef = fvnone;
2899            objdef = omethodtag;
2900            linebuffer_setlen (&token_name, len);
2901            strncpy (token_name.buffer, str, len);
2902            token_name.buffer[len] = '\0';
2903            return TRUE;
2904          }
2905        return FALSE;
2906      case omethodcolon:
2907        if (parlev == 0)
2908          objdef = omethodparm;
2909        return FALSE;
2910      case omethodparm:
2911        if (parlev == 0)
2912          {
2913            fvdef = fvnone;
2914            objdef = omethodtag;
2915            linebuffer_setlen (&token_name, token_name.len + len);
2916            strncat (token_name.buffer, str, len);
2917            return TRUE;
2918          }
2919        return FALSE;
2920      case oignore:
2921        if (toktype == st_C_objend)
2922          {
2923            /* Memory leakage here: the string pointed by objtag is
2924               never released, because many tests would be needed to
2925               avoid breaking on incorrect input code.  The amount of
2926               memory leaked here is the sum of the lengths of the
2927               class tags.
2928            free (objtag); */
2929            objdef = onone;
2930          }
2931        return FALSE;
2932      }
2933
2934    /* A function, variable or enum constant? */
2935    switch (toktype)
2936      {
2937      case st_C_extern:
2938        fvextern = TRUE;
2939        switch  (fvdef)
2940          {
2941          case finlist:
2942          case flistseen:
2943          case fignore:
2944          case vignore:
2945            break;
2946          default:
2947            fvdef = fvnone;
2948          }
2949        return FALSE;
2950      case st_C_ignore:
2951        fvextern = FALSE;
2952        fvdef = vignore;
2953        return FALSE;
2954      case st_C_operator:
2955        fvdef = foperator;
2956        *is_func_or_var = TRUE;
2957        return TRUE;
2958      case st_none:
2959        if (constantypedefs
2960            && structdef == snone
2961            && structtype == st_C_enum && bracelev > structbracelev)
2962          return TRUE;           /* enum constant */
2963        switch (fvdef)
2964          {
2965          case fdefunkey:
2966            if (bracelev > 0)
2967              break;
2968            fvdef = fdefunname;  /* GNU macro */
2969            *is_func_or_var = TRUE;
2970            return TRUE;
2971          case fvnone:
2972            switch (typdef)
2973              {
2974              case ttypeseen:
2975                return FALSE;
2976              case tnone:
2977                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2978                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2979                  {
2980                    fvdef = vignore;
2981                    return FALSE;
2982                  }
2983                break;
2984              }
2985           /* FALLTHRU */
2986           case fvnameseen:
2987           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2988             {
2989               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2990                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2991               fvdef = foperator;
2992               *is_func_or_var = TRUE;
2993               return TRUE;
2994             }
2995           if (bracelev > 0 && !instruct)
2996             break;
2997           fvdef = fvnameseen;   /* function or variable */
2998           *is_func_or_var = TRUE;
2999           return TRUE;
3000         }
3001       break;
3002     }
3003
3004   return FALSE;
3005 }
3006
3007 \f
3008 /*
3009  * C_entries often keeps pointers to tokens or lines which are older than
3010  * the line currently read.  By keeping two line buffers, and switching
3011  * them at end of line, it is possible to use those pointers.
3012  */
3013 static struct
3014 {
3015   long linepos;
3016   linebuffer lb;
3017 } lbs[2];
3018
3019 #define current_lb_is_new (newndx == curndx)
3020 #define switch_line_buffers() (curndx = 1 - curndx)
3021
3022 #define curlb (lbs[curndx].lb)
3023 #define newlb (lbs[newndx].lb)
3024 #define curlinepos (lbs[curndx].linepos)
3025 #define newlinepos (lbs[newndx].linepos)
3026
3027 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3028 #define cplpl (c_ext & C_PLPL)
3029 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3030
3031 #define CNL_SAVE_DEFINEDEF()                                            \
3032 do {                                                                    \
3033   curlinepos = charno;                                                  \
3034   readline (&curlb, inf);                                               \
3035   lp = curlb.buffer;                                                    \
3036   quotednl = FALSE;                                                     \
3037   newndx = curndx;                                                      \
3038 } while (0)
3039
3040 #define CNL()                                                           \
3041 do {                                                                    \
3042   CNL_SAVE_DEFINEDEF();                                                 \
3043   if (savetoken.valid)                                                  \
3044     {                                                                   \
3045       token = savetoken;                                                \
3046       savetoken.valid = FALSE;                                          \
3047     }                                                                   \
3048   definedef = dnone;                                                    \
3049 } while (0)
3050
3051
3052 static void
3053 make_C_tag (int isfun)
3054 {
3055   /* This function is never called when token.valid is FALSE, but
3056      we must protect against invalid input or internal errors. */
3057   if (token.valid)
3058     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3059               token.offset+token.length+1, token.lineno, token.linepos);
3060   else if (DEBUG)
3061     {                             /* this branch is optimised away if !DEBUG */
3062       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3063                 token_name.len + 17, isfun, token.line,
3064                 token.offset+token.length+1, token.lineno, token.linepos);
3065       error ("INVALID TOKEN", NULL);
3066     }
3067
3068   token.valid = FALSE;
3069 }
3070
3071
3072 /*
3073  * C_entries ()
3074  *      This routine finds functions, variables, typedefs,
3075  *      #define's, enum constants and struct/union/enum definitions in
3076  *      C syntax and adds them to the list.
3077  */
3078 static void
3079 C_entries (int c_ext, FILE *inf)
3080                                 /* extension of C */
3081                                 /* input file */
3082 {
3083   register char c;              /* latest char read; '\0' for end of line */
3084   register char *lp;            /* pointer one beyond the character `c' */
3085   int curndx, newndx;           /* indices for current and new lb */
3086   register int tokoff;          /* offset in line of start of current token */
3087   register int toklen;          /* length of current token */
3088   const char *qualifier;        /* string used to qualify names */
3089   int qlen;                     /* length of qualifier */
3090   int bracelev;                 /* current brace level */
3091   int bracketlev;               /* current bracket level */
3092   int parlev;                   /* current parenthesis level */
3093   int attrparlev;               /* __attribute__ parenthesis level */
3094   int templatelev;              /* current template level */
3095   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3096   bool incomm, inquote, inchar, quotednl, midtoken;
3097   bool yacc_rules;              /* in the rules part of a yacc file */
3098   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3099
3100
3101   linebuffer_init (&lbs[0].lb);
3102   linebuffer_init (&lbs[1].lb);
3103   if (cstack.size == 0)
3104     {
3105       cstack.size = (DEBUG) ? 1 : 4;
3106       cstack.nl = 0;
3107       cstack.cname = xnew (cstack.size, char *);
3108       cstack.bracelev = xnew (cstack.size, int);
3109     }
3110
3111   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3112   curndx = newndx = 0;
3113   lp = curlb.buffer;
3114   *lp = 0;
3115
3116   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3117   structdef = snone; definedef = dnone; objdef = onone;
3118   yacc_rules = FALSE;
3119   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3120   token.valid = savetoken.valid = FALSE;
3121   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3122   if (cjava)
3123     { qualifier = "."; qlen = 1; }
3124   else
3125     { qualifier = "::"; qlen = 2; }
3126
3127
3128   while (!feof (inf))
3129     {
3130       c = *lp++;
3131       if (c == '\\')
3132         {
3133           /* If we are at the end of the line, the next character is a
3134              '\0'; do not skip it, because it is what tells us
3135              to read the next line.  */
3136           if (*lp == '\0')
3137             {
3138               quotednl = TRUE;
3139               continue;
3140             }
3141           lp++;
3142           c = ' ';
3143         }
3144       else if (incomm)
3145         {
3146           switch (c)
3147             {
3148             case '*':
3149               if (*lp == '/')
3150                 {
3151                   c = *lp++;
3152                   incomm = FALSE;
3153                 }
3154               break;
3155             case '\0':
3156               /* Newlines inside comments do not end macro definitions in
3157                  traditional cpp. */
3158               CNL_SAVE_DEFINEDEF ();
3159               break;
3160             }
3161           continue;
3162         }
3163       else if (inquote)
3164         {
3165           switch (c)
3166             {
3167             case '"':
3168               inquote = FALSE;
3169               break;
3170             case '\0':
3171               /* Newlines inside strings do not end macro definitions
3172                  in traditional cpp, even though compilers don't
3173                  usually accept them. */
3174               CNL_SAVE_DEFINEDEF ();
3175               break;
3176             }
3177           continue;
3178         }
3179       else if (inchar)
3180         {
3181           switch (c)
3182             {
3183             case '\0':
3184               /* Hmmm, something went wrong. */
3185               CNL ();
3186               /* FALLTHRU */
3187             case '\'':
3188               inchar = FALSE;
3189               break;
3190             }
3191           continue;
3192         }
3193       else if (bracketlev > 0)
3194         {
3195           switch (c)
3196             {
3197             case ']':
3198               if (--bracketlev > 0)
3199                 continue;
3200               break;
3201             case '\0':
3202               CNL_SAVE_DEFINEDEF ();
3203               break;
3204             }
3205           continue;
3206         }
3207       else switch (c)
3208         {
3209         case '"':
3210           inquote = TRUE;
3211           if (inattribute)
3212             break;
3213           switch (fvdef)
3214             {
3215             case fdefunkey:
3216             case fstartlist:
3217             case finlist:
3218             case fignore:
3219             case vignore:
3220               break;
3221             default:
3222               fvextern = FALSE;
3223               fvdef = fvnone;
3224             }
3225           continue;
3226         case '\'':
3227           inchar = TRUE;
3228           if (inattribute)
3229             break;
3230           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3231             {
3232               fvextern = FALSE;
3233               fvdef = fvnone;
3234             }
3235           continue;
3236         case '/':
3237           if (*lp == '*')
3238             {
3239               incomm = TRUE;
3240               lp++;
3241               c = ' ';
3242             }
3243           else if (/* cplpl && */ *lp == '/')
3244             {
3245               c = '\0';
3246             }
3247           break;
3248         case '%':
3249           if ((c_ext & YACC) && *lp == '%')
3250             {
3251               /* Entering or exiting rules section in yacc file. */
3252               lp++;
3253               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3254               typdef = tnone; structdef = snone;
3255               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3256               bracelev = 0;
3257               yacc_rules = !yacc_rules;
3258               continue;
3259             }
3260           else
3261             break;
3262         case '#':
3263           if (definedef == dnone)
3264             {
3265               char *cp;
3266               bool cpptoken = TRUE;
3267
3268               /* Look back on this line.  If all blanks, or nonblanks
3269                  followed by an end of comment, this is a preprocessor
3270                  token. */
3271               for (cp = newlb.buffer; cp < lp-1; cp++)
3272                 if (!iswhite (*cp))
3273                   {
3274                     if (*cp == '*' && *(cp+1) == '/')
3275                       {
3276                         cp++;
3277                         cpptoken = TRUE;
3278                       }
3279                     else
3280                       cpptoken = FALSE;
3281                   }
3282               if (cpptoken)
3283                 definedef = dsharpseen;
3284             } /* if (definedef == dnone) */
3285           continue;
3286         case '[':
3287           bracketlev++;
3288             continue;
3289         } /* switch (c) */
3290
3291
3292       /* Consider token only if some involved conditions are satisfied. */
3293       if (typdef != tignore
3294           && definedef != dignorerest
3295           && fvdef != finlist
3296           && templatelev == 0
3297           && (definedef != dnone
3298               || structdef != scolonseen)
3299           && !inattribute)
3300         {
3301           if (midtoken)
3302             {
3303               if (endtoken (c))
3304                 {
3305                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3306                     /* This handles :: in the middle,
3307                        but not at the beginning of an identifier.
3308                        Also, space-separated :: is not recognised. */
3309                     {
3310                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3311                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3312                       lp += 2;
3313                       toklen += 2;
3314                       c = lp[-1];
3315                       goto still_in_token;
3316                     }
3317                   else
3318                     {
3319                       bool funorvar = FALSE;
3320
3321                       if (yacc_rules
3322                           || consider_token (newlb.buffer + tokoff, toklen, c,
3323                                              &c_ext, bracelev, parlev,
3324                                              &funorvar))
3325                         {
3326                           if (fvdef == foperator)
3327                             {
3328                               char *oldlp = lp;
3329                               lp = skip_spaces (lp-1);
3330                               if (*lp != '\0')
3331                                 lp += 1;
3332                               while (*lp != '\0'
3333                                      && !iswhite (*lp) && *lp != '(')
3334                                 lp += 1;
3335                               c = *lp++;
3336                               toklen += lp - oldlp;
3337                             }
3338                           token.named = FALSE;
3339                           if (!plainc
3340                               && nestlev > 0 && definedef == dnone)
3341                             /* in struct body */
3342                             {
3343                               write_classname (&token_name, qualifier);
3344                               linebuffer_setlen (&token_name,
3345                                                  token_name.len+qlen+toklen);
3346                               strcat (token_name.buffer, qualifier);
3347                               strncat (token_name.buffer,
3348                                        newlb.buffer + tokoff, toklen);
3349                               token.named = TRUE;
3350                             }
3351                           else if (objdef == ocatseen)
3352                             /* Objective C category */
3353                             {
3354                               int len = strlen (objtag) + 2 + toklen;
3355                               linebuffer_setlen (&token_name, len);
3356                               strcpy (token_name.buffer, objtag);
3357                               strcat (token_name.buffer, "(");
3358                               strncat (token_name.buffer,
3359                                        newlb.buffer + tokoff, toklen);
3360                               strcat (token_name.buffer, ")");
3361                               token.named = TRUE;
3362                             }
3363                           else if (objdef == omethodtag
3364                                    || objdef == omethodparm)
3365                             /* Objective C method */
3366                             {
3367                               token.named = TRUE;
3368                             }
3369                           else if (fvdef == fdefunname)
3370                             /* GNU DEFUN and similar macros */
3371                             {
3372                               bool defun = (newlb.buffer[tokoff] == 'F');
3373                               int off = tokoff;
3374                               int len = toklen;
3375
3376                               /* Rewrite the tag so that emacs lisp DEFUNs
3377                                  can be found by their elisp name */
3378                               if (defun)
3379                                 {
3380                                   off += 1;
3381                                   len -= 1;
3382                                 }
3383                               linebuffer_setlen (&token_name, len);
3384                               strncpy (token_name.buffer,
3385                                        newlb.buffer + off, len);
3386                               token_name.buffer[len] = '\0';
3387                               if (defun)
3388                                 while (--len >= 0)
3389                                   if (token_name.buffer[len] == '_')
3390                                     token_name.buffer[len] = '-';
3391                               token.named = defun;
3392                             }
3393                           else
3394                             {
3395                               linebuffer_setlen (&token_name, toklen);
3396                               strncpy (token_name.buffer,
3397                                        newlb.buffer + tokoff, toklen);
3398                               token_name.buffer[toklen] = '\0';
3399                               /* Name macros and members. */
3400                               token.named = (structdef == stagseen
3401                                              || typdef == ttypeseen
3402                                              || typdef == tend
3403                                              || (funorvar
3404                                                  && definedef == dignorerest)
3405                                              || (funorvar
3406                                                  && definedef == dnone
3407                                                  && structdef == snone
3408                                                  && bracelev > 0));
3409                             }
3410                           token.lineno = lineno;
3411                           token.offset = tokoff;
3412                           token.length = toklen;
3413                           token.line = newlb.buffer;
3414                           token.linepos = newlinepos;
3415                           token.valid = TRUE;
3416
3417                           if (definedef == dnone
3418                               && (fvdef == fvnameseen
3419                                   || fvdef == foperator
3420                                   || structdef == stagseen
3421                                   || typdef == tend
3422                                   || typdef == ttypeseen
3423                                   || objdef != onone))
3424                             {
3425                               if (current_lb_is_new)
3426                                 switch_line_buffers ();
3427                             }
3428                           else if (definedef != dnone
3429                                    || fvdef == fdefunname
3430                                    || instruct)
3431                             make_C_tag (funorvar);
3432                         }
3433                       else /* not yacc and consider_token failed */
3434                         {
3435                           if (inattribute && fvdef == fignore)
3436                             {
3437                               /* We have just met __attribute__ after a
3438                                  function parameter list: do not tag the
3439                                  function again. */
3440                               fvdef = fvnone;
3441                             }
3442                         }
3443                       midtoken = FALSE;
3444                     }
3445                 } /* if (endtoken (c)) */
3446               else if (intoken (c))
3447                 still_in_token:
3448                 {
3449                   toklen++;
3450                   continue;
3451                 }
3452             } /* if (midtoken) */
3453           else if (begtoken (c))
3454             {
3455               switch (definedef)
3456                 {
3457                 case dnone:
3458                   switch (fvdef)
3459                     {
3460                     case fstartlist:
3461                       /* This prevents tagging fb in
3462                          void (__attribute__((noreturn)) *fb) (void);
3463                          Fixing this is not easy and not very important. */
3464                       fvdef = finlist;
3465                       continue;
3466                     case flistseen:
3467                       if (plainc || declarations)
3468                         {
3469                           make_C_tag (TRUE); /* a function */
3470                           fvdef = fignore;
3471                         }
3472                       break;
3473                     }
3474                   if (structdef == stagseen && !cjava)
3475                     {
3476                       popclass_above (bracelev);
3477                       structdef = snone;
3478                     }
3479                   break;
3480                 case dsharpseen:
3481                   savetoken = token;
3482                   break;
3483                 }
3484               if (!yacc_rules || lp == newlb.buffer + 1)
3485                 {
3486                   tokoff = lp - 1 - newlb.buffer;
3487                   toklen = 1;
3488                   midtoken = TRUE;
3489                 }
3490               continue;
3491             } /* if (begtoken) */
3492         } /* if must look at token */
3493
3494
3495       /* Detect end of line, colon, comma, semicolon and various braces
3496          after having handled a token.*/
3497       switch (c)
3498         {
3499         case ':':
3500           if (inattribute)
3501             break;
3502           if (yacc_rules && token.offset == 0 && token.valid)
3503             {
3504               make_C_tag (FALSE); /* a yacc function */
3505               break;
3506             }
3507           if (definedef != dnone)
3508             break;
3509           switch (objdef)
3510             {
3511             case  otagseen:
3512               objdef = oignore;
3513               make_C_tag (TRUE); /* an Objective C class */
3514               break;
3515             case omethodtag:
3516             case omethodparm:
3517               objdef = omethodcolon;
3518               linebuffer_setlen (&token_name, token_name.len + 1);
3519               strcat (token_name.buffer, ":");
3520               break;
3521             }
3522           if (structdef == stagseen)
3523             {
3524               structdef = scolonseen;
3525               break;
3526             }
3527           /* Should be useless, but may be work as a safety net. */
3528           if (cplpl && fvdef == flistseen)
3529             {
3530               make_C_tag (TRUE); /* a function */
3531               fvdef = fignore;
3532               break;
3533             }
3534           break;
3535         case ';':
3536           if (definedef != dnone || inattribute)
3537             break;
3538           switch (typdef)
3539             {
3540             case tend:
3541             case ttypeseen:
3542               make_C_tag (FALSE); /* a typedef */
3543               typdef = tnone;
3544               fvdef = fvnone;
3545               break;
3546             case tnone:
3547             case tinbody:
3548             case tignore:
3549               switch (fvdef)
3550                 {
3551                 case fignore:
3552                   if (typdef == tignore || cplpl)
3553                     fvdef = fvnone;
3554                   break;
3555                 case fvnameseen:
3556                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3557                       || (members && instruct))
3558                     make_C_tag (FALSE); /* a variable */
3559                   fvextern = FALSE;
3560                   fvdef = fvnone;
3561                   token.valid = FALSE;
3562                   break;
3563                 case flistseen:
3564                   if ((declarations
3565                        && (cplpl || !instruct)
3566                        && (typdef == tnone || (typdef != tignore && instruct)))
3567                       || (members
3568                           && plainc && instruct))
3569                     make_C_tag (TRUE);  /* a function */
3570                   /* FALLTHRU */
3571                 default:
3572                   fvextern = FALSE;
3573                   fvdef = fvnone;
3574                   if (declarations
3575                        && cplpl && structdef == stagseen)
3576                     make_C_tag (FALSE); /* forward declaration */
3577                   else
3578                     token.valid = FALSE;
3579                 } /* switch (fvdef) */
3580               /* FALLTHRU */
3581             default:
3582               if (!instruct)
3583                 typdef = tnone;
3584             }
3585           if (structdef == stagseen)
3586             structdef = snone;
3587           break;
3588         case ',':
3589           if (definedef != dnone || inattribute)
3590             break;
3591           switch (objdef)
3592             {
3593             case omethodtag:
3594             case omethodparm:
3595               make_C_tag (TRUE); /* an Objective C method */
3596               objdef = oinbody;
3597               break;
3598             }
3599           switch (fvdef)
3600             {
3601             case fdefunkey:
3602             case foperator:
3603             case fstartlist:
3604             case finlist:
3605             case fignore:
3606             case vignore:
3607               break;
3608             case fdefunname:
3609               fvdef = fignore;
3610               break;
3611             case fvnameseen:
3612               if (parlev == 0
3613                   && ((globals
3614                        && bracelev == 0
3615                        && templatelev == 0
3616                        && (!fvextern || declarations))
3617                       || (members && instruct)))
3618                   make_C_tag (FALSE); /* a variable */
3619               break;
3620             case flistseen:
3621               if ((declarations && typdef == tnone && !instruct)
3622                   || (members && typdef != tignore && instruct))
3623                 {
3624                   make_C_tag (TRUE); /* a function */
3625                   fvdef = fvnameseen;
3626                 }
3627               else if (!declarations)
3628                 fvdef = fvnone;
3629               token.valid = FALSE;
3630               break;
3631             default:
3632               fvdef = fvnone;
3633             }
3634           if (structdef == stagseen)
3635             structdef = snone;
3636           break;
3637         case ']':
3638           if (definedef != dnone || inattribute)
3639             break;
3640           if (structdef == stagseen)
3641             structdef = snone;
3642           switch (typdef)
3643             {
3644             case ttypeseen:
3645             case tend:
3646               typdef = tignore;
3647               make_C_tag (FALSE);       /* a typedef */
3648               break;
3649             case tnone:
3650             case tinbody:
3651               switch (fvdef)
3652                 {
3653                 case foperator:
3654                 case finlist:
3655                 case fignore:
3656                 case vignore:
3657                   break;
3658                 case fvnameseen:
3659                   if ((members && bracelev == 1)
3660                       || (globals && bracelev == 0
3661                           && (!fvextern || declarations)))
3662                     make_C_tag (FALSE); /* a variable */
3663                   /* FALLTHRU */
3664                 default:
3665                   fvdef = fvnone;
3666                 }
3667               break;
3668             }
3669           break;
3670         case '(':
3671           if (inattribute)
3672             {
3673               attrparlev++;
3674               break;
3675             }
3676           if (definedef != dnone)
3677             break;
3678           if (objdef == otagseen && parlev == 0)
3679             objdef = oparenseen;
3680           switch (fvdef)
3681             {
3682             case fvnameseen:
3683               if (typdef == ttypeseen
3684                   && *lp != '*'
3685                   && !instruct)
3686                 {
3687                   /* This handles constructs like:
3688                      typedef void OperatorFun (int fun); */
3689                   make_C_tag (FALSE);
3690                   typdef = tignore;
3691                   fvdef = fignore;
3692                   break;
3693                 }
3694               /* FALLTHRU */
3695             case foperator:
3696               fvdef = fstartlist;
3697               break;
3698             case flistseen:
3699               fvdef = finlist;
3700               break;
3701             }
3702           parlev++;
3703           break;
3704         case ')':
3705           if (inattribute)
3706             {
3707               if (--attrparlev == 0)
3708                 inattribute = FALSE;
3709               break;
3710             }
3711           if (definedef != dnone)
3712             break;
3713           if (objdef == ocatseen && parlev == 1)
3714             {
3715               make_C_tag (TRUE); /* an Objective C category */
3716               objdef = oignore;
3717             }
3718           if (--parlev == 0)
3719             {
3720               switch (fvdef)
3721                 {
3722                 case fstartlist:
3723                 case finlist:
3724                   fvdef = flistseen;
3725                   break;
3726                 }
3727               if (!instruct
3728                   && (typdef == tend
3729                       || typdef == ttypeseen))
3730                 {
3731                   typdef = tignore;
3732                   make_C_tag (FALSE); /* a typedef */
3733                 }
3734             }
3735           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3736             parlev = 0;
3737           break;
3738         case '{':
3739           if (definedef != dnone)
3740             break;
3741           if (typdef == ttypeseen)
3742             {
3743               /* Whenever typdef is set to tinbody (currently only
3744                  here), typdefbracelev should be set to bracelev. */
3745               typdef = tinbody;
3746               typdefbracelev = bracelev;
3747             }
3748           switch (fvdef)
3749             {
3750             case flistseen:
3751               make_C_tag (TRUE);    /* a function */
3752               /* FALLTHRU */
3753             case fignore:
3754               fvdef = fvnone;
3755               break;
3756             case fvnone:
3757               switch (objdef)
3758                 {
3759                 case otagseen:
3760                   make_C_tag (TRUE); /* an Objective C class */
3761                   objdef = oignore;
3762                   break;
3763                 case omethodtag:
3764                 case omethodparm:
3765                   make_C_tag (TRUE); /* an Objective C method */
3766                   objdef = oinbody;
3767                   break;
3768                 default:
3769                   /* Neutralize `extern "C" {' grot. */
3770                   if (bracelev == 0 && structdef == snone && nestlev == 0
3771                       && typdef == tnone)
3772                     bracelev = -1;
3773                 }
3774               break;
3775             }
3776           switch (structdef)
3777             {
3778             case skeyseen:         /* unnamed struct */
3779               pushclass_above (bracelev, NULL, 0);
3780               structdef = snone;
3781               break;
3782             case stagseen:         /* named struct or enum */
3783             case scolonseen:       /* a class */
3784               pushclass_above (bracelev,token.line+token.offset, token.length);
3785               structdef = snone;
3786               make_C_tag (FALSE);  /* a struct or enum */
3787               break;
3788             }
3789           bracelev += 1;
3790           break;
3791         case '*':
3792           if (definedef != dnone)
3793             break;
3794           if (fvdef == fstartlist)
3795             {
3796               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3797               token.valid = FALSE;
3798             }
3799           break;
3800         case '}':
3801           if (definedef != dnone)
3802             break;
3803           bracelev -= 1;
3804           if (!ignoreindent && lp == newlb.buffer + 1)
3805             {
3806               if (bracelev != 0)
3807                 token.valid = FALSE; /* unexpected value, token unreliable */
3808               bracelev = 0;     /* reset brace level if first column */
3809               parlev = 0;       /* also reset paren level, just in case... */
3810             }
3811           else if (bracelev < 0)
3812             {
3813               token.valid = FALSE; /* something gone amiss, token unreliable */
3814               bracelev = 0;
3815             }
3816           if (bracelev == 0 && fvdef == vignore)
3817             fvdef = fvnone;             /* end of function */
3818           popclass_above (bracelev);
3819           structdef = snone;
3820           /* Only if typdef == tinbody is typdefbracelev significant. */
3821           if (typdef == tinbody && bracelev <= typdefbracelev)
3822             {
3823               assert (bracelev == typdefbracelev);
3824               typdef = tend;
3825             }
3826           break;
3827         case '=':
3828           if (definedef != dnone)
3829             break;
3830           switch (fvdef)
3831             {
3832             case foperator:
3833             case finlist:
3834             case fignore:
3835             case vignore:
3836               break;
3837             case fvnameseen:
3838               if ((members && bracelev == 1)
3839                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3840                 make_C_tag (FALSE); /* a variable */
3841               /* FALLTHRU */
3842             default:
3843               fvdef = vignore;
3844             }
3845           break;
3846         case '<':
3847           if (cplpl
3848               && (structdef == stagseen || fvdef == fvnameseen))
3849             {
3850               templatelev++;
3851               break;
3852             }
3853           goto resetfvdef;
3854         case '>':
3855           if (templatelev > 0)
3856             {
3857               templatelev--;
3858               break;
3859             }
3860           goto resetfvdef;
3861         case '+':
3862         case '-':
3863           if (objdef == oinbody && bracelev == 0)
3864             {
3865               objdef = omethodsign;
3866               break;
3867             }
3868           /* FALLTHRU */
3869         resetfvdef:
3870         case '#': case '~': case '&': case '%': case '/':
3871         case '|': case '^': case '!': case '.': case '?':
3872           if (definedef != dnone)
3873             break;
3874           /* These surely cannot follow a function tag in C. */
3875           switch (fvdef)
3876             {
3877             case foperator:
3878             case finlist:
3879             case fignore:
3880             case vignore:
3881               break;
3882             default:
3883               fvdef = fvnone;
3884             }
3885           break;
3886         case '\0':
3887           if (objdef == otagseen)
3888             {
3889               make_C_tag (TRUE); /* an Objective C class */
3890               objdef = oignore;
3891             }
3892           /* If a macro spans multiple lines don't reset its state. */
3893           if (quotednl)
3894             CNL_SAVE_DEFINEDEF ();
3895           else
3896             CNL ();
3897           break;
3898         } /* switch (c) */
3899
3900     } /* while not eof */
3901
3902   free (lbs[0].lb.buffer);
3903   free (lbs[1].lb.buffer);
3904 }
3905
3906 /*
3907  * Process either a C++ file or a C file depending on the setting
3908  * of a global flag.
3909  */
3910 static void
3911 default_C_entries (FILE *inf)
3912 {
3913   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3914 }
3915
3916 /* Always do plain C. */
3917 static void
3918 plain_C_entries (FILE *inf)
3919 {
3920   C_entries (0, inf);
3921 }
3922
3923 /* Always do C++. */
3924 static void
3925 Cplusplus_entries (FILE *inf)
3926 {
3927   C_entries (C_PLPL, inf);
3928 }
3929
3930 /* Always do Java. */
3931 static void
3932 Cjava_entries (FILE *inf)
3933 {
3934   C_entries (C_JAVA, inf);
3935 }
3936
3937 /* Always do C*. */
3938 static void
3939 Cstar_entries (FILE *inf)
3940 {
3941   C_entries (C_STAR, inf);
3942 }
3943
3944 /* Always do Yacc. */
3945 static void
3946 Yacc_entries (FILE *inf)
3947 {
3948   C_entries (YACC, inf);
3949 }
3950
3951 \f
3952 /* Useful macros. */
3953 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3954   for (;                        /* loop initialization */               \
3955        !feof (file_pointer)     /* loop test */                         \
3956        &&                       /* instructions at start of loop */     \
3957           (readline (&line_buffer, file_pointer),                       \
3958            char_pointer = line_buffer.buffer,                           \
3959            TRUE);                                                       \
3960       )
3961
3962 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3963   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
3964    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
3965    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
3966    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
3967
3968 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3969 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3970   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
3971    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
3972    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
3973
3974 /*
3975  * Read a file, but do no processing.  This is used to do regexp
3976  * matching on files that have no language defined.
3977  */
3978 static void
3979 just_read_file (FILE *inf)
3980 {
3981   while (!feof (inf))
3982     readline (&lb, inf);
3983 }
3984
3985 \f
3986 /* Fortran parsing */
3987
3988 static void F_takeprec (void);
3989 static void F_getit (FILE *);
3990
3991 static void
3992 F_takeprec (void)
3993 {
3994   dbp = skip_spaces (dbp);
3995   if (*dbp != '*')
3996     return;
3997   dbp++;
3998   dbp = skip_spaces (dbp);
3999   if (strneq (dbp, "(*)", 3))
4000     {
4001       dbp += 3;
4002       return;
4003     }
4004   if (!ISDIGIT (*dbp))
4005     {
4006       --dbp;                    /* force failure */
4007       return;
4008     }
4009   do
4010     dbp++;
4011   while (ISDIGIT (*dbp));
4012 }
4013
4014 static void
4015 F_getit (FILE *inf)
4016 {
4017   register char *cp;
4018
4019   dbp = skip_spaces (dbp);
4020   if (*dbp == '\0')
4021     {
4022       readline (&lb, inf);
4023       dbp = lb.buffer;
4024       if (dbp[5] != '&')
4025         return;
4026       dbp += 6;
4027       dbp = skip_spaces (dbp);
4028     }
4029   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4030     return;
4031   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4032     continue;
4033   make_tag (dbp, cp-dbp, TRUE,
4034             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4035 }
4036
4037
4038 static void
4039 Fortran_functions (FILE *inf)
4040 {
4041   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4042     {
4043       if (*dbp == '%')
4044         dbp++;                  /* Ratfor escape to fortran */
4045       dbp = skip_spaces (dbp);
4046       if (*dbp == '\0')
4047         continue;
4048
4049       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4050         dbp = skip_spaces (dbp);
4051
4052       switch (lowcase (*dbp))
4053         {
4054         case 'i':
4055           if (nocase_tail ("integer"))
4056             F_takeprec ();
4057           break;
4058         case 'r':
4059           if (nocase_tail ("real"))
4060             F_takeprec ();
4061           break;
4062         case 'l':
4063           if (nocase_tail ("logical"))
4064             F_takeprec ();
4065           break;
4066         case 'c':
4067           if (nocase_tail ("complex") || nocase_tail ("character"))
4068             F_takeprec ();
4069           break;
4070         case 'd':
4071           if (nocase_tail ("double"))
4072             {
4073               dbp = skip_spaces (dbp);
4074               if (*dbp == '\0')
4075                 continue;
4076               if (nocase_tail ("precision"))
4077                 break;
4078               continue;
4079             }
4080           break;
4081         }
4082       dbp = skip_spaces (dbp);
4083       if (*dbp == '\0')
4084         continue;
4085       switch (lowcase (*dbp))
4086         {
4087         case 'f':
4088           if (nocase_tail ("function"))
4089             F_getit (inf);
4090           continue;
4091         case 's':
4092           if (nocase_tail ("subroutine"))
4093             F_getit (inf);
4094           continue;
4095         case 'e':
4096           if (nocase_tail ("entry"))
4097             F_getit (inf);
4098           continue;
4099         case 'b':
4100           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4101             {
4102               dbp = skip_spaces (dbp);
4103               if (*dbp == '\0') /* assume un-named */
4104                 make_tag ("blockdata", 9, TRUE,
4105                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4106               else
4107                 F_getit (inf);  /* look for name */
4108             }
4109           continue;
4110         }
4111     }
4112 }
4113
4114 \f
4115 /*
4116  * Ada parsing
4117  * Original code by
4118  * Philippe Waroquiers (1998)
4119  */
4120
4121 /* Once we are positioned after an "interesting" keyword, let's get
4122    the real tag value necessary. */
4123 static void
4124 Ada_getit (FILE *inf, const char *name_qualifier)
4125 {
4126   register char *cp;
4127   char *name;
4128   char c;
4129
4130   while (!feof (inf))
4131     {
4132       dbp = skip_spaces (dbp);
4133       if (*dbp == '\0'
4134           || (dbp[0] == '-' && dbp[1] == '-'))
4135         {
4136           readline (&lb, inf);
4137           dbp = lb.buffer;
4138         }
4139       switch (lowcase(*dbp))
4140         {
4141         case 'b':
4142           if (nocase_tail ("body"))
4143             {
4144               /* Skipping body of   procedure body   or   package body or ....
4145                  resetting qualifier to body instead of spec. */
4146               name_qualifier = "/b";
4147               continue;
4148             }
4149           break;
4150         case 't':
4151           /* Skipping type of   task type   or   protected type ... */
4152           if (nocase_tail ("type"))
4153             continue;
4154           break;
4155         }
4156       if (*dbp == '"')
4157         {
4158           dbp += 1;
4159           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4160             continue;
4161         }
4162       else
4163         {
4164           dbp = skip_spaces (dbp);
4165           for (cp = dbp;
4166                (*cp != '\0'
4167                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4168                cp++)
4169             continue;
4170           if (cp == dbp)
4171             return;
4172         }
4173       c = *cp;
4174       *cp = '\0';
4175       name = concat (dbp, name_qualifier, "");
4176       *cp = c;
4177       make_tag (name, strlen (name), TRUE,
4178                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4179       free (name);
4180       if (c == '"')
4181         dbp = cp + 1;
4182       return;
4183     }
4184 }
4185
4186 static void
4187 Ada_funcs (FILE *inf)
4188 {
4189   bool inquote = FALSE;
4190   bool skip_till_semicolumn = FALSE;
4191
4192   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4193     {
4194       while (*dbp != '\0')
4195         {
4196           /* Skip a string i.e. "abcd". */
4197           if (inquote || (*dbp == '"'))
4198             {
4199               dbp = etags_strchr (dbp + !inquote, '"');
4200               if (dbp != NULL)
4201                 {
4202                   inquote = FALSE;
4203                   dbp += 1;
4204                   continue;     /* advance char */
4205                 }
4206               else
4207                 {
4208                   inquote = TRUE;
4209                   break;        /* advance line */
4210                 }
4211             }
4212
4213           /* Skip comments. */
4214           if (dbp[0] == '-' && dbp[1] == '-')
4215             break;              /* advance line */
4216
4217           /* Skip character enclosed in single quote i.e. 'a'
4218              and skip single quote starting an attribute i.e. 'Image. */
4219           if (*dbp == '\'')
4220             {
4221               dbp++ ;
4222               if (*dbp != '\0')
4223                 dbp++;
4224               continue;
4225             }
4226
4227           if (skip_till_semicolumn)
4228             {
4229               if (*dbp == ';')
4230                 skip_till_semicolumn = FALSE;
4231               dbp++;
4232               continue;         /* advance char */
4233             }
4234
4235           /* Search for beginning of a token.  */
4236           if (!begtoken (*dbp))
4237             {
4238               dbp++;
4239               continue;         /* advance char */
4240             }
4241
4242           /* We are at the beginning of a token. */
4243           switch (lowcase(*dbp))
4244             {
4245             case 'f':
4246               if (!packages_only && nocase_tail ("function"))
4247                 Ada_getit (inf, "/f");
4248               else
4249                 break;          /* from switch */
4250               continue;         /* advance char */
4251             case 'p':
4252               if (!packages_only && nocase_tail ("procedure"))
4253                 Ada_getit (inf, "/p");
4254               else if (nocase_tail ("package"))
4255                 Ada_getit (inf, "/s");
4256               else if (nocase_tail ("protected")) /* protected type */
4257                 Ada_getit (inf, "/t");
4258               else
4259                 break;          /* from switch */
4260               continue;         /* advance char */
4261
4262             case 'u':
4263               if (typedefs && !packages_only && nocase_tail ("use"))
4264                 {
4265                   /* when tagging types, avoid tagging  use type Pack.Typename;
4266                      for this, we will skip everything till a ; */
4267                   skip_till_semicolumn = TRUE;
4268                   continue;     /* advance char */
4269                 }
4270
4271             case 't':
4272               if (!packages_only && nocase_tail ("task"))
4273                 Ada_getit (inf, "/k");
4274               else if (typedefs && !packages_only && nocase_tail ("type"))
4275                 {
4276                   Ada_getit (inf, "/t");
4277                   while (*dbp != '\0')
4278                     dbp += 1;
4279                 }
4280               else
4281                 break;          /* from switch */
4282               continue;         /* advance char */
4283             }
4284
4285           /* Look for the end of the token. */
4286           while (!endtoken (*dbp))
4287             dbp++;
4288
4289         } /* advance char */
4290     } /* advance line */
4291 }
4292
4293 \f
4294 /*
4295  * Unix and microcontroller assembly tag handling
4296  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4297  * Idea by Bob Weiner, Motorola Inc. (1994)
4298  */
4299 static void
4300 Asm_labels (FILE *inf)
4301 {
4302   register char *cp;
4303
4304   LOOP_ON_INPUT_LINES (inf, lb, cp)
4305     {
4306       /* If first char is alphabetic or one of [_.$], test for colon
4307          following identifier. */
4308       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4309         {
4310           /* Read past label. */
4311           cp++;
4312           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4313             cp++;
4314           if (*cp == ':' || iswhite (*cp))
4315             /* Found end of label, so copy it and add it to the table. */
4316             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4317                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4318         }
4319     }
4320 }
4321
4322 \f
4323 /*
4324  * Perl support
4325  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4326  * Perl variable names: /^(my|local).../
4327  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4328  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4329  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4330  */
4331 static void
4332 Perl_functions (FILE *inf)
4333 {
4334   char *package = savestr ("main"); /* current package name */
4335   register char *cp;
4336
4337   LOOP_ON_INPUT_LINES (inf, lb, cp)
4338     {
4339       cp = skip_spaces (cp);
4340
4341       if (LOOKING_AT (cp, "package"))
4342         {
4343           free (package);
4344           get_tag (cp, &package);
4345         }
4346       else if (LOOKING_AT (cp, "sub"))
4347         {
4348           char *pos;
4349           char *sp = cp;
4350
4351           while (!notinname (*cp))
4352             cp++;
4353           if (cp == sp)
4354             continue;           /* nothing found */
4355           if ((pos = etags_strchr (sp, ':')) != NULL
4356               && pos < cp && pos[1] == ':')
4357             /* The name is already qualified. */
4358             make_tag (sp, cp - sp, TRUE,
4359                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4360           else
4361             /* Qualify it. */
4362             {
4363               char savechar, *name;
4364
4365               savechar = *cp;
4366               *cp = '\0';
4367               name = concat (package, "::", sp);
4368               *cp = savechar;
4369               make_tag (name, strlen(name), TRUE,
4370                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4371               free (name);
4372             }
4373         }
4374        else if (globals)        /* only if we are tagging global vars */
4375         {
4376           /* Skip a qualifier, if any. */
4377           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4378           /* After "my" or "local", but before any following paren or space. */
4379           char *varstart = cp;
4380
4381           if (qual              /* should this be removed?  If yes, how? */
4382               && (*cp == '$' || *cp == '@' || *cp == '%'))
4383             {
4384               varstart += 1;
4385               do
4386                 cp++;
4387               while (ISALNUM (*cp) || *cp == '_');
4388             }
4389           else if (qual)
4390             {
4391               /* Should be examining a variable list at this point;
4392                  could insist on seeing an open parenthesis. */
4393               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4394                 cp++;
4395             }
4396           else
4397             continue;
4398
4399           make_tag (varstart, cp - varstart, FALSE,
4400                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4401         }
4402     }
4403   free (package);
4404 }
4405
4406
4407 /*
4408  * Python support
4409  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4410  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4411  * More ideas by seb bacon <seb@jamkit.com> (2002)
4412  */
4413 static void
4414 Python_functions (FILE *inf)
4415 {
4416   register char *cp;
4417
4418   LOOP_ON_INPUT_LINES (inf, lb, cp)
4419     {
4420       cp = skip_spaces (cp);
4421       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4422         {
4423           char *name = cp;
4424           while (!notinname (*cp) && *cp != ':')
4425             cp++;
4426           make_tag (name, cp - name, TRUE,
4427                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4428         }
4429     }
4430 }
4431
4432 \f
4433 /*
4434  * PHP support
4435  * Look for:
4436  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4437  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4438  *  - /^[ \t]*define\(\"[^\"]+/
4439  * Only with --members:
4440  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4441  * Idea by Diez B. Roggisch (2001)
4442  */
4443 static void
4444 PHP_functions (FILE *inf)
4445 {
4446   register char *cp, *name;
4447   bool search_identifier = FALSE;
4448
4449   LOOP_ON_INPUT_LINES (inf, lb, cp)
4450     {
4451       cp = skip_spaces (cp);
4452       name = cp;
4453       if (search_identifier
4454           && *cp != '\0')
4455         {
4456           while (!notinname (*cp))
4457             cp++;
4458           make_tag (name, cp - name, TRUE,
4459                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4460           search_identifier = FALSE;
4461         }
4462       else if (LOOKING_AT (cp, "function"))
4463         {
4464           if(*cp == '&')
4465             cp = skip_spaces (cp+1);
4466           if(*cp != '\0')
4467             {
4468               name = cp;
4469               while (!notinname (*cp))
4470                 cp++;
4471               make_tag (name, cp - name, TRUE,
4472                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4473             }
4474           else
4475             search_identifier = TRUE;
4476         }
4477       else if (LOOKING_AT (cp, "class"))
4478         {
4479           if (*cp != '\0')
4480             {
4481               name = cp;
4482               while (*cp != '\0' && !iswhite (*cp))
4483                 cp++;
4484               make_tag (name, cp - name, FALSE,
4485                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4486             }
4487           else
4488             search_identifier = TRUE;
4489         }
4490       else if (strneq (cp, "define", 6)
4491                && (cp = skip_spaces (cp+6))
4492                && *cp++ == '('
4493                && (*cp == '"' || *cp == '\''))
4494         {
4495           char quote = *cp++;
4496           name = cp;
4497           while (*cp != quote && *cp != '\0')
4498             cp++;
4499           make_tag (name, cp - name, FALSE,
4500                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4501         }
4502       else if (members
4503                && LOOKING_AT (cp, "var")
4504                && *cp == '$')
4505         {
4506           name = cp;
4507           while (!notinname(*cp))
4508             cp++;
4509           make_tag (name, cp - name, FALSE,
4510                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4511         }
4512     }
4513 }
4514
4515 \f
4516 /*
4517  * Cobol tag functions
4518  * We could look for anything that could be a paragraph name.
4519  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4520  * Idea by Corny de Souza (1993)
4521  */
4522 static void
4523 Cobol_paragraphs (FILE *inf)
4524 {
4525   register char *bp, *ep;
4526
4527   LOOP_ON_INPUT_LINES (inf, lb, bp)
4528     {
4529       if (lb.len < 9)
4530         continue;
4531       bp += 8;
4532
4533       /* If eoln, compiler option or comment ignore whole line. */
4534       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4535         continue;
4536
4537       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4538         continue;
4539       if (*ep++ == '.')
4540         make_tag (bp, ep - bp, TRUE,
4541                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4542     }
4543 }
4544
4545 \f
4546 /*
4547  * Makefile support
4548  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4549  */
4550 static void
4551 Makefile_targets (FILE *inf)
4552 {
4553   register char *bp;
4554
4555   LOOP_ON_INPUT_LINES (inf, lb, bp)
4556     {
4557       if (*bp == '\t' || *bp == '#')
4558         continue;
4559       while (*bp != '\0' && *bp != '=' && *bp != ':')
4560         bp++;
4561       if (*bp == ':' || (globals && *bp == '='))
4562         {
4563           /* We should detect if there is more than one tag, but we do not.
4564              We just skip initial and final spaces. */
4565           char * namestart = skip_spaces (lb.buffer);
4566           while (--bp > namestart)
4567             if (!notinname (*bp))
4568               break;
4569           make_tag (namestart, bp - namestart + 1, TRUE,
4570                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4571         }
4572     }
4573 }
4574
4575 \f
4576 /*
4577  * Pascal parsing
4578  * Original code by Mosur K. Mohan (1989)
4579  *
4580  *  Locates tags for procedures & functions.  Doesn't do any type- or
4581  *  var-definitions.  It does look for the keyword "extern" or
4582  *  "forward" immediately following the procedure statement; if found,
4583  *  the tag is skipped.
4584  */
4585 static void
4586 Pascal_functions (FILE *inf)
4587 {
4588   linebuffer tline;             /* mostly copied from C_entries */
4589   long save_lcno;
4590   int save_lineno, namelen, taglen;
4591   char c, *name;
4592
4593   bool                          /* each of these flags is TRUE if: */
4594     incomment,                  /* point is inside a comment */
4595     inquote,                    /* point is inside '..' string */
4596     get_tagname,                /* point is after PROCEDURE/FUNCTION
4597                                    keyword, so next item = potential tag */
4598     found_tag,                  /* point is after a potential tag */
4599     inparms,                    /* point is within parameter-list */
4600     verify_tag;                 /* point has passed the parm-list, so the
4601                                    next token will determine whether this
4602                                    is a FORWARD/EXTERN to be ignored, or
4603                                    whether it is a real tag */
4604
4605   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4606   name = NULL;                  /* keep compiler quiet */
4607   dbp = lb.buffer;
4608   *dbp = '\0';
4609   linebuffer_init (&tline);
4610
4611   incomment = inquote = FALSE;
4612   found_tag = FALSE;            /* have a proc name; check if extern */
4613   get_tagname = FALSE;          /* found "procedure" keyword         */
4614   inparms = FALSE;              /* found '(' after "proc"            */
4615   verify_tag = FALSE;           /* check if "extern" is ahead        */
4616
4617
4618   while (!feof (inf))           /* long main loop to get next char */
4619     {
4620       c = *dbp++;
4621       if (c == '\0')            /* if end of line */
4622         {
4623           readline (&lb, inf);
4624           dbp = lb.buffer;
4625           if (*dbp == '\0')
4626             continue;
4627           if (!((found_tag && verify_tag)
4628                 || get_tagname))
4629             c = *dbp++;         /* only if don't need *dbp pointing
4630                                    to the beginning of the name of
4631                                    the procedure or function */
4632         }
4633       if (incomment)
4634         {
4635           if (c == '}')         /* within { } comments */
4636             incomment = FALSE;
4637           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4638             {
4639               dbp++;
4640               incomment = FALSE;
4641             }
4642           continue;
4643         }
4644       else if (inquote)
4645         {
4646           if (c == '\'')
4647             inquote = FALSE;
4648           continue;
4649         }
4650       else
4651         switch (c)
4652           {
4653           case '\'':
4654             inquote = TRUE;     /* found first quote */
4655             continue;
4656           case '{':             /* found open { comment */
4657             incomment = TRUE;
4658             continue;
4659           case '(':
4660             if (*dbp == '*')    /* found open (* comment */
4661               {
4662                 incomment = TRUE;
4663                 dbp++;
4664               }
4665             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4666               inparms = TRUE;
4667             continue;
4668           case ')':             /* end of parms list */
4669             if (inparms)
4670               inparms = FALSE;
4671             continue;
4672           case ';':
4673             if (found_tag && !inparms) /* end of proc or fn stmt */
4674               {
4675                 verify_tag = TRUE;
4676                 break;
4677               }
4678             continue;
4679           }
4680       if (found_tag && verify_tag && (*dbp != ' '))
4681         {
4682           /* Check if this is an "extern" declaration. */
4683           if (*dbp == '\0')
4684             continue;
4685           if (lowcase (*dbp == 'e'))
4686             {
4687               if (nocase_tail ("extern")) /* superfluous, really! */
4688                 {
4689                   found_tag = FALSE;
4690                   verify_tag = FALSE;
4691                 }
4692             }
4693           else if (lowcase (*dbp) == 'f')
4694             {
4695               if (nocase_tail ("forward")) /* check for forward reference */
4696                 {
4697                   found_tag = FALSE;
4698                   verify_tag = FALSE;
4699                 }
4700             }
4701           if (found_tag && verify_tag) /* not external proc, so make tag */
4702             {
4703               found_tag = FALSE;
4704               verify_tag = FALSE;
4705               make_tag (name, namelen, TRUE,
4706                         tline.buffer, taglen, save_lineno, save_lcno);
4707               continue;
4708             }
4709         }
4710       if (get_tagname)          /* grab name of proc or fn */
4711         {
4712           char *cp;
4713
4714           if (*dbp == '\0')
4715             continue;
4716
4717           /* Find block name. */
4718           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4719             continue;
4720
4721           /* Save all values for later tagging. */
4722           linebuffer_setlen (&tline, lb.len);
4723           strcpy (tline.buffer, lb.buffer);
4724           save_lineno = lineno;
4725           save_lcno = linecharno;
4726           name = tline.buffer + (dbp - lb.buffer);
4727           namelen = cp - dbp;
4728           taglen = cp - lb.buffer + 1;
4729
4730           dbp = cp;             /* set dbp to e-o-token */
4731           get_tagname = FALSE;
4732           found_tag = TRUE;
4733           continue;
4734
4735           /* And proceed to check for "extern". */
4736         }
4737       else if (!incomment && !inquote && !found_tag)
4738         {
4739           /* Check for proc/fn keywords. */
4740           switch (lowcase (c))
4741             {
4742             case 'p':
4743               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4744                 get_tagname = TRUE;
4745               continue;
4746             case 'f':
4747               if (nocase_tail ("unction"))
4748                 get_tagname = TRUE;
4749               continue;
4750             }
4751         }
4752     } /* while not eof */
4753
4754   free (tline.buffer);
4755 }
4756
4757 \f
4758 /*
4759  * Lisp tag functions
4760  *  look for (def or (DEF, quote or QUOTE
4761  */
4762
4763 static void L_getit (void);
4764
4765 static void
4766 L_getit (void)
4767 {
4768   if (*dbp == '\'')             /* Skip prefix quote */
4769     dbp++;
4770   else if (*dbp == '(')
4771   {
4772     dbp++;
4773     /* Try to skip "(quote " */
4774     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4775       /* Ok, then skip "(" before name in (defstruct (foo)) */
4776       dbp = skip_spaces (dbp);
4777   }
4778   get_tag (dbp, NULL);
4779 }
4780
4781 static void
4782 Lisp_functions (FILE *inf)
4783 {
4784   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4785     {
4786       if (dbp[0] != '(')
4787         continue;
4788
4789       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4790         {
4791           dbp = skip_non_spaces (dbp);
4792           dbp = skip_spaces (dbp);
4793           L_getit ();
4794         }
4795       else
4796         {
4797           /* Check for (foo::defmumble name-defined ... */
4798           do
4799             dbp++;
4800           while (!notinname (*dbp) && *dbp != ':');
4801           if (*dbp == ':')
4802             {
4803               do
4804                 dbp++;
4805               while (*dbp == ':');
4806
4807               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4808                 {
4809                   dbp = skip_non_spaces (dbp);
4810                   dbp = skip_spaces (dbp);
4811                   L_getit ();
4812                 }
4813             }
4814         }
4815     }
4816 }
4817
4818 \f
4819 /*
4820  * Lua script language parsing
4821  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4822  *
4823  *  "function" and "local function" are tags if they start at column 1.
4824  */
4825 static void
4826 Lua_functions (FILE *inf)
4827 {
4828   register char *bp;
4829
4830   LOOP_ON_INPUT_LINES (inf, lb, bp)
4831     {
4832       if (bp[0] != 'f' && bp[0] != 'l')
4833         continue;
4834
4835       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4836
4837       if (LOOKING_AT (bp, "function"))
4838         get_tag (bp, NULL);
4839     }
4840 }
4841
4842 \f
4843 /*
4844  * Postscript tags
4845  * Just look for lines where the first character is '/'
4846  * Also look at "defineps" for PSWrap
4847  * Ideas by:
4848  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4849  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4850  */
4851 static void
4852 PS_functions (FILE *inf)
4853 {
4854   register char *bp, *ep;
4855
4856   LOOP_ON_INPUT_LINES (inf, lb, bp)
4857     {
4858       if (bp[0] == '/')
4859         {
4860           for (ep = bp+1;
4861                *ep != '\0' && *ep != ' ' && *ep != '{';
4862                ep++)
4863             continue;
4864           make_tag (bp, ep - bp, TRUE,
4865                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4866         }
4867       else if (LOOKING_AT (bp, "defineps"))
4868         get_tag (bp, NULL);
4869     }
4870 }
4871
4872 \f
4873 /*
4874  * Forth tags
4875  * Ignore anything after \ followed by space or in ( )
4876  * Look for words defined by :
4877  * Look for constant, code, create, defer, value, and variable
4878  * OBP extensions:  Look for buffer:, field,
4879  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4880  */
4881 static void
4882 Forth_words (FILE *inf)
4883 {
4884   register char *bp;
4885
4886   LOOP_ON_INPUT_LINES (inf, lb, bp)
4887     while ((bp = skip_spaces (bp))[0] != '\0')
4888       if (bp[0] == '\\' && iswhite(bp[1]))
4889         break;                  /* read next line */
4890       else if (bp[0] == '(' && iswhite(bp[1]))
4891         do                      /* skip to ) or eol */
4892           bp++;
4893         while (*bp != ')' && *bp != '\0');
4894       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4895                || LOOKING_AT_NOCASE (bp, "constant")
4896                || LOOKING_AT_NOCASE (bp, "code")
4897                || LOOKING_AT_NOCASE (bp, "create")
4898                || LOOKING_AT_NOCASE (bp, "defer")
4899                || LOOKING_AT_NOCASE (bp, "value")
4900                || LOOKING_AT_NOCASE (bp, "variable")
4901                || LOOKING_AT_NOCASE (bp, "buffer:")
4902                || LOOKING_AT_NOCASE (bp, "field"))
4903         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4904       else
4905         bp = skip_non_spaces (bp);
4906 }
4907
4908 \f
4909 /*
4910  * Scheme tag functions
4911  * look for (def... xyzzy
4912  *          (def... (xyzzy
4913  *          (def ... ((...(xyzzy ....
4914  *          (set! xyzzy
4915  * Original code by Ken Haase (1985?)
4916  */
4917 static void
4918 Scheme_functions (FILE *inf)
4919 {
4920   register char *bp;
4921
4922   LOOP_ON_INPUT_LINES (inf, lb, bp)
4923     {
4924       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4925         {
4926           bp = skip_non_spaces (bp+4);
4927           /* Skip over open parens and white space.  Don't continue past
4928              '\0'. */
4929           while (*bp && notinname (*bp))
4930             bp++;
4931           get_tag (bp, NULL);
4932         }
4933       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4934         get_tag (bp, NULL);
4935     }
4936 }
4937
4938 \f
4939 /* Find tags in TeX and LaTeX input files.  */
4940
4941 /* TEX_toktab is a table of TeX control sequences that define tags.
4942  * Each entry records one such control sequence.
4943  *
4944  * Original code from who knows whom.
4945  * Ideas by:
4946  *   Stefan Monnier (2002)
4947  */
4948
4949 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4950
4951 /* Default set of control sequences to put into TEX_toktab.
4952    The value of environment var TEXTAGS is prepended to this.  */
4953 static const char *TEX_defenv = "\
4954 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4955 :part:appendix:entry:index:def\
4956 :newcommand:renewcommand:newenvironment:renewenvironment";
4957
4958 static void TEX_mode (FILE *);
4959 static void TEX_decode_env (const char *, const char *);
4960
4961 static char TEX_esc = '\\';
4962 static char TEX_opgrp = '{';
4963 static char TEX_clgrp = '}';
4964
4965 /*
4966  * TeX/LaTeX scanning loop.
4967  */
4968 static void
4969 TeX_commands (FILE *inf)
4970 {
4971   char *cp;
4972   linebuffer *key;
4973
4974   /* Select either \ or ! as escape character.  */
4975   TEX_mode (inf);
4976
4977   /* Initialize token table once from environment. */
4978   if (TEX_toktab == NULL)
4979     TEX_decode_env ("TEXTAGS", TEX_defenv);
4980
4981   LOOP_ON_INPUT_LINES (inf, lb, cp)
4982     {
4983       /* Look at each TEX keyword in line. */
4984       for (;;)
4985         {
4986           /* Look for a TEX escape. */
4987           while (*cp++ != TEX_esc)
4988             if (cp[-1] == '\0' || cp[-1] == '%')
4989               goto tex_next_line;
4990
4991           for (key = TEX_toktab; key->buffer != NULL; key++)
4992             if (strneq (cp, key->buffer, key->len))
4993               {
4994                 register char *p;
4995                 int namelen, linelen;
4996                 bool opgrp = FALSE;
4997
4998                 cp = skip_spaces (cp + key->len);
4999                 if (*cp == TEX_opgrp)
5000                   {
5001                     opgrp = TRUE;
5002                     cp++;
5003                   }
5004                 for (p = cp;
5005                      (!iswhite (*p) && *p != '#' &&
5006                       *p != TEX_opgrp && *p != TEX_clgrp);
5007                      p++)
5008                   continue;
5009                 namelen = p - cp;
5010                 linelen = lb.len;
5011                 if (!opgrp || *p == TEX_clgrp)
5012                   {
5013                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5014                       p++;
5015                     linelen = p - lb.buffer + 1;
5016                   }
5017                 make_tag (cp, namelen, TRUE,
5018                           lb.buffer, linelen, lineno, linecharno);
5019                 goto tex_next_line; /* We only tag a line once */
5020               }
5021         }
5022     tex_next_line:
5023       ;
5024     }
5025 }
5026
5027 #define TEX_LESC '\\'
5028 #define TEX_SESC '!'
5029
5030 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5031    chars accordingly. */
5032 static void
5033 TEX_mode (FILE *inf)
5034 {
5035   int c;
5036
5037   while ((c = getc (inf)) != EOF)
5038     {
5039       /* Skip to next line if we hit the TeX comment char. */
5040       if (c == '%')
5041         while (c != '\n' && c != EOF)
5042           c = getc (inf);
5043       else if (c == TEX_LESC || c == TEX_SESC )
5044         break;
5045     }
5046
5047   if (c == TEX_LESC)
5048     {
5049       TEX_esc = TEX_LESC;
5050       TEX_opgrp = '{';
5051       TEX_clgrp = '}';
5052     }
5053   else
5054     {
5055       TEX_esc = TEX_SESC;
5056       TEX_opgrp = '<';
5057       TEX_clgrp = '>';
5058     }
5059   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5060      No attempt is made to correct the situation. */
5061   rewind (inf);
5062 }
5063
5064 /* Read environment and prepend it to the default string.
5065    Build token table. */
5066 static void
5067 TEX_decode_env (const char *evarname, const char *defenv)
5068 {
5069   register const char *env, *p;
5070   int i, len;
5071
5072   /* Append default string to environment. */
5073   env = getenv (evarname);
5074   if (!env)
5075     env = defenv;
5076   else
5077     env = concat (env, defenv, "");
5078
5079   /* Allocate a token table */
5080   for (len = 1, p = env; p;)
5081     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5082       len++;
5083   TEX_toktab = xnew (len, linebuffer);
5084
5085   /* Unpack environment string into token table. Be careful about */
5086   /* zero-length strings (leading ':', "::" and trailing ':') */
5087   for (i = 0; *env != '\0';)
5088     {
5089       p = etags_strchr (env, ':');
5090       if (!p)                   /* End of environment string. */
5091         p = env + strlen (env);
5092       if (p - env > 0)
5093         {                       /* Only non-zero strings. */
5094           TEX_toktab[i].buffer = savenstr (env, p - env);
5095           TEX_toktab[i].len = p - env;
5096           i++;
5097         }
5098       if (*p)
5099         env = p + 1;
5100       else
5101         {
5102           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5103           TEX_toktab[i].len = 0;
5104           break;
5105         }
5106     }
5107 }
5108
5109 \f
5110 /* Texinfo support.  Dave Love, Mar. 2000.  */
5111 static void
5112 Texinfo_nodes (FILE *inf)
5113 {
5114   char *cp, *start;
5115   LOOP_ON_INPUT_LINES (inf, lb, cp)
5116     if (LOOKING_AT (cp, "@node"))
5117       {
5118         start = cp;
5119         while (*cp != '\0' && *cp != ',')
5120           cp++;
5121         make_tag (start, cp - start, TRUE,
5122                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5123       }
5124 }
5125
5126 \f
5127 /*
5128  * HTML support.
5129  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5130  * Contents of <a name=xxx> are tags with name xxx.
5131  *
5132  * Francesco Potortì, 2002.
5133  */
5134 static void
5135 HTML_labels (FILE *inf)
5136 {
5137   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5138   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5139   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5140   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5141   char *end;
5142
5143
5144   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5145
5146   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5147     for (;;)                    /* loop on the same line */
5148       {
5149         if (skiptag)            /* skip HTML tag */
5150           {
5151             while (*dbp != '\0' && *dbp != '>')
5152               dbp++;
5153             if (*dbp == '>')
5154               {
5155                 dbp += 1;
5156                 skiptag = FALSE;
5157                 continue;       /* look on the same line */
5158               }
5159             break;              /* go to next line */
5160           }
5161
5162         else if (intag) /* look for "name=" or "id=" */
5163           {
5164             while (*dbp != '\0' && *dbp != '>'
5165                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5166               dbp++;
5167             if (*dbp == '\0')
5168               break;            /* go to next line */
5169             if (*dbp == '>')
5170               {
5171                 dbp += 1;
5172                 intag = FALSE;
5173                 continue;       /* look on the same line */
5174               }
5175             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5176                 || LOOKING_AT_NOCASE (dbp, "id="))
5177               {
5178                 bool quoted = (dbp[0] == '"');
5179
5180                 if (quoted)
5181                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5182                     continue;
5183                 else
5184                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5185                     continue;
5186                 linebuffer_setlen (&token_name, end - dbp);
5187                 strncpy (token_name.buffer, dbp, end - dbp);
5188                 token_name.buffer[end - dbp] = '\0';
5189
5190                 dbp = end;
5191                 intag = FALSE;  /* we found what we looked for */
5192                 skiptag = TRUE; /* skip to the end of the tag */
5193                 getnext = TRUE; /* then grab the text */
5194                 continue;       /* look on the same line */
5195               }
5196             dbp += 1;
5197           }
5198
5199         else if (getnext)       /* grab next tokens and tag them */
5200           {
5201             dbp = skip_spaces (dbp);
5202             if (*dbp == '\0')
5203               break;            /* go to next line */
5204             if (*dbp == '<')
5205               {
5206                 intag = TRUE;
5207                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5208                 continue;       /* look on the same line */
5209               }
5210
5211             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5212               continue;
5213             make_tag (token_name.buffer, token_name.len, TRUE,
5214                       dbp, end - dbp, lineno, linecharno);
5215             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5216             getnext = FALSE;
5217             break;              /* go to next line */
5218           }
5219
5220         else                    /* look for an interesting HTML tag */
5221           {
5222             while (*dbp != '\0' && *dbp != '<')
5223               dbp++;
5224             if (*dbp == '\0')
5225               break;            /* go to next line */
5226             intag = TRUE;
5227             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5228               {
5229                 inanchor = TRUE;
5230                 continue;       /* look on the same line */
5231               }
5232             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5233                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5234                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5235                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5236               {
5237                 intag = FALSE;
5238                 getnext = TRUE;
5239                 continue;       /* look on the same line */
5240               }
5241             dbp += 1;
5242           }
5243       }
5244 }
5245
5246 \f
5247 /*
5248  * Prolog support
5249  *
5250  * Assumes that the predicate or rule starts at column 0.
5251  * Only the first clause of a predicate or rule is added.
5252  * Original code by Sunichirou Sugou (1989)
5253  * Rewritten by Anders Lindgren (1996)
5254  */
5255 static size_t prolog_pr (char *, char *);
5256 static void prolog_skip_comment (linebuffer *, FILE *);
5257 static size_t prolog_atom (char *, size_t);
5258
5259 static void
5260 Prolog_functions (FILE *inf)
5261 {
5262   char *cp, *last;
5263   size_t len;
5264   size_t allocated;
5265
5266   allocated = 0;
5267   len = 0;
5268   last = NULL;
5269
5270   LOOP_ON_INPUT_LINES (inf, lb, cp)
5271     {
5272       if (cp[0] == '\0')        /* Empty line */
5273         continue;
5274       else if (iswhite (cp[0])) /* Not a predicate */
5275         continue;
5276       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5277         prolog_skip_comment (&lb, inf);
5278       else if ((len = prolog_pr (cp, last)) > 0)
5279         {
5280           /* Predicate or rule.  Store the function name so that we
5281              only generate a tag for the first clause.  */
5282           if (last == NULL)
5283             last = xnew(len + 1, char);
5284           else if (len + 1 > allocated)
5285             xrnew (last, len + 1, char);
5286           allocated = len + 1;
5287           strncpy (last, cp, len);
5288           last[len] = '\0';
5289         }
5290     }
5291   free (last);
5292 }
5293
5294
5295 static void
5296 prolog_skip_comment (linebuffer *plb, FILE *inf)
5297 {
5298   char *cp;
5299
5300   do
5301     {
5302       for (cp = plb->buffer; *cp != '\0'; cp++)
5303         if (cp[0] == '*' && cp[1] == '/')
5304           return;
5305       readline (plb, inf);
5306     }
5307   while (!feof(inf));
5308 }
5309
5310 /*
5311  * A predicate or rule definition is added if it matches:
5312  *     <beginning of line><Prolog Atom><whitespace>(
5313  * or  <beginning of line><Prolog Atom><whitespace>:-
5314  *
5315  * It is added to the tags database if it doesn't match the
5316  * name of the previous clause header.
5317  *
5318  * Return the size of the name of the predicate or rule, or 0 if no
5319  * header was found.
5320  */
5321 static size_t
5322 prolog_pr (char *s, char *last)
5323
5324                                 /* Name of last clause. */
5325 {
5326   size_t pos;
5327   size_t len;
5328
5329   pos = prolog_atom (s, 0);
5330   if (! pos)
5331     return 0;
5332
5333   len = pos;
5334   pos = skip_spaces (s + pos) - s;
5335
5336   if ((s[pos] == '.'
5337        || (s[pos] == '(' && (pos += 1))
5338        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5339       && (last == NULL          /* save only the first clause */
5340           || len != strlen (last)
5341           || !strneq (s, last, len)))
5342         {
5343           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5344           return len;
5345         }
5346   else
5347     return 0;
5348 }
5349
5350 /*
5351  * Consume a Prolog atom.
5352  * Return the number of bytes consumed, or 0 if there was an error.
5353  *
5354  * A prolog atom, in this context, could be one of:
5355  * - An alphanumeric sequence, starting with a lower case letter.
5356  * - A quoted arbitrary string. Single quotes can escape themselves.
5357  *   Backslash quotes everything.
5358  */
5359 static size_t
5360 prolog_atom (char *s, size_t pos)
5361 {
5362   size_t origpos;
5363
5364   origpos = pos;
5365
5366   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5367     {
5368       /* The atom is unquoted. */
5369       pos++;
5370       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5371         {
5372           pos++;
5373         }
5374       return pos - origpos;
5375     }
5376   else if (s[pos] == '\'')
5377     {
5378       pos++;
5379
5380       for (;;)
5381         {
5382           if (s[pos] == '\'')
5383             {
5384               pos++;
5385               if (s[pos] != '\'')
5386                 break;
5387               pos++;            /* A double quote */
5388             }
5389           else if (s[pos] == '\0')
5390             /* Multiline quoted atoms are ignored. */
5391             return 0;
5392           else if (s[pos] == '\\')
5393             {
5394               if (s[pos+1] == '\0')
5395                 return 0;
5396               pos += 2;
5397             }
5398           else
5399             pos++;
5400         }
5401       return pos - origpos;
5402     }
5403   else
5404     return 0;
5405 }
5406
5407 \f
5408 /*
5409  * Support for Erlang
5410  *
5411  * Generates tags for functions, defines, and records.
5412  * Assumes that Erlang functions start at column 0.
5413  * Original code by Anders Lindgren (1996)
5414  */
5415 static int erlang_func (char *, char *);
5416 static void erlang_attribute (char *);
5417 static int erlang_atom (char *);
5418
5419 static void
5420 Erlang_functions (FILE *inf)
5421 {
5422   char *cp, *last;
5423   int len;
5424   int allocated;
5425
5426   allocated = 0;
5427   len = 0;
5428   last = NULL;
5429
5430   LOOP_ON_INPUT_LINES (inf, lb, cp)
5431     {
5432       if (cp[0] == '\0')        /* Empty line */
5433         continue;
5434       else if (iswhite (cp[0])) /* Not function nor attribute */
5435         continue;
5436       else if (cp[0] == '%')    /* comment */
5437         continue;
5438       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5439         continue;
5440       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5441         {
5442           erlang_attribute (cp);
5443           if (last != NULL)
5444             {
5445               free (last);
5446               last = NULL;
5447             }
5448         }
5449       else if ((len = erlang_func (cp, last)) > 0)
5450         {
5451           /*
5452            * Function.  Store the function name so that we only
5453            * generates a tag for the first clause.
5454            */
5455           if (last == NULL)
5456             last = xnew (len + 1, char);
5457           else if (len + 1 > allocated)
5458             xrnew (last, len + 1, char);
5459           allocated = len + 1;
5460           strncpy (last, cp, len);
5461           last[len] = '\0';
5462         }
5463     }
5464   free (last);
5465 }
5466
5467
5468 /*
5469  * A function definition is added if it matches:
5470  *     <beginning of line><Erlang Atom><whitespace>(
5471  *
5472  * It is added to the tags database if it doesn't match the
5473  * name of the previous clause header.
5474  *
5475  * Return the size of the name of the function, or 0 if no function
5476  * was found.
5477  */
5478 static int
5479 erlang_func (char *s, char *last)
5480
5481                                 /* Name of last clause. */
5482 {
5483   int pos;
5484   int len;
5485
5486   pos = erlang_atom (s);
5487   if (pos < 1)
5488     return 0;
5489
5490   len = pos;
5491   pos = skip_spaces (s + pos) - s;
5492
5493   /* Save only the first clause. */
5494   if (s[pos++] == '('
5495       && (last == NULL
5496           || len != (int)strlen (last)
5497           || !strneq (s, last, len)))
5498         {
5499           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5500           return len;
5501         }
5502
5503   return 0;
5504 }
5505
5506
5507 /*
5508  * Handle attributes.  Currently, tags are generated for defines
5509  * and records.
5510  *
5511  * They are on the form:
5512  * -define(foo, bar).
5513  * -define(Foo(M, N), M+N).
5514  * -record(graph, {vtab = notable, cyclic = true}).
5515  */
5516 static void
5517 erlang_attribute (char *s)
5518 {
5519   char *cp = s;
5520
5521   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5522       && *cp++ == '(')
5523     {
5524       int len = erlang_atom (skip_spaces (cp));
5525       if (len > 0)
5526         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5527     }
5528   return;
5529 }
5530
5531
5532 /*
5533  * Consume an Erlang atom (or variable).
5534  * Return the number of bytes consumed, or -1 if there was an error.
5535  */
5536 static int
5537 erlang_atom (char *s)
5538 {
5539   int pos = 0;
5540
5541   if (ISALPHA (s[pos]) || s[pos] == '_')
5542     {
5543       /* The atom is unquoted. */
5544       do
5545         pos++;
5546       while (ISALNUM (s[pos]) || s[pos] == '_');
5547     }
5548   else if (s[pos] == '\'')
5549     {
5550       for (pos++; s[pos] != '\''; pos++)
5551         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5552             || (s[pos] == '\\' && s[++pos] == '\0'))
5553           return 0;
5554       pos++;
5555     }
5556
5557   return pos;
5558 }
5559
5560 \f
5561 static char *scan_separators (char *);
5562 static void add_regex (char *, language *);
5563 static char *substitute (char *, char *, struct re_registers *);
5564
5565 /*
5566  * Take a string like "/blah/" and turn it into "blah", verifying
5567  * that the first and last characters are the same, and handling
5568  * quoted separator characters.  Actually, stops on the occurrence of
5569  * an unquoted separator.  Also process \t, \n, etc. and turn into
5570  * appropriate characters. Works in place.  Null terminates name string.
5571  * Returns pointer to terminating separator, or NULL for
5572  * unterminated regexps.
5573  */
5574 static char *
5575 scan_separators (char *name)
5576 {
5577   char sep = name[0];
5578   char *copyto = name;
5579   bool quoted = FALSE;
5580
5581   for (++name; *name != '\0'; ++name)
5582     {
5583       if (quoted)
5584         {
5585           switch (*name)
5586             {
5587             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5588             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5589             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5590             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5591             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5592             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5593             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5594             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5595             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5596             default:
5597               if (*name == sep)
5598                 *copyto++ = sep;
5599               else
5600                 {
5601                   /* Something else is quoted, so preserve the quote. */
5602                   *copyto++ = '\\';
5603                   *copyto++ = *name;
5604                 }
5605               break;
5606             }
5607           quoted = FALSE;
5608         }
5609       else if (*name == '\\')
5610         quoted = TRUE;
5611       else if (*name == sep)
5612         break;
5613       else
5614         *copyto++ = *name;
5615     }
5616   if (*name != sep)
5617     name = NULL;                /* signal unterminated regexp */
5618
5619   /* Terminate copied string. */
5620   *copyto = '\0';
5621   return name;
5622 }
5623
5624 /* Look at the argument of --regex or --no-regex and do the right
5625    thing.  Same for each line of a regexp file. */
5626 static void
5627 analyse_regex (char *regex_arg)
5628 {
5629   if (regex_arg == NULL)
5630     {
5631       free_regexps ();          /* --no-regex: remove existing regexps */
5632       return;
5633     }
5634
5635   /* A real --regexp option or a line in a regexp file. */
5636   switch (regex_arg[0])
5637     {
5638       /* Comments in regexp file or null arg to --regex. */
5639     case '\0':
5640     case ' ':
5641     case '\t':
5642       break;
5643
5644       /* Read a regex file.  This is recursive and may result in a
5645          loop, which will stop when the file descriptors are exhausted. */
5646     case '@':
5647       {
5648         FILE *regexfp;
5649         linebuffer regexbuf;
5650         char *regexfile = regex_arg + 1;
5651
5652         /* regexfile is a file containing regexps, one per line. */
5653         regexfp = fopen (regexfile, "r");
5654         if (regexfp == NULL)
5655           {
5656             pfatal (regexfile);
5657             return;
5658           }
5659         linebuffer_init (&regexbuf);
5660         while (readline_internal (&regexbuf, regexfp) > 0)
5661           analyse_regex (regexbuf.buffer);
5662         free (regexbuf.buffer);
5663         fclose (regexfp);
5664       }
5665       break;
5666
5667       /* Regexp to be used for a specific language only. */
5668     case '{':
5669       {
5670         language *lang;
5671         char *lang_name = regex_arg + 1;
5672         char *cp;
5673
5674         for (cp = lang_name; *cp != '}'; cp++)
5675           if (*cp == '\0')
5676             {
5677               error ("unterminated language name in regex: %s", regex_arg);
5678               return;
5679             }
5680         *cp++ = '\0';
5681         lang = get_language_from_langname (lang_name);
5682         if (lang == NULL)
5683           return;
5684         add_regex (cp, lang);
5685       }
5686       break;
5687
5688       /* Regexp to be used for any language. */
5689     default:
5690       add_regex (regex_arg, NULL);
5691       break;
5692     }
5693 }
5694
5695 /* Separate the regexp pattern, compile it,
5696    and care for optional name and modifiers. */
5697 static void
5698 add_regex (char *regexp_pattern, language *lang)
5699 {
5700   static struct re_pattern_buffer zeropattern;
5701   char sep, *pat, *name, *modifiers;
5702   char empty[] = "";
5703   const char *err;
5704   struct re_pattern_buffer *patbuf;
5705   regexp *rp;
5706   bool
5707     force_explicit_name = TRUE, /* do not use implicit tag names */
5708     ignore_case = FALSE,        /* case is significant */
5709     multi_line = FALSE,         /* matches are done one line at a time */
5710     single_line = FALSE;        /* dot does not match newline */
5711
5712
5713   if (strlen(regexp_pattern) < 3)
5714     {
5715       error ("null regexp", (char *)NULL);
5716       return;
5717     }
5718   sep = regexp_pattern[0];
5719   name = scan_separators (regexp_pattern);
5720   if (name == NULL)
5721     {
5722       error ("%s: unterminated regexp", regexp_pattern);
5723       return;
5724     }
5725   if (name[1] == sep)
5726     {
5727       error ("null name for regexp \"%s\"", regexp_pattern);
5728       return;
5729     }
5730   modifiers = scan_separators (name);
5731   if (modifiers == NULL)        /* no terminating separator --> no name */
5732     {
5733       modifiers = name;
5734       name = empty;
5735     }
5736   else
5737     modifiers += 1;             /* skip separator */
5738
5739   /* Parse regex modifiers. */
5740   for (; modifiers[0] != '\0'; modifiers++)
5741     switch (modifiers[0])
5742       {
5743       case 'N':
5744         if (modifiers == name)
5745           error ("forcing explicit tag name but no name, ignoring", NULL);
5746         force_explicit_name = TRUE;
5747         break;
5748       case 'i':
5749         ignore_case = TRUE;
5750         break;
5751       case 's':
5752         single_line = TRUE;
5753         /* FALLTHRU */
5754       case 'm':
5755         multi_line = TRUE;
5756         need_filebuf = TRUE;
5757         break;
5758       default:
5759         {
5760           char wrongmod [2];
5761           wrongmod[0] = modifiers[0];
5762           wrongmod[1] = '\0';
5763           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5764         }
5765         break;
5766       }
5767
5768   patbuf = xnew (1, struct re_pattern_buffer);
5769   *patbuf = zeropattern;
5770   if (ignore_case)
5771     {
5772       static char lc_trans[CHARS];
5773       int i;
5774       for (i = 0; i < CHARS; i++)
5775         lc_trans[i] = lowcase (i);
5776       patbuf->translate = lc_trans;     /* translation table to fold case  */
5777     }
5778
5779   if (multi_line)
5780     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5781   else
5782     pat = regexp_pattern;
5783
5784   if (single_line)
5785     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5786   else
5787     re_set_syntax (RE_SYNTAX_EMACS);
5788
5789   err = re_compile_pattern (pat, strlen (pat), patbuf);
5790   if (multi_line)
5791     free (pat);
5792   if (err != NULL)
5793     {
5794       error ("%s while compiling pattern", err);
5795       return;
5796     }
5797
5798   rp = p_head;
5799   p_head = xnew (1, regexp);
5800   p_head->pattern = savestr (regexp_pattern);
5801   p_head->p_next = rp;
5802   p_head->lang = lang;
5803   p_head->pat = patbuf;
5804   p_head->name = savestr (name);
5805   p_head->error_signaled = FALSE;
5806   p_head->force_explicit_name = force_explicit_name;
5807   p_head->ignore_case = ignore_case;
5808   p_head->multi_line = multi_line;
5809 }
5810
5811 /*
5812  * Do the substitutions indicated by the regular expression and
5813  * arguments.
5814  */
5815 static char *
5816 substitute (char *in, char *out, struct re_registers *regs)
5817 {
5818   char *result, *t;
5819   int size, dig, diglen;
5820
5821   result = NULL;
5822   size = strlen (out);
5823
5824   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5825   if (out[size - 1] == '\\')
5826     fatal ("pattern error in \"%s\"", out);
5827   for (t = etags_strchr (out, '\\');
5828        t != NULL;
5829        t = etags_strchr (t + 2, '\\'))
5830     if (ISDIGIT (t[1]))
5831       {
5832         dig = t[1] - '0';
5833         diglen = regs->end[dig] - regs->start[dig];
5834         size += diglen - 2;
5835       }
5836     else
5837       size -= 1;
5838
5839   /* Allocate space and do the substitutions. */
5840   assert (size >= 0);
5841   result = xnew (size + 1, char);
5842
5843   for (t = result; *out != '\0'; out++)
5844     if (*out == '\\' && ISDIGIT (*++out))
5845       {
5846         dig = *out - '0';
5847         diglen = regs->end[dig] - regs->start[dig];
5848         strncpy (t, in + regs->start[dig], diglen);
5849         t += diglen;
5850       }
5851     else
5852       *t++ = *out;
5853   *t = '\0';
5854
5855   assert (t <= result + size);
5856   assert (t - result == (int)strlen (result));
5857
5858   return result;
5859 }
5860
5861 /* Deallocate all regexps. */
5862 static void
5863 free_regexps (void)
5864 {
5865   regexp *rp;
5866   while (p_head != NULL)
5867     {
5868       rp = p_head->p_next;
5869       free (p_head->pattern);
5870       free (p_head->name);
5871       free (p_head);
5872       p_head = rp;
5873     }
5874   return;
5875 }
5876
5877 /*
5878  * Reads the whole file as a single string from `filebuf' and looks for
5879  * multi-line regular expressions, creating tags on matches.
5880  * readline already dealt with normal regexps.
5881  *
5882  * Idea by Ben Wing <ben@666.com> (2002).
5883  */
5884 static void
5885 regex_tag_multiline (void)
5886 {
5887   char *buffer = filebuf.buffer;
5888   regexp *rp;
5889   char *name;
5890
5891   for (rp = p_head; rp != NULL; rp = rp->p_next)
5892     {
5893       int match = 0;
5894
5895       if (!rp->multi_line)
5896         continue;               /* skip normal regexps */
5897
5898       /* Generic initialisations before parsing file from memory. */
5899       lineno = 1;               /* reset global line number */
5900       charno = 0;               /* reset global char number */
5901       linecharno = 0;           /* reset global char number of line start */
5902
5903       /* Only use generic regexps or those for the current language. */
5904       if (rp->lang != NULL && rp->lang != curfdp->lang)
5905         continue;
5906
5907       while (match >= 0 && match < filebuf.len)
5908         {
5909           match = re_search (rp->pat, buffer, filebuf.len, charno,
5910                              filebuf.len - match, &rp->regs);
5911           switch (match)
5912             {
5913             case -2:
5914               /* Some error. */
5915               if (!rp->error_signaled)
5916                 {
5917                   error ("regexp stack overflow while matching \"%s\"",
5918                          rp->pattern);
5919                   rp->error_signaled = TRUE;
5920                 }
5921               break;
5922             case -1:
5923               /* No match. */
5924               break;
5925             default:
5926               if (match == rp->regs.end[0])
5927                 {
5928                   if (!rp->error_signaled)
5929                     {
5930                       error ("regexp matches the empty string: \"%s\"",
5931                              rp->pattern);
5932                       rp->error_signaled = TRUE;
5933                     }
5934                   match = -3;   /* exit from while loop */
5935                   break;
5936                 }
5937
5938               /* Match occurred.  Construct a tag. */
5939               while (charno < rp->regs.end[0])
5940                 if (buffer[charno++] == '\n')
5941                   lineno++, linecharno = charno;
5942               name = rp->name;
5943               if (name[0] == '\0')
5944                 name = NULL;
5945               else /* make a named tag */
5946                 name = substitute (buffer, rp->name, &rp->regs);
5947               if (rp->force_explicit_name)
5948                 /* Force explicit tag name, if a name is there. */
5949                 pfnote (name, TRUE, buffer + linecharno,
5950                         charno - linecharno + 1, lineno, linecharno);
5951               else
5952                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5953                           charno - linecharno + 1, lineno, linecharno);
5954               break;
5955             }
5956         }
5957     }
5958 }
5959
5960 \f
5961 static bool
5962 nocase_tail (const char *cp)
5963 {
5964   register int len = 0;
5965
5966   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5967     cp++, len++;
5968   if (*cp == '\0' && !intoken (dbp[len]))
5969     {
5970       dbp += len;
5971       return TRUE;
5972     }
5973   return FALSE;
5974 }
5975
5976 static void
5977 get_tag (register char *bp, char **namepp)
5978 {
5979   register char *cp = bp;
5980
5981   if (*bp != '\0')
5982     {
5983       /* Go till you get to white space or a syntactic break */
5984       for (cp = bp + 1; !notinname (*cp); cp++)
5985         continue;
5986       make_tag (bp, cp - bp, TRUE,
5987                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5988     }
5989
5990   if (namepp != NULL)
5991     *namepp = savenstr (bp, cp - bp);
5992 }
5993
5994 /*
5995  * Read a line of text from `stream' into `lbp', excluding the
5996  * newline or CR-NL, if any.  Return the number of characters read from
5997  * `stream', which is the length of the line including the newline.
5998  *
5999  * On DOS or Windows we do not count the CR character, if any before the
6000  * NL, in the returned length; this mirrors the behavior of Emacs on those
6001  * platforms (for text files, it translates CR-NL to NL as it reads in the
6002  * file).
6003  *
6004  * If multi-line regular expressions are requested, each line read is
6005  * appended to `filebuf'.
6006  */
6007 static long
6008 readline_internal (linebuffer *lbp, register FILE *stream)
6009 {
6010   char *buffer = lbp->buffer;
6011   register char *p = lbp->buffer;
6012   register char *pend;
6013   int chars_deleted;
6014
6015   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6016
6017   for (;;)
6018     {
6019       register int c = getc (stream);
6020       if (p == pend)
6021         {
6022           /* We're at the end of linebuffer: expand it. */
6023           lbp->size *= 2;
6024           xrnew (buffer, lbp->size, char);
6025           p += buffer - lbp->buffer;
6026           pend = buffer + lbp->size;
6027           lbp->buffer = buffer;
6028         }
6029       if (c == EOF)
6030         {
6031           *p = '\0';
6032           chars_deleted = 0;
6033           break;
6034         }
6035       if (c == '\n')
6036         {
6037           if (p > buffer && p[-1] == '\r')
6038             {
6039               p -= 1;
6040 #ifdef DOS_NT
6041              /* Assume CRLF->LF translation will be performed by Emacs
6042                 when loading this file, so CRs won't appear in the buffer.
6043                 It would be cleaner to compensate within Emacs;
6044                 however, Emacs does not know how many CRs were deleted
6045                 before any given point in the file.  */
6046               chars_deleted = 1;
6047 #else
6048               chars_deleted = 2;
6049 #endif
6050             }
6051           else
6052             {
6053               chars_deleted = 1;
6054             }
6055           *p = '\0';
6056           break;
6057         }
6058       *p++ = c;
6059     }
6060   lbp->len = p - buffer;
6061
6062   if (need_filebuf              /* we need filebuf for multi-line regexps */
6063       && chars_deleted > 0)     /* not at EOF */
6064     {
6065       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6066         {
6067           /* Expand filebuf. */
6068           filebuf.size *= 2;
6069           xrnew (filebuf.buffer, filebuf.size, char);
6070         }
6071       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6072       filebuf.len += lbp->len;
6073       filebuf.buffer[filebuf.len++] = '\n';
6074       filebuf.buffer[filebuf.len] = '\0';
6075     }
6076
6077   return lbp->len + chars_deleted;
6078 }
6079
6080 /*
6081  * Like readline_internal, above, but in addition try to match the
6082  * input line against relevant regular expressions and manage #line
6083  * directives.
6084  */
6085 static void
6086 readline (linebuffer *lbp, FILE *stream)
6087 {
6088   long result;
6089
6090   linecharno = charno;          /* update global char number of line start */
6091   result = readline_internal (lbp, stream); /* read line */
6092   lineno += 1;                  /* increment global line number */
6093   charno += result;             /* increment global char number */
6094
6095   /* Honour #line directives. */
6096   if (!no_line_directive)
6097     {
6098       static bool discard_until_line_directive;
6099
6100       /* Check whether this is a #line directive. */
6101       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6102         {
6103           unsigned int lno;
6104           int start = 0;
6105
6106           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6107               && start > 0)     /* double quote character found */
6108             {
6109               char *endp = lbp->buffer + start;
6110
6111               while ((endp = etags_strchr (endp, '"')) != NULL
6112                      && endp[-1] == '\\')
6113                 endp++;
6114               if (endp != NULL)
6115                 /* Ok, this is a real #line directive.  Let's deal with it. */
6116                 {
6117                   char *taggedabsname;  /* absolute name of original file */
6118                   char *taggedfname;    /* name of original file as given */
6119                   char *name;           /* temp var */
6120
6121                   discard_until_line_directive = FALSE; /* found it */
6122                   name = lbp->buffer + start;
6123                   *endp = '\0';
6124                   canonicalize_filename (name);
6125                   taggedabsname = absolute_filename (name, tagfiledir);
6126                   if (filename_is_absolute (name)
6127                       || filename_is_absolute (curfdp->infname))
6128                     taggedfname = savestr (taggedabsname);
6129                   else
6130                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6131
6132                   if (streq (curfdp->taggedfname, taggedfname))
6133                     /* The #line directive is only a line number change.  We
6134                        deal with this afterwards. */
6135                     free (taggedfname);
6136                   else
6137                     /* The tags following this #line directive should be
6138                        attributed to taggedfname.  In order to do this, set
6139                        curfdp accordingly. */
6140                     {
6141                       fdesc *fdp; /* file description pointer */
6142
6143                       /* Go look for a file description already set up for the
6144                          file indicated in the #line directive.  If there is
6145                          one, use it from now until the next #line
6146                          directive. */
6147                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6148                         if (streq (fdp->infname, curfdp->infname)
6149                             && streq (fdp->taggedfname, taggedfname))
6150                           /* If we remove the second test above (after the &&)
6151                              then all entries pertaining to the same file are
6152                              coalesced in the tags file.  If we use it, then
6153                              entries pertaining to the same file but generated
6154                              from different files (via #line directives) will
6155                              go into separate sections in the tags file.  These
6156                              alternatives look equivalent.  The first one
6157                              destroys some apparently useless information. */
6158                           {
6159                             curfdp = fdp;
6160                             free (taggedfname);
6161                             break;
6162                           }
6163                       /* Else, if we already tagged the real file, skip all
6164                          input lines until the next #line directive. */
6165                       if (fdp == NULL) /* not found */
6166                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6167                           if (streq (fdp->infabsname, taggedabsname))
6168                             {
6169                               discard_until_line_directive = TRUE;
6170                               free (taggedfname);
6171                               break;
6172                             }
6173                       /* Else create a new file description and use that from
6174                          now on, until the next #line directive. */
6175                       if (fdp == NULL) /* not found */
6176                         {
6177                           fdp = fdhead;
6178                           fdhead = xnew (1, fdesc);
6179                           *fdhead = *curfdp; /* copy curr. file description */
6180                           fdhead->next = fdp;
6181                           fdhead->infname = savestr (curfdp->infname);
6182                           fdhead->infabsname = savestr (curfdp->infabsname);
6183                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6184                           fdhead->taggedfname = taggedfname;
6185                           fdhead->usecharno = FALSE;
6186                           fdhead->prop = NULL;
6187                           fdhead->written = FALSE;
6188                           curfdp = fdhead;
6189                         }
6190                     }
6191                   free (taggedabsname);
6192                   lineno = lno - 1;
6193                   readline (lbp, stream);
6194                   return;
6195                 } /* if a real #line directive */
6196             } /* if #line is followed by a number */
6197         } /* if line begins with "#line " */
6198
6199       /* If we are here, no #line directive was found. */
6200       if (discard_until_line_directive)
6201         {
6202           if (result > 0)
6203             {
6204               /* Do a tail recursion on ourselves, thus discarding the contents
6205                  of the line buffer. */
6206               readline (lbp, stream);
6207               return;
6208             }
6209           /* End of file. */
6210           discard_until_line_directive = FALSE;
6211           return;
6212         }
6213     } /* if #line directives should be considered */
6214
6215   {
6216     int match;
6217     regexp *rp;
6218     char *name;
6219
6220     /* Match against relevant regexps. */
6221     if (lbp->len > 0)
6222       for (rp = p_head; rp != NULL; rp = rp->p_next)
6223         {
6224           /* Only use generic regexps or those for the current language.
6225              Also do not use multiline regexps, which is the job of
6226              regex_tag_multiline. */
6227           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6228               || rp->multi_line)
6229             continue;
6230
6231           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6232           switch (match)
6233             {
6234             case -2:
6235               /* Some error. */
6236               if (!rp->error_signaled)
6237                 {
6238                   error ("regexp stack overflow while matching \"%s\"",
6239                          rp->pattern);
6240                   rp->error_signaled = TRUE;
6241                 }
6242               break;
6243             case -1:
6244               /* No match. */
6245               break;
6246             case 0:
6247               /* Empty string matched. */
6248               if (!rp->error_signaled)
6249                 {
6250                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6251                   rp->error_signaled = TRUE;
6252                 }
6253               break;
6254             default:
6255               /* Match occurred.  Construct a tag. */
6256               name = rp->name;
6257               if (name[0] == '\0')
6258                 name = NULL;
6259               else /* make a named tag */
6260                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6261               if (rp->force_explicit_name)
6262                 /* Force explicit tag name, if a name is there. */
6263                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6264               else
6265                 make_tag (name, strlen (name), TRUE,
6266                           lbp->buffer, match, lineno, linecharno);
6267               break;
6268             }
6269         }
6270   }
6271 }
6272
6273 \f
6274 /*
6275  * Return a pointer to a space of size strlen(cp)+1 allocated
6276  * with xnew where the string CP has been copied.
6277  */
6278 static char *
6279 savestr (const char *cp)
6280 {
6281   return savenstr (cp, strlen (cp));
6282 }
6283
6284 /*
6285  * Return a pointer to a space of size LEN+1 allocated with xnew where
6286  * the string CP has been copied for at most the first LEN characters.
6287  */
6288 static char *
6289 savenstr (const char *cp, int len)
6290 {
6291   register char *dp;
6292
6293   dp = xnew (len + 1, char);
6294   strncpy (dp, cp, len);
6295   dp[len] = '\0';
6296   return dp;
6297 }
6298
6299 /*
6300  * Return the ptr in sp at which the character c last
6301  * appears; NULL if not found
6302  *
6303  * Identical to POSIX strrchr, included for portability.
6304  */
6305 static char *
6306 etags_strrchr (register const char *sp, register int c)
6307 {
6308   register const char *r;
6309
6310   r = NULL;
6311   do
6312     {
6313       if (*sp == c)
6314         r = sp;
6315   } while (*sp++);
6316   return (char *)r;
6317 }
6318
6319 /*
6320  * Return the ptr in sp at which the character c first
6321  * appears; NULL if not found
6322  *
6323  * Identical to POSIX strchr, included for portability.
6324  */
6325 static char *
6326 etags_strchr (register const char *sp, register int c)
6327 {
6328   do
6329     {
6330       if (*sp == c)
6331         return (char *)sp;
6332     } while (*sp++);
6333   return NULL;
6334 }
6335
6336 /*
6337  * Compare two strings, ignoring case for alphabetic characters.
6338  *
6339  * Same as BSD's strcasecmp, included for portability.
6340  */
6341 static int
6342 etags_strcasecmp (register const char *s1, register const char *s2)
6343 {
6344   while (*s1 != '\0'
6345          && (ISALPHA (*s1) && ISALPHA (*s2)
6346              ? lowcase (*s1) == lowcase (*s2)
6347              : *s1 == *s2))
6348     s1++, s2++;
6349
6350   return (ISALPHA (*s1) && ISALPHA (*s2)
6351           ? lowcase (*s1) - lowcase (*s2)
6352           : *s1 - *s2);
6353 }
6354
6355 /*
6356  * Compare two strings, ignoring case for alphabetic characters.
6357  * Stop after a given number of characters
6358  *
6359  * Same as BSD's strncasecmp, included for portability.
6360  */
6361 static int
6362 etags_strncasecmp (register const char *s1, register const char *s2, register int n)
6363 {
6364   while (*s1 != '\0' && n-- > 0
6365          && (ISALPHA (*s1) && ISALPHA (*s2)
6366              ? lowcase (*s1) == lowcase (*s2)
6367              : *s1 == *s2))
6368     s1++, s2++;
6369
6370   if (n < 0)
6371     return 0;
6372   else
6373     return (ISALPHA (*s1) && ISALPHA (*s2)
6374             ? lowcase (*s1) - lowcase (*s2)
6375             : *s1 - *s2);
6376 }
6377
6378 /* Skip spaces (end of string is not space), return new pointer. */
6379 static char *
6380 skip_spaces (char *cp)
6381 {
6382   while (iswhite (*cp))
6383     cp++;
6384   return cp;
6385 }
6386
6387 /* Skip non spaces, except end of string, return new pointer. */
6388 static char *
6389 skip_non_spaces (char *cp)
6390 {
6391   while (*cp != '\0' && !iswhite (*cp))
6392     cp++;
6393   return cp;
6394 }
6395
6396 /* Print error message and exit.  */
6397 void
6398 fatal (const char *s1, const char *s2)
6399 {
6400   error (s1, s2);
6401   exit (EXIT_FAILURE);
6402 }
6403
6404 static void
6405 pfatal (const char *s1)
6406 {
6407   perror (s1);
6408   exit (EXIT_FAILURE);
6409 }
6410
6411 static void
6412 suggest_asking_for_help (void)
6413 {
6414   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6415            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6416   exit (EXIT_FAILURE);
6417 }
6418
6419 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6420 static void
6421 error (const char *s1, const char *s2)
6422 {
6423   fprintf (stderr, "%s: ", progname);
6424   fprintf (stderr, s1, s2);
6425   fprintf (stderr, "\n");
6426 }
6427
6428 /* Return a newly-allocated string whose contents
6429    concatenate those of s1, s2, s3.  */
6430 static char *
6431 concat (const char *s1, const char *s2, const char *s3)
6432 {
6433   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6434   char *result = xnew (len1 + len2 + len3 + 1, char);
6435
6436   strcpy (result, s1);
6437   strcpy (result + len1, s2);
6438   strcpy (result + len1 + len2, s3);
6439   result[len1 + len2 + len3] = '\0';
6440
6441   return result;
6442 }
6443
6444 \f
6445 /* Does the same work as the system V getcwd, but does not need to
6446    guess the buffer size in advance. */
6447 static char *
6448 etags_getcwd (void)
6449 {
6450 #ifdef HAVE_GETCWD
6451   int bufsize = 200;
6452   char *path = xnew (bufsize, char);
6453
6454   while (getcwd (path, bufsize) == NULL)
6455     {
6456       if (errno != ERANGE)
6457         pfatal ("getcwd");
6458       bufsize *= 2;
6459       free (path);
6460       path = xnew (bufsize, char);
6461     }
6462
6463   canonicalize_filename (path);
6464   return path;
6465
6466 #else /* not HAVE_GETCWD */
6467 #if MSDOS
6468
6469   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6470
6471   getwd (path);
6472
6473   for (p = path; *p != '\0'; p++)
6474     if (*p == '\\')
6475       *p = '/';
6476     else
6477       *p = lowcase (*p);
6478
6479   return strdup (path);
6480 #else /* not MSDOS */
6481   linebuffer path;
6482   FILE *pipe;
6483
6484   linebuffer_init (&path);
6485   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6486   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6487     pfatal ("pwd");
6488   pclose (pipe);
6489
6490   return path.buffer;
6491 #endif /* not MSDOS */
6492 #endif /* not HAVE_GETCWD */
6493 }
6494
6495 /* Return a newly allocated string containing the file name of FILE
6496    relative to the absolute directory DIR (which should end with a slash). */
6497 static char *
6498 relative_filename (char *file, char *dir)
6499 {
6500   char *fp, *dp, *afn, *res;
6501   int i;
6502
6503   /* Find the common root of file and dir (with a trailing slash). */
6504   afn = absolute_filename (file, cwd);
6505   fp = afn;
6506   dp = dir;
6507   while (*fp++ == *dp++)
6508     continue;
6509   fp--, dp--;                   /* back to the first differing char */
6510 #ifdef DOS_NT
6511   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6512     return afn;
6513 #endif
6514   do                            /* look at the equal chars until '/' */
6515     fp--, dp--;
6516   while (*fp != '/');
6517
6518   /* Build a sequence of "../" strings for the resulting relative file name. */
6519   i = 0;
6520   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6521     i += 1;
6522   res = xnew (3*i + strlen (fp + 1) + 1, char);
6523   res[0] = '\0';
6524   while (i-- > 0)
6525     strcat (res, "../");
6526
6527   /* Add the file name relative to the common root of file and dir. */
6528   strcat (res, fp + 1);
6529   free (afn);
6530
6531   return res;
6532 }
6533
6534 /* Return a newly allocated string containing the absolute file name
6535    of FILE given DIR (which should end with a slash). */
6536 static char *
6537 absolute_filename (char *file, char *dir)
6538 {
6539   char *slashp, *cp, *res;
6540
6541   if (filename_is_absolute (file))
6542     res = savestr (file);
6543 #ifdef DOS_NT
6544   /* We don't support non-absolute file names with a drive
6545      letter, like `d:NAME' (it's too much hassle).  */
6546   else if (file[1] == ':')
6547     fatal ("%s: relative file names with drive letters not supported", file);
6548 #endif
6549   else
6550     res = concat (dir, file, "");
6551
6552   /* Delete the "/dirname/.." and "/." substrings. */
6553   slashp = etags_strchr (res, '/');
6554   while (slashp != NULL && slashp[0] != '\0')
6555     {
6556       if (slashp[1] == '.')
6557         {
6558           if (slashp[2] == '.'
6559               && (slashp[3] == '/' || slashp[3] == '\0'))
6560             {
6561               cp = slashp;
6562               do
6563                 cp--;
6564               while (cp >= res && !filename_is_absolute (cp));
6565               if (cp < res)
6566                 cp = slashp;    /* the absolute name begins with "/.." */
6567 #ifdef DOS_NT
6568               /* Under MSDOS and NT we get `d:/NAME' as absolute
6569                  file name, so the luser could say `d:/../NAME'.
6570                  We silently treat this as `d:/NAME'.  */
6571               else if (cp[0] != '/')
6572                 cp = slashp;
6573 #endif
6574 #ifdef HAVE_MEMMOVE
6575               memmove (cp, slashp + 3, strlen (slashp + 2));
6576 #else
6577               /* Overlapping copy isn't really okay */
6578               strcpy (cp, slashp + 3);
6579 #endif
6580               slashp = cp;
6581               continue;
6582             }
6583           else if (slashp[2] == '/' || slashp[2] == '\0')
6584             {
6585 #ifdef HAVE_MEMMOVE
6586               memmove (slashp, slashp + 2, strlen (slashp + 1));
6587 #else
6588               strcpy (slashp, slashp + 2);
6589 #endif
6590               continue;
6591             }
6592         }
6593
6594       slashp = etags_strchr (slashp + 1, '/');
6595     }
6596
6597   if (res[0] == '\0')           /* just a safety net: should never happen */
6598     {
6599       free (res);
6600       return savestr ("/");
6601     }
6602   else
6603     return res;
6604 }
6605
6606 /* Return a newly allocated string containing the absolute
6607    file name of dir where FILE resides given DIR (which should
6608    end with a slash). */
6609 static char *
6610 absolute_dirname (char *file, char *dir)
6611 {
6612   char *slashp, *res;
6613   char save;
6614
6615   slashp = etags_strrchr (file, '/');
6616   if (slashp == NULL)
6617     return savestr (dir);
6618   save = slashp[1];
6619   slashp[1] = '\0';
6620   res = absolute_filename (file, dir);
6621   slashp[1] = save;
6622
6623   return res;
6624 }
6625
6626 /* Whether the argument string is an absolute file name.  The argument
6627    string must have been canonicalized with canonicalize_filename. */
6628 static bool
6629 filename_is_absolute (char *fn)
6630 {
6631   return (fn[0] == '/'
6632 #ifdef DOS_NT
6633           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6634 #endif
6635           );
6636 }
6637
6638 /* Downcase DOS drive letter and collapse separators into single slashes.
6639    Works in place. */
6640 static void
6641 canonicalize_filename (register char *fn)
6642 {
6643   register char* cp;
6644   char sep = '/';
6645
6646 #ifdef DOS_NT
6647   /* Canonicalize drive letter case.  */
6648 # define ISUPPER(c)     isupper (CHAR(c))
6649   if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6650     fn[0] = lowcase (fn[0]);
6651
6652   sep = '\\';
6653 #endif
6654
6655   /* Collapse multiple separators into a single slash. */
6656   for (cp = fn; *cp != '\0'; cp++, fn++)
6657     if (*cp == sep)
6658       {
6659         *fn = '/';
6660         while (cp[1] == sep)
6661           cp++;
6662       }
6663     else
6664       *fn = *cp;
6665   *fn = '\0';
6666 }
6667
6668 \f
6669 /* Initialize a linebuffer for use. */
6670 static void
6671 linebuffer_init (linebuffer *lbp)
6672 {
6673   lbp->size = (DEBUG) ? 3 : 200;
6674   lbp->buffer = xnew (lbp->size, char);
6675   lbp->buffer[0] = '\0';
6676   lbp->len = 0;
6677 }
6678
6679 /* Set the minimum size of a string contained in a linebuffer. */
6680 static void
6681 linebuffer_setlen (linebuffer *lbp, int toksize)
6682 {
6683   while (lbp->size <= toksize)
6684     {
6685       lbp->size *= 2;
6686       xrnew (lbp->buffer, lbp->size, char);
6687     }
6688   lbp->len = toksize;
6689 }
6690
6691 /* Like malloc but get fatal error if memory is exhausted. */
6692 static PTR
6693 xmalloc (unsigned int size)
6694 {
6695   PTR result = (PTR) malloc (size);
6696   if (result == NULL)
6697     fatal ("virtual memory exhausted", (char *)NULL);
6698   return result;
6699 }
6700
6701 static PTR
6702 xrealloc (char *ptr, unsigned int size)
6703 {
6704   PTR result = (PTR) realloc (ptr, size);
6705   if (result == NULL)
6706     fatal ("virtual memory exhausted", (char *)NULL);
6707   return result;
6708 }
6709
6710 /*
6711  * Local Variables:
6712  * indent-tabs-mode: t
6713  * tab-width: 8
6714  * fill-column: 79
6715  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6716  * c-file-style: "gnu"
6717  * End:
6718  */
6719
6720 /* etags.c ends here */