lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2011
  32   Free Software Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  *
  72  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #define TRUE    1
  84 #define FALSE   0
  85
  86 #ifdef DEBUG
  87 #  undef DEBUG
  88 #  define DEBUG TRUE
  89 #else
  90 #  define DEBUG  FALSE
  91 #  define NDEBUG                /* disable assert */
  92 #endif
  93
  94 #ifdef HAVE_CONFIG_H
  95 # include <config.h>
  96   /* This is probably not necessary any more.  On some systems, config.h
  97      used to define static as nothing for the sake of unexec.  We don't
  98      want that here since we don't use unexec.  None of these systems
  99      are supported any more, but the idea is still mentioned in
 100      etc/PROBLEMS.  */
 101 # undef static
 102 # ifndef PTR                    /* for XEmacs */
 103 #   define PTR void *
 104 # endif
 105 #else  /* no config.h */
 106 # if defined (__STDC__) && (__STDC__ || defined (__SUNPRO_C))
 107 #   define PTR void *           /* for generic pointers */
 108 # else /* not standard C */
 109 #   define const                /* remove const for old compilers' sake */
 110 #   define PTR long *           /* don't use void* */
 111 # endif
 112 #endif /* !HAVE_CONFIG_H */
 113
 114 #ifndef _GNU_SOURCE
 115 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 116 #endif
 117
 118 /* WIN32_NATIVE is for XEmacs.
 119    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 120 #ifdef WIN32_NATIVE
 121 # undef MSDOS
 122 # undef  WINDOWSNT
 123 # define WINDOWSNT
 124 #endif /* WIN32_NATIVE */
 125
 126 #ifdef MSDOS
 127 # undef MSDOS
 128 # define MSDOS TRUE
 129 # include <fcntl.h>
 130 # include <sys/param.h>
 131 # include <io.h>
 132 # ifndef HAVE_CONFIG_H
 133 #   define DOS_NT
 134 #   include <sys/config.h>
 135 # endif
 136 #else
 137 # define MSDOS FALSE
 138 #endif /* MSDOS */
 139
 140 #ifdef WINDOWSNT
 141 # include <fcntl.h>
 142 # include <direct.h>
 143 # include <io.h>
 144 # define MAXPATHLEN _MAX_PATH
 145 # undef HAVE_NTGUI
 146 # undef  DOS_NT
 147 # define DOS_NT
 148 # ifndef HAVE_GETCWD
 149 #   define HAVE_GETCWD
 150 # endif /* undef HAVE_GETCWD */
 151 #else /* not WINDOWSNT */
 152 #endif /* !WINDOWSNT */
 153
 154 #include <unistd.h>
 155 #ifndef HAVE_UNISTD_H
 156 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 157     extern char *getcwd (char *buf, size_t size);
 158 # endif
 159 #endif /* HAVE_UNISTD_H */
 160
 161 #include <stdlib.h>
 162 #include <string.h>
 163 #include <stdio.h>
 164 #include <ctype.h>
 165 #include <errno.h>
 166 #include <sys/types.h>
 167 #include <sys/stat.h>
 168
 169 #include <assert.h>
 170 #ifdef NDEBUG
 171 # undef  assert                 /* some systems have a buggy assert.h */
 172 # define assert(x) ((void) 0)
 173 #endif
 174
 175 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 176 # define NO_LONG_OPTIONS TRUE
 177 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 178   extern char *optarg;
 179   extern int optind, opterr;
 180 #else
 181 # define NO_LONG_OPTIONS FALSE
 182 # include <getopt.h>
 183 #endif /* NO_LONG_OPTIONS */
 184
 185 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 186 # ifdef __CYGWIN__              /* compiling on Cygwin */
 187                              !!! NOTICE !!!
 188  the regex.h distributed with Cygwin is not compatible with etags, alas!
 189 If you want regular expression support, you should delete this notice and
 190               arrange to use the GNU regex.h and regex.c.
 191 # endif
 192 #endif
 193 #include <regex.h>
 194
 195 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 196  Leave it undefined to make the program "etags", which makes emacs-style
 197  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 198 #ifdef CTAGS
 199 # undef  CTAGS
 200 # define CTAGS TRUE
 201 #else
 202 # define CTAGS FALSE
 203 #endif
 204
 205 #define streq(s,t)      (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 206 #define strcaseeq(s,t)  (assert ((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 207 #define strneq(s,t,n)   (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 208 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 209
 210 #define CHARS 256               /* 2^sizeof(char) */
 211 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 212 #define iswhite(c)      (_wht[CHAR (c)]) /* c is white (see white) */
 213 #define notinname(c)    (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
 214 #define begtoken(c)     (_btk[CHAR (c)]) /* c can start token (see begtk) */
 215 #define intoken(c)      (_itk[CHAR (c)]) /* c can be in token (see midtk) */
 216 #define endtoken(c)     (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
 217
 218 #define ISALNUM(c)      isalnum (CHAR (c))
 219 #define ISALPHA(c)      isalpha (CHAR (c))
 220 #define ISDIGIT(c)      isdigit (CHAR (c))
 221 #define ISLOWER(c)      islower (CHAR (c))
 222
 223 #define lowcase(c)      tolower (CHAR (c))
 224
 225
 226 /*
 227  *      xnew, xrnew -- allocate, reallocate storage
 228  *
 229  * SYNOPSIS:    Type *xnew (int n, Type);
 230  *              void xrnew (OldPointer, int n, Type);
 231  */
 232 #if DEBUG
 233 # include "chkmalloc.h"
 234 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 235                                                   (n) * sizeof (Type)))
 236 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 237                                         (char *) (op), (n) * sizeof (Type)))
 238 #else
 239 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 240 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 241                                         (char *) (op), (n) * sizeof (Type)))
 242 #endif
 243
 244 #define bool int
 245
 246 typedef void Lang_function (FILE *);
 247
 248 typedef struct
 249 {
 250   const char *suffix;           /* file name suffix for this compressor */
 251   const char *command;          /* takes one arg and decompresses to stdout */
 252 } compressor;
 253
 254 typedef struct
 255 {
 256   const char *name;             /* language name */
 257   const char *help;             /* detailed help for the language */
 258   Lang_function *function;      /* parse function */
 259   const char **suffixes;        /* name suffixes of this language's files */
 260   const char **filenames;       /* names of this language's files */
 261   const char **interpreters;    /* interpreters for this language */
 262   bool metasource;              /* source used to generate other sources */
 263 } language;
 264
 265 typedef struct fdesc
 266 {
 267   struct fdesc *next;           /* for the linked list */
 268   char *infname;                /* uncompressed input file name */
 269   char *infabsname;             /* absolute uncompressed input file name */
 270   char *infabsdir;              /* absolute dir of input file */
 271   char *taggedfname;            /* file name to write in tagfile */
 272   language *lang;               /* language of file */
 273   char *prop;                   /* file properties to write in tagfile */
 274   bool usecharno;               /* etags tags shall contain char number */
 275   bool written;                 /* entry written in the tags file */
 276 } fdesc;
 277
 278 typedef struct node_st
 279 {                               /* sorting structure */
 280   struct node_st *left, *right; /* left and right sons */
 281   fdesc *fdp;                   /* description of file to whom tag belongs */
 282   char *name;                   /* tag name */
 283   char *regex;                  /* search regexp */
 284   bool valid;                   /* write this tag on the tag file */
 285   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 286   bool been_warned;             /* warning already given for duplicated tag */
 287   int lno;                      /* line number tag is on */
 288   long cno;                     /* character number line starts on */
 289 } node;
 290
 291 /*
 292  * A `linebuffer' is a structure which holds a line of text.
 293  * `readline_internal' reads a line from a stream into a linebuffer
 294  * and works regardless of the length of the line.
 295  * SIZE is the size of BUFFER, LEN is the length of the string in
 296  * BUFFER after readline reads it.
 297  */
 298 typedef struct
 299 {
 300   long size;
 301   int len;
 302   char *buffer;
 303 } linebuffer;
 304
 305 /* Used to support mixing of --lang and file names. */
 306 typedef struct
 307 {
 308   enum {
 309     at_language,                /* a language specification */
 310     at_regexp,                  /* a regular expression */
 311     at_filename,                /* a file name */
 312     at_stdin,                   /* read from stdin here */
 313     at_end                      /* stop parsing the list */
 314   } arg_type;                   /* argument type */
 315   language *lang;               /* language associated with the argument */
 316   char *what;                   /* the argument itself */
 317 } argument;
 318
 319 /* Structure defining a regular expression. */
 320 typedef struct regexp
 321 {
 322   struct regexp *p_next;        /* pointer to next in list */
 323   language *lang;               /* if set, use only for this language */
 324   char *pattern;                /* the regexp pattern */
 325   char *name;                   /* tag name */
 326   struct re_pattern_buffer *pat; /* the compiled pattern */
 327   struct re_registers regs;     /* re registers */
 328   bool error_signaled;          /* already signaled for this regexp */
 329   bool force_explicit_name;     /* do not allow implict tag name */
 330   bool ignore_case;             /* ignore case when matching */
 331   bool multi_line;              /* do a multi-line match on the whole file */
 332 } regexp;
 333
 334
 335 /* Many compilers barf on this:
 336         Lang_function Ada_funcs;
 337    so let's write it this way */
 338 static void Ada_funcs (FILE *);
 339 static void Asm_labels (FILE *);
 340 static void C_entries (int c_ext, FILE *);
 341 static void default_C_entries (FILE *);
 342 static void plain_C_entries (FILE *);
 343 static void Cjava_entries (FILE *);
 344 static void Cobol_paragraphs (FILE *);
 345 static void Cplusplus_entries (FILE *);
 346 static void Cstar_entries (FILE *);
 347 static void Erlang_functions (FILE *);
 348 static void Forth_words (FILE *);
 349 static void Fortran_functions (FILE *);
 350 static void HTML_labels (FILE *);
 351 static void Lisp_functions (FILE *);
 352 static void Lua_functions (FILE *);
 353 static void Makefile_targets (FILE *);
 354 static void Pascal_functions (FILE *);
 355 static void Perl_functions (FILE *);
 356 static void PHP_functions (FILE *);
 357 static void PS_functions (FILE *);
 358 static void Prolog_functions (FILE *);
 359 static void Python_functions (FILE *);
 360 static void Scheme_functions (FILE *);
 361 static void TeX_commands (FILE *);
 362 static void Texinfo_nodes (FILE *);
 363 static void Yacc_entries (FILE *);
 364 static void just_read_file (FILE *);
 365
 366 static void print_language_names (void);
 367 static void print_version (void);
 368 static void print_help (argument *);
 369 int main (int, char **);
 370
 371 static compressor *get_compressor_from_suffix (char *, char **);
 372 static language *get_language_from_langname (const char *);
 373 static language *get_language_from_interpreter (char *);
 374 static language *get_language_from_filename (char *, bool);
 375 static void readline (linebuffer *, FILE *);
 376 static long readline_internal (linebuffer *, FILE *);
 377 static bool nocase_tail (const char *);
 378 static void get_tag (char *, char **);
 379
 380 static void analyse_regex (char *);
 381 static void free_regexps (void);
 382 static void regex_tag_multiline (void);
 383 static void error (const char *, const char *);
 384 static void suggest_asking_for_help (void) NO_RETURN;
 385 void fatal (const char *, const char *) NO_RETURN;
 386 static void pfatal (const char *) NO_RETURN;
 387 static void add_node (node *, node **);
 388
 389 static void init (void);
 390 static void process_file_name (char *, language *);
 391 static void process_file (FILE *, char *, language *);
 392 static void find_entries (FILE *);
 393 static void free_tree (node *);
 394 static void free_fdesc (fdesc *);
 395 static void pfnote (char *, bool, char *, int, int, long);
 396 static void make_tag (const char *, int, bool, char *, int, int, long);
 397 static void invalidate_nodes (fdesc *, node **);
 398 static void put_entries (node *);
 399
 400 static char *concat (const char *, const char *, const char *);
 401 static char *skip_spaces (char *);
 402 static char *skip_non_spaces (char *);
 403 static char *savenstr (const char *, int);
 404 static char *savestr (const char *);
 405 static char *etags_strchr (const char *, int);
 406 static char *etags_strrchr (const char *, int);
 407 static int etags_strcasecmp (const char *, const char *);
 408 static int etags_strncasecmp (const char *, const char *, int);
 409 static char *etags_getcwd (void);
 410 static char *relative_filename (char *, char *);
 411 static char *absolute_filename (char *, char *);
 412 static char *absolute_dirname (char *, char *);
 413 static bool filename_is_absolute (char *f);
 414 static void canonicalize_filename (char *);
 415 static void linebuffer_init (linebuffer *);
 416 static void linebuffer_setlen (linebuffer *, int);
 417 static PTR xmalloc (size_t);
 418 static PTR xrealloc (char *, size_t);
 419
 420 \f
 421 static char searchar = '/';     /* use /.../ searches */
 422
 423 static char *tagfile;           /* output file */
 424 static char *progname;          /* name this program was invoked with */
 425 static char *cwd;               /* current working directory */
 426 static char *tagfiledir;        /* directory of tagfile */
 427 static FILE *tagf;              /* ioptr for tags file */
 428 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 429
 430 static fdesc *fdhead;           /* head of file description list */
 431 static fdesc *curfdp;           /* current file description */
 432 static int lineno;              /* line number of current line */
 433 static long charno;             /* current character number */
 434 static long linecharno;         /* charno of start of current line */
 435 static char *dbp;               /* pointer to start of current tag */
 436
 437 static const int invalidcharno = -1;
 438
 439 static node *nodehead;          /* the head of the binary tree of tags */
 440 static node *last_node;         /* the last node created */
 441
 442 static linebuffer lb;           /* the current line */
 443 static linebuffer filebuf;      /* a buffer containing the whole file */
 444 static linebuffer token_name;   /* a buffer containing a tag name */
 445
 446 /* boolean "functions" (see init)       */
 447 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 448 static const char
 449   /* white chars */
 450   *white = " \f\t\n\r\v",
 451   /* not in a name */
 452   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 453   /* token ending chars */
 454   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 455   /* token starting chars */
 456   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 457   /* valid in-token chars */
 458   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 459
 460 static bool append_to_tagfile;  /* -a: append to tags */
 461 /* The next five default to TRUE in C and derived languages.  */
 462 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 463 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 464                                 /* 0 struct/enum/union decls, and C++ */
 465                                 /* member functions. */
 466 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 467                                 /* constants and variables. */
 468                                 /* -D: opposite of -d.  Default under ctags. */
 469 static bool globals;            /* create tags for global variables */
 470 static bool members;            /* create tags for C member variables */
 471 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 472 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 473 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 474 static bool update;             /* -u: update tags */
 475 static bool vgrind_style;       /* -v: create vgrind style index output */
 476 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 477 static bool cxref_style;        /* -x: create cxref style output */
 478 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 479 static bool ignoreindent;       /* -I: ignore indentation in C */
 480 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 481
 482 /* STDIN is defined in LynxOS system headers */
 483 #ifdef STDIN
 484 # undef STDIN
 485 #endif
 486
 487 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 488 static bool parsing_stdin;      /* --parse-stdin used */
 489
 490 static regexp *p_head;          /* list of all regexps */
 491 static bool need_filebuf;       /* some regexes are multi-line */
 492
 493 static struct option longopts[] =
 494 {
 495   { "append",             no_argument,       NULL,               'a'   },
 496   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 497   { "c++",                no_argument,       NULL,               'C'   },
 498   { "declarations",       no_argument,       &declarations,      TRUE  },
 499   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 500   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 501   { "help",               no_argument,       NULL,               'h'   },
 502   { "help",               no_argument,       NULL,               'H'   },
 503   { "ignore-indentation", no_argument,       NULL,               'I'   },
 504   { "language",           required_argument, NULL,               'l'   },
 505   { "members",            no_argument,       &members,           TRUE  },
 506   { "no-members",         no_argument,       &members,           FALSE },
 507   { "output",             required_argument, NULL,               'o'   },
 508   { "regex",              required_argument, NULL,               'r'   },
 509   { "no-regex",           no_argument,       NULL,               'R'   },
 510   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 511   { "parse-stdin",        required_argument, NULL,               STDIN },
 512   { "version",            no_argument,       NULL,               'V'   },
 513
 514 #if CTAGS /* Ctags options */
 515   { "backward-search",    no_argument,       NULL,               'B'   },
 516   { "cxref",              no_argument,       NULL,               'x'   },
 517   { "defines",            no_argument,       NULL,               'd'   },
 518   { "globals",            no_argument,       &globals,           TRUE  },
 519   { "typedefs",           no_argument,       NULL,               't'   },
 520   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 521   { "update",             no_argument,       NULL,               'u'   },
 522   { "vgrind",             no_argument,       NULL,               'v'   },
 523   { "no-warn",            no_argument,       NULL,               'w'   },
 524
 525 #else /* Etags options */
 526   { "no-defines",         no_argument,       NULL,               'D'   },
 527   { "no-globals",         no_argument,       &globals,           FALSE },
 528   { "include",            required_argument, NULL,               'i'   },
 529 #endif
 530   { NULL }
 531 };
 532
 533 static compressor compressors[] =
 534 {
 535   { "z", "gzip -d -c"},
 536   { "Z", "gzip -d -c"},
 537   { "gz", "gzip -d -c"},
 538   { "GZ", "gzip -d -c"},
 539   { "bz2", "bzip2 -d -c" },
 540   { "xz", "xz -d -c" },
 541   { NULL }
 542 };
 543
 544 /*
 545  * Language stuff.
 546  */
 547
 548 /* Ada code */
 549 static const char *Ada_suffixes [] =
 550   { "ads", "adb", "ada", NULL };
 551 static const char Ada_help [] =
 552 "In Ada code, functions, procedures, packages, tasks and types are\n\
 553 tags.  Use the `--packages-only' option to create tags for\n\
 554 packages only.\n\
 555 Ada tag names have suffixes indicating the type of entity:\n\
 556         Entity type:    Qualifier:\n\
 557         ------------    ----------\n\
 558         function        /f\n\
 559         procedure       /p\n\
 560         package spec    /s\n\
 561         package body    /b\n\
 562         type            /t\n\
 563         task            /k\n\
 564 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 565 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 566 will just search for any tag `bidule'.";
 567
 568 /* Assembly code */
 569 static const char *Asm_suffixes [] =
 570   { "a",        /* Unix assembler */
 571     "asm", /* Microcontroller assembly */
 572     "def", /* BSO/Tasking definition includes  */
 573     "inc", /* Microcontroller include files */
 574     "ins", /* Microcontroller include files */
 575     "s", "sa", /* Unix assembler */
 576     "S",   /* cpp-processed Unix assembler */
 577     "src", /* BSO/Tasking C compiler output */
 578     NULL
 579   };
 580 static const char Asm_help [] =
 581 "In assembler code, labels appearing at the beginning of a line,\n\
 582 followed by a colon, are tags.";
 583
 584
 585 /* Note that .c and .h can be considered C++, if the --c++ flag was
 586    given, or if the `class' or `template' keywords are met inside the file.
 587    That is why default_C_entries is called for these. */
 588 static const char *default_C_suffixes [] =
 589   { "c", "h", NULL };
 590 #if CTAGS                               /* C help for Ctags */
 591 static const char default_C_help [] =
 592 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 593 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 594 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 595 Use --globals to tag global variables.\n\
 596 You can tag function declarations and external variables by\n\
 597 using `--declarations', and struct members by using `--members'.";
 598 #else                                   /* C help for Etags */
 599 static const char default_C_help [] =
 600 "In C code, any C function or typedef is a tag, and so are\n\
 601 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 602 definitions and `enum' constants are tags unless you specify\n\
 603 `--no-defines'.  Global variables are tags unless you specify\n\
 604 `--no-globals' and so are struct members unless you specify\n\
 605 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 606 `--no-members' can make the tags table file much smaller.\n\
 607 You can tag function declarations and external variables by\n\
 608 using `--declarations'.";
 609 #endif  /* C help for Ctags and Etags */
 610
 611 static const char *Cplusplus_suffixes [] =
 612   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 613     "M",                        /* Objective C++ */
 614     "pdb",                      /* PostScript with C syntax */
 615     NULL };
 616 static const char Cplusplus_help [] =
 617 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 618 --help --lang=c --lang=c++ for full help.)\n\
 619 In addition to C tags, member functions are also recognized.  Member\n\
 620 variables are recognized unless you use the `--no-members' option.\n\
 621 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 622 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 623 `operator+'.";
 624
 625 static const char *Cjava_suffixes [] =
 626   { "java", NULL };
 627 static char Cjava_help [] =
 628 "In Java code, all the tags constructs of C and C++ code are\n\
 629 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 630
 631
 632 static const char *Cobol_suffixes [] =
 633   { "COB", "cob", NULL };
 634 static char Cobol_help [] =
 635 "In Cobol code, tags are paragraph names; that is, any word\n\
 636 starting in column 8 and followed by a period.";
 637
 638 static const char *Cstar_suffixes [] =
 639   { "cs", "hs", NULL };
 640
 641 static const char *Erlang_suffixes [] =
 642   { "erl", "hrl", NULL };
 643 static const char Erlang_help [] =
 644 "In Erlang code, the tags are the functions, records and macros\n\
 645 defined in the file.";
 646
 647 const char *Forth_suffixes [] =
 648   { "fth", "tok", NULL };
 649 static const char Forth_help [] =
 650 "In Forth code, tags are words defined by `:',\n\
 651 constant, code, create, defer, value, variable, buffer:, field.";
 652
 653 static const char *Fortran_suffixes [] =
 654   { "F", "f", "f90", "for", NULL };
 655 static const char Fortran_help [] =
 656 "In Fortran code, functions, subroutines and block data are tags.";
 657
 658 static const char *HTML_suffixes [] =
 659   { "htm", "html", "shtml", NULL };
 660 static const char HTML_help [] =
 661 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 662 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 663 occurrences of `id='.";
 664
 665 static const char *Lisp_suffixes [] =
 666   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 667 static const char Lisp_help [] =
 668 "In Lisp code, any function defined with `defun', any variable\n\
 669 defined with `defvar' or `defconst', and in general the first\n\
 670 argument of any expression that starts with `(def' in column zero\n\
 671 is a tag.";
 672
 673 static const char *Lua_suffixes [] =
 674   { "lua", "LUA", NULL };
 675 static const char Lua_help [] =
 676 "In Lua scripts, all functions are tags.";
 677
 678 static const char *Makefile_filenames [] =
 679   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 680 static const char Makefile_help [] =
 681 "In makefiles, targets are tags; additionally, variables are tags\n\
 682 unless you specify `--no-globals'.";
 683
 684 static const char *Objc_suffixes [] =
 685   { "lm",                       /* Objective lex file */
 686     "m",                        /* Objective C file */
 687      NULL };
 688 static const char Objc_help [] =
 689 "In Objective C code, tags include Objective C definitions for classes,\n\
 690 class categories, methods and protocols.  Tags for variables and\n\
 691 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 692 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 693
 694 static const char *Pascal_suffixes [] =
 695   { "p", "pas", NULL };
 696 static const char Pascal_help [] =
 697 "In Pascal code, the tags are the functions and procedures defined\n\
 698 in the file.";
 699 /* " // this is for working around an Emacs highlighting bug... */
 700
 701 static const char *Perl_suffixes [] =
 702   { "pl", "pm", NULL };
 703 static const char *Perl_interpreters [] =
 704   { "perl", "@PERL@", NULL };
 705 static const char Perl_help [] =
 706 "In Perl code, the tags are the packages, subroutines and variables\n\
 707 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 708 `--globals' if you want to tag global variables.  Tags for\n\
 709 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 710 defined in the default package is `main::SUB'.";
 711
 712 static const char *PHP_suffixes [] =
 713   { "php", "php3", "php4", NULL };
 714 static const char PHP_help [] =
 715 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 716 the `--no-members' option, vars are tags too.";
 717
 718 static const char *plain_C_suffixes [] =
 719   { "pc",                       /* Pro*C file */
 720      NULL };
 721
 722 static const char *PS_suffixes [] =
 723   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 724 static const char PS_help [] =
 725 "In PostScript code, the tags are the functions.";
 726
 727 static const char *Prolog_suffixes [] =
 728   { "prolog", NULL };
 729 static const char Prolog_help [] =
 730 "In Prolog code, tags are predicates and rules at the beginning of\n\
 731 line.";
 732
 733 static const char *Python_suffixes [] =
 734   { "py", NULL };
 735 static const char Python_help [] =
 736 "In Python code, `def' or `class' at the beginning of a line\n\
 737 generate a tag.";
 738
 739 /* Can't do the `SCM' or `scm' prefix with a version number. */
 740 static const char *Scheme_suffixes [] =
 741   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 742 static const char Scheme_help [] =
 743 "In Scheme code, tags include anything defined with `def' or with a\n\
 744 construct whose name starts with `def'.  They also include\n\
 745 variables set with `set!' at top level in the file.";
 746
 747 static const char *TeX_suffixes [] =
 748   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 749 static const char TeX_help [] =
 750 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 751 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 752 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 753 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 754 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 755 \n\
 756 Other commands can be specified by setting the environment variable\n\
 757 `TEXTAGS' to a colon-separated list like, for example,\n\
 758      TEXTAGS=\"mycommand:myothercommand\".";
 759
 760
 761 static const char *Texinfo_suffixes [] =
 762   { "texi", "texinfo", "txi", NULL };
 763 static const char Texinfo_help [] =
 764 "for texinfo files, lines starting with @node are tagged.";
 765
 766 static const char *Yacc_suffixes [] =
 767   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 768 static const char Yacc_help [] =
 769 "In Bison or Yacc input files, each rule defines as a tag the\n\
 770 nonterminal it constructs.  The portions of the file that contain\n\
 771 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 772 for full help).";
 773
 774 static const char auto_help [] =
 775 "`auto' is not a real language, it indicates to use\n\
 776 a default language for files base on file name suffix and file contents.";
 777
 778 static const char none_help [] =
 779 "`none' is not a real language, it indicates to only do\n\
 780 regexp processing on files.";
 781
 782 static const char no_lang_help [] =
 783 "No detailed help available for this language.";
 784
 785
 786 /*
 787  * Table of languages.
 788  *
 789  * It is ok for a given function to be listed under more than one
 790  * name.  I just didn't.
 791  */
 792
 793 static language lang_names [] =
 794 {
 795   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 796   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 797   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 798   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 799   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 800   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 801   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 802   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 803   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 804   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 805   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 806   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 807   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 808   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 809   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 810   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 811   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 812   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 813   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 814   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 815   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 816   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 817   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 818   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 819   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 820   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 821   { "auto",      auto_help },                      /* default guessing scheme */
 822   { "none",      none_help,      just_read_file }, /* regexp matching only */
 823   { NULL }                /* end of list */
 824 };
 825
 826 \f
 827 static void
 828 print_language_names (void)
 829 {
 830   language *lang;
 831   const char **name, **ext;
 832
 833   puts ("\nThese are the currently supported languages, along with the\n\
 834 default file names and dot suffixes:");
 835   for (lang = lang_names; lang->name != NULL; lang++)
 836     {
 837       printf ("  %-*s", 10, lang->name);
 838       if (lang->filenames != NULL)
 839         for (name = lang->filenames; *name != NULL; name++)
 840           printf (" %s", *name);
 841       if (lang->suffixes != NULL)
 842         for (ext = lang->suffixes; *ext != NULL; ext++)
 843           printf (" .%s", *ext);
 844       puts ("");
 845     }
 846   puts ("where `auto' means use default language for files based on file\n\
 847 name suffix, and `none' means only do regexp processing on files.\n\
 848 If no language is specified and no matching suffix is found,\n\
 849 the first line of the file is read for a sharp-bang (#!) sequence\n\
 850 followed by the name of an interpreter.  If no such sequence is found,\n\
 851 Fortran is tried first; if no tags are found, C is tried next.\n\
 852 When parsing any C file, a \"class\" or \"template\" keyword\n\
 853 switches to C++.");
 854   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 855 \n\
 856 For detailed help on a given language use, for example,\n\
 857 etags --help --lang=ada.");
 858 }
 859
 860 #ifndef EMACS_NAME
 861 # define EMACS_NAME "standalone"
 862 #endif
 863 #ifndef VERSION
 864 # define VERSION "17.38.1.4"
 865 #endif
 866 static void
 867 print_version (void)
 868 {
 869   /* Makes it easier to update automatically. */
 870   char emacs_copyright[] = "Copyright (C) 2011 Free Software Foundation, Inc.";
 871
 872   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 873   puts (emacs_copyright);
 874   puts ("This program is distributed under the terms in ETAGS.README");
 875
 876   exit (EXIT_SUCCESS);
 877 }
 878
 879 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 880 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 881 #endif
 882
 883 static void
 884 print_help (argument *argbuffer)
 885 {
 886   bool help_for_lang = FALSE;
 887
 888   for (; argbuffer->arg_type != at_end; argbuffer++)
 889     if (argbuffer->arg_type == at_language)
 890       {
 891         if (help_for_lang)
 892           puts ("");
 893         puts (argbuffer->lang->help);
 894         help_for_lang = TRUE;
 895       }
 896
 897   if (help_for_lang)
 898     exit (EXIT_SUCCESS);
 899
 900   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 901 \n\
 902 These are the options accepted by %s.\n", progname, progname);
 903   if (NO_LONG_OPTIONS)
 904     puts ("WARNING: long option names do not work with this executable,\n\
 905 as it is not linked with GNU getopt.");
 906   else
 907     puts ("You may use unambiguous abbreviations for the long option names.");
 908   puts ("  A - as file name means read names from stdin (one per line).\n\
 909 Absolute names are stored in the output file as they are.\n\
 910 Relative ones are stored relative to the output file's directory.\n");
 911
 912   puts ("-a, --append\n\
 913         Append tag entries to existing tags file.");
 914
 915   puts ("--packages-only\n\
 916         For Ada files, only generate tags for packages.");
 917
 918   if (CTAGS)
 919     puts ("-B, --backward-search\n\
 920         Write the search commands for the tag entries using '?', the\n\
 921         backward-search command instead of '/', the forward-search command.");
 922
 923   /* This option is mostly obsolete, because etags can now automatically
 924      detect C++.  Retained for backward compatibility and for debugging and
 925      experimentation.  In principle, we could want to tag as C++ even
 926      before any "class" or "template" keyword.
 927   puts ("-C, --c++\n\
 928         Treat files whose name suffix defaults to C language as C++ files.");
 929   */
 930
 931   puts ("--declarations\n\
 932         In C and derived languages, create tags for function declarations,");
 933   if (CTAGS)
 934     puts ("\tand create tags for extern variables if --globals is used.");
 935   else
 936     puts
 937       ("\tand create tags for extern variables unless --no-globals is used.");
 938
 939   if (CTAGS)
 940     puts ("-d, --defines\n\
 941         Create tag entries for C #define constants and enum constants, too.");
 942   else
 943     puts ("-D, --no-defines\n\
 944         Don't create tag entries for C #define constants and enum constants.\n\
 945         This makes the tags file smaller.");
 946
 947   if (!CTAGS)
 948     puts ("-i FILE, --include=FILE\n\
 949         Include a note in tag file indicating that, when searching for\n\
 950         a tag, one should also consult the tags file FILE after\n\
 951         checking the current file.");
 952
 953   puts ("-l LANG, --language=LANG\n\
 954         Force the following files to be considered as written in the\n\
 955         named language up to the next --language=LANG option.");
 956
 957   if (CTAGS)
 958     puts ("--globals\n\
 959         Create tag entries for global variables in some languages.");
 960   else
 961     puts ("--no-globals\n\
 962         Do not create tag entries for global variables in some\n\
 963         languages.  This makes the tags file smaller.");
 964
 965   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 966     puts ("--no-line-directive\n\
 967         Ignore #line preprocessor directives in C and derived languages.");
 968
 969   if (CTAGS)
 970     puts ("--members\n\
 971         Create tag entries for members of structures in some languages.");
 972   else
 973     puts ("--no-members\n\
 974         Do not create tag entries for members of structures\n\
 975         in some languages.");
 976
 977   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 978         Make a tag for each line matching a regular expression pattern\n\
 979         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 980         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 981         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 982         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 983   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 984         For example Tcl named tags can be created with:\n\
 985           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 986         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 987         `m' means to allow multi-line matches, `s' implies `m' and\n\
 988         causes dot to match any character, including newline.");
 989
 990   puts ("-R, --no-regex\n\
 991         Don't create tags from regexps for the following files.");
 992
 993   puts ("-I, --ignore-indentation\n\
 994         In C and C++ do not assume that a closing brace in the first\n\
 995         column is the final brace of a function or structure definition.");
 996
 997   puts ("-o FILE, --output=FILE\n\
 998         Write the tags to FILE.");
 999
1000   puts ("--parse-stdin=NAME\n\
1001         Read from standard input and record tags as belonging to file NAME.");
1002
1003   if (CTAGS)
1004     {
1005       puts ("-t, --typedefs\n\
1006         Generate tag entries for C and Ada typedefs.");
1007       puts ("-T, --typedefs-and-c++\n\
1008         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1009         and C++ member functions.");
1010     }
1011
1012   if (CTAGS)
1013     puts ("-u, --update\n\
1014         Update the tag entries for the given files, leaving tag\n\
1015         entries for other files in place.  Currently, this is\n\
1016         implemented by deleting the existing entries for the given\n\
1017         files and then rewriting the new entries at the end of the\n\
1018         tags file.  It is often faster to simply rebuild the entire\n\
1019         tag file than to use this.");
1020
1021   if (CTAGS)
1022     {
1023       puts ("-v, --vgrind\n\
1024         Print on the standard output an index of items intended for\n\
1025         human consumption, similar to the output of vgrind.  The index\n\
1026         is sorted, and gives the page number of each item.");
1027
1028       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1029         puts ("-w, --no-duplicates\n\
1030         Do not create duplicate tag entries, for compatibility with\n\
1031         traditional ctags.");
1032
1033       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1034         puts ("-w, --no-warn\n\
1035         Suppress warning messages about duplicate tag entries.");
1036
1037       puts ("-x, --cxref\n\
1038         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1039         The output uses line numbers instead of page numbers, but\n\
1040         beyond that the differences are cosmetic; try both to see\n\
1041         which you like.");
1042     }
1043
1044   puts ("-V, --version\n\
1045         Print the version of the program.\n\
1046 -h, --help\n\
1047         Print this help message.\n\
1048         Followed by one or more `--language' options prints detailed\n\
1049         help about tag generation for the specified languages.");
1050
1051   print_language_names ();
1052
1053   puts ("");
1054   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1055
1056   exit (EXIT_SUCCESS);
1057 }
1058
1059 \f
1060 int
1061 main (int argc, char **argv)
1062 {
1063   int i;
1064   unsigned int nincluded_files;
1065   char **included_files;
1066   argument *argbuffer;
1067   int current_arg, file_count;
1068   linebuffer filename_lb;
1069   bool help_asked = FALSE;
1070   ptrdiff_t len;
1071  char *optstring;
1072  int opt;
1073
1074
1075 #ifdef DOS_NT
1076   _fmode = O_BINARY;   /* all of files are treated as binary files */
1077 #endif /* DOS_NT */
1078
1079   progname = argv[0];
1080   nincluded_files = 0;
1081   included_files = xnew (argc, char *);
1082   current_arg = 0;
1083   file_count = 0;
1084
1085   /* Allocate enough no matter what happens.  Overkill, but each one
1086      is small. */
1087   argbuffer = xnew (argc, argument);
1088
1089   /*
1090    * Always find typedefs and structure tags.
1091    * Also default to find macro constants, enum constants, struct
1092    * members and global variables.  Do it for both etags and ctags.
1093    */
1094   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1095   globals = members = TRUE;
1096
1097   /* When the optstring begins with a '-' getopt_long does not rearrange the
1098      non-options arguments to be at the end, but leaves them alone. */
1099   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1100                       "ac:Cf:Il:o:r:RSVhH",
1101                       (CTAGS) ? "BxdtTuvw" : "Di:");
1102
1103   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1104     switch (opt)
1105       {
1106       case 0:
1107         /* If getopt returns 0, then it has already processed a
1108            long-named option.  We should do nothing.  */
1109         break;
1110
1111       case 1:
1112         /* This means that a file name has been seen.  Record it. */
1113         argbuffer[current_arg].arg_type = at_filename;
1114         argbuffer[current_arg].what     = optarg;
1115         len = strlen (optarg);
1116         if (whatlen_max < len)
1117           whatlen_max = len;
1118         ++current_arg;
1119         ++file_count;
1120         break;
1121
1122       case STDIN:
1123         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1124         argbuffer[current_arg].arg_type = at_stdin;
1125         argbuffer[current_arg].what     = optarg;
1126         len = strlen (optarg);
1127         if (whatlen_max < len)
1128           whatlen_max = len;
1129         ++current_arg;
1130         ++file_count;
1131         if (parsing_stdin)
1132           fatal ("cannot parse standard input more than once", (char *)NULL);
1133         parsing_stdin = TRUE;
1134         break;
1135
1136         /* Common options. */
1137       case 'a': append_to_tagfile = TRUE;       break;
1138       case 'C': cplusplus = TRUE;               break;
1139       case 'f':         /* for compatibility with old makefiles */
1140       case 'o':
1141         if (tagfile)
1142           {
1143             error ("-o option may only be given once.", (char *)NULL);
1144             suggest_asking_for_help ();
1145             /* NOTREACHED */
1146           }
1147         tagfile = optarg;
1148         break;
1149       case 'I':
1150       case 'S':         /* for backward compatibility */
1151         ignoreindent = TRUE;
1152         break;
1153       case 'l':
1154         {
1155           language *lang = get_language_from_langname (optarg);
1156           if (lang != NULL)
1157             {
1158               argbuffer[current_arg].lang = lang;
1159               argbuffer[current_arg].arg_type = at_language;
1160               ++current_arg;
1161             }
1162         }
1163         break;
1164       case 'c':
1165         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1166         optarg = concat (optarg, "i", ""); /* memory leak here */
1167         /* FALLTHRU */
1168       case 'r':
1169         argbuffer[current_arg].arg_type = at_regexp;
1170         argbuffer[current_arg].what = optarg;
1171         len = strlen (optarg);
1172         if (whatlen_max < len)
1173           whatlen_max = len;
1174         ++current_arg;
1175         break;
1176       case 'R':
1177         argbuffer[current_arg].arg_type = at_regexp;
1178         argbuffer[current_arg].what = NULL;
1179         ++current_arg;
1180         break;
1181       case 'V':
1182         print_version ();
1183         break;
1184       case 'h':
1185       case 'H':
1186         help_asked = TRUE;
1187         break;
1188
1189         /* Etags options */
1190       case 'D': constantypedefs = FALSE;                        break;
1191       case 'i': included_files[nincluded_files++] = optarg;     break;
1192
1193         /* Ctags options. */
1194       case 'B': searchar = '?';                                 break;
1195       case 'd': constantypedefs = TRUE;                         break;
1196       case 't': typedefs = TRUE;                                break;
1197       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1198       case 'u': update = TRUE;                                  break;
1199       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1200       case 'x': cxref_style = TRUE;                             break;
1201       case 'w': no_warnings = TRUE;                             break;
1202       default:
1203         suggest_asking_for_help ();
1204         /* NOTREACHED */
1205       }
1206
1207   /* No more options.  Store the rest of arguments. */
1208   for (; optind < argc; optind++)
1209     {
1210       argbuffer[current_arg].arg_type = at_filename;
1211       argbuffer[current_arg].what = argv[optind];
1212       len = strlen (argv[optind]);
1213       if (whatlen_max < len)
1214         whatlen_max = len;
1215       ++current_arg;
1216       ++file_count;
1217     }
1218
1219   argbuffer[current_arg].arg_type = at_end;
1220
1221   if (help_asked)
1222     print_help (argbuffer);
1223     /* NOTREACHED */
1224
1225   if (nincluded_files == 0 && file_count == 0)
1226     {
1227       error ("no input files specified.", (char *)NULL);
1228       suggest_asking_for_help ();
1229       /* NOTREACHED */
1230     }
1231
1232   if (tagfile == NULL)
1233     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1234   cwd = etags_getcwd ();        /* the current working directory */
1235   if (cwd[strlen (cwd) - 1] != '/')
1236     {
1237       char *oldcwd = cwd;
1238       cwd = concat (oldcwd, "/", "");
1239       free (oldcwd);
1240     }
1241
1242   /* Compute base directory for relative file names. */
1243   if (streq (tagfile, "-")
1244       || strneq (tagfile, "/dev/", 5))
1245     tagfiledir = cwd;            /* relative file names are relative to cwd */
1246   else
1247     {
1248       canonicalize_filename (tagfile);
1249       tagfiledir = absolute_dirname (tagfile, cwd);
1250     }
1251
1252   init ();                      /* set up boolean "functions" */
1253
1254   linebuffer_init (&lb);
1255   linebuffer_init (&filename_lb);
1256   linebuffer_init (&filebuf);
1257   linebuffer_init (&token_name);
1258
1259   if (!CTAGS)
1260     {
1261       if (streq (tagfile, "-"))
1262         {
1263           tagf = stdout;
1264 #ifdef DOS_NT
1265           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1266              doesn't take effect until after `stdout' is already open). */
1267           if (!isatty (fileno (stdout)))
1268             setmode (fileno (stdout), O_BINARY);
1269 #endif /* DOS_NT */
1270         }
1271       else
1272         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1273       if (tagf == NULL)
1274         pfatal (tagfile);
1275     }
1276
1277   /*
1278    * Loop through files finding functions.
1279    */
1280   for (i = 0; i < current_arg; i++)
1281     {
1282       static language *lang;    /* non-NULL if language is forced */
1283       char *this_file;
1284
1285       switch (argbuffer[i].arg_type)
1286         {
1287         case at_language:
1288           lang = argbuffer[i].lang;
1289           break;
1290         case at_regexp:
1291           analyse_regex (argbuffer[i].what);
1292           break;
1293         case at_filename:
1294               this_file = argbuffer[i].what;
1295               /* Input file named "-" means read file names from stdin
1296                  (one per line) and use them. */
1297               if (streq (this_file, "-"))
1298                 {
1299                   if (parsing_stdin)
1300                     fatal ("cannot parse standard input AND read file names from it",
1301                            (char *)NULL);
1302                   while (readline_internal (&filename_lb, stdin) > 0)
1303                     process_file_name (filename_lb.buffer, lang);
1304                 }
1305               else
1306                 process_file_name (this_file, lang);
1307           break;
1308         case at_stdin:
1309           this_file = argbuffer[i].what;
1310           process_file (stdin, this_file, lang);
1311           break;
1312         }
1313     }
1314
1315   free_regexps ();
1316   free (lb.buffer);
1317   free (filebuf.buffer);
1318   free (token_name.buffer);
1319
1320   if (!CTAGS || cxref_style)
1321     {
1322       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1323       put_entries (nodehead);
1324       free_tree (nodehead);
1325       nodehead = NULL;
1326       if (!CTAGS)
1327         {
1328           fdesc *fdp;
1329
1330           /* Output file entries that have no tags. */
1331           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1332             if (!fdp->written)
1333               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1334
1335           while (nincluded_files-- > 0)
1336             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1337
1338           if (fclose (tagf) == EOF)
1339             pfatal (tagfile);
1340         }
1341
1342       exit (EXIT_SUCCESS);
1343     }
1344
1345   /* From here on, we are in (CTAGS && !cxref_style) */
1346   if (update)
1347     {
1348       char *cmd =
1349         xmalloc (strlen (tagfile) + whatlen_max +
1350                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1351       for (i = 0; i < current_arg; ++i)
1352         {
1353           switch (argbuffer[i].arg_type)
1354             {
1355             case at_filename:
1356             case at_stdin:
1357               break;
1358             default:
1359               continue;         /* the for loop */
1360             }
1361           strcpy (cmd, "mv ");
1362           strcat (cmd, tagfile);
1363           strcat (cmd, " OTAGS;fgrep -v '\t");
1364           strcat (cmd, argbuffer[i].what);
1365           strcat (cmd, "\t' OTAGS >");
1366           strcat (cmd, tagfile);
1367           strcat (cmd, ";rm OTAGS");
1368           if (system (cmd) != EXIT_SUCCESS)
1369             fatal ("failed to execute shell command", (char *)NULL);
1370         }
1371       free (cmd);
1372       append_to_tagfile = TRUE;
1373     }
1374
1375   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1376   if (tagf == NULL)
1377     pfatal (tagfile);
1378   put_entries (nodehead);       /* write all the tags (CTAGS) */
1379   free_tree (nodehead);
1380   nodehead = NULL;
1381   if (fclose (tagf) == EOF)
1382     pfatal (tagfile);
1383
1384   if (CTAGS)
1385     if (append_to_tagfile || update)
1386       {
1387         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1388         /* Maybe these should be used:
1389            setenv ("LC_COLLATE", "C", 1);
1390            setenv ("LC_ALL", "C", 1); */
1391         strcpy (cmd, "sort -u -o ");
1392         strcat (cmd, tagfile);
1393         strcat (cmd, " ");
1394         strcat (cmd, tagfile);
1395         exit (system (cmd));
1396       }
1397   return EXIT_SUCCESS;
1398 }
1399
1400
1401 /*
1402  * Return a compressor given the file name.  If EXTPTR is non-zero,
1403  * return a pointer into FILE where the compressor-specific
1404  * extension begins.  If no compressor is found, NULL is returned
1405  * and EXTPTR is not significant.
1406  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1407  */
1408 static compressor *
1409 get_compressor_from_suffix (char *file, char **extptr)
1410 {
1411   compressor *compr;
1412   char *slash, *suffix;
1413
1414   /* File has been processed by canonicalize_filename,
1415      so we don't need to consider backslashes on DOS_NT.  */
1416   slash = etags_strrchr (file, '/');
1417   suffix = etags_strrchr (file, '.');
1418   if (suffix == NULL || suffix < slash)
1419     return NULL;
1420   if (extptr != NULL)
1421     *extptr = suffix;
1422   suffix += 1;
1423   /* Let those poor souls who live with DOS 8+3 file name limits get
1424      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1425      Only the first do loop is run if not MSDOS */
1426   do
1427     {
1428       for (compr = compressors; compr->suffix != NULL; compr++)
1429         if (streq (compr->suffix, suffix))
1430           return compr;
1431       if (!MSDOS)
1432         break;                  /* do it only once: not really a loop */
1433       if (extptr != NULL)
1434         *extptr = ++suffix;
1435     } while (*suffix != '\0');
1436   return NULL;
1437 }
1438
1439
1440
1441 /*
1442  * Return a language given the name.
1443  */
1444 static language *
1445 get_language_from_langname (const char *name)
1446 {
1447   language *lang;
1448
1449   if (name == NULL)
1450     error ("empty language name", (char *)NULL);
1451   else
1452     {
1453       for (lang = lang_names; lang->name != NULL; lang++)
1454         if (streq (name, lang->name))
1455           return lang;
1456       error ("unknown language \"%s\"", name);
1457     }
1458
1459   return NULL;
1460 }
1461
1462
1463 /*
1464  * Return a language given the interpreter name.
1465  */
1466 static language *
1467 get_language_from_interpreter (char *interpreter)
1468 {
1469   language *lang;
1470   const char **iname;
1471
1472   if (interpreter == NULL)
1473     return NULL;
1474   for (lang = lang_names; lang->name != NULL; lang++)
1475     if (lang->interpreters != NULL)
1476       for (iname = lang->interpreters; *iname != NULL; iname++)
1477         if (streq (*iname, interpreter))
1478             return lang;
1479
1480   return NULL;
1481 }
1482
1483
1484
1485 /*
1486  * Return a language given the file name.
1487  */
1488 static language *
1489 get_language_from_filename (char *file, int case_sensitive)
1490 {
1491   language *lang;
1492   const char **name, **ext, *suffix;
1493
1494   /* Try whole file name first. */
1495   for (lang = lang_names; lang->name != NULL; lang++)
1496     if (lang->filenames != NULL)
1497       for (name = lang->filenames; *name != NULL; name++)
1498         if ((case_sensitive)
1499             ? streq (*name, file)
1500             : strcaseeq (*name, file))
1501           return lang;
1502
1503   /* If not found, try suffix after last dot. */
1504   suffix = etags_strrchr (file, '.');
1505   if (suffix == NULL)
1506     return NULL;
1507   suffix += 1;
1508   for (lang = lang_names; lang->name != NULL; lang++)
1509     if (lang->suffixes != NULL)
1510       for (ext = lang->suffixes; *ext != NULL; ext++)
1511         if ((case_sensitive)
1512             ? streq (*ext, suffix)
1513             : strcaseeq (*ext, suffix))
1514           return lang;
1515   return NULL;
1516 }
1517
1518 \f
1519 /*
1520  * This routine is called on each file argument.
1521  */
1522 static void
1523 process_file_name (char *file, language *lang)
1524 {
1525   struct stat stat_buf;
1526   FILE *inf;
1527   fdesc *fdp;
1528   compressor *compr;
1529   char *compressed_name, *uncompressed_name;
1530   char *ext, *real_name;
1531   int retval;
1532
1533   canonicalize_filename (file);
1534   if (streq (file, tagfile) && !streq (tagfile, "-"))
1535     {
1536       error ("skipping inclusion of %s in self.", file);
1537       return;
1538     }
1539   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1540     {
1541       compressed_name = NULL;
1542       real_name = uncompressed_name = savestr (file);
1543     }
1544   else
1545     {
1546       real_name = compressed_name = savestr (file);
1547       uncompressed_name = savenstr (file, ext - file);
1548     }
1549
1550   /* If the canonicalized uncompressed name
1551      has already been dealt with, skip it silently. */
1552   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1553     {
1554       assert (fdp->infname != NULL);
1555       if (streq (uncompressed_name, fdp->infname))
1556         goto cleanup;
1557     }
1558
1559   if (stat (real_name, &stat_buf) != 0)
1560     {
1561       /* Reset real_name and try with a different name. */
1562       real_name = NULL;
1563       if (compressed_name != NULL) /* try with the given suffix */
1564         {
1565           if (stat (uncompressed_name, &stat_buf) == 0)
1566             real_name = uncompressed_name;
1567         }
1568       else                      /* try all possible suffixes */
1569         {
1570           for (compr = compressors; compr->suffix != NULL; compr++)
1571             {
1572               compressed_name = concat (file, ".", compr->suffix);
1573               if (stat (compressed_name, &stat_buf) != 0)
1574                 {
1575                   if (MSDOS)
1576                     {
1577                       char *suf = compressed_name + strlen (file);
1578                       size_t suflen = strlen (compr->suffix) + 1;
1579                       for ( ; suf[1]; suf++, suflen--)
1580                         {
1581                           memmove (suf, suf + 1, suflen);
1582                           if (stat (compressed_name, &stat_buf) == 0)
1583                             {
1584                               real_name = compressed_name;
1585                               break;
1586                             }
1587                         }
1588                       if (real_name != NULL)
1589                         break;
1590                     } /* MSDOS */
1591                   free (compressed_name);
1592                   compressed_name = NULL;
1593                 }
1594               else
1595                 {
1596                   real_name = compressed_name;
1597                   break;
1598                 }
1599             }
1600         }
1601       if (real_name == NULL)
1602         {
1603           perror (file);
1604           goto cleanup;
1605         }
1606     } /* try with a different name */
1607
1608   if (!S_ISREG (stat_buf.st_mode))
1609     {
1610       error ("skipping %s: it is not a regular file.", real_name);
1611       goto cleanup;
1612     }
1613   if (real_name == compressed_name)
1614     {
1615       char *cmd = concat (compr->command, " ", real_name);
1616       inf = (FILE *) popen (cmd, "r");
1617       free (cmd);
1618     }
1619   else
1620     inf = fopen (real_name, "r");
1621   if (inf == NULL)
1622     {
1623       perror (real_name);
1624       goto cleanup;
1625     }
1626
1627   process_file (inf, uncompressed_name, lang);
1628
1629   if (real_name == compressed_name)
1630     retval = pclose (inf);
1631   else
1632     retval = fclose (inf);
1633   if (retval < 0)
1634     pfatal (file);
1635
1636  cleanup:
1637   free (compressed_name);
1638   free (uncompressed_name);
1639   last_node = NULL;
1640   curfdp = NULL;
1641   return;
1642 }
1643
1644 static void
1645 process_file (FILE *fh, char *fn, language *lang)
1646 {
1647   static const fdesc emptyfdesc;
1648   fdesc *fdp;
1649
1650   /* Create a new input file description entry. */
1651   fdp = xnew (1, fdesc);
1652   *fdp = emptyfdesc;
1653   fdp->next = fdhead;
1654   fdp->infname = savestr (fn);
1655   fdp->lang = lang;
1656   fdp->infabsname = absolute_filename (fn, cwd);
1657   fdp->infabsdir = absolute_dirname (fn, cwd);
1658   if (filename_is_absolute (fn))
1659     {
1660       /* An absolute file name.  Canonicalize it. */
1661       fdp->taggedfname = absolute_filename (fn, NULL);
1662     }
1663   else
1664     {
1665       /* A file name relative to cwd.  Make it relative
1666          to the directory of the tags file. */
1667       fdp->taggedfname = relative_filename (fn, tagfiledir);
1668     }
1669   fdp->usecharno = TRUE;        /* use char position when making tags */
1670   fdp->prop = NULL;
1671   fdp->written = FALSE;         /* not written on tags file yet */
1672
1673   fdhead = fdp;
1674   curfdp = fdhead;              /* the current file description */
1675
1676   find_entries (fh);
1677
1678   /* If not Ctags, and if this is not metasource and if it contained no #line
1679      directives, we can write the tags and free all nodes pointing to
1680      curfdp. */
1681   if (!CTAGS
1682       && curfdp->usecharno      /* no #line directives in this file */
1683       && !curfdp->lang->metasource)
1684     {
1685       node *np, *prev;
1686
1687       /* Look for the head of the sublist relative to this file.  See add_node
1688          for the structure of the node tree. */
1689       prev = NULL;
1690       for (np = nodehead; np != NULL; prev = np, np = np->left)
1691         if (np->fdp == curfdp)
1692           break;
1693
1694       /* If we generated tags for this file, write and delete them. */
1695       if (np != NULL)
1696         {
1697           /* This is the head of the last sublist, if any.  The following
1698              instructions depend on this being true. */
1699           assert (np->left == NULL);
1700
1701           assert (fdhead == curfdp);
1702           assert (last_node->fdp == curfdp);
1703           put_entries (np);     /* write tags for file curfdp->taggedfname */
1704           free_tree (np);       /* remove the written nodes */
1705           if (prev == NULL)
1706             nodehead = NULL;    /* no nodes left */
1707           else
1708             prev->left = NULL;  /* delete the pointer to the sublist */
1709         }
1710     }
1711 }
1712
1713 /*
1714  * This routine sets up the boolean pseudo-functions which work
1715  * by setting boolean flags dependent upon the corresponding character.
1716  * Every char which is NOT in that string is not a white char.  Therefore,
1717  * all of the array "_wht" is set to FALSE, and then the elements
1718  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1719  * of a char is TRUE if it is the string "white", else FALSE.
1720  */
1721 static void
1722 init (void)
1723 {
1724   register const char *sp;
1725   register int i;
1726
1727   for (i = 0; i < CHARS; i++)
1728     iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i) = FALSE;
1729   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1730   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1731   notinname ('\0') = notinname ('\n');
1732   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1733   begtoken ('\0') = begtoken ('\n');
1734   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1735   intoken ('\0') = intoken ('\n');
1736   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1737   endtoken ('\0') = endtoken ('\n');
1738 }
1739
1740 /*
1741  * This routine opens the specified file and calls the function
1742  * which finds the function and type definitions.
1743  */
1744 static void
1745 find_entries (FILE *inf)
1746 {
1747   char *cp;
1748   language *lang = curfdp->lang;
1749   Lang_function *parser = NULL;
1750
1751   /* If user specified a language, use it. */
1752   if (lang != NULL && lang->function != NULL)
1753     {
1754       parser = lang->function;
1755     }
1756
1757   /* Else try to guess the language given the file name. */
1758   if (parser == NULL)
1759     {
1760       lang = get_language_from_filename (curfdp->infname, TRUE);
1761       if (lang != NULL && lang->function != NULL)
1762         {
1763           curfdp->lang = lang;
1764           parser = lang->function;
1765         }
1766     }
1767
1768   /* Else look for sharp-bang as the first two characters. */
1769   if (parser == NULL
1770       && readline_internal (&lb, inf) > 0
1771       && lb.len >= 2
1772       && lb.buffer[0] == '#'
1773       && lb.buffer[1] == '!')
1774     {
1775       char *lp;
1776
1777       /* Set lp to point at the first char after the last slash in the
1778          line or, if no slashes, at the first nonblank.  Then set cp to
1779          the first successive blank and terminate the string. */
1780       lp = etags_strrchr (lb.buffer+2, '/');
1781       if (lp != NULL)
1782         lp += 1;
1783       else
1784         lp = skip_spaces (lb.buffer + 2);
1785       cp = skip_non_spaces (lp);
1786       *cp = '\0';
1787
1788       if (strlen (lp) > 0)
1789         {
1790           lang = get_language_from_interpreter (lp);
1791           if (lang != NULL && lang->function != NULL)
1792             {
1793               curfdp->lang = lang;
1794               parser = lang->function;
1795             }
1796         }
1797     }
1798
1799   /* We rewind here, even if inf may be a pipe.  We fail if the
1800      length of the first line is longer than the pipe block size,
1801      which is unlikely. */
1802   rewind (inf);
1803
1804   /* Else try to guess the language given the case insensitive file name. */
1805   if (parser == NULL)
1806     {
1807       lang = get_language_from_filename (curfdp->infname, FALSE);
1808       if (lang != NULL && lang->function != NULL)
1809         {
1810           curfdp->lang = lang;
1811           parser = lang->function;
1812         }
1813     }
1814
1815   /* Else try Fortran or C. */
1816   if (parser == NULL)
1817     {
1818       node *old_last_node = last_node;
1819
1820       curfdp->lang = get_language_from_langname ("fortran");
1821       find_entries (inf);
1822
1823       if (old_last_node == last_node)
1824         /* No Fortran entries found.  Try C. */
1825         {
1826           /* We do not tag if rewind fails.
1827              Only the file name will be recorded in the tags file. */
1828           rewind (inf);
1829           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1830           find_entries (inf);
1831         }
1832       return;
1833     }
1834
1835   if (!no_line_directive
1836       && curfdp->lang != NULL && curfdp->lang->metasource)
1837     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1838        file, or anyway we parsed a file that is automatically generated from
1839        this one.  If this is the case, the bingo.c file contained #line
1840        directives that generated tags pointing to this file.  Let's delete
1841        them all before parsing this file, which is the real source. */
1842     {
1843       fdesc **fdpp = &fdhead;
1844       while (*fdpp != NULL)
1845         if (*fdpp != curfdp
1846             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1847           /* We found one of those!  We must delete both the file description
1848              and all tags referring to it. */
1849           {
1850             fdesc *badfdp = *fdpp;
1851
1852             /* Delete the tags referring to badfdp->taggedfname
1853                that were obtained from badfdp->infname. */
1854             invalidate_nodes (badfdp, &nodehead);
1855
1856             *fdpp = badfdp->next; /* remove the bad description from the list */
1857             free_fdesc (badfdp);
1858           }
1859         else
1860           fdpp = &(*fdpp)->next; /* advance the list pointer */
1861     }
1862
1863   assert (parser != NULL);
1864
1865   /* Generic initialisations before reading from file. */
1866   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1867
1868   /* Generic initialisations before parsing file with readline. */
1869   lineno = 0;                  /* reset global line number */
1870   charno = 0;                  /* reset global char number */
1871   linecharno = 0;              /* reset global char number of line start */
1872
1873   parser (inf);
1874
1875   regex_tag_multiline ();
1876 }
1877
1878 \f
1879 /*
1880  * Check whether an implicitly named tag should be created,
1881  * then call `pfnote'.
1882  * NAME is a string that is internally copied by this function.
1883  *
1884  * TAGS format specification
1885  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1886  * The following is explained in some more detail in etc/ETAGS.EBNF.
1887  *
1888  * make_tag creates tags with "implicit tag names" (unnamed tags)
1889  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1890  *  1. NAME does not contain any of the characters in NONAM;
1891  *  2. LINESTART contains name as either a rightmost, or rightmost but
1892  *     one character, substring;
1893  *  3. the character, if any, immediately before NAME in LINESTART must
1894  *     be a character in NONAM;
1895  *  4. the character, if any, immediately after NAME in LINESTART must
1896  *     also be a character in NONAM.
1897  *
1898  * The implementation uses the notinname() macro, which recognises the
1899  * characters stored in the string `nonam'.
1900  * etags.el needs to use the same characters that are in NONAM.
1901  */
1902 static void
1903 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1904           int namelen,          /* tag length */
1905           int is_func,          /* tag is a function */
1906           char *linestart,      /* start of the line where tag is */
1907           int linelen,          /* length of the line where tag is */
1908           int lno,              /* line number */
1909           long int cno)         /* character number */
1910 {
1911   bool named = (name != NULL && namelen > 0);
1912   char *nname = NULL;
1913
1914   if (!CTAGS && named)          /* maybe set named to false */
1915     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1916        such that etags.el can guess a name from it. */
1917     {
1918       int i;
1919       register const char *cp = name;
1920
1921       for (i = 0; i < namelen; i++)
1922         if (notinname (*cp++))
1923           break;
1924       if (i == namelen)                         /* rule #1 */
1925         {
1926           cp = linestart + linelen - namelen;
1927           if (notinname (linestart[linelen-1]))
1928             cp -= 1;                            /* rule #4 */
1929           if (cp >= linestart                   /* rule #2 */
1930               && (cp == linestart
1931                   || notinname (cp[-1]))        /* rule #3 */
1932               && strneq (name, cp, namelen))    /* rule #2 */
1933             named = FALSE;      /* use implicit tag name */
1934         }
1935     }
1936
1937   if (named)
1938     nname = savenstr (name, namelen);
1939
1940   pfnote (nname, is_func, linestart, linelen, lno, cno);
1941 }
1942
1943 /* Record a tag. */
1944 static void
1945 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1946                                 /* tag name, or NULL if unnamed */
1947                                 /* tag is a function */
1948                                 /* start of the line where tag is */
1949                                 /* length of the line where tag is */
1950                                 /* line number */
1951                                 /* character number */
1952 {
1953   register node *np;
1954
1955   assert (name == NULL || name[0] != '\0');
1956   if (CTAGS && name == NULL)
1957     return;
1958
1959   np = xnew (1, node);
1960
1961   /* If ctags mode, change name "main" to M<thisfilename>. */
1962   if (CTAGS && !cxref_style && streq (name, "main"))
1963     {
1964       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1965       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1966       fp = etags_strrchr (np->name, '.');
1967       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1968         fp[0] = '\0';
1969     }
1970   else
1971     np->name = name;
1972   np->valid = TRUE;
1973   np->been_warned = FALSE;
1974   np->fdp = curfdp;
1975   np->is_func = is_func;
1976   np->lno = lno;
1977   if (np->fdp->usecharno)
1978     /* Our char numbers are 0-base, because of C language tradition?
1979        ctags compatibility?  old versions compatibility?   I don't know.
1980        Anyway, since emacs's are 1-base we expect etags.el to take care
1981        of the difference.  If we wanted to have 1-based numbers, we would
1982        uncomment the +1 below. */
1983     np->cno = cno /* + 1 */ ;
1984   else
1985     np->cno = invalidcharno;
1986   np->left = np->right = NULL;
1987   if (CTAGS && !cxref_style)
1988     {
1989       if (strlen (linestart) < 50)
1990         np->regex = concat (linestart, "$", "");
1991       else
1992         np->regex = savenstr (linestart, 50);
1993     }
1994   else
1995     np->regex = savenstr (linestart, linelen);
1996
1997   add_node (np, &nodehead);
1998 }
1999
2000 /*
2001  * free_tree ()
2002  *      recurse on left children, iterate on right children.
2003  */
2004 static void
2005 free_tree (register node *np)
2006 {
2007   while (np)
2008     {
2009       register node *node_right = np->right;
2010       free_tree (np->left);
2011       free (np->name);
2012       free (np->regex);
2013       free (np);
2014       np = node_right;
2015     }
2016 }
2017
2018 /*
2019  * free_fdesc ()
2020  *      delete a file description
2021  */
2022 static void
2023 free_fdesc (register fdesc *fdp)
2024 {
2025   free (fdp->infname);
2026   free (fdp->infabsname);
2027   free (fdp->infabsdir);
2028   free (fdp->taggedfname);
2029   free (fdp->prop);
2030   free (fdp);
2031 }
2032
2033 /*
2034  * add_node ()
2035  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2036  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2037  *      balancing.
2038  *
2039  *      add_node is the only function allowed to add nodes, so it can
2040  *      maintain state.
2041  */
2042 static void
2043 add_node (node *np, node **cur_node_p)
2044 {
2045   register int dif;
2046   register node *cur_node = *cur_node_p;
2047
2048   if (cur_node == NULL)
2049     {
2050       *cur_node_p = np;
2051       last_node = np;
2052       return;
2053     }
2054
2055   if (!CTAGS)
2056     /* Etags Mode */
2057     {
2058       /* For each file name, tags are in a linked sublist on the right
2059          pointer.  The first tags of different files are a linked list
2060          on the left pointer.  last_node points to the end of the last
2061          used sublist. */
2062       if (last_node != NULL && last_node->fdp == np->fdp)
2063         {
2064           /* Let's use the same sublist as the last added node. */
2065           assert (last_node->right == NULL);
2066           last_node->right = np;
2067           last_node = np;
2068         }
2069       else if (cur_node->fdp == np->fdp)
2070         {
2071           /* Scanning the list we found the head of a sublist which is
2072              good for us.  Let's scan this sublist. */
2073           add_node (np, &cur_node->right);
2074         }
2075       else
2076         /* The head of this sublist is not good for us.  Let's try the
2077            next one. */
2078         add_node (np, &cur_node->left);
2079     } /* if ETAGS mode */
2080
2081   else
2082     {
2083       /* Ctags Mode */
2084       dif = strcmp (np->name, cur_node->name);
2085
2086       /*
2087        * If this tag name matches an existing one, then
2088        * do not add the node, but maybe print a warning.
2089        */
2090       if (no_duplicates && !dif)
2091         {
2092           if (np->fdp == cur_node->fdp)
2093             {
2094               if (!no_warnings)
2095                 {
2096                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2097                            np->fdp->infname, lineno, np->name);
2098                   fprintf (stderr, "Second entry ignored\n");
2099                 }
2100             }
2101           else if (!cur_node->been_warned && !no_warnings)
2102             {
2103               fprintf
2104                 (stderr,
2105                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2106                  np->fdp->infname, cur_node->fdp->infname, np->name);
2107               cur_node->been_warned = TRUE;
2108             }
2109           return;
2110         }
2111
2112       /* Actually add the node */
2113       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2114     } /* if CTAGS mode */
2115 }
2116
2117 /*
2118  * invalidate_nodes ()
2119  *      Scan the node tree and invalidate all nodes pointing to the
2120  *      given file description (CTAGS case) or free them (ETAGS case).
2121  */
2122 static void
2123 invalidate_nodes (fdesc *badfdp, node **npp)
2124 {
2125   node *np = *npp;
2126
2127   if (np == NULL)
2128     return;
2129
2130   if (CTAGS)
2131     {
2132       if (np->left != NULL)
2133         invalidate_nodes (badfdp, &np->left);
2134       if (np->fdp == badfdp)
2135         np->valid = FALSE;
2136       if (np->right != NULL)
2137         invalidate_nodes (badfdp, &np->right);
2138     }
2139   else
2140     {
2141       assert (np->fdp != NULL);
2142       if (np->fdp == badfdp)
2143         {
2144           *npp = np->left;      /* detach the sublist from the list */
2145           np->left = NULL;      /* isolate it */
2146           free_tree (np);       /* free it */
2147           invalidate_nodes (badfdp, npp);
2148         }
2149       else
2150         invalidate_nodes (badfdp, &np->left);
2151     }
2152 }
2153
2154 \f
2155 static int total_size_of_entries (node *);
2156 static int number_len (long);
2157
2158 /* Length of a non-negative number's decimal representation. */
2159 static int
2160 number_len (long int num)
2161 {
2162   int len = 1;
2163   while ((num /= 10) > 0)
2164     len += 1;
2165   return len;
2166 }
2167
2168 /*
2169  * Return total number of characters that put_entries will output for
2170  * the nodes in the linked list at the right of the specified node.
2171  * This count is irrelevant with etags.el since emacs 19.34 at least,
2172  * but is still supplied for backward compatibility.
2173  */
2174 static int
2175 total_size_of_entries (register node *np)
2176 {
2177   register int total = 0;
2178
2179   for (; np != NULL; np = np->right)
2180     if (np->valid)
2181       {
2182         total += strlen (np->regex) + 1;                /* pat\177 */
2183         if (np->name != NULL)
2184           total += strlen (np->name) + 1;               /* name\001 */
2185         total += number_len ((long) np->lno) + 1;       /* lno, */
2186         if (np->cno != invalidcharno)                   /* cno */
2187           total += number_len (np->cno);
2188         total += 1;                                     /* newline */
2189       }
2190
2191   return total;
2192 }
2193
2194 static void
2195 put_entries (register node *np)
2196 {
2197   register char *sp;
2198   static fdesc *fdp = NULL;
2199
2200   if (np == NULL)
2201     return;
2202
2203   /* Output subentries that precede this one */
2204   if (CTAGS)
2205     put_entries (np->left);
2206
2207   /* Output this entry */
2208   if (np->valid)
2209     {
2210       if (!CTAGS)
2211         {
2212           /* Etags mode */
2213           if (fdp != np->fdp)
2214             {
2215               fdp = np->fdp;
2216               fprintf (tagf, "\f\n%s,%d\n",
2217                        fdp->taggedfname, total_size_of_entries (np));
2218               fdp->written = TRUE;
2219             }
2220           fputs (np->regex, tagf);
2221           fputc ('\177', tagf);
2222           if (np->name != NULL)
2223             {
2224               fputs (np->name, tagf);
2225               fputc ('\001', tagf);
2226             }
2227           fprintf (tagf, "%d,", np->lno);
2228           if (np->cno != invalidcharno)
2229             fprintf (tagf, "%ld", np->cno);
2230           fputs ("\n", tagf);
2231         }
2232       else
2233         {
2234           /* Ctags mode */
2235           if (np->name == NULL)
2236             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2237
2238           if (cxref_style)
2239             {
2240               if (vgrind_style)
2241                 fprintf (stdout, "%s %s %d\n",
2242                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2243               else
2244                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2245                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2246             }
2247           else
2248             {
2249               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2250
2251               if (np->is_func)
2252                 {               /* function or #define macro with args */
2253                   putc (searchar, tagf);
2254                   putc ('^', tagf);
2255
2256                   for (sp = np->regex; *sp; sp++)
2257                     {
2258                       if (*sp == '\\' || *sp == searchar)
2259                         putc ('\\', tagf);
2260                       putc (*sp, tagf);
2261                     }
2262                   putc (searchar, tagf);
2263                 }
2264               else
2265                 {               /* anything else; text pattern inadequate */
2266                   fprintf (tagf, "%d", np->lno);
2267                 }
2268               putc ('\n', tagf);
2269             }
2270         }
2271     } /* if this node contains a valid tag */
2272
2273   /* Output subentries that follow this one */
2274   put_entries (np->right);
2275   if (!CTAGS)
2276     put_entries (np->left);
2277 }
2278
2279 \f
2280 /* C extensions. */
2281 #define C_EXT   0x00fff         /* C extensions */
2282 #define C_PLAIN 0x00000         /* C */
2283 #define C_PLPL  0x00001         /* C++ */
2284 #define C_STAR  0x00003         /* C* */
2285 #define C_JAVA  0x00005         /* JAVA */
2286 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2287 #define YACC    0x10000         /* yacc file */
2288
2289 /*
2290  * The C symbol tables.
2291  */
2292 enum sym_type
2293 {
2294   st_none,
2295   st_C_objprot, st_C_objimpl, st_C_objend,
2296   st_C_gnumacro,
2297   st_C_ignore, st_C_attribute,
2298   st_C_javastruct,
2299   st_C_operator,
2300   st_C_class, st_C_template,
2301   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2302 };
2303
2304 static unsigned int hash (const char *, unsigned int);
2305 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2306 static enum sym_type C_symtype (char *, int, int);
2307
2308 /* Feed stuff between (but not including) %[ and %] lines to:
2309      gperf -m 5
2310 %[
2311 %compare-strncmp
2312 %enum
2313 %struct-type
2314 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2315 %%
2316 if,             0,                      st_C_ignore
2317 for,            0,                      st_C_ignore
2318 while,          0,                      st_C_ignore
2319 switch,         0,                      st_C_ignore
2320 return,         0,                      st_C_ignore
2321 __attribute__,  0,                      st_C_attribute
2322 GTY,            0,                      st_C_attribute
2323 @interface,     0,                      st_C_objprot
2324 @protocol,      0,                      st_C_objprot
2325 @implementation,0,                      st_C_objimpl
2326 @end,           0,                      st_C_objend
2327 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2328 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2329 friend,         C_PLPL,                 st_C_ignore
2330 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2331 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2332 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2333 class,          0,                      st_C_class
2334 namespace,      C_PLPL,                 st_C_struct
2335 domain,         C_STAR,                 st_C_struct
2336 union,          0,                      st_C_struct
2337 struct,         0,                      st_C_struct
2338 extern,         0,                      st_C_extern
2339 enum,           0,                      st_C_enum
2340 typedef,        0,                      st_C_typedef
2341 define,         0,                      st_C_define
2342 undef,          0,                      st_C_define
2343 operator,       C_PLPL,                 st_C_operator
2344 template,       0,                      st_C_template
2345 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2346 DEFUN,          0,                      st_C_gnumacro
2347 SYSCALL,        0,                      st_C_gnumacro
2348 ENTRY,          0,                      st_C_gnumacro
2349 PSEUDO,         0,                      st_C_gnumacro
2350 # These are defined inside C functions, so currently they are not met.
2351 # EXFUN used in glibc, DEFVAR_* in emacs.
2352 #EXFUN,         0,                      st_C_gnumacro
2353 #DEFVAR_,       0,                      st_C_gnumacro
2354 %]
2355 and replace lines between %< and %> with its output, then:
2356  - remove the #if characterset check
2357  - make in_word_set static and not inline. */
2358 /*%<*/
2359 /* C code produced by gperf version 3.0.1 */
2360 /* Command-line: gperf -m 5  */
2361 /* Computed positions: -k'2-3' */
2362
2363 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2364 /* maximum key range = 33, duplicates = 0 */
2365
2366 static inline unsigned int
2367 hash (register const char *str, register unsigned int len)
2368 {
2369   static unsigned char asso_values[] =
2370     {
2371       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2372       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2373       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2374       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2375       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2376       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2377       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2378       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2379       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2380       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2381       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2382        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2383        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2384       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2385       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2386       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2387       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2388       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2389       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2390       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2391       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2392       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2393       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2394       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2395       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2396       35, 35, 35, 35, 35, 35
2397     };
2398   register int hval = len;
2399
2400   switch (hval)
2401     {
2402       default:
2403         hval += asso_values[(unsigned char)str[2]];
2404       /*FALLTHROUGH*/
2405       case 2:
2406         hval += asso_values[(unsigned char)str[1]];
2407         break;
2408     }
2409   return hval;
2410 }
2411
2412 static struct C_stab_entry *
2413 in_word_set (register const char *str, register unsigned int len)
2414 {
2415   enum
2416     {
2417       TOTAL_KEYWORDS = 33,
2418       MIN_WORD_LENGTH = 2,
2419       MAX_WORD_LENGTH = 15,
2420       MIN_HASH_VALUE = 2,
2421       MAX_HASH_VALUE = 34
2422     };
2423
2424   static struct C_stab_entry wordlist[] =
2425     {
2426       {""}, {""},
2427       {"if",            0,                      st_C_ignore},
2428       {"GTY",           0,                      st_C_attribute},
2429       {"@end",          0,                      st_C_objend},
2430       {"union",         0,                      st_C_struct},
2431       {"define",                0,                      st_C_define},
2432       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2433       {"template",      0,                      st_C_template},
2434       {"operator",      C_PLPL,                 st_C_operator},
2435       {"@interface",    0,                      st_C_objprot},
2436       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2437       {"friend",                C_PLPL,                 st_C_ignore},
2438       {"typedef",       0,                      st_C_typedef},
2439       {"return",                0,                      st_C_ignore},
2440       {"@implementation",0,                     st_C_objimpl},
2441       {"@protocol",     0,                      st_C_objprot},
2442       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2443       {"extern",                0,                      st_C_extern},
2444       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2445       {"struct",                0,                      st_C_struct},
2446       {"domain",                C_STAR,                 st_C_struct},
2447       {"switch",                0,                      st_C_ignore},
2448       {"enum",          0,                      st_C_enum},
2449       {"for",           0,                      st_C_ignore},
2450       {"namespace",     C_PLPL,                 st_C_struct},
2451       {"class",         0,                      st_C_class},
2452       {"while",         0,                      st_C_ignore},
2453       {"undef",         0,                      st_C_define},
2454       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2455       {"__attribute__", 0,                      st_C_attribute},
2456       {"SYSCALL",       0,                      st_C_gnumacro},
2457       {"ENTRY",         0,                      st_C_gnumacro},
2458       {"PSEUDO",                0,                      st_C_gnumacro},
2459       {"DEFUN",         0,                      st_C_gnumacro}
2460     };
2461
2462   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2463     {
2464       register int key = hash (str, len);
2465
2466       if (key <= MAX_HASH_VALUE && key >= 0)
2467         {
2468           register const char *s = wordlist[key].name;
2469
2470           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2471             return &wordlist[key];
2472         }
2473     }
2474   return 0;
2475 }
2476 /*%>*/
2477
2478 static enum sym_type
2479 C_symtype (char *str, int len, int c_ext)
2480 {
2481   register struct C_stab_entry *se = in_word_set (str, len);
2482
2483   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2484     return st_none;
2485   return se->type;
2486 }
2487
2488 \f
2489 /*
2490  * Ignoring __attribute__ ((list))
2491  */
2492 static bool inattribute;        /* looking at an __attribute__ construct */
2493
2494 /*
2495  * C functions and variables are recognized using a simple
2496  * finite automaton.  fvdef is its state variable.
2497  */
2498 static enum
2499 {
2500   fvnone,                       /* nothing seen */
2501   fdefunkey,                    /* Emacs DEFUN keyword seen */
2502   fdefunname,                   /* Emacs DEFUN name seen */
2503   foperator,                    /* func: operator keyword seen (cplpl) */
2504   fvnameseen,                   /* function or variable name seen */
2505   fstartlist,                   /* func: just after open parenthesis */
2506   finlist,                      /* func: in parameter list */
2507   flistseen,                    /* func: after parameter list */
2508   fignore,                      /* func: before open brace */
2509   vignore                       /* var-like: ignore until ';' */
2510 } fvdef;
2511
2512 static bool fvextern;           /* func or var: extern keyword seen; */
2513
2514 /*
2515  * typedefs are recognized using a simple finite automaton.
2516  * typdef is its state variable.
2517  */
2518 static enum
2519 {
2520   tnone,                        /* nothing seen */
2521   tkeyseen,                     /* typedef keyword seen */
2522   ttypeseen,                    /* defined type seen */
2523   tinbody,                      /* inside typedef body */
2524   tend,                         /* just before typedef tag */
2525   tignore                       /* junk after typedef tag */
2526 } typdef;
2527
2528 /*
2529  * struct-like structures (enum, struct and union) are recognized
2530  * using another simple finite automaton.  `structdef' is its state
2531  * variable.
2532  */
2533 static enum
2534 {
2535   snone,                        /* nothing seen yet,
2536                                    or in struct body if bracelev > 0 */
2537   skeyseen,                     /* struct-like keyword seen */
2538   stagseen,                     /* struct-like tag seen */
2539   scolonseen                    /* colon seen after struct-like tag */
2540 } structdef;
2541
2542 /*
2543  * When objdef is different from onone, objtag is the name of the class.
2544  */
2545 static const char *objtag = "<uninited>";
2546
2547 /*
2548  * Yet another little state machine to deal with preprocessor lines.
2549  */
2550 static enum
2551 {
2552   dnone,                        /* nothing seen */
2553   dsharpseen,                   /* '#' seen as first char on line */
2554   ddefineseen,                  /* '#' and 'define' seen */
2555   dignorerest                   /* ignore rest of line */
2556 } definedef;
2557
2558 /*
2559  * State machine for Objective C protocols and implementations.
2560  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2561  */
2562 static enum
2563 {
2564   onone,                        /* nothing seen */
2565   oprotocol,                    /* @interface or @protocol seen */
2566   oimplementation,              /* @implementations seen */
2567   otagseen,                     /* class name seen */
2568   oparenseen,                   /* parenthesis before category seen */
2569   ocatseen,                     /* category name seen */
2570   oinbody,                      /* in @implementation body */
2571   omethodsign,                  /* in @implementation body, after +/- */
2572   omethodtag,                   /* after method name */
2573   omethodcolon,                 /* after method colon */
2574   omethodparm,                  /* after method parameter */
2575   oignore                       /* wait for @end */
2576 } objdef;
2577
2578
2579 /*
2580  * Use this structure to keep info about the token read, and how it
2581  * should be tagged.  Used by the make_C_tag function to build a tag.
2582  */
2583 static struct tok
2584 {
2585   char *line;                   /* string containing the token */
2586   int offset;                   /* where the token starts in LINE */
2587   int length;                   /* token length */
2588   /*
2589     The previous members can be used to pass strings around for generic
2590     purposes.  The following ones specifically refer to creating tags.  In this
2591     case the token contained here is the pattern that will be used to create a
2592     tag.
2593   */
2594   bool valid;                   /* do not create a tag; the token should be
2595                                    invalidated whenever a state machine is
2596                                    reset prematurely */
2597   bool named;                   /* create a named tag */
2598   int lineno;                   /* source line number of tag */
2599   long linepos;                 /* source char number of tag */
2600 } token;                        /* latest token read */
2601
2602 /*
2603  * Variables and functions for dealing with nested structures.
2604  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2605  */
2606 static void pushclass_above (int, char *, int);
2607 static void popclass_above (int);
2608 static void write_classname (linebuffer *, const char *qualifier);
2609
2610 static struct {
2611   char **cname;                 /* nested class names */
2612   int *bracelev;                /* nested class brace level */
2613   int nl;                       /* class nesting level (elements used) */
2614   int size;                     /* length of the array */
2615 } cstack;                       /* stack for nested declaration tags */
2616 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2617 #define nestlev         (cstack.nl)
2618 /* After struct keyword or in struct body, not inside a nested function. */
2619 #define instruct        (structdef == snone && nestlev > 0                      \
2620                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2621
2622 static void
2623 pushclass_above (int bracelev, char *str, int len)
2624 {
2625   int nl;
2626
2627   popclass_above (bracelev);
2628   nl = cstack.nl;
2629   if (nl >= cstack.size)
2630     {
2631       int size = cstack.size *= 2;
2632       xrnew (cstack.cname, size, char *);
2633       xrnew (cstack.bracelev, size, int);
2634     }
2635   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2636   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2637   cstack.bracelev[nl] = bracelev;
2638   cstack.nl = nl + 1;
2639 }
2640
2641 static void
2642 popclass_above (int bracelev)
2643 {
2644   int nl;
2645
2646   for (nl = cstack.nl - 1;
2647        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2648        nl--)
2649     {
2650       free (cstack.cname[nl]);
2651       cstack.nl = nl;
2652     }
2653 }
2654
2655 static void
2656 write_classname (linebuffer *cn, const char *qualifier)
2657 {
2658   int i, len;
2659   int qlen = strlen (qualifier);
2660
2661   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2662     {
2663       len = 0;
2664       cn->len = 0;
2665       cn->buffer[0] = '\0';
2666     }
2667   else
2668     {
2669       len = strlen (cstack.cname[0]);
2670       linebuffer_setlen (cn, len);
2671       strcpy (cn->buffer, cstack.cname[0]);
2672     }
2673   for (i = 1; i < cstack.nl; i++)
2674     {
2675       char *s;
2676       int slen;
2677
2678       s = cstack.cname[i];
2679       if (s == NULL)
2680         continue;
2681       slen = strlen (s);
2682       len += slen + qlen;
2683       linebuffer_setlen (cn, len);
2684       strncat (cn->buffer, qualifier, qlen);
2685       strncat (cn->buffer, s, slen);
2686     }
2687 }
2688
2689 \f
2690 static bool consider_token (char *, int, int, int *, int, int, bool *);
2691 static void make_C_tag (bool);
2692
2693 /*
2694  * consider_token ()
2695  *      checks to see if the current token is at the start of a
2696  *      function or variable, or corresponds to a typedef, or
2697  *      is a struct/union/enum tag, or #define, or an enum constant.
2698  *
2699  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2700  *      with args.  C_EXTP points to which language we are looking at.
2701  *
2702  * Globals
2703  *      fvdef                   IN OUT
2704  *      structdef               IN OUT
2705  *      definedef               IN OUT
2706  *      typdef                  IN OUT
2707  *      objdef                  IN OUT
2708  */
2709
2710 static bool
2711 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2712                                 /* IN: token pointer */
2713                                 /* IN: token length */
2714                                 /* IN: first char after the token */
2715                                 /* IN, OUT: C extensions mask */
2716                                 /* IN: brace level */
2717                                 /* IN: parenthesis level */
2718                                 /* OUT: function or variable found */
2719 {
2720   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2721      structtype is the type of the preceding struct-like keyword, and
2722      structbracelev is the brace level where it has been seen. */
2723   static enum sym_type structtype;
2724   static int structbracelev;
2725   static enum sym_type toktype;
2726
2727
2728   toktype = C_symtype (str, len, *c_extp);
2729
2730   /*
2731    * Skip __attribute__
2732    */
2733   if (toktype == st_C_attribute)
2734     {
2735       inattribute = TRUE;
2736       return FALSE;
2737      }
2738
2739    /*
2740     * Advance the definedef state machine.
2741     */
2742    switch (definedef)
2743      {
2744      case dnone:
2745        /* We're not on a preprocessor line. */
2746        if (toktype == st_C_gnumacro)
2747          {
2748            fvdef = fdefunkey;
2749            return FALSE;
2750          }
2751        break;
2752      case dsharpseen:
2753        if (toktype == st_C_define)
2754          {
2755            definedef = ddefineseen;
2756          }
2757        else
2758          {
2759            definedef = dignorerest;
2760          }
2761        return FALSE;
2762      case ddefineseen:
2763        /*
2764         * Make a tag for any macro, unless it is a constant
2765         * and constantypedefs is FALSE.
2766         */
2767        definedef = dignorerest;
2768        *is_func_or_var = (c == '(');
2769        if (!*is_func_or_var && !constantypedefs)
2770          return FALSE;
2771        else
2772          return TRUE;
2773      case dignorerest:
2774        return FALSE;
2775      default:
2776        error ("internal error: definedef value.", (char *)NULL);
2777      }
2778
2779    /*
2780     * Now typedefs
2781     */
2782    switch (typdef)
2783      {
2784      case tnone:
2785        if (toktype == st_C_typedef)
2786          {
2787            if (typedefs)
2788              typdef = tkeyseen;
2789            fvextern = FALSE;
2790            fvdef = fvnone;
2791            return FALSE;
2792          }
2793        break;
2794      case tkeyseen:
2795        switch (toktype)
2796          {
2797          case st_none:
2798          case st_C_class:
2799          case st_C_struct:
2800          case st_C_enum:
2801            typdef = ttypeseen;
2802          }
2803        break;
2804      case ttypeseen:
2805        if (structdef == snone && fvdef == fvnone)
2806          {
2807            fvdef = fvnameseen;
2808            return TRUE;
2809          }
2810        break;
2811      case tend:
2812        switch (toktype)
2813          {
2814          case st_C_class:
2815          case st_C_struct:
2816          case st_C_enum:
2817            return FALSE;
2818          }
2819        return TRUE;
2820      }
2821
2822    switch (toktype)
2823      {
2824      case st_C_javastruct:
2825        if (structdef == stagseen)
2826          structdef = scolonseen;
2827        return FALSE;
2828      case st_C_template:
2829      case st_C_class:
2830        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2831            && bracelev == 0
2832            && definedef == dnone && structdef == snone
2833            && typdef == tnone && fvdef == fvnone)
2834          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2835        if (toktype == st_C_template)
2836          break;
2837        /* FALLTHRU */
2838      case st_C_struct:
2839      case st_C_enum:
2840        if (parlev == 0
2841            && fvdef != vignore
2842            && (typdef == tkeyseen
2843                || (typedefs_or_cplusplus && structdef == snone)))
2844          {
2845            structdef = skeyseen;
2846            structtype = toktype;
2847            structbracelev = bracelev;
2848            if (fvdef == fvnameseen)
2849              fvdef = fvnone;
2850          }
2851        return FALSE;
2852      }
2853
2854    if (structdef == skeyseen)
2855      {
2856        structdef = stagseen;
2857        return TRUE;
2858      }
2859
2860    if (typdef != tnone)
2861      definedef = dnone;
2862
2863    /* Detect Objective C constructs. */
2864    switch (objdef)
2865      {
2866      case onone:
2867        switch (toktype)
2868          {
2869          case st_C_objprot:
2870            objdef = oprotocol;
2871            return FALSE;
2872          case st_C_objimpl:
2873            objdef = oimplementation;
2874            return FALSE;
2875          }
2876        break;
2877      case oimplementation:
2878        /* Save the class tag for functions or variables defined inside. */
2879        objtag = savenstr (str, len);
2880        objdef = oinbody;
2881        return FALSE;
2882      case oprotocol:
2883        /* Save the class tag for categories. */
2884        objtag = savenstr (str, len);
2885        objdef = otagseen;
2886        *is_func_or_var = TRUE;
2887        return TRUE;
2888      case oparenseen:
2889        objdef = ocatseen;
2890        *is_func_or_var = TRUE;
2891        return TRUE;
2892      case oinbody:
2893        break;
2894      case omethodsign:
2895        if (parlev == 0)
2896          {
2897            fvdef = fvnone;
2898            objdef = omethodtag;
2899            linebuffer_setlen (&token_name, len);
2900            strncpy (token_name.buffer, str, len);
2901            token_name.buffer[len] = '\0';
2902            return TRUE;
2903          }
2904        return FALSE;
2905      case omethodcolon:
2906        if (parlev == 0)
2907          objdef = omethodparm;
2908        return FALSE;
2909      case omethodparm:
2910        if (parlev == 0)
2911          {
2912            fvdef = fvnone;
2913            objdef = omethodtag;
2914            linebuffer_setlen (&token_name, token_name.len + len);
2915            strncat (token_name.buffer, str, len);
2916            return TRUE;
2917          }
2918        return FALSE;
2919      case oignore:
2920        if (toktype == st_C_objend)
2921          {
2922            /* Memory leakage here: the string pointed by objtag is
2923               never released, because many tests would be needed to
2924               avoid breaking on incorrect input code.  The amount of
2925               memory leaked here is the sum of the lengths of the
2926               class tags.
2927            free (objtag); */
2928            objdef = onone;
2929          }
2930        return FALSE;
2931      }
2932
2933    /* A function, variable or enum constant? */
2934    switch (toktype)
2935      {
2936      case st_C_extern:
2937        fvextern = TRUE;
2938        switch  (fvdef)
2939          {
2940          case finlist:
2941          case flistseen:
2942          case fignore:
2943          case vignore:
2944            break;
2945          default:
2946            fvdef = fvnone;
2947          }
2948        return FALSE;
2949      case st_C_ignore:
2950        fvextern = FALSE;
2951        fvdef = vignore;
2952        return FALSE;
2953      case st_C_operator:
2954        fvdef = foperator;
2955        *is_func_or_var = TRUE;
2956        return TRUE;
2957      case st_none:
2958        if (constantypedefs
2959            && structdef == snone
2960            && structtype == st_C_enum && bracelev > structbracelev)
2961          return TRUE;           /* enum constant */
2962        switch (fvdef)
2963          {
2964          case fdefunkey:
2965            if (bracelev > 0)
2966              break;
2967            fvdef = fdefunname;  /* GNU macro */
2968            *is_func_or_var = TRUE;
2969            return TRUE;
2970          case fvnone:
2971            switch (typdef)
2972              {
2973              case ttypeseen:
2974                return FALSE;
2975              case tnone:
2976                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2977                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2978                  {
2979                    fvdef = vignore;
2980                    return FALSE;
2981                  }
2982                break;
2983              }
2984           /* FALLTHRU */
2985           case fvnameseen:
2986           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2987             {
2988               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2989                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2990               fvdef = foperator;
2991               *is_func_or_var = TRUE;
2992               return TRUE;
2993             }
2994           if (bracelev > 0 && !instruct)
2995             break;
2996           fvdef = fvnameseen;   /* function or variable */
2997           *is_func_or_var = TRUE;
2998           return TRUE;
2999         }
3000       break;
3001     }
3002
3003   return FALSE;
3004 }
3005
3006 \f
3007 /*
3008  * C_entries often keeps pointers to tokens or lines which are older than
3009  * the line currently read.  By keeping two line buffers, and switching
3010  * them at end of line, it is possible to use those pointers.
3011  */
3012 static struct
3013 {
3014   long linepos;
3015   linebuffer lb;
3016 } lbs[2];
3017
3018 #define current_lb_is_new (newndx == curndx)
3019 #define switch_line_buffers() (curndx = 1 - curndx)
3020
3021 #define curlb (lbs[curndx].lb)
3022 #define newlb (lbs[newndx].lb)
3023 #define curlinepos (lbs[curndx].linepos)
3024 #define newlinepos (lbs[newndx].linepos)
3025
3026 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3027 #define cplpl (c_ext & C_PLPL)
3028 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3029
3030 #define CNL_SAVE_DEFINEDEF()                                            \
3031 do {                                                                    \
3032   curlinepos = charno;                                                  \
3033   readline (&curlb, inf);                                               \
3034   lp = curlb.buffer;                                                    \
3035   quotednl = FALSE;                                                     \
3036   newndx = curndx;                                                      \
3037 } while (0)
3038
3039 #define CNL()                                                           \
3040 do {                                                                    \
3041   CNL_SAVE_DEFINEDEF();                                                 \
3042   if (savetoken.valid)                                                  \
3043     {                                                                   \
3044       token = savetoken;                                                \
3045       savetoken.valid = FALSE;                                          \
3046     }                                                                   \
3047   definedef = dnone;                                                    \
3048 } while (0)
3049
3050
3051 static void
3052 make_C_tag (int isfun)
3053 {
3054   /* This function is never called when token.valid is FALSE, but
3055      we must protect against invalid input or internal errors. */
3056   if (token.valid)
3057     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3058               token.offset+token.length+1, token.lineno, token.linepos);
3059   else if (DEBUG)
3060     {                             /* this branch is optimised away if !DEBUG */
3061       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3062                 token_name.len + 17, isfun, token.line,
3063                 token.offset+token.length+1, token.lineno, token.linepos);
3064       error ("INVALID TOKEN", NULL);
3065     }
3066
3067   token.valid = FALSE;
3068 }
3069
3070
3071 /*
3072  * C_entries ()
3073  *      This routine finds functions, variables, typedefs,
3074  *      #define's, enum constants and struct/union/enum definitions in
3075  *      C syntax and adds them to the list.
3076  */
3077 static void
3078 C_entries (int c_ext, FILE *inf)
3079                                 /* extension of C */
3080                                 /* input file */
3081 {
3082   register char c;              /* latest char read; '\0' for end of line */
3083   register char *lp;            /* pointer one beyond the character `c' */
3084   int curndx, newndx;           /* indices for current and new lb */
3085   register int tokoff;          /* offset in line of start of current token */
3086   register int toklen;          /* length of current token */
3087   const char *qualifier;        /* string used to qualify names */
3088   int qlen;                     /* length of qualifier */
3089   int bracelev;                 /* current brace level */
3090   int bracketlev;               /* current bracket level */
3091   int parlev;                   /* current parenthesis level */
3092   int attrparlev;               /* __attribute__ parenthesis level */
3093   int templatelev;              /* current template level */
3094   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3095   bool incomm, inquote, inchar, quotednl, midtoken;
3096   bool yacc_rules;              /* in the rules part of a yacc file */
3097   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3098
3099
3100   linebuffer_init (&lbs[0].lb);
3101   linebuffer_init (&lbs[1].lb);
3102   if (cstack.size == 0)
3103     {
3104       cstack.size = (DEBUG) ? 1 : 4;
3105       cstack.nl = 0;
3106       cstack.cname = xnew (cstack.size, char *);
3107       cstack.bracelev = xnew (cstack.size, int);
3108     }
3109
3110   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3111   curndx = newndx = 0;
3112   lp = curlb.buffer;
3113   *lp = 0;
3114
3115   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3116   structdef = snone; definedef = dnone; objdef = onone;
3117   yacc_rules = FALSE;
3118   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3119   token.valid = savetoken.valid = FALSE;
3120   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3121   if (cjava)
3122     { qualifier = "."; qlen = 1; }
3123   else
3124     { qualifier = "::"; qlen = 2; }
3125
3126
3127   while (!feof (inf))
3128     {
3129       c = *lp++;
3130       if (c == '\\')
3131         {
3132           /* If we are at the end of the line, the next character is a
3133              '\0'; do not skip it, because it is what tells us
3134              to read the next line.  */
3135           if (*lp == '\0')
3136             {
3137               quotednl = TRUE;
3138               continue;
3139             }
3140           lp++;
3141           c = ' ';
3142         }
3143       else if (incomm)
3144         {
3145           switch (c)
3146             {
3147             case '*':
3148               if (*lp == '/')
3149                 {
3150                   c = *lp++;
3151                   incomm = FALSE;
3152                 }
3153               break;
3154             case '\0':
3155               /* Newlines inside comments do not end macro definitions in
3156                  traditional cpp. */
3157               CNL_SAVE_DEFINEDEF ();
3158               break;
3159             }
3160           continue;
3161         }
3162       else if (inquote)
3163         {
3164           switch (c)
3165             {
3166             case '"':
3167               inquote = FALSE;
3168               break;
3169             case '\0':
3170               /* Newlines inside strings do not end macro definitions
3171                  in traditional cpp, even though compilers don't
3172                  usually accept them. */
3173               CNL_SAVE_DEFINEDEF ();
3174               break;
3175             }
3176           continue;
3177         }
3178       else if (inchar)
3179         {
3180           switch (c)
3181             {
3182             case '\0':
3183               /* Hmmm, something went wrong. */
3184               CNL ();
3185               /* FALLTHRU */
3186             case '\'':
3187               inchar = FALSE;
3188               break;
3189             }
3190           continue;
3191         }
3192       else if (bracketlev > 0)
3193         {
3194           switch (c)
3195             {
3196             case ']':
3197               if (--bracketlev > 0)
3198                 continue;
3199               break;
3200             case '\0':
3201               CNL_SAVE_DEFINEDEF ();
3202               break;
3203             }
3204           continue;
3205         }
3206       else switch (c)
3207         {
3208         case '"':
3209           inquote = TRUE;
3210           if (inattribute)
3211             break;
3212           switch (fvdef)
3213             {
3214             case fdefunkey:
3215             case fstartlist:
3216             case finlist:
3217             case fignore:
3218             case vignore:
3219               break;
3220             default:
3221               fvextern = FALSE;
3222               fvdef = fvnone;
3223             }
3224           continue;
3225         case '\'':
3226           inchar = TRUE;
3227           if (inattribute)
3228             break;
3229           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3230             {
3231               fvextern = FALSE;
3232               fvdef = fvnone;
3233             }
3234           continue;
3235         case '/':
3236           if (*lp == '*')
3237             {
3238               incomm = TRUE;
3239               lp++;
3240               c = ' ';
3241             }
3242           else if (/* cplpl && */ *lp == '/')
3243             {
3244               c = '\0';
3245             }
3246           break;
3247         case '%':
3248           if ((c_ext & YACC) && *lp == '%')
3249             {
3250               /* Entering or exiting rules section in yacc file. */
3251               lp++;
3252               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3253               typdef = tnone; structdef = snone;
3254               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3255               bracelev = 0;
3256               yacc_rules = !yacc_rules;
3257               continue;
3258             }
3259           else
3260             break;
3261         case '#':
3262           if (definedef == dnone)
3263             {
3264               char *cp;
3265               bool cpptoken = TRUE;
3266
3267               /* Look back on this line.  If all blanks, or nonblanks
3268                  followed by an end of comment, this is a preprocessor
3269                  token. */
3270               for (cp = newlb.buffer; cp < lp-1; cp++)
3271                 if (!iswhite (*cp))
3272                   {
3273                     if (*cp == '*' && *(cp+1) == '/')
3274                       {
3275                         cp++;
3276                         cpptoken = TRUE;
3277                       }
3278                     else
3279                       cpptoken = FALSE;
3280                   }
3281               if (cpptoken)
3282                 definedef = dsharpseen;
3283             } /* if (definedef == dnone) */
3284           continue;
3285         case '[':
3286           bracketlev++;
3287             continue;
3288         } /* switch (c) */
3289
3290
3291       /* Consider token only if some involved conditions are satisfied. */
3292       if (typdef != tignore
3293           && definedef != dignorerest
3294           && fvdef != finlist
3295           && templatelev == 0
3296           && (definedef != dnone
3297               || structdef != scolonseen)
3298           && !inattribute)
3299         {
3300           if (midtoken)
3301             {
3302               if (endtoken (c))
3303                 {
3304                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3305                     /* This handles :: in the middle,
3306                        but not at the beginning of an identifier.
3307                        Also, space-separated :: is not recognised. */
3308                     {
3309                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3310                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3311                       lp += 2;
3312                       toklen += 2;
3313                       c = lp[-1];
3314                       goto still_in_token;
3315                     }
3316                   else
3317                     {
3318                       bool funorvar = FALSE;
3319
3320                       if (yacc_rules
3321                           || consider_token (newlb.buffer + tokoff, toklen, c,
3322                                              &c_ext, bracelev, parlev,
3323                                              &funorvar))
3324                         {
3325                           if (fvdef == foperator)
3326                             {
3327                               char *oldlp = lp;
3328                               lp = skip_spaces (lp-1);
3329                               if (*lp != '\0')
3330                                 lp += 1;
3331                               while (*lp != '\0'
3332                                      && !iswhite (*lp) && *lp != '(')
3333                                 lp += 1;
3334                               c = *lp++;
3335                               toklen += lp - oldlp;
3336                             }
3337                           token.named = FALSE;
3338                           if (!plainc
3339                               && nestlev > 0 && definedef == dnone)
3340                             /* in struct body */
3341                             {
3342                               write_classname (&token_name, qualifier);
3343                               linebuffer_setlen (&token_name,
3344                                                  token_name.len+qlen+toklen);
3345                               strcat (token_name.buffer, qualifier);
3346                               strncat (token_name.buffer,
3347                                        newlb.buffer + tokoff, toklen);
3348                               token.named = TRUE;
3349                             }
3350                           else if (objdef == ocatseen)
3351                             /* Objective C category */
3352                             {
3353                               int len = strlen (objtag) + 2 + toklen;
3354                               linebuffer_setlen (&token_name, len);
3355                               strcpy (token_name.buffer, objtag);
3356                               strcat (token_name.buffer, "(");
3357                               strncat (token_name.buffer,
3358                                        newlb.buffer + tokoff, toklen);
3359                               strcat (token_name.buffer, ")");
3360                               token.named = TRUE;
3361                             }
3362                           else if (objdef == omethodtag
3363                                    || objdef == omethodparm)
3364                             /* Objective C method */
3365                             {
3366                               token.named = TRUE;
3367                             }
3368                           else if (fvdef == fdefunname)
3369                             /* GNU DEFUN and similar macros */
3370                             {
3371                               bool defun = (newlb.buffer[tokoff] == 'F');
3372                               int off = tokoff;
3373                               int len = toklen;
3374
3375                               /* Rewrite the tag so that emacs lisp DEFUNs
3376                                  can be found by their elisp name */
3377                               if (defun)
3378                                 {
3379                                   off += 1;
3380                                   len -= 1;
3381                                 }
3382                               linebuffer_setlen (&token_name, len);
3383                               strncpy (token_name.buffer,
3384                                        newlb.buffer + off, len);
3385                               token_name.buffer[len] = '\0';
3386                               if (defun)
3387                                 while (--len >= 0)
3388                                   if (token_name.buffer[len] == '_')
3389                                     token_name.buffer[len] = '-';
3390                               token.named = defun;
3391                             }
3392                           else
3393                             {
3394                               linebuffer_setlen (&token_name, toklen);
3395                               strncpy (token_name.buffer,
3396                                        newlb.buffer + tokoff, toklen);
3397                               token_name.buffer[toklen] = '\0';
3398                               /* Name macros and members. */
3399                               token.named = (structdef == stagseen
3400                                              || typdef == ttypeseen
3401                                              || typdef == tend
3402                                              || (funorvar
3403                                                  && definedef == dignorerest)
3404                                              || (funorvar
3405                                                  && definedef == dnone
3406                                                  && structdef == snone
3407                                                  && bracelev > 0));
3408                             }
3409                           token.lineno = lineno;
3410                           token.offset = tokoff;
3411                           token.length = toklen;
3412                           token.line = newlb.buffer;
3413                           token.linepos = newlinepos;
3414                           token.valid = TRUE;
3415
3416                           if (definedef == dnone
3417                               && (fvdef == fvnameseen
3418                                   || fvdef == foperator
3419                                   || structdef == stagseen
3420                                   || typdef == tend
3421                                   || typdef == ttypeseen
3422                                   || objdef != onone))
3423                             {
3424                               if (current_lb_is_new)
3425                                 switch_line_buffers ();
3426                             }
3427                           else if (definedef != dnone
3428                                    || fvdef == fdefunname
3429                                    || instruct)
3430                             make_C_tag (funorvar);
3431                         }
3432                       else /* not yacc and consider_token failed */
3433                         {
3434                           if (inattribute && fvdef == fignore)
3435                             {
3436                               /* We have just met __attribute__ after a
3437                                  function parameter list: do not tag the
3438                                  function again. */
3439                               fvdef = fvnone;
3440                             }
3441                         }
3442                       midtoken = FALSE;
3443                     }
3444                 } /* if (endtoken (c)) */
3445               else if (intoken (c))
3446                 still_in_token:
3447                 {
3448                   toklen++;
3449                   continue;
3450                 }
3451             } /* if (midtoken) */
3452           else if (begtoken (c))
3453             {
3454               switch (definedef)
3455                 {
3456                 case dnone:
3457                   switch (fvdef)
3458                     {
3459                     case fstartlist:
3460                       /* This prevents tagging fb in
3461                          void (__attribute__((noreturn)) *fb) (void);
3462                          Fixing this is not easy and not very important. */
3463                       fvdef = finlist;
3464                       continue;
3465                     case flistseen:
3466                       if (plainc || declarations)
3467                         {
3468                           make_C_tag (TRUE); /* a function */
3469                           fvdef = fignore;
3470                         }
3471                       break;
3472                     }
3473                   if (structdef == stagseen && !cjava)
3474                     {
3475                       popclass_above (bracelev);
3476                       structdef = snone;
3477                     }
3478                   break;
3479                 case dsharpseen:
3480                   savetoken = token;
3481                   break;
3482                 }
3483               if (!yacc_rules || lp == newlb.buffer + 1)
3484                 {
3485                   tokoff = lp - 1 - newlb.buffer;
3486                   toklen = 1;
3487                   midtoken = TRUE;
3488                 }
3489               continue;
3490             } /* if (begtoken) */
3491         } /* if must look at token */
3492
3493
3494       /* Detect end of line, colon, comma, semicolon and various braces
3495          after having handled a token.*/
3496       switch (c)
3497         {
3498         case ':':
3499           if (inattribute)
3500             break;
3501           if (yacc_rules && token.offset == 0 && token.valid)
3502             {
3503               make_C_tag (FALSE); /* a yacc function */
3504               break;
3505             }
3506           if (definedef != dnone)
3507             break;
3508           switch (objdef)
3509             {
3510             case  otagseen:
3511               objdef = oignore;
3512               make_C_tag (TRUE); /* an Objective C class */
3513               break;
3514             case omethodtag:
3515             case omethodparm:
3516               objdef = omethodcolon;
3517               linebuffer_setlen (&token_name, token_name.len + 1);
3518               strcat (token_name.buffer, ":");
3519               break;
3520             }
3521           if (structdef == stagseen)
3522             {
3523               structdef = scolonseen;
3524               break;
3525             }
3526           /* Should be useless, but may be work as a safety net. */
3527           if (cplpl && fvdef == flistseen)
3528             {
3529               make_C_tag (TRUE); /* a function */
3530               fvdef = fignore;
3531               break;
3532             }
3533           break;
3534         case ';':
3535           if (definedef != dnone || inattribute)
3536             break;
3537           switch (typdef)
3538             {
3539             case tend:
3540             case ttypeseen:
3541               make_C_tag (FALSE); /* a typedef */
3542               typdef = tnone;
3543               fvdef = fvnone;
3544               break;
3545             case tnone:
3546             case tinbody:
3547             case tignore:
3548               switch (fvdef)
3549                 {
3550                 case fignore:
3551                   if (typdef == tignore || cplpl)
3552                     fvdef = fvnone;
3553                   break;
3554                 case fvnameseen:
3555                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3556                       || (members && instruct))
3557                     make_C_tag (FALSE); /* a variable */
3558                   fvextern = FALSE;
3559                   fvdef = fvnone;
3560                   token.valid = FALSE;
3561                   break;
3562                 case flistseen:
3563                   if ((declarations
3564                        && (cplpl || !instruct)
3565                        && (typdef == tnone || (typdef != tignore && instruct)))
3566                       || (members
3567                           && plainc && instruct))
3568                     make_C_tag (TRUE);  /* a function */
3569                   /* FALLTHRU */
3570                 default:
3571                   fvextern = FALSE;
3572                   fvdef = fvnone;
3573                   if (declarations
3574                        && cplpl && structdef == stagseen)
3575                     make_C_tag (FALSE); /* forward declaration */
3576                   else
3577                     token.valid = FALSE;
3578                 } /* switch (fvdef) */
3579               /* FALLTHRU */
3580             default:
3581               if (!instruct)
3582                 typdef = tnone;
3583             }
3584           if (structdef == stagseen)
3585             structdef = snone;
3586           break;
3587         case ',':
3588           if (definedef != dnone || inattribute)
3589             break;
3590           switch (objdef)
3591             {
3592             case omethodtag:
3593             case omethodparm:
3594               make_C_tag (TRUE); /* an Objective C method */
3595               objdef = oinbody;
3596               break;
3597             }
3598           switch (fvdef)
3599             {
3600             case fdefunkey:
3601             case foperator:
3602             case fstartlist:
3603             case finlist:
3604             case fignore:
3605             case vignore:
3606               break;
3607             case fdefunname:
3608               fvdef = fignore;
3609               break;
3610             case fvnameseen:
3611               if (parlev == 0
3612                   && ((globals
3613                        && bracelev == 0
3614                        && templatelev == 0
3615                        && (!fvextern || declarations))
3616                       || (members && instruct)))
3617                   make_C_tag (FALSE); /* a variable */
3618               break;
3619             case flistseen:
3620               if ((declarations && typdef == tnone && !instruct)
3621                   || (members && typdef != tignore && instruct))
3622                 {
3623                   make_C_tag (TRUE); /* a function */
3624                   fvdef = fvnameseen;
3625                 }
3626               else if (!declarations)
3627                 fvdef = fvnone;
3628               token.valid = FALSE;
3629               break;
3630             default:
3631               fvdef = fvnone;
3632             }
3633           if (structdef == stagseen)
3634             structdef = snone;
3635           break;
3636         case ']':
3637           if (definedef != dnone || inattribute)
3638             break;
3639           if (structdef == stagseen)
3640             structdef = snone;
3641           switch (typdef)
3642             {
3643             case ttypeseen:
3644             case tend:
3645               typdef = tignore;
3646               make_C_tag (FALSE);       /* a typedef */
3647               break;
3648             case tnone:
3649             case tinbody:
3650               switch (fvdef)
3651                 {
3652                 case foperator:
3653                 case finlist:
3654                 case fignore:
3655                 case vignore:
3656                   break;
3657                 case fvnameseen:
3658                   if ((members && bracelev == 1)
3659                       || (globals && bracelev == 0
3660                           && (!fvextern || declarations)))
3661                     make_C_tag (FALSE); /* a variable */
3662                   /* FALLTHRU */
3663                 default:
3664                   fvdef = fvnone;
3665                 }
3666               break;
3667             }
3668           break;
3669         case '(':
3670           if (inattribute)
3671             {
3672               attrparlev++;
3673               break;
3674             }
3675           if (definedef != dnone)
3676             break;
3677           if (objdef == otagseen && parlev == 0)
3678             objdef = oparenseen;
3679           switch (fvdef)
3680             {
3681             case fvnameseen:
3682               if (typdef == ttypeseen
3683                   && *lp != '*'
3684                   && !instruct)
3685                 {
3686                   /* This handles constructs like:
3687                      typedef void OperatorFun (int fun); */
3688                   make_C_tag (FALSE);
3689                   typdef = tignore;
3690                   fvdef = fignore;
3691                   break;
3692                 }
3693               /* FALLTHRU */
3694             case foperator:
3695               fvdef = fstartlist;
3696               break;
3697             case flistseen:
3698               fvdef = finlist;
3699               break;
3700             }
3701           parlev++;
3702           break;
3703         case ')':
3704           if (inattribute)
3705             {
3706               if (--attrparlev == 0)
3707                 inattribute = FALSE;
3708               break;
3709             }
3710           if (definedef != dnone)
3711             break;
3712           if (objdef == ocatseen && parlev == 1)
3713             {
3714               make_C_tag (TRUE); /* an Objective C category */
3715               objdef = oignore;
3716             }
3717           if (--parlev == 0)
3718             {
3719               switch (fvdef)
3720                 {
3721                 case fstartlist:
3722                 case finlist:
3723                   fvdef = flistseen;
3724                   break;
3725                 }
3726               if (!instruct
3727                   && (typdef == tend
3728                       || typdef == ttypeseen))
3729                 {
3730                   typdef = tignore;
3731                   make_C_tag (FALSE); /* a typedef */
3732                 }
3733             }
3734           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3735             parlev = 0;
3736           break;
3737         case '{':
3738           if (definedef != dnone)
3739             break;
3740           if (typdef == ttypeseen)
3741             {
3742               /* Whenever typdef is set to tinbody (currently only
3743                  here), typdefbracelev should be set to bracelev. */
3744               typdef = tinbody;
3745               typdefbracelev = bracelev;
3746             }
3747           switch (fvdef)
3748             {
3749             case flistseen:
3750               make_C_tag (TRUE);    /* a function */
3751               /* FALLTHRU */
3752             case fignore:
3753               fvdef = fvnone;
3754               break;
3755             case fvnone:
3756               switch (objdef)
3757                 {
3758                 case otagseen:
3759                   make_C_tag (TRUE); /* an Objective C class */
3760                   objdef = oignore;
3761                   break;
3762                 case omethodtag:
3763                 case omethodparm:
3764                   make_C_tag (TRUE); /* an Objective C method */
3765                   objdef = oinbody;
3766                   break;
3767                 default:
3768                   /* Neutralize `extern "C" {' grot. */
3769                   if (bracelev == 0 && structdef == snone && nestlev == 0
3770                       && typdef == tnone)
3771                     bracelev = -1;
3772                 }
3773               break;
3774             }
3775           switch (structdef)
3776             {
3777             case skeyseen:         /* unnamed struct */
3778               pushclass_above (bracelev, NULL, 0);
3779               structdef = snone;
3780               break;
3781             case stagseen:         /* named struct or enum */
3782             case scolonseen:       /* a class */
3783               pushclass_above (bracelev,token.line+token.offset, token.length);
3784               structdef = snone;
3785               make_C_tag (FALSE);  /* a struct or enum */
3786               break;
3787             }
3788           bracelev += 1;
3789           break;
3790         case '*':
3791           if (definedef != dnone)
3792             break;
3793           if (fvdef == fstartlist)
3794             {
3795               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3796               token.valid = FALSE;
3797             }
3798           break;
3799         case '}':
3800           if (definedef != dnone)
3801             break;
3802           bracelev -= 1;
3803           if (!ignoreindent && lp == newlb.buffer + 1)
3804             {
3805               if (bracelev != 0)
3806                 token.valid = FALSE; /* unexpected value, token unreliable */
3807               bracelev = 0;     /* reset brace level if first column */
3808               parlev = 0;       /* also reset paren level, just in case... */
3809             }
3810           else if (bracelev < 0)
3811             {
3812               token.valid = FALSE; /* something gone amiss, token unreliable */
3813               bracelev = 0;
3814             }
3815           if (bracelev == 0 && fvdef == vignore)
3816             fvdef = fvnone;             /* end of function */
3817           popclass_above (bracelev);
3818           structdef = snone;
3819           /* Only if typdef == tinbody is typdefbracelev significant. */
3820           if (typdef == tinbody && bracelev <= typdefbracelev)
3821             {
3822               assert (bracelev == typdefbracelev);
3823               typdef = tend;
3824             }
3825           break;
3826         case '=':
3827           if (definedef != dnone)
3828             break;
3829           switch (fvdef)
3830             {
3831             case foperator:
3832             case finlist:
3833             case fignore:
3834             case vignore:
3835               break;
3836             case fvnameseen:
3837               if ((members && bracelev == 1)
3838                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3839                 make_C_tag (FALSE); /* a variable */
3840               /* FALLTHRU */
3841             default:
3842               fvdef = vignore;
3843             }
3844           break;
3845         case '<':
3846           if (cplpl
3847               && (structdef == stagseen || fvdef == fvnameseen))
3848             {
3849               templatelev++;
3850               break;
3851             }
3852           goto resetfvdef;
3853         case '>':
3854           if (templatelev > 0)
3855             {
3856               templatelev--;
3857               break;
3858             }
3859           goto resetfvdef;
3860         case '+':
3861         case '-':
3862           if (objdef == oinbody && bracelev == 0)
3863             {
3864               objdef = omethodsign;
3865               break;
3866             }
3867           /* FALLTHRU */
3868         resetfvdef:
3869         case '#': case '~': case '&': case '%': case '/':
3870         case '|': case '^': case '!': case '.': case '?':
3871           if (definedef != dnone)
3872             break;
3873           /* These surely cannot follow a function tag in C. */
3874           switch (fvdef)
3875             {
3876             case foperator:
3877             case finlist:
3878             case fignore:
3879             case vignore:
3880               break;
3881             default:
3882               fvdef = fvnone;
3883             }
3884           break;
3885         case '\0':
3886           if (objdef == otagseen)
3887             {
3888               make_C_tag (TRUE); /* an Objective C class */
3889               objdef = oignore;
3890             }
3891           /* If a macro spans multiple lines don't reset its state. */
3892           if (quotednl)
3893             CNL_SAVE_DEFINEDEF ();
3894           else
3895             CNL ();
3896           break;
3897         } /* switch (c) */
3898
3899     } /* while not eof */
3900
3901   free (lbs[0].lb.buffer);
3902   free (lbs[1].lb.buffer);
3903 }
3904
3905 /*
3906  * Process either a C++ file or a C file depending on the setting
3907  * of a global flag.
3908  */
3909 static void
3910 default_C_entries (FILE *inf)
3911 {
3912   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3913 }
3914
3915 /* Always do plain C. */
3916 static void
3917 plain_C_entries (FILE *inf)
3918 {
3919   C_entries (0, inf);
3920 }
3921
3922 /* Always do C++. */
3923 static void
3924 Cplusplus_entries (FILE *inf)
3925 {
3926   C_entries (C_PLPL, inf);
3927 }
3928
3929 /* Always do Java. */
3930 static void
3931 Cjava_entries (FILE *inf)
3932 {
3933   C_entries (C_JAVA, inf);
3934 }
3935
3936 /* Always do C*. */
3937 static void
3938 Cstar_entries (FILE *inf)
3939 {
3940   C_entries (C_STAR, inf);
3941 }
3942
3943 /* Always do Yacc. */
3944 static void
3945 Yacc_entries (FILE *inf)
3946 {
3947   C_entries (YACC, inf);
3948 }
3949
3950 \f
3951 /* Useful macros. */
3952 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3953   for (;                        /* loop initialization */               \
3954        !feof (file_pointer)     /* loop test */                         \
3955        &&                       /* instructions at start of loop */     \
3956           (readline (&line_buffer, file_pointer),                       \
3957            char_pointer = line_buffer.buffer,                           \
3958            TRUE);                                                       \
3959       )
3960
3961 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3962   ((assert ("" kw), TRUE)   /* syntax error if not a literal string */  \
3963    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
3964    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
3965    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3966
3967 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3968 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3969   ((assert ("" kw), TRUE) /* syntax error if not a literal string */    \
3970    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
3971    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
3972
3973 /*
3974  * Read a file, but do no processing.  This is used to do regexp
3975  * matching on files that have no language defined.
3976  */
3977 static void
3978 just_read_file (FILE *inf)
3979 {
3980   while (!feof (inf))
3981     readline (&lb, inf);
3982 }
3983
3984 \f
3985 /* Fortran parsing */
3986
3987 static void F_takeprec (void);
3988 static void F_getit (FILE *);
3989
3990 static void
3991 F_takeprec (void)
3992 {
3993   dbp = skip_spaces (dbp);
3994   if (*dbp != '*')
3995     return;
3996   dbp++;
3997   dbp = skip_spaces (dbp);
3998   if (strneq (dbp, "(*)", 3))
3999     {
4000       dbp += 3;
4001       return;
4002     }
4003   if (!ISDIGIT (*dbp))
4004     {
4005       --dbp;                    /* force failure */
4006       return;
4007     }
4008   do
4009     dbp++;
4010   while (ISDIGIT (*dbp));
4011 }
4012
4013 static void
4014 F_getit (FILE *inf)
4015 {
4016   register char *cp;
4017
4018   dbp = skip_spaces (dbp);
4019   if (*dbp == '\0')
4020     {
4021       readline (&lb, inf);
4022       dbp = lb.buffer;
4023       if (dbp[5] != '&')
4024         return;
4025       dbp += 6;
4026       dbp = skip_spaces (dbp);
4027     }
4028   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4029     return;
4030   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4031     continue;
4032   make_tag (dbp, cp-dbp, TRUE,
4033             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4034 }
4035
4036
4037 static void
4038 Fortran_functions (FILE *inf)
4039 {
4040   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4041     {
4042       if (*dbp == '%')
4043         dbp++;                  /* Ratfor escape to fortran */
4044       dbp = skip_spaces (dbp);
4045       if (*dbp == '\0')
4046         continue;
4047
4048       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4049         dbp = skip_spaces (dbp);
4050
4051       if (LOOKING_AT_NOCASE (dbp, "pure"))
4052         dbp = skip_spaces (dbp);
4053
4054       if (LOOKING_AT_NOCASE (dbp, "elemental"))
4055         dbp = skip_spaces (dbp);
4056
4057       switch (lowcase (*dbp))
4058         {
4059         case 'i':
4060           if (nocase_tail ("integer"))
4061             F_takeprec ();
4062           break;
4063         case 'r':
4064           if (nocase_tail ("real"))
4065             F_takeprec ();
4066           break;
4067         case 'l':
4068           if (nocase_tail ("logical"))
4069             F_takeprec ();
4070           break;
4071         case 'c':
4072           if (nocase_tail ("complex") || nocase_tail ("character"))
4073             F_takeprec ();
4074           break;
4075         case 'd':
4076           if (nocase_tail ("double"))
4077             {
4078               dbp = skip_spaces (dbp);
4079               if (*dbp == '\0')
4080                 continue;
4081               if (nocase_tail ("precision"))
4082                 break;
4083               continue;
4084             }
4085           break;
4086         }
4087       dbp = skip_spaces (dbp);
4088       if (*dbp == '\0')
4089         continue;
4090       switch (lowcase (*dbp))
4091         {
4092         case 'f':
4093           if (nocase_tail ("function"))
4094             F_getit (inf);
4095           continue;
4096         case 's':
4097           if (nocase_tail ("subroutine"))
4098             F_getit (inf);
4099           continue;
4100         case 'e':
4101           if (nocase_tail ("entry"))
4102             F_getit (inf);
4103           continue;
4104         case 'b':
4105           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4106             {
4107               dbp = skip_spaces (dbp);
4108               if (*dbp == '\0') /* assume un-named */
4109                 make_tag ("blockdata", 9, TRUE,
4110                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4111               else
4112                 F_getit (inf);  /* look for name */
4113             }
4114           continue;
4115         }
4116     }
4117 }
4118
4119 \f
4120 /*
4121  * Ada parsing
4122  * Original code by
4123  * Philippe Waroquiers (1998)
4124  */
4125
4126 /* Once we are positioned after an "interesting" keyword, let's get
4127    the real tag value necessary. */
4128 static void
4129 Ada_getit (FILE *inf, const char *name_qualifier)
4130 {
4131   register char *cp;
4132   char *name;
4133   char c;
4134
4135   while (!feof (inf))
4136     {
4137       dbp = skip_spaces (dbp);
4138       if (*dbp == '\0'
4139           || (dbp[0] == '-' && dbp[1] == '-'))
4140         {
4141           readline (&lb, inf);
4142           dbp = lb.buffer;
4143         }
4144       switch (lowcase (*dbp))
4145         {
4146         case 'b':
4147           if (nocase_tail ("body"))
4148             {
4149               /* Skipping body of   procedure body   or   package body or ....
4150                  resetting qualifier to body instead of spec. */
4151               name_qualifier = "/b";
4152               continue;
4153             }
4154           break;
4155         case 't':
4156           /* Skipping type of   task type   or   protected type ... */
4157           if (nocase_tail ("type"))
4158             continue;
4159           break;
4160         }
4161       if (*dbp == '"')
4162         {
4163           dbp += 1;
4164           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4165             continue;
4166         }
4167       else
4168         {
4169           dbp = skip_spaces (dbp);
4170           for (cp = dbp;
4171                (*cp != '\0'
4172                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4173                cp++)
4174             continue;
4175           if (cp == dbp)
4176             return;
4177         }
4178       c = *cp;
4179       *cp = '\0';
4180       name = concat (dbp, name_qualifier, "");
4181       *cp = c;
4182       make_tag (name, strlen (name), TRUE,
4183                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4184       free (name);
4185       if (c == '"')
4186         dbp = cp + 1;
4187       return;
4188     }
4189 }
4190
4191 static void
4192 Ada_funcs (FILE *inf)
4193 {
4194   bool inquote = FALSE;
4195   bool skip_till_semicolumn = FALSE;
4196
4197   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4198     {
4199       while (*dbp != '\0')
4200         {
4201           /* Skip a string i.e. "abcd". */
4202           if (inquote || (*dbp == '"'))
4203             {
4204               dbp = etags_strchr (dbp + !inquote, '"');
4205               if (dbp != NULL)
4206                 {
4207                   inquote = FALSE;
4208                   dbp += 1;
4209                   continue;     /* advance char */
4210                 }
4211               else
4212                 {
4213                   inquote = TRUE;
4214                   break;        /* advance line */
4215                 }
4216             }
4217
4218           /* Skip comments. */
4219           if (dbp[0] == '-' && dbp[1] == '-')
4220             break;              /* advance line */
4221
4222           /* Skip character enclosed in single quote i.e. 'a'
4223              and skip single quote starting an attribute i.e. 'Image. */
4224           if (*dbp == '\'')
4225             {
4226               dbp++ ;
4227               if (*dbp != '\0')
4228                 dbp++;
4229               continue;
4230             }
4231
4232           if (skip_till_semicolumn)
4233             {
4234               if (*dbp == ';')
4235                 skip_till_semicolumn = FALSE;
4236               dbp++;
4237               continue;         /* advance char */
4238             }
4239
4240           /* Search for beginning of a token.  */
4241           if (!begtoken (*dbp))
4242             {
4243               dbp++;
4244               continue;         /* advance char */
4245             }
4246
4247           /* We are at the beginning of a token. */
4248           switch (lowcase (*dbp))
4249             {
4250             case 'f':
4251               if (!packages_only && nocase_tail ("function"))
4252                 Ada_getit (inf, "/f");
4253               else
4254                 break;          /* from switch */
4255               continue;         /* advance char */
4256             case 'p':
4257               if (!packages_only && nocase_tail ("procedure"))
4258                 Ada_getit (inf, "/p");
4259               else if (nocase_tail ("package"))
4260                 Ada_getit (inf, "/s");
4261               else if (nocase_tail ("protected")) /* protected type */
4262                 Ada_getit (inf, "/t");
4263               else
4264                 break;          /* from switch */
4265               continue;         /* advance char */
4266
4267             case 'u':
4268               if (typedefs && !packages_only && nocase_tail ("use"))
4269                 {
4270                   /* when tagging types, avoid tagging  use type Pack.Typename;
4271                      for this, we will skip everything till a ; */
4272                   skip_till_semicolumn = TRUE;
4273                   continue;     /* advance char */
4274                 }
4275
4276             case 't':
4277               if (!packages_only && nocase_tail ("task"))
4278                 Ada_getit (inf, "/k");
4279               else if (typedefs && !packages_only && nocase_tail ("type"))
4280                 {
4281                   Ada_getit (inf, "/t");
4282                   while (*dbp != '\0')
4283                     dbp += 1;
4284                 }
4285               else
4286                 break;          /* from switch */
4287               continue;         /* advance char */
4288             }
4289
4290           /* Look for the end of the token. */
4291           while (!endtoken (*dbp))
4292             dbp++;
4293
4294         } /* advance char */
4295     } /* advance line */
4296 }
4297
4298 \f
4299 /*
4300  * Unix and microcontroller assembly tag handling
4301  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4302  * Idea by Bob Weiner, Motorola Inc. (1994)
4303  */
4304 static void
4305 Asm_labels (FILE *inf)
4306 {
4307   register char *cp;
4308
4309   LOOP_ON_INPUT_LINES (inf, lb, cp)
4310     {
4311       /* If first char is alphabetic or one of [_.$], test for colon
4312          following identifier. */
4313       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4314         {
4315           /* Read past label. */
4316           cp++;
4317           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4318             cp++;
4319           if (*cp == ':' || iswhite (*cp))
4320             /* Found end of label, so copy it and add it to the table. */
4321             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4322                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4323         }
4324     }
4325 }
4326
4327 \f
4328 /*
4329  * Perl support
4330  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4331  * Perl variable names: /^(my|local).../
4332  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4333  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4334  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4335  */
4336 static void
4337 Perl_functions (FILE *inf)
4338 {
4339   char *package = savestr ("main"); /* current package name */
4340   register char *cp;
4341
4342   LOOP_ON_INPUT_LINES (inf, lb, cp)
4343     {
4344       cp = skip_spaces (cp);
4345
4346       if (LOOKING_AT (cp, "package"))
4347         {
4348           free (package);
4349           get_tag (cp, &package);
4350         }
4351       else if (LOOKING_AT (cp, "sub"))
4352         {
4353           char *pos;
4354           char *sp = cp;
4355
4356           while (!notinname (*cp))
4357             cp++;
4358           if (cp == sp)
4359             continue;           /* nothing found */
4360           if ((pos = etags_strchr (sp, ':')) != NULL
4361               && pos < cp && pos[1] == ':')
4362             /* The name is already qualified. */
4363             make_tag (sp, cp - sp, TRUE,
4364                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4365           else
4366             /* Qualify it. */
4367             {
4368               char savechar, *name;
4369
4370               savechar = *cp;
4371               *cp = '\0';
4372               name = concat (package, "::", sp);
4373               *cp = savechar;
4374               make_tag (name, strlen (name), TRUE,
4375                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4376               free (name);
4377             }
4378         }
4379        else if (globals)        /* only if we are tagging global vars */
4380         {
4381           /* Skip a qualifier, if any. */
4382           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4383           /* After "my" or "local", but before any following paren or space. */
4384           char *varstart = cp;
4385
4386           if (qual              /* should this be removed?  If yes, how? */
4387               && (*cp == '$' || *cp == '@' || *cp == '%'))
4388             {
4389               varstart += 1;
4390               do
4391                 cp++;
4392               while (ISALNUM (*cp) || *cp == '_');
4393             }
4394           else if (qual)
4395             {
4396               /* Should be examining a variable list at this point;
4397                  could insist on seeing an open parenthesis. */
4398               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4399                 cp++;
4400             }
4401           else
4402             continue;
4403
4404           make_tag (varstart, cp - varstart, FALSE,
4405                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4406         }
4407     }
4408   free (package);
4409 }
4410
4411
4412 /*
4413  * Python support
4414  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4415  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4416  * More ideas by seb bacon <seb@jamkit.com> (2002)
4417  */
4418 static void
4419 Python_functions (FILE *inf)
4420 {
4421   register char *cp;
4422
4423   LOOP_ON_INPUT_LINES (inf, lb, cp)
4424     {
4425       cp = skip_spaces (cp);
4426       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4427         {
4428           char *name = cp;
4429           while (!notinname (*cp) && *cp != ':')
4430             cp++;
4431           make_tag (name, cp - name, TRUE,
4432                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4433         }
4434     }
4435 }
4436
4437 \f
4438 /*
4439  * PHP support
4440  * Look for:
4441  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4442  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4443  *  - /^[ \t]*define\(\"[^\"]+/
4444  * Only with --members:
4445  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4446  * Idea by Diez B. Roggisch (2001)
4447  */
4448 static void
4449 PHP_functions (FILE *inf)
4450 {
4451   register char *cp, *name;
4452   bool search_identifier = FALSE;
4453
4454   LOOP_ON_INPUT_LINES (inf, lb, cp)
4455     {
4456       cp = skip_spaces (cp);
4457       name = cp;
4458       if (search_identifier
4459           && *cp != '\0')
4460         {
4461           while (!notinname (*cp))
4462             cp++;
4463           make_tag (name, cp - name, TRUE,
4464                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4465           search_identifier = FALSE;
4466         }
4467       else if (LOOKING_AT (cp, "function"))
4468         {
4469           if (*cp == '&')
4470             cp = skip_spaces (cp+1);
4471           if (*cp != '\0')
4472             {
4473               name = cp;
4474               while (!notinname (*cp))
4475                 cp++;
4476               make_tag (name, cp - name, TRUE,
4477                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4478             }
4479           else
4480             search_identifier = TRUE;
4481         }
4482       else if (LOOKING_AT (cp, "class"))
4483         {
4484           if (*cp != '\0')
4485             {
4486               name = cp;
4487               while (*cp != '\0' && !iswhite (*cp))
4488                 cp++;
4489               make_tag (name, cp - name, FALSE,
4490                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4491             }
4492           else
4493             search_identifier = TRUE;
4494         }
4495       else if (strneq (cp, "define", 6)
4496                && (cp = skip_spaces (cp+6))
4497                && *cp++ == '('
4498                && (*cp == '"' || *cp == '\''))
4499         {
4500           char quote = *cp++;
4501           name = cp;
4502           while (*cp != quote && *cp != '\0')
4503             cp++;
4504           make_tag (name, cp - name, FALSE,
4505                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4506         }
4507       else if (members
4508                && LOOKING_AT (cp, "var")
4509                && *cp == '$')
4510         {
4511           name = cp;
4512           while (!notinname (*cp))
4513             cp++;
4514           make_tag (name, cp - name, FALSE,
4515                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4516         }
4517     }
4518 }
4519
4520 \f
4521 /*
4522  * Cobol tag functions
4523  * We could look for anything that could be a paragraph name.
4524  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4525  * Idea by Corny de Souza (1993)
4526  */
4527 static void
4528 Cobol_paragraphs (FILE *inf)
4529 {
4530   register char *bp, *ep;
4531
4532   LOOP_ON_INPUT_LINES (inf, lb, bp)
4533     {
4534       if (lb.len < 9)
4535         continue;
4536       bp += 8;
4537
4538       /* If eoln, compiler option or comment ignore whole line. */
4539       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4540         continue;
4541
4542       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4543         continue;
4544       if (*ep++ == '.')
4545         make_tag (bp, ep - bp, TRUE,
4546                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4547     }
4548 }
4549
4550 \f
4551 /*
4552  * Makefile support
4553  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4554  */
4555 static void
4556 Makefile_targets (FILE *inf)
4557 {
4558   register char *bp;
4559
4560   LOOP_ON_INPUT_LINES (inf, lb, bp)
4561     {
4562       if (*bp == '\t' || *bp == '#')
4563         continue;
4564       while (*bp != '\0' && *bp != '=' && *bp != ':')
4565         bp++;
4566       if (*bp == ':' || (globals && *bp == '='))
4567         {
4568           /* We should detect if there is more than one tag, but we do not.
4569              We just skip initial and final spaces. */
4570           char * namestart = skip_spaces (lb.buffer);
4571           while (--bp > namestart)
4572             if (!notinname (*bp))
4573               break;
4574           make_tag (namestart, bp - namestart + 1, TRUE,
4575                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4576         }
4577     }
4578 }
4579
4580 \f
4581 /*
4582  * Pascal parsing
4583  * Original code by Mosur K. Mohan (1989)
4584  *
4585  *  Locates tags for procedures & functions.  Doesn't do any type- or
4586  *  var-definitions.  It does look for the keyword "extern" or
4587  *  "forward" immediately following the procedure statement; if found,
4588  *  the tag is skipped.
4589  */
4590 static void
4591 Pascal_functions (FILE *inf)
4592 {
4593   linebuffer tline;             /* mostly copied from C_entries */
4594   long save_lcno;
4595   int save_lineno, namelen, taglen;
4596   char c, *name;
4597
4598   bool                          /* each of these flags is TRUE if: */
4599     incomment,                  /* point is inside a comment */
4600     inquote,                    /* point is inside '..' string */
4601     get_tagname,                /* point is after PROCEDURE/FUNCTION
4602                                    keyword, so next item = potential tag */
4603     found_tag,                  /* point is after a potential tag */
4604     inparms,                    /* point is within parameter-list */
4605     verify_tag;                 /* point has passed the parm-list, so the
4606                                    next token will determine whether this
4607                                    is a FORWARD/EXTERN to be ignored, or
4608                                    whether it is a real tag */
4609
4610   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4611   name = NULL;                  /* keep compiler quiet */
4612   dbp = lb.buffer;
4613   *dbp = '\0';
4614   linebuffer_init (&tline);
4615
4616   incomment = inquote = FALSE;
4617   found_tag = FALSE;            /* have a proc name; check if extern */
4618   get_tagname = FALSE;          /* found "procedure" keyword         */
4619   inparms = FALSE;              /* found '(' after "proc"            */
4620   verify_tag = FALSE;           /* check if "extern" is ahead        */
4621
4622
4623   while (!feof (inf))           /* long main loop to get next char */
4624     {
4625       c = *dbp++;
4626       if (c == '\0')            /* if end of line */
4627         {
4628           readline (&lb, inf);
4629           dbp = lb.buffer;
4630           if (*dbp == '\0')
4631             continue;
4632           if (!((found_tag && verify_tag)
4633                 || get_tagname))
4634             c = *dbp++;         /* only if don't need *dbp pointing
4635                                    to the beginning of the name of
4636                                    the procedure or function */
4637         }
4638       if (incomment)
4639         {
4640           if (c == '}')         /* within { } comments */
4641             incomment = FALSE;
4642           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4643             {
4644               dbp++;
4645               incomment = FALSE;
4646             }
4647           continue;
4648         }
4649       else if (inquote)
4650         {
4651           if (c == '\'')
4652             inquote = FALSE;
4653           continue;
4654         }
4655       else
4656         switch (c)
4657           {
4658           case '\'':
4659             inquote = TRUE;     /* found first quote */
4660             continue;
4661           case '{':             /* found open { comment */
4662             incomment = TRUE;
4663             continue;
4664           case '(':
4665             if (*dbp == '*')    /* found open (* comment */
4666               {
4667                 incomment = TRUE;
4668                 dbp++;
4669               }
4670             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4671               inparms = TRUE;
4672             continue;
4673           case ')':             /* end of parms list */
4674             if (inparms)
4675               inparms = FALSE;
4676             continue;
4677           case ';':
4678             if (found_tag && !inparms) /* end of proc or fn stmt */
4679               {
4680                 verify_tag = TRUE;
4681                 break;
4682               }
4683             continue;
4684           }
4685       if (found_tag && verify_tag && (*dbp != ' '))
4686         {
4687           /* Check if this is an "extern" declaration. */
4688           if (*dbp == '\0')
4689             continue;
4690           if (lowcase (*dbp == 'e'))
4691             {
4692               if (nocase_tail ("extern")) /* superfluous, really! */
4693                 {
4694                   found_tag = FALSE;
4695                   verify_tag = FALSE;
4696                 }
4697             }
4698           else if (lowcase (*dbp) == 'f')
4699             {
4700               if (nocase_tail ("forward")) /* check for forward reference */
4701                 {
4702                   found_tag = FALSE;
4703                   verify_tag = FALSE;
4704                 }
4705             }
4706           if (found_tag && verify_tag) /* not external proc, so make tag */
4707             {
4708               found_tag = FALSE;
4709               verify_tag = FALSE;
4710               make_tag (name, namelen, TRUE,
4711                         tline.buffer, taglen, save_lineno, save_lcno);
4712               continue;
4713             }
4714         }
4715       if (get_tagname)          /* grab name of proc or fn */
4716         {
4717           char *cp;
4718
4719           if (*dbp == '\0')
4720             continue;
4721
4722           /* Find block name. */
4723           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4724             continue;
4725
4726           /* Save all values for later tagging. */
4727           linebuffer_setlen (&tline, lb.len);
4728           strcpy (tline.buffer, lb.buffer);
4729           save_lineno = lineno;
4730           save_lcno = linecharno;
4731           name = tline.buffer + (dbp - lb.buffer);
4732           namelen = cp - dbp;
4733           taglen = cp - lb.buffer + 1;
4734
4735           dbp = cp;             /* set dbp to e-o-token */
4736           get_tagname = FALSE;
4737           found_tag = TRUE;
4738           continue;
4739
4740           /* And proceed to check for "extern". */
4741         }
4742       else if (!incomment && !inquote && !found_tag)
4743         {
4744           /* Check for proc/fn keywords. */
4745           switch (lowcase (c))
4746             {
4747             case 'p':
4748               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4749                 get_tagname = TRUE;
4750               continue;
4751             case 'f':
4752               if (nocase_tail ("unction"))
4753                 get_tagname = TRUE;
4754               continue;
4755             }
4756         }
4757     } /* while not eof */
4758
4759   free (tline.buffer);
4760 }
4761
4762 \f
4763 /*
4764  * Lisp tag functions
4765  *  look for (def or (DEF, quote or QUOTE
4766  */
4767
4768 static void L_getit (void);
4769
4770 static void
4771 L_getit (void)
4772 {
4773   if (*dbp == '\'')             /* Skip prefix quote */
4774     dbp++;
4775   else if (*dbp == '(')
4776   {
4777     dbp++;
4778     /* Try to skip "(quote " */
4779     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4780       /* Ok, then skip "(" before name in (defstruct (foo)) */
4781       dbp = skip_spaces (dbp);
4782   }
4783   get_tag (dbp, NULL);
4784 }
4785
4786 static void
4787 Lisp_functions (FILE *inf)
4788 {
4789   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4790     {
4791       if (dbp[0] != '(')
4792         continue;
4793
4794       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4795         {
4796           dbp = skip_non_spaces (dbp);
4797           dbp = skip_spaces (dbp);
4798           L_getit ();
4799         }
4800       else
4801         {
4802           /* Check for (foo::defmumble name-defined ... */
4803           do
4804             dbp++;
4805           while (!notinname (*dbp) && *dbp != ':');
4806           if (*dbp == ':')
4807             {
4808               do
4809                 dbp++;
4810               while (*dbp == ':');
4811
4812               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4813                 {
4814                   dbp = skip_non_spaces (dbp);
4815                   dbp = skip_spaces (dbp);
4816                   L_getit ();
4817                 }
4818             }
4819         }
4820     }
4821 }
4822
4823 \f
4824 /*
4825  * Lua script language parsing
4826  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4827  *
4828  *  "function" and "local function" are tags if they start at column 1.
4829  */
4830 static void
4831 Lua_functions (FILE *inf)
4832 {
4833   register char *bp;
4834
4835   LOOP_ON_INPUT_LINES (inf, lb, bp)
4836     {
4837       if (bp[0] != 'f' && bp[0] != 'l')
4838         continue;
4839
4840       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4841
4842       if (LOOKING_AT (bp, "function"))
4843         get_tag (bp, NULL);
4844     }
4845 }
4846
4847 \f
4848 /*
4849  * PostScript tags
4850  * Just look for lines where the first character is '/'
4851  * Also look at "defineps" for PSWrap
4852  * Ideas by:
4853  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4854  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4855  */
4856 static void
4857 PS_functions (FILE *inf)
4858 {
4859   register char *bp, *ep;
4860
4861   LOOP_ON_INPUT_LINES (inf, lb, bp)
4862     {
4863       if (bp[0] == '/')
4864         {
4865           for (ep = bp+1;
4866                *ep != '\0' && *ep != ' ' && *ep != '{';
4867                ep++)
4868             continue;
4869           make_tag (bp, ep - bp, TRUE,
4870                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4871         }
4872       else if (LOOKING_AT (bp, "defineps"))
4873         get_tag (bp, NULL);
4874     }
4875 }
4876
4877 \f
4878 /*
4879  * Forth tags
4880  * Ignore anything after \ followed by space or in ( )
4881  * Look for words defined by :
4882  * Look for constant, code, create, defer, value, and variable
4883  * OBP extensions:  Look for buffer:, field,
4884  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4885  */
4886 static void
4887 Forth_words (FILE *inf)
4888 {
4889   register char *bp;
4890
4891   LOOP_ON_INPUT_LINES (inf, lb, bp)
4892     while ((bp = skip_spaces (bp))[0] != '\0')
4893       if (bp[0] == '\\' && iswhite (bp[1]))
4894         break;                  /* read next line */
4895       else if (bp[0] == '(' && iswhite (bp[1]))
4896         do                      /* skip to ) or eol */
4897           bp++;
4898         while (*bp != ')' && *bp != '\0');
4899       else if ((bp[0] == ':' && iswhite (bp[1]) && bp++)
4900                || LOOKING_AT_NOCASE (bp, "constant")
4901                || LOOKING_AT_NOCASE (bp, "code")
4902                || LOOKING_AT_NOCASE (bp, "create")
4903                || LOOKING_AT_NOCASE (bp, "defer")
4904                || LOOKING_AT_NOCASE (bp, "value")
4905                || LOOKING_AT_NOCASE (bp, "variable")
4906                || LOOKING_AT_NOCASE (bp, "buffer:")
4907                || LOOKING_AT_NOCASE (bp, "field"))
4908         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4909       else
4910         bp = skip_non_spaces (bp);
4911 }
4912
4913 \f
4914 /*
4915  * Scheme tag functions
4916  * look for (def... xyzzy
4917  *          (def... (xyzzy
4918  *          (def ... ((...(xyzzy ....
4919  *          (set! xyzzy
4920  * Original code by Ken Haase (1985?)
4921  */
4922 static void
4923 Scheme_functions (FILE *inf)
4924 {
4925   register char *bp;
4926
4927   LOOP_ON_INPUT_LINES (inf, lb, bp)
4928     {
4929       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4930         {
4931           bp = skip_non_spaces (bp+4);
4932           /* Skip over open parens and white space.  Don't continue past
4933              '\0'. */
4934           while (*bp && notinname (*bp))
4935             bp++;
4936           get_tag (bp, NULL);
4937         }
4938       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4939         get_tag (bp, NULL);
4940     }
4941 }
4942
4943 \f
4944 /* Find tags in TeX and LaTeX input files.  */
4945
4946 /* TEX_toktab is a table of TeX control sequences that define tags.
4947  * Each entry records one such control sequence.
4948  *
4949  * Original code from who knows whom.
4950  * Ideas by:
4951  *   Stefan Monnier (2002)
4952  */
4953
4954 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4955
4956 /* Default set of control sequences to put into TEX_toktab.
4957    The value of environment var TEXTAGS is prepended to this.  */
4958 static const char *TEX_defenv = "\
4959 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4960 :part:appendix:entry:index:def\
4961 :newcommand:renewcommand:newenvironment:renewenvironment";
4962
4963 static void TEX_mode (FILE *);
4964 static void TEX_decode_env (const char *, const char *);
4965
4966 static char TEX_esc = '\\';
4967 static char TEX_opgrp = '{';
4968 static char TEX_clgrp = '}';
4969
4970 /*
4971  * TeX/LaTeX scanning loop.
4972  */
4973 static void
4974 TeX_commands (FILE *inf)
4975 {
4976   char *cp;
4977   linebuffer *key;
4978
4979   /* Select either \ or ! as escape character.  */
4980   TEX_mode (inf);
4981
4982   /* Initialize token table once from environment. */
4983   if (TEX_toktab == NULL)
4984     TEX_decode_env ("TEXTAGS", TEX_defenv);
4985
4986   LOOP_ON_INPUT_LINES (inf, lb, cp)
4987     {
4988       /* Look at each TEX keyword in line. */
4989       for (;;)
4990         {
4991           /* Look for a TEX escape. */
4992           while (*cp++ != TEX_esc)
4993             if (cp[-1] == '\0' || cp[-1] == '%')
4994               goto tex_next_line;
4995
4996           for (key = TEX_toktab; key->buffer != NULL; key++)
4997             if (strneq (cp, key->buffer, key->len))
4998               {
4999                 register char *p;
5000                 int namelen, linelen;
5001                 bool opgrp = FALSE;
5002
5003                 cp = skip_spaces (cp + key->len);
5004                 if (*cp == TEX_opgrp)
5005                   {
5006                     opgrp = TRUE;
5007                     cp++;
5008                   }
5009                 for (p = cp;
5010                      (!iswhite (*p) && *p != '#' &&
5011                       *p != TEX_opgrp && *p != TEX_clgrp);
5012                      p++)
5013                   continue;
5014                 namelen = p - cp;
5015                 linelen = lb.len;
5016                 if (!opgrp || *p == TEX_clgrp)
5017                   {
5018                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5019                       p++;
5020                     linelen = p - lb.buffer + 1;
5021                   }
5022                 make_tag (cp, namelen, TRUE,
5023                           lb.buffer, linelen, lineno, linecharno);
5024                 goto tex_next_line; /* We only tag a line once */
5025               }
5026         }
5027     tex_next_line:
5028       ;
5029     }
5030 }
5031
5032 #define TEX_LESC '\\'
5033 #define TEX_SESC '!'
5034
5035 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5036    chars accordingly. */
5037 static void
5038 TEX_mode (FILE *inf)
5039 {
5040   int c;
5041
5042   while ((c = getc (inf)) != EOF)
5043     {
5044       /* Skip to next line if we hit the TeX comment char. */
5045       if (c == '%')
5046         while (c != '\n' && c != EOF)
5047           c = getc (inf);
5048       else if (c == TEX_LESC || c == TEX_SESC )
5049         break;
5050     }
5051
5052   if (c == TEX_LESC)
5053     {
5054       TEX_esc = TEX_LESC;
5055       TEX_opgrp = '{';
5056       TEX_clgrp = '}';
5057     }
5058   else
5059     {
5060       TEX_esc = TEX_SESC;
5061       TEX_opgrp = '<';
5062       TEX_clgrp = '>';
5063     }
5064   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5065      No attempt is made to correct the situation. */
5066   rewind (inf);
5067 }
5068
5069 /* Read environment and prepend it to the default string.
5070    Build token table. */
5071 static void
5072 TEX_decode_env (const char *evarname, const char *defenv)
5073 {
5074   register const char *env, *p;
5075   int i, len;
5076
5077   /* Append default string to environment. */
5078   env = getenv (evarname);
5079   if (!env)
5080     env = defenv;
5081   else
5082     env = concat (env, defenv, "");
5083
5084   /* Allocate a token table */
5085   for (len = 1, p = env; p;)
5086     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5087       len++;
5088   TEX_toktab = xnew (len, linebuffer);
5089
5090   /* Unpack environment string into token table. Be careful about */
5091   /* zero-length strings (leading ':', "::" and trailing ':') */
5092   for (i = 0; *env != '\0';)
5093     {
5094       p = etags_strchr (env, ':');
5095       if (!p)                   /* End of environment string. */
5096         p = env + strlen (env);
5097       if (p - env > 0)
5098         {                       /* Only non-zero strings. */
5099           TEX_toktab[i].buffer = savenstr (env, p - env);
5100           TEX_toktab[i].len = p - env;
5101           i++;
5102         }
5103       if (*p)
5104         env = p + 1;
5105       else
5106         {
5107           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5108           TEX_toktab[i].len = 0;
5109           break;
5110         }
5111     }
5112 }
5113
5114 \f
5115 /* Texinfo support.  Dave Love, Mar. 2000.  */
5116 static void
5117 Texinfo_nodes (FILE *inf)
5118 {
5119   char *cp, *start;
5120   LOOP_ON_INPUT_LINES (inf, lb, cp)
5121     if (LOOKING_AT (cp, "@node"))
5122       {
5123         start = cp;
5124         while (*cp != '\0' && *cp != ',')
5125           cp++;
5126         make_tag (start, cp - start, TRUE,
5127                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5128       }
5129 }
5130
5131 \f
5132 /*
5133  * HTML support.
5134  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5135  * Contents of <a name=xxx> are tags with name xxx.
5136  *
5137  * Francesco Potortì, 2002.
5138  */
5139 static void
5140 HTML_labels (FILE *inf)
5141 {
5142   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5143   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5144   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5145   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5146   char *end;
5147
5148
5149   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5150
5151   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5152     for (;;)                    /* loop on the same line */
5153       {
5154         if (skiptag)            /* skip HTML tag */
5155           {
5156             while (*dbp != '\0' && *dbp != '>')
5157               dbp++;
5158             if (*dbp == '>')
5159               {
5160                 dbp += 1;
5161                 skiptag = FALSE;
5162                 continue;       /* look on the same line */
5163               }
5164             break;              /* go to next line */
5165           }
5166
5167         else if (intag) /* look for "name=" or "id=" */
5168           {
5169             while (*dbp != '\0' && *dbp != '>'
5170                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5171               dbp++;
5172             if (*dbp == '\0')
5173               break;            /* go to next line */
5174             if (*dbp == '>')
5175               {
5176                 dbp += 1;
5177                 intag = FALSE;
5178                 continue;       /* look on the same line */
5179               }
5180             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5181                 || LOOKING_AT_NOCASE (dbp, "id="))
5182               {
5183                 bool quoted = (dbp[0] == '"');
5184
5185                 if (quoted)
5186                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5187                     continue;
5188                 else
5189                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5190                     continue;
5191                 linebuffer_setlen (&token_name, end - dbp);
5192                 strncpy (token_name.buffer, dbp, end - dbp);
5193                 token_name.buffer[end - dbp] = '\0';
5194
5195                 dbp = end;
5196                 intag = FALSE;  /* we found what we looked for */
5197                 skiptag = TRUE; /* skip to the end of the tag */
5198                 getnext = TRUE; /* then grab the text */
5199                 continue;       /* look on the same line */
5200               }
5201             dbp += 1;
5202           }
5203
5204         else if (getnext)       /* grab next tokens and tag them */
5205           {
5206             dbp = skip_spaces (dbp);
5207             if (*dbp == '\0')
5208               break;            /* go to next line */
5209             if (*dbp == '<')
5210               {
5211                 intag = TRUE;
5212                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5213                 continue;       /* look on the same line */
5214               }
5215
5216             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5217               continue;
5218             make_tag (token_name.buffer, token_name.len, TRUE,
5219                       dbp, end - dbp, lineno, linecharno);
5220             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5221             getnext = FALSE;
5222             break;              /* go to next line */
5223           }
5224
5225         else                    /* look for an interesting HTML tag */
5226           {
5227             while (*dbp != '\0' && *dbp != '<')
5228               dbp++;
5229             if (*dbp == '\0')
5230               break;            /* go to next line */
5231             intag = TRUE;
5232             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5233               {
5234                 inanchor = TRUE;
5235                 continue;       /* look on the same line */
5236               }
5237             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5238                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5239                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5240                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5241               {
5242                 intag = FALSE;
5243                 getnext = TRUE;
5244                 continue;       /* look on the same line */
5245               }
5246             dbp += 1;
5247           }
5248       }
5249 }
5250
5251 \f
5252 /*
5253  * Prolog support
5254  *
5255  * Assumes that the predicate or rule starts at column 0.
5256  * Only the first clause of a predicate or rule is added.
5257  * Original code by Sunichirou Sugou (1989)
5258  * Rewritten by Anders Lindgren (1996)
5259  */
5260 static size_t prolog_pr (char *, char *);
5261 static void prolog_skip_comment (linebuffer *, FILE *);
5262 static size_t prolog_atom (char *, size_t);
5263
5264 static void
5265 Prolog_functions (FILE *inf)
5266 {
5267   char *cp, *last;
5268   size_t len;
5269   size_t allocated;
5270
5271   allocated = 0;
5272   len = 0;
5273   last = NULL;
5274
5275   LOOP_ON_INPUT_LINES (inf, lb, cp)
5276     {
5277       if (cp[0] == '\0')        /* Empty line */
5278         continue;
5279       else if (iswhite (cp[0])) /* Not a predicate */
5280         continue;
5281       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5282         prolog_skip_comment (&lb, inf);
5283       else if ((len = prolog_pr (cp, last)) > 0)
5284         {
5285           /* Predicate or rule.  Store the function name so that we
5286              only generate a tag for the first clause.  */
5287           if (last == NULL)
5288             last = xnew (len + 1, char);
5289           else if (len + 1 > allocated)
5290             xrnew (last, len + 1, char);
5291           allocated = len + 1;
5292           strncpy (last, cp, len);
5293           last[len] = '\0';
5294         }
5295     }
5296   free (last);
5297 }
5298
5299
5300 static void
5301 prolog_skip_comment (linebuffer *plb, FILE *inf)
5302 {
5303   char *cp;
5304
5305   do
5306     {
5307       for (cp = plb->buffer; *cp != '\0'; cp++)
5308         if (cp[0] == '*' && cp[1] == '/')
5309           return;
5310       readline (plb, inf);
5311     }
5312   while (!feof (inf));
5313 }
5314
5315 /*
5316  * A predicate or rule definition is added if it matches:
5317  *     <beginning of line><Prolog Atom><whitespace>(
5318  * or  <beginning of line><Prolog Atom><whitespace>:-
5319  *
5320  * It is added to the tags database if it doesn't match the
5321  * name of the previous clause header.
5322  *
5323  * Return the size of the name of the predicate or rule, or 0 if no
5324  * header was found.
5325  */
5326 static size_t
5327 prolog_pr (char *s, char *last)
5328
5329                                 /* Name of last clause. */
5330 {
5331   size_t pos;
5332   size_t len;
5333
5334   pos = prolog_atom (s, 0);
5335   if (! pos)
5336     return 0;
5337
5338   len = pos;
5339   pos = skip_spaces (s + pos) - s;
5340
5341   if ((s[pos] == '.'
5342        || (s[pos] == '(' && (pos += 1))
5343        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5344       && (last == NULL          /* save only the first clause */
5345           || len != strlen (last)
5346           || !strneq (s, last, len)))
5347         {
5348           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5349           return len;
5350         }
5351   else
5352     return 0;
5353 }
5354
5355 /*
5356  * Consume a Prolog atom.
5357  * Return the number of bytes consumed, or 0 if there was an error.
5358  *
5359  * A prolog atom, in this context, could be one of:
5360  * - An alphanumeric sequence, starting with a lower case letter.
5361  * - A quoted arbitrary string. Single quotes can escape themselves.
5362  *   Backslash quotes everything.
5363  */
5364 static size_t
5365 prolog_atom (char *s, size_t pos)
5366 {
5367   size_t origpos;
5368
5369   origpos = pos;
5370
5371   if (ISLOWER (s[pos]) || (s[pos] == '_'))
5372     {
5373       /* The atom is unquoted. */
5374       pos++;
5375       while (ISALNUM (s[pos]) || (s[pos] == '_'))
5376         {
5377           pos++;
5378         }
5379       return pos - origpos;
5380     }
5381   else if (s[pos] == '\'')
5382     {
5383       pos++;
5384
5385       for (;;)
5386         {
5387           if (s[pos] == '\'')
5388             {
5389               pos++;
5390               if (s[pos] != '\'')
5391                 break;
5392               pos++;            /* A double quote */
5393             }
5394           else if (s[pos] == '\0')
5395             /* Multiline quoted atoms are ignored. */
5396             return 0;
5397           else if (s[pos] == '\\')
5398             {
5399               if (s[pos+1] == '\0')
5400                 return 0;
5401               pos += 2;
5402             }
5403           else
5404             pos++;
5405         }
5406       return pos - origpos;
5407     }
5408   else
5409     return 0;
5410 }
5411
5412 \f
5413 /*
5414  * Support for Erlang
5415  *
5416  * Generates tags for functions, defines, and records.
5417  * Assumes that Erlang functions start at column 0.
5418  * Original code by Anders Lindgren (1996)
5419  */
5420 static int erlang_func (char *, char *);
5421 static void erlang_attribute (char *);
5422 static int erlang_atom (char *);
5423
5424 static void
5425 Erlang_functions (FILE *inf)
5426 {
5427   char *cp, *last;
5428   int len;
5429   int allocated;
5430
5431   allocated = 0;
5432   len = 0;
5433   last = NULL;
5434
5435   LOOP_ON_INPUT_LINES (inf, lb, cp)
5436     {
5437       if (cp[0] == '\0')        /* Empty line */
5438         continue;
5439       else if (iswhite (cp[0])) /* Not function nor attribute */
5440         continue;
5441       else if (cp[0] == '%')    /* comment */
5442         continue;
5443       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5444         continue;
5445       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5446         {
5447           erlang_attribute (cp);
5448           if (last != NULL)
5449             {
5450               free (last);
5451               last = NULL;
5452             }
5453         }
5454       else if ((len = erlang_func (cp, last)) > 0)
5455         {
5456           /*
5457            * Function.  Store the function name so that we only
5458            * generates a tag for the first clause.
5459            */
5460           if (last == NULL)
5461             last = xnew (len + 1, char);
5462           else if (len + 1 > allocated)
5463             xrnew (last, len + 1, char);
5464           allocated = len + 1;
5465           strncpy (last, cp, len);
5466           last[len] = '\0';
5467         }
5468     }
5469   free (last);
5470 }
5471
5472
5473 /*
5474  * A function definition is added if it matches:
5475  *     <beginning of line><Erlang Atom><whitespace>(
5476  *
5477  * It is added to the tags database if it doesn't match the
5478  * name of the previous clause header.
5479  *
5480  * Return the size of the name of the function, or 0 if no function
5481  * was found.
5482  */
5483 static int
5484 erlang_func (char *s, char *last)
5485
5486                                 /* Name of last clause. */
5487 {
5488   int pos;
5489   int len;
5490
5491   pos = erlang_atom (s);
5492   if (pos < 1)
5493     return 0;
5494
5495   len = pos;
5496   pos = skip_spaces (s + pos) - s;
5497
5498   /* Save only the first clause. */
5499   if (s[pos++] == '('
5500       && (last == NULL
5501           || len != (int)strlen (last)
5502           || !strneq (s, last, len)))
5503         {
5504           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5505           return len;
5506         }
5507
5508   return 0;
5509 }
5510
5511
5512 /*
5513  * Handle attributes.  Currently, tags are generated for defines
5514  * and records.
5515  *
5516  * They are on the form:
5517  * -define(foo, bar).
5518  * -define(Foo(M, N), M+N).
5519  * -record(graph, {vtab = notable, cyclic = true}).
5520  */
5521 static void
5522 erlang_attribute (char *s)
5523 {
5524   char *cp = s;
5525
5526   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5527       && *cp++ == '(')
5528     {
5529       int len = erlang_atom (skip_spaces (cp));
5530       if (len > 0)
5531         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5532     }
5533   return;
5534 }
5535
5536
5537 /*
5538  * Consume an Erlang atom (or variable).
5539  * Return the number of bytes consumed, or -1 if there was an error.
5540  */
5541 static int
5542 erlang_atom (char *s)
5543 {
5544   int pos = 0;
5545
5546   if (ISALPHA (s[pos]) || s[pos] == '_')
5547     {
5548       /* The atom is unquoted. */
5549       do
5550         pos++;
5551       while (ISALNUM (s[pos]) || s[pos] == '_');
5552     }
5553   else if (s[pos] == '\'')
5554     {
5555       for (pos++; s[pos] != '\''; pos++)
5556         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5557             || (s[pos] == '\\' && s[++pos] == '\0'))
5558           return 0;
5559       pos++;
5560     }
5561
5562   return pos;
5563 }
5564
5565 \f
5566 static char *scan_separators (char *);
5567 static void add_regex (char *, language *);
5568 static char *substitute (char *, char *, struct re_registers *);
5569
5570 /*
5571  * Take a string like "/blah/" and turn it into "blah", verifying
5572  * that the first and last characters are the same, and handling
5573  * quoted separator characters.  Actually, stops on the occurrence of
5574  * an unquoted separator.  Also process \t, \n, etc. and turn into
5575  * appropriate characters. Works in place.  Null terminates name string.
5576  * Returns pointer to terminating separator, or NULL for
5577  * unterminated regexps.
5578  */
5579 static char *
5580 scan_separators (char *name)
5581 {
5582   char sep = name[0];
5583   char *copyto = name;
5584   bool quoted = FALSE;
5585
5586   for (++name; *name != '\0'; ++name)
5587     {
5588       if (quoted)
5589         {
5590           switch (*name)
5591             {
5592             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5593             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5594             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5595             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5596             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5597             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5598             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5599             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5600             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5601             default:
5602               if (*name == sep)
5603                 *copyto++ = sep;
5604               else
5605                 {
5606                   /* Something else is quoted, so preserve the quote. */
5607                   *copyto++ = '\\';
5608                   *copyto++ = *name;
5609                 }
5610               break;
5611             }
5612           quoted = FALSE;
5613         }
5614       else if (*name == '\\')
5615         quoted = TRUE;
5616       else if (*name == sep)
5617         break;
5618       else
5619         *copyto++ = *name;
5620     }
5621   if (*name != sep)
5622     name = NULL;                /* signal unterminated regexp */
5623
5624   /* Terminate copied string. */
5625   *copyto = '\0';
5626   return name;
5627 }
5628
5629 /* Look at the argument of --regex or --no-regex and do the right
5630    thing.  Same for each line of a regexp file. */
5631 static void
5632 analyse_regex (char *regex_arg)
5633 {
5634   if (regex_arg == NULL)
5635     {
5636       free_regexps ();          /* --no-regex: remove existing regexps */
5637       return;
5638     }
5639
5640   /* A real --regexp option or a line in a regexp file. */
5641   switch (regex_arg[0])
5642     {
5643       /* Comments in regexp file or null arg to --regex. */
5644     case '\0':
5645     case ' ':
5646     case '\t':
5647       break;
5648
5649       /* Read a regex file.  This is recursive and may result in a
5650          loop, which will stop when the file descriptors are exhausted. */
5651     case '@':
5652       {
5653         FILE *regexfp;
5654         linebuffer regexbuf;
5655         char *regexfile = regex_arg + 1;
5656
5657         /* regexfile is a file containing regexps, one per line. */
5658         regexfp = fopen (regexfile, "r");
5659         if (regexfp == NULL)
5660           {
5661             pfatal (regexfile);
5662             return;
5663           }
5664         linebuffer_init (&regexbuf);
5665         while (readline_internal (&regexbuf, regexfp) > 0)
5666           analyse_regex (regexbuf.buffer);
5667         free (regexbuf.buffer);
5668         fclose (regexfp);
5669       }
5670       break;
5671
5672       /* Regexp to be used for a specific language only. */
5673     case '{':
5674       {
5675         language *lang;
5676         char *lang_name = regex_arg + 1;
5677         char *cp;
5678
5679         for (cp = lang_name; *cp != '}'; cp++)
5680           if (*cp == '\0')
5681             {
5682               error ("unterminated language name in regex: %s", regex_arg);
5683               return;
5684             }
5685         *cp++ = '\0';
5686         lang = get_language_from_langname (lang_name);
5687         if (lang == NULL)
5688           return;
5689         add_regex (cp, lang);
5690       }
5691       break;
5692
5693       /* Regexp to be used for any language. */
5694     default:
5695       add_regex (regex_arg, NULL);
5696       break;
5697     }
5698 }
5699
5700 /* Separate the regexp pattern, compile it,
5701    and care for optional name and modifiers. */
5702 static void
5703 add_regex (char *regexp_pattern, language *lang)
5704 {
5705   static struct re_pattern_buffer zeropattern;
5706   char sep, *pat, *name, *modifiers;
5707   char empty[] = "";
5708   const char *err;
5709   struct re_pattern_buffer *patbuf;
5710   regexp *rp;
5711   bool
5712     force_explicit_name = TRUE, /* do not use implicit tag names */
5713     ignore_case = FALSE,        /* case is significant */
5714     multi_line = FALSE,         /* matches are done one line at a time */
5715     single_line = FALSE;        /* dot does not match newline */
5716
5717
5718   if (strlen (regexp_pattern) < 3)
5719     {
5720       error ("null regexp", (char *)NULL);
5721       return;
5722     }
5723   sep = regexp_pattern[0];
5724   name = scan_separators (regexp_pattern);
5725   if (name == NULL)
5726     {
5727       error ("%s: unterminated regexp", regexp_pattern);
5728       return;
5729     }
5730   if (name[1] == sep)
5731     {
5732       error ("null name for regexp \"%s\"", regexp_pattern);
5733       return;
5734     }
5735   modifiers = scan_separators (name);
5736   if (modifiers == NULL)        /* no terminating separator --> no name */
5737     {
5738       modifiers = name;
5739       name = empty;
5740     }
5741   else
5742     modifiers += 1;             /* skip separator */
5743
5744   /* Parse regex modifiers. */
5745   for (; modifiers[0] != '\0'; modifiers++)
5746     switch (modifiers[0])
5747       {
5748       case 'N':
5749         if (modifiers == name)
5750           error ("forcing explicit tag name but no name, ignoring", NULL);
5751         force_explicit_name = TRUE;
5752         break;
5753       case 'i':
5754         ignore_case = TRUE;
5755         break;
5756       case 's':
5757         single_line = TRUE;
5758         /* FALLTHRU */
5759       case 'm':
5760         multi_line = TRUE;
5761         need_filebuf = TRUE;
5762         break;
5763       default:
5764         {
5765           char wrongmod [2];
5766           wrongmod[0] = modifiers[0];
5767           wrongmod[1] = '\0';
5768           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5769         }
5770         break;
5771       }
5772
5773   patbuf = xnew (1, struct re_pattern_buffer);
5774   *patbuf = zeropattern;
5775   if (ignore_case)
5776     {
5777       static char lc_trans[CHARS];
5778       int i;
5779       for (i = 0; i < CHARS; i++)
5780         lc_trans[i] = lowcase (i);
5781       patbuf->translate = lc_trans;     /* translation table to fold case  */
5782     }
5783
5784   if (multi_line)
5785     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5786   else
5787     pat = regexp_pattern;
5788
5789   if (single_line)
5790     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5791   else
5792     re_set_syntax (RE_SYNTAX_EMACS);
5793
5794   err = re_compile_pattern (pat, strlen (pat), patbuf);
5795   if (multi_line)
5796     free (pat);
5797   if (err != NULL)
5798     {
5799       error ("%s while compiling pattern", err);
5800       return;
5801     }
5802
5803   rp = p_head;
5804   p_head = xnew (1, regexp);
5805   p_head->pattern = savestr (regexp_pattern);
5806   p_head->p_next = rp;
5807   p_head->lang = lang;
5808   p_head->pat = patbuf;
5809   p_head->name = savestr (name);
5810   p_head->error_signaled = FALSE;
5811   p_head->force_explicit_name = force_explicit_name;
5812   p_head->ignore_case = ignore_case;
5813   p_head->multi_line = multi_line;
5814 }
5815
5816 /*
5817  * Do the substitutions indicated by the regular expression and
5818  * arguments.
5819  */
5820 static char *
5821 substitute (char *in, char *out, struct re_registers *regs)
5822 {
5823   char *result, *t;
5824   int size, dig, diglen;
5825
5826   result = NULL;
5827   size = strlen (out);
5828
5829   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5830   if (out[size - 1] == '\\')
5831     fatal ("pattern error in \"%s\"", out);
5832   for (t = etags_strchr (out, '\\');
5833        t != NULL;
5834        t = etags_strchr (t + 2, '\\'))
5835     if (ISDIGIT (t[1]))
5836       {
5837         dig = t[1] - '0';
5838         diglen = regs->end[dig] - regs->start[dig];
5839         size += diglen - 2;
5840       }
5841     else
5842       size -= 1;
5843
5844   /* Allocate space and do the substitutions. */
5845   assert (size >= 0);
5846   result = xnew (size + 1, char);
5847
5848   for (t = result; *out != '\0'; out++)
5849     if (*out == '\\' && ISDIGIT (*++out))
5850       {
5851         dig = *out - '0';
5852         diglen = regs->end[dig] - regs->start[dig];
5853         strncpy (t, in + regs->start[dig], diglen);
5854         t += diglen;
5855       }
5856     else
5857       *t++ = *out;
5858   *t = '\0';
5859
5860   assert (t <= result + size);
5861   assert (t - result == (int)strlen (result));
5862
5863   return result;
5864 }
5865
5866 /* Deallocate all regexps. */
5867 static void
5868 free_regexps (void)
5869 {
5870   regexp *rp;
5871   while (p_head != NULL)
5872     {
5873       rp = p_head->p_next;
5874       free (p_head->pattern);
5875       free (p_head->name);
5876       free (p_head);
5877       p_head = rp;
5878     }
5879   return;
5880 }
5881
5882 /*
5883  * Reads the whole file as a single string from `filebuf' and looks for
5884  * multi-line regular expressions, creating tags on matches.
5885  * readline already dealt with normal regexps.
5886  *
5887  * Idea by Ben Wing <ben@666.com> (2002).
5888  */
5889 static void
5890 regex_tag_multiline (void)
5891 {
5892   char *buffer = filebuf.buffer;
5893   regexp *rp;
5894   char *name;
5895
5896   for (rp = p_head; rp != NULL; rp = rp->p_next)
5897     {
5898       int match = 0;
5899
5900       if (!rp->multi_line)
5901         continue;               /* skip normal regexps */
5902
5903       /* Generic initialisations before parsing file from memory. */
5904       lineno = 1;               /* reset global line number */
5905       charno = 0;               /* reset global char number */
5906       linecharno = 0;           /* reset global char number of line start */
5907
5908       /* Only use generic regexps or those for the current language. */
5909       if (rp->lang != NULL && rp->lang != curfdp->lang)
5910         continue;
5911
5912       while (match >= 0 && match < filebuf.len)
5913         {
5914           match = re_search (rp->pat, buffer, filebuf.len, charno,
5915                              filebuf.len - match, &rp->regs);
5916           switch (match)
5917             {
5918             case -2:
5919               /* Some error. */
5920               if (!rp->error_signaled)
5921                 {
5922                   error ("regexp stack overflow while matching \"%s\"",
5923                          rp->pattern);
5924                   rp->error_signaled = TRUE;
5925                 }
5926               break;
5927             case -1:
5928               /* No match. */
5929               break;
5930             default:
5931               if (match == rp->regs.end[0])
5932                 {
5933                   if (!rp->error_signaled)
5934                     {
5935                       error ("regexp matches the empty string: \"%s\"",
5936                              rp->pattern);
5937                       rp->error_signaled = TRUE;
5938                     }
5939                   match = -3;   /* exit from while loop */
5940                   break;
5941                 }
5942
5943               /* Match occurred.  Construct a tag. */
5944               while (charno < rp->regs.end[0])
5945                 if (buffer[charno++] == '\n')
5946                   lineno++, linecharno = charno;
5947               name = rp->name;
5948               if (name[0] == '\0')
5949                 name = NULL;
5950               else /* make a named tag */
5951                 name = substitute (buffer, rp->name, &rp->regs);
5952               if (rp->force_explicit_name)
5953                 /* Force explicit tag name, if a name is there. */
5954                 pfnote (name, TRUE, buffer + linecharno,
5955                         charno - linecharno + 1, lineno, linecharno);
5956               else
5957                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5958                           charno - linecharno + 1, lineno, linecharno);
5959               break;
5960             }
5961         }
5962     }
5963 }
5964
5965 \f
5966 static bool
5967 nocase_tail (const char *cp)
5968 {
5969   register int len = 0;
5970
5971   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5972     cp++, len++;
5973   if (*cp == '\0' && !intoken (dbp[len]))
5974     {
5975       dbp += len;
5976       return TRUE;
5977     }
5978   return FALSE;
5979 }
5980
5981 static void
5982 get_tag (register char *bp, char **namepp)
5983 {
5984   register char *cp = bp;
5985
5986   if (*bp != '\0')
5987     {
5988       /* Go till you get to white space or a syntactic break */
5989       for (cp = bp + 1; !notinname (*cp); cp++)
5990         continue;
5991       make_tag (bp, cp - bp, TRUE,
5992                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5993     }
5994
5995   if (namepp != NULL)
5996     *namepp = savenstr (bp, cp - bp);
5997 }
5998
5999 /*
6000  * Read a line of text from `stream' into `lbp', excluding the
6001  * newline or CR-NL, if any.  Return the number of characters read from
6002  * `stream', which is the length of the line including the newline.
6003  *
6004  * On DOS or Windows we do not count the CR character, if any before the
6005  * NL, in the returned length; this mirrors the behavior of Emacs on those
6006  * platforms (for text files, it translates CR-NL to NL as it reads in the
6007  * file).
6008  *
6009  * If multi-line regular expressions are requested, each line read is
6010  * appended to `filebuf'.
6011  */
6012 static long
6013 readline_internal (linebuffer *lbp, register FILE *stream)
6014 {
6015   char *buffer = lbp->buffer;
6016   register char *p = lbp->buffer;
6017   register char *pend;
6018   int chars_deleted;
6019
6020   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6021
6022   for (;;)
6023     {
6024       register int c = getc (stream);
6025       if (p == pend)
6026         {
6027           /* We're at the end of linebuffer: expand it. */
6028           lbp->size *= 2;
6029           xrnew (buffer, lbp->size, char);
6030           p += buffer - lbp->buffer;
6031           pend = buffer + lbp->size;
6032           lbp->buffer = buffer;
6033         }
6034       if (c == EOF)
6035         {
6036           *p = '\0';
6037           chars_deleted = 0;
6038           break;
6039         }
6040       if (c == '\n')
6041         {
6042           if (p > buffer && p[-1] == '\r')
6043             {
6044               p -= 1;
6045 #ifdef DOS_NT
6046              /* Assume CRLF->LF translation will be performed by Emacs
6047                 when loading this file, so CRs won't appear in the buffer.
6048                 It would be cleaner to compensate within Emacs;
6049                 however, Emacs does not know how many CRs were deleted
6050                 before any given point in the file.  */
6051               chars_deleted = 1;
6052 #else
6053               chars_deleted = 2;
6054 #endif
6055             }
6056           else
6057             {
6058               chars_deleted = 1;
6059             }
6060           *p = '\0';
6061           break;
6062         }
6063       *p++ = c;
6064     }
6065   lbp->len = p - buffer;
6066
6067   if (need_filebuf              /* we need filebuf for multi-line regexps */
6068       && chars_deleted > 0)     /* not at EOF */
6069     {
6070       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6071         {
6072           /* Expand filebuf. */
6073           filebuf.size *= 2;
6074           xrnew (filebuf.buffer, filebuf.size, char);
6075         }
6076       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6077       filebuf.len += lbp->len;
6078       filebuf.buffer[filebuf.len++] = '\n';
6079       filebuf.buffer[filebuf.len] = '\0';
6080     }
6081
6082   return lbp->len + chars_deleted;
6083 }
6084
6085 /*
6086  * Like readline_internal, above, but in addition try to match the
6087  * input line against relevant regular expressions and manage #line
6088  * directives.
6089  */
6090 static void
6091 readline (linebuffer *lbp, FILE *stream)
6092 {
6093   long result;
6094
6095   linecharno = charno;          /* update global char number of line start */
6096   result = readline_internal (lbp, stream); /* read line */
6097   lineno += 1;                  /* increment global line number */
6098   charno += result;             /* increment global char number */
6099
6100   /* Honor #line directives. */
6101   if (!no_line_directive)
6102     {
6103       static bool discard_until_line_directive;
6104
6105       /* Check whether this is a #line directive. */
6106       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6107         {
6108           unsigned int lno;
6109           int start = 0;
6110
6111           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6112               && start > 0)     /* double quote character found */
6113             {
6114               char *endp = lbp->buffer + start;
6115
6116               while ((endp = etags_strchr (endp, '"')) != NULL
6117                      && endp[-1] == '\\')
6118                 endp++;
6119               if (endp != NULL)
6120                 /* Ok, this is a real #line directive.  Let's deal with it. */
6121                 {
6122                   char *taggedabsname;  /* absolute name of original file */
6123                   char *taggedfname;    /* name of original file as given */
6124                   char *name;           /* temp var */
6125
6126                   discard_until_line_directive = FALSE; /* found it */
6127                   name = lbp->buffer + start;
6128                   *endp = '\0';
6129                   canonicalize_filename (name);
6130                   taggedabsname = absolute_filename (name, tagfiledir);
6131                   if (filename_is_absolute (name)
6132                       || filename_is_absolute (curfdp->infname))
6133                     taggedfname = savestr (taggedabsname);
6134                   else
6135                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6136
6137                   if (streq (curfdp->taggedfname, taggedfname))
6138                     /* The #line directive is only a line number change.  We
6139                        deal with this afterwards. */
6140                     free (taggedfname);
6141                   else
6142                     /* The tags following this #line directive should be
6143                        attributed to taggedfname.  In order to do this, set
6144                        curfdp accordingly. */
6145                     {
6146                       fdesc *fdp; /* file description pointer */
6147
6148                       /* Go look for a file description already set up for the
6149                          file indicated in the #line directive.  If there is
6150                          one, use it from now until the next #line
6151                          directive. */
6152                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6153                         if (streq (fdp->infname, curfdp->infname)
6154                             && streq (fdp->taggedfname, taggedfname))
6155                           /* If we remove the second test above (after the &&)
6156                              then all entries pertaining to the same file are
6157                              coalesced in the tags file.  If we use it, then
6158                              entries pertaining to the same file but generated
6159                              from different files (via #line directives) will
6160                              go into separate sections in the tags file.  These
6161                              alternatives look equivalent.  The first one
6162                              destroys some apparently useless information. */
6163                           {
6164                             curfdp = fdp;
6165                             free (taggedfname);
6166                             break;
6167                           }
6168                       /* Else, if we already tagged the real file, skip all
6169                          input lines until the next #line directive. */
6170                       if (fdp == NULL) /* not found */
6171                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6172                           if (streq (fdp->infabsname, taggedabsname))
6173                             {
6174                               discard_until_line_directive = TRUE;
6175                               free (taggedfname);
6176                               break;
6177                             }
6178                       /* Else create a new file description and use that from
6179                          now on, until the next #line directive. */
6180                       if (fdp == NULL) /* not found */
6181                         {
6182                           fdp = fdhead;
6183                           fdhead = xnew (1, fdesc);
6184                           *fdhead = *curfdp; /* copy curr. file description */
6185                           fdhead->next = fdp;
6186                           fdhead->infname = savestr (curfdp->infname);
6187                           fdhead->infabsname = savestr (curfdp->infabsname);
6188                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6189                           fdhead->taggedfname = taggedfname;
6190                           fdhead->usecharno = FALSE;
6191                           fdhead->prop = NULL;
6192                           fdhead->written = FALSE;
6193                           curfdp = fdhead;
6194                         }
6195                     }
6196                   free (taggedabsname);
6197                   lineno = lno - 1;
6198                   readline (lbp, stream);
6199                   return;
6200                 } /* if a real #line directive */
6201             } /* if #line is followed by a number */
6202         } /* if line begins with "#line " */
6203
6204       /* If we are here, no #line directive was found. */
6205       if (discard_until_line_directive)
6206         {
6207           if (result > 0)
6208             {
6209               /* Do a tail recursion on ourselves, thus discarding the contents
6210                  of the line buffer. */
6211               readline (lbp, stream);
6212               return;
6213             }
6214           /* End of file. */
6215           discard_until_line_directive = FALSE;
6216           return;
6217         }
6218     } /* if #line directives should be considered */
6219
6220   {
6221     int match;
6222     regexp *rp;
6223     char *name;
6224
6225     /* Match against relevant regexps. */
6226     if (lbp->len > 0)
6227       for (rp = p_head; rp != NULL; rp = rp->p_next)
6228         {
6229           /* Only use generic regexps or those for the current language.
6230              Also do not use multiline regexps, which is the job of
6231              regex_tag_multiline. */
6232           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6233               || rp->multi_line)
6234             continue;
6235
6236           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6237           switch (match)
6238             {
6239             case -2:
6240               /* Some error. */
6241               if (!rp->error_signaled)
6242                 {
6243                   error ("regexp stack overflow while matching \"%s\"",
6244                          rp->pattern);
6245                   rp->error_signaled = TRUE;
6246                 }
6247               break;
6248             case -1:
6249               /* No match. */
6250               break;
6251             case 0:
6252               /* Empty string matched. */
6253               if (!rp->error_signaled)
6254                 {
6255                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6256                   rp->error_signaled = TRUE;
6257                 }
6258               break;
6259             default:
6260               /* Match occurred.  Construct a tag. */
6261               name = rp->name;
6262               if (name[0] == '\0')
6263                 name = NULL;
6264               else /* make a named tag */
6265                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6266               if (rp->force_explicit_name)
6267                 /* Force explicit tag name, if a name is there. */
6268                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6269               else
6270                 make_tag (name, strlen (name), TRUE,
6271                           lbp->buffer, match, lineno, linecharno);
6272               break;
6273             }
6274         }
6275   }
6276 }
6277
6278 \f
6279 /*
6280  * Return a pointer to a space of size strlen(cp)+1 allocated
6281  * with xnew where the string CP has been copied.
6282  */
6283 static char *
6284 savestr (const char *cp)
6285 {
6286   return savenstr (cp, strlen (cp));
6287 }
6288
6289 /*
6290  * Return a pointer to a space of size LEN+1 allocated with xnew where
6291  * the string CP has been copied for at most the first LEN characters.
6292  */
6293 static char *
6294 savenstr (const char *cp, int len)
6295 {
6296   register char *dp;
6297
6298   dp = xnew (len + 1, char);
6299   strncpy (dp, cp, len);
6300   dp[len] = '\0';
6301   return dp;
6302 }
6303
6304 /*
6305  * Return the ptr in sp at which the character c last
6306  * appears; NULL if not found
6307  *
6308  * Identical to POSIX strrchr, included for portability.
6309  */
6310 static char *
6311 etags_strrchr (register const char *sp, register int c)
6312 {
6313   register const char *r;
6314
6315   r = NULL;
6316   do
6317     {
6318       if (*sp == c)
6319         r = sp;
6320   } while (*sp++);
6321   return (char *)r;
6322 }
6323
6324 /*
6325  * Return the ptr in sp at which the character c first
6326  * appears; NULL if not found
6327  *
6328  * Identical to POSIX strchr, included for portability.
6329  */
6330 static char *
6331 etags_strchr (register const char *sp, register int c)
6332 {
6333   do
6334     {
6335       if (*sp == c)
6336         return (char *)sp;
6337     } while (*sp++);
6338   return NULL;
6339 }
6340
6341 /*
6342  * Compare two strings, ignoring case for alphabetic characters.
6343  *
6344  * Same as BSD's strcasecmp, included for portability.
6345  */
6346 static int
6347 etags_strcasecmp (register const char *s1, register const char *s2)
6348 {
6349   while (*s1 != '\0'
6350          && (ISALPHA (*s1) && ISALPHA (*s2)
6351              ? lowcase (*s1) == lowcase (*s2)
6352              : *s1 == *s2))
6353     s1++, s2++;
6354
6355   return (ISALPHA (*s1) && ISALPHA (*s2)
6356           ? lowcase (*s1) - lowcase (*s2)
6357           : *s1 - *s2);
6358 }
6359
6360 /*
6361  * Compare two strings, ignoring case for alphabetic characters.
6362  * Stop after a given number of characters
6363  *
6364  * Same as BSD's strncasecmp, included for portability.
6365  */
6366 static int
6367 etags_strncasecmp (register const char *s1, register const char *s2, register int n)
6368 {
6369   while (*s1 != '\0' && n-- > 0
6370          && (ISALPHA (*s1) && ISALPHA (*s2)
6371              ? lowcase (*s1) == lowcase (*s2)
6372              : *s1 == *s2))
6373     s1++, s2++;
6374
6375   if (n < 0)
6376     return 0;
6377   else
6378     return (ISALPHA (*s1) && ISALPHA (*s2)
6379             ? lowcase (*s1) - lowcase (*s2)
6380             : *s1 - *s2);
6381 }
6382
6383 /* Skip spaces (end of string is not space), return new pointer. */
6384 static char *
6385 skip_spaces (char *cp)
6386 {
6387   while (iswhite (*cp))
6388     cp++;
6389   return cp;
6390 }
6391
6392 /* Skip non spaces, except end of string, return new pointer. */
6393 static char *
6394 skip_non_spaces (char *cp)
6395 {
6396   while (*cp != '\0' && !iswhite (*cp))
6397     cp++;
6398   return cp;
6399 }
6400
6401 /* Print error message and exit.  */
6402 void
6403 fatal (const char *s1, const char *s2)
6404 {
6405   error (s1, s2);
6406   exit (EXIT_FAILURE);
6407 }
6408
6409 static void
6410 pfatal (const char *s1)
6411 {
6412   perror (s1);
6413   exit (EXIT_FAILURE);
6414 }
6415
6416 static void
6417 suggest_asking_for_help (void)
6418 {
6419   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6420            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6421   exit (EXIT_FAILURE);
6422 }
6423
6424 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6425 static void
6426 error (const char *s1, const char *s2)
6427 {
6428   fprintf (stderr, "%s: ", progname);
6429   fprintf (stderr, s1, s2);
6430   fprintf (stderr, "\n");
6431 }
6432
6433 /* Return a newly-allocated string whose contents
6434    concatenate those of s1, s2, s3.  */
6435 static char *
6436 concat (const char *s1, const char *s2, const char *s3)
6437 {
6438   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6439   char *result = xnew (len1 + len2 + len3 + 1, char);
6440
6441   strcpy (result, s1);
6442   strcpy (result + len1, s2);
6443   strcpy (result + len1 + len2, s3);
6444   result[len1 + len2 + len3] = '\0';
6445
6446   return result;
6447 }
6448
6449 \f
6450 /* Does the same work as the system V getcwd, but does not need to
6451    guess the buffer size in advance. */
6452 static char *
6453 etags_getcwd (void)
6454 {
6455 #ifdef HAVE_GETCWD
6456   int bufsize = 200;
6457   char *path = xnew (bufsize, char);
6458
6459   while (getcwd (path, bufsize) == NULL)
6460     {
6461       if (errno != ERANGE)
6462         pfatal ("getcwd");
6463       bufsize *= 2;
6464       free (path);
6465       path = xnew (bufsize, char);
6466     }
6467
6468   canonicalize_filename (path);
6469   return path;
6470
6471 #else /* not HAVE_GETCWD */
6472 #if MSDOS
6473
6474   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6475
6476   getwd (path);
6477
6478   for (p = path; *p != '\0'; p++)
6479     if (*p == '\\')
6480       *p = '/';
6481     else
6482       *p = lowcase (*p);
6483
6484   return strdup (path);
6485 #else /* not MSDOS */
6486   linebuffer path;
6487   FILE *pipe;
6488
6489   linebuffer_init (&path);
6490   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6491   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6492     pfatal ("pwd");
6493   pclose (pipe);
6494
6495   return path.buffer;
6496 #endif /* not MSDOS */
6497 #endif /* not HAVE_GETCWD */
6498 }
6499
6500 /* Return a newly allocated string containing the file name of FILE
6501    relative to the absolute directory DIR (which should end with a slash). */
6502 static char *
6503 relative_filename (char *file, char *dir)
6504 {
6505   char *fp, *dp, *afn, *res;
6506   int i;
6507
6508   /* Find the common root of file and dir (with a trailing slash). */
6509   afn = absolute_filename (file, cwd);
6510   fp = afn;
6511   dp = dir;
6512   while (*fp++ == *dp++)
6513     continue;
6514   fp--, dp--;                   /* back to the first differing char */
6515 #ifdef DOS_NT
6516   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6517     return afn;
6518 #endif
6519   do                            /* look at the equal chars until '/' */
6520     fp--, dp--;
6521   while (*fp != '/');
6522
6523   /* Build a sequence of "../" strings for the resulting relative file name. */
6524   i = 0;
6525   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6526     i += 1;
6527   res = xnew (3*i + strlen (fp + 1) + 1, char);
6528   res[0] = '\0';
6529   while (i-- > 0)
6530     strcat (res, "../");
6531
6532   /* Add the file name relative to the common root of file and dir. */
6533   strcat (res, fp + 1);
6534   free (afn);
6535
6536   return res;
6537 }
6538
6539 /* Return a newly allocated string containing the absolute file name
6540    of FILE given DIR (which should end with a slash). */
6541 static char *
6542 absolute_filename (char *file, char *dir)
6543 {
6544   char *slashp, *cp, *res;
6545
6546   if (filename_is_absolute (file))
6547     res = savestr (file);
6548 #ifdef DOS_NT
6549   /* We don't support non-absolute file names with a drive
6550      letter, like `d:NAME' (it's too much hassle).  */
6551   else if (file[1] == ':')
6552     fatal ("%s: relative file names with drive letters not supported", file);
6553 #endif
6554   else
6555     res = concat (dir, file, "");
6556
6557   /* Delete the "/dirname/.." and "/." substrings. */
6558   slashp = etags_strchr (res, '/');
6559   while (slashp != NULL && slashp[0] != '\0')
6560     {
6561       if (slashp[1] == '.')
6562         {
6563           if (slashp[2] == '.'
6564               && (slashp[3] == '/' || slashp[3] == '\0'))
6565             {
6566               cp = slashp;
6567               do
6568                 cp--;
6569               while (cp >= res && !filename_is_absolute (cp));
6570               if (cp < res)
6571                 cp = slashp;    /* the absolute name begins with "/.." */
6572 #ifdef DOS_NT
6573               /* Under MSDOS and NT we get `d:/NAME' as absolute
6574                  file name, so the luser could say `d:/../NAME'.
6575                  We silently treat this as `d:/NAME'.  */
6576               else if (cp[0] != '/')
6577                 cp = slashp;
6578 #endif
6579               memmove (cp, slashp + 3, strlen (slashp + 2));
6580               slashp = cp;
6581               continue;
6582             }
6583           else if (slashp[2] == '/' || slashp[2] == '\0')
6584             {
6585               memmove (slashp, slashp + 2, strlen (slashp + 1));
6586               continue;
6587             }
6588         }
6589
6590       slashp = etags_strchr (slashp + 1, '/');
6591     }
6592
6593   if (res[0] == '\0')           /* just a safety net: should never happen */
6594     {
6595       free (res);
6596       return savestr ("/");
6597     }
6598   else
6599     return res;
6600 }
6601
6602 /* Return a newly allocated string containing the absolute
6603    file name of dir where FILE resides given DIR (which should
6604    end with a slash). */
6605 static char *
6606 absolute_dirname (char *file, char *dir)
6607 {
6608   char *slashp, *res;
6609   char save;
6610
6611   slashp = etags_strrchr (file, '/');
6612   if (slashp == NULL)
6613     return savestr (dir);
6614   save = slashp[1];
6615   slashp[1] = '\0';
6616   res = absolute_filename (file, dir);
6617   slashp[1] = save;
6618
6619   return res;
6620 }
6621
6622 /* Whether the argument string is an absolute file name.  The argument
6623    string must have been canonicalized with canonicalize_filename. */
6624 static bool
6625 filename_is_absolute (char *fn)
6626 {
6627   return (fn[0] == '/'
6628 #ifdef DOS_NT
6629           || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/')
6630 #endif
6631           );
6632 }
6633
6634 /* Downcase DOS drive letter and collapse separators into single slashes.
6635    Works in place. */
6636 static void
6637 canonicalize_filename (register char *fn)
6638 {
6639   register char* cp;
6640   char sep = '/';
6641
6642 #ifdef DOS_NT
6643   /* Canonicalize drive letter case.  */
6644 # define ISUPPER(c)     isupper (CHAR (c))
6645   if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6646     fn[0] = lowcase (fn[0]);
6647
6648   sep = '\\';
6649 #endif
6650
6651   /* Collapse multiple separators into a single slash. */
6652   for (cp = fn; *cp != '\0'; cp++, fn++)
6653     if (*cp == sep)
6654       {
6655         *fn = '/';
6656         while (cp[1] == sep)
6657           cp++;
6658       }
6659     else
6660       *fn = *cp;
6661   *fn = '\0';
6662 }
6663
6664 \f
6665 /* Initialize a linebuffer for use. */
6666 static void
6667 linebuffer_init (linebuffer *lbp)
6668 {
6669   lbp->size = (DEBUG) ? 3 : 200;
6670   lbp->buffer = xnew (lbp->size, char);
6671   lbp->buffer[0] = '\0';
6672   lbp->len = 0;
6673 }
6674
6675 /* Set the minimum size of a string contained in a linebuffer. */
6676 static void
6677 linebuffer_setlen (linebuffer *lbp, int toksize)
6678 {
6679   while (lbp->size <= toksize)
6680     {
6681       lbp->size *= 2;
6682       xrnew (lbp->buffer, lbp->size, char);
6683     }
6684   lbp->len = toksize;
6685 }
6686
6687 /* Like malloc but get fatal error if memory is exhausted. */
6688 static PTR
6689 xmalloc (size_t size)
6690 {
6691   PTR result = (PTR) malloc (size);
6692   if (result == NULL)
6693     fatal ("virtual memory exhausted", (char *)NULL);
6694   return result;
6695 }
6696
6697 static PTR
6698 xrealloc (char *ptr, size_t size)
6699 {
6700   PTR result = (PTR) realloc (ptr, size);
6701   if (result == NULL)
6702     fatal ("virtual memory exhausted", (char *)NULL);
6703   return result;
6704 }
6705
6706 /*
6707  * Local Variables:
6708  * indent-tabs-mode: t
6709  * tab-width: 8
6710  * fill-column: 79
6711  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6712  * c-file-style: "gnu"
6713  * End:
6714  */
6715
6716 /* etags.c ends here */