lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
  32   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
  33   Free Software Foundation, Inc.
  34
  35 This file is not considered part of GNU Emacs.
  36
  37 This program is free software: you can redistribute it and/or modify
  38 it under the terms of the GNU General Public License as published by
  39 the Free Software Foundation, either version 3 of the License, or
  40 (at your option) any later version.
  41
  42 This program is distributed in the hope that it will be useful,
  43 but WITHOUT ANY WARRANTY; without even the implied warranty of
  44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  45 GNU General Public License for more details.
  46
  47 You should have received a copy of the GNU General Public License
  48 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  49
  50
  51 /* NB To comply with the above BSD license, copyright information is
  52 reproduced in etc/ETAGS.README.  That file should be updated when the
  53 above notices are.
  54
  55 To the best of our knowledge, this code was originally based on the
  56 ctags.c distributed with BSD4.2, which was copyrighted by the
  57 University of California, as described above. */
  58
  59
  60 /*
  61  * Authors:
  62  * 1983 Ctags originally by Ken Arnold.
  63  * 1984 Fortran added by Jim Kleckner.
  64  * 1984 Ed Pelegri-Llopart added C typedefs.
  65  * 1985 Emacs TAGS format by Richard Stallman.
  66  * 1989 Sam Kendall added C++.
  67  * 1992 Joseph B. Wells improved C and C++ parsing.
  68  * 1993 Francesco Potortì reorganized C and C++.
  69  * 1994 Line-by-line regexp tags by Tom Tromey.
  70  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  71  * 2002 #line directives by Francesco Potortì.
  72  *
  73  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  74  */
  75
  76 /*
  77  * If you want to add support for a new language, start by looking at the LUA
  78  * language, which is the simplest.  Alternatively, consider distributing etags
  79  * together with a configuration file containing regexp definitions for etags.
  80  */
  81
  82 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  83
  84 #define TRUE    1
  85 #define FALSE   0
  86
  87 #ifdef DEBUG
  88 #  undef DEBUG
  89 #  define DEBUG TRUE
  90 #else
  91 #  define DEBUG  FALSE
  92 #  define NDEBUG                /* disable assert */
  93 #endif
  94
  95 #ifdef HAVE_CONFIG_H
  96 # include <config.h>
  97   /* On some systems, Emacs defines static as nothing for the sake
  98      of unexec.  We don't want that here since we don't use unexec. */
  99 # undef static
 100 # ifndef PTR                    /* for XEmacs */
 101 #   define PTR void *
 102 # endif
 103 #else  /* no config.h */
 104 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 105 #   define PTR void *           /* for generic pointers */
 106 # else /* not standard C */
 107 #   define const                /* remove const for old compilers' sake */
 108 #   define PTR long *           /* don't use void* */
 109 # endif
 110 #endif /* !HAVE_CONFIG_H */
 111
 112 #ifndef _GNU_SOURCE
 113 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 114 #endif
 115
 116 /* WIN32_NATIVE is for XEmacs.
 117    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 118 #ifdef WIN32_NATIVE
 119 # undef MSDOS
 120 # undef  WINDOWSNT
 121 # define WINDOWSNT
 122 #endif /* WIN32_NATIVE */
 123
 124 #ifdef MSDOS
 125 # undef MSDOS
 126 # define MSDOS TRUE
 127 # include <fcntl.h>
 128 # include <sys/param.h>
 129 # include <io.h>
 130 # ifndef HAVE_CONFIG_H
 131 #   define DOS_NT
 132 #   include <sys/config.h>
 133 # endif
 134 #else
 135 # define MSDOS FALSE
 136 #endif /* MSDOS */
 137
 138 #ifdef WINDOWSNT
 139 # include <stdlib.h>
 140 # include <fcntl.h>
 141 # include <string.h>
 142 # include <direct.h>
 143 # include <io.h>
 144 # define MAXPATHLEN _MAX_PATH
 145 # undef HAVE_NTGUI
 146 # undef  DOS_NT
 147 # define DOS_NT
 148 # ifndef HAVE_GETCWD
 149 #   define HAVE_GETCWD
 150 # endif /* undef HAVE_GETCWD */
 151 #else /* not WINDOWSNT */
 152 # ifdef STDC_HEADERS
 153 #  include <stdlib.h>
 154 #  include <string.h>
 155 # else /* no standard C headers */
 156    extern char *getenv (const char *);
 157    extern char *strcpy (char *, const char *);
 158    extern char *strncpy (char *, const char *, unsigned long);
 159    extern char *strcat (char *, const char *);
 160    extern char *strncat (char *, const char *, unsigned long);
 161    extern int strcmp (const char *, const char *);
 162    extern int strncmp (const char *, const char *, unsigned long);
 163    extern int system (const char *);
 164    extern unsigned long strlen (const char *);
 165    extern void *malloc (unsigned long);
 166    extern void *realloc (void *, unsigned long);
 167    extern void exit (int);
 168    extern void free (void *);
 169    extern void *memmove (void *, const void *, unsigned long);
 170 #  define EXIT_SUCCESS  0
 171 #  define EXIT_FAILURE  1
 172 # endif
 173 #endif /* !WINDOWSNT */
 174
 175 #ifdef HAVE_UNISTD_H
 176 # include <unistd.h>
 177 #else
 178 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 179     extern char *getcwd (char *buf, size_t size);
 180 # endif
 181 #endif /* HAVE_UNISTD_H */
 182
 183 #include <stdio.h>
 184 #include <ctype.h>
 185 #include <errno.h>
 186 #include <sys/types.h>
 187 #include <sys/stat.h>
 188
 189 #include <assert.h>
 190 #ifdef NDEBUG
 191 # undef  assert                 /* some systems have a buggy assert.h */
 192 # define assert(x) ((void) 0)
 193 #endif
 194
 195 #if !defined (S_ISREG) && defined (S_IFREG)
 196 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 197 #endif
 198
 199 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 200 # define NO_LONG_OPTIONS TRUE
 201 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 202   extern char *optarg;
 203   extern int optind, opterr;
 204 #else
 205 # define NO_LONG_OPTIONS FALSE
 206 # include <getopt.h>
 207 #endif /* NO_LONG_OPTIONS */
 208
 209 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 210 # ifdef __CYGWIN__              /* compiling on Cygwin */
 211                              !!! NOTICE !!!
 212  the regex.h distributed with Cygwin is not compatible with etags, alas!
 213 If you want regular expression support, you should delete this notice and
 214               arrange to use the GNU regex.h and regex.c.
 215 # endif
 216 #endif
 217 #include <regex.h>
 218
 219 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 220  Leave it undefined to make the program "etags", which makes emacs-style
 221  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 222 #ifdef CTAGS
 223 # undef  CTAGS
 224 # define CTAGS TRUE
 225 #else
 226 # define CTAGS FALSE
 227 #endif
 228
 229 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 230 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 231 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 232 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 233
 234 #define CHARS 256               /* 2^sizeof(char) */
 235 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 236 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 237 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 238 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 239 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 240 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 241
 242 #define ISALNUM(c)      isalnum (CHAR(c))
 243 #define ISALPHA(c)      isalpha (CHAR(c))
 244 #define ISDIGIT(c)      isdigit (CHAR(c))
 245 #define ISLOWER(c)      islower (CHAR(c))
 246
 247 #define lowcase(c)      tolower (CHAR(c))
 248 #define upcase(c)       toupper (CHAR(c))
 249
 250
 251 /*
 252  *      xnew, xrnew -- allocate, reallocate storage
 253  *
 254  * SYNOPSIS:    Type *xnew (int n, Type);
 255  *              void xrnew (OldPointer, int n, Type);
 256  */
 257 #if DEBUG
 258 # include "chkmalloc.h"
 259 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 260                                                   (n) * sizeof (Type)))
 261 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 262                                         (char *) (op), (n) * sizeof (Type)))
 263 #else
 264 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 265 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 266                                         (char *) (op), (n) * sizeof (Type)))
 267 #endif
 268
 269 #define bool int
 270
 271 typedef void Lang_function (FILE *);
 272
 273 typedef struct
 274 {
 275   const char *suffix;           /* file name suffix for this compressor */
 276   const char *command;          /* takes one arg and decompresses to stdout */
 277 } compressor;
 278
 279 typedef struct
 280 {
 281   const char *name;             /* language name */
 282   const char *help;             /* detailed help for the language */
 283   Lang_function *function;      /* parse function */
 284   const char **suffixes;        /* name suffixes of this language's files */
 285   const char **filenames;       /* names of this language's files */
 286   const char **interpreters;    /* interpreters for this language */
 287   bool metasource;              /* source used to generate other sources */
 288 } language;
 289
 290 typedef struct fdesc
 291 {
 292   struct fdesc *next;           /* for the linked list */
 293   char *infname;                /* uncompressed input file name */
 294   char *infabsname;             /* absolute uncompressed input file name */
 295   char *infabsdir;              /* absolute dir of input file */
 296   char *taggedfname;            /* file name to write in tagfile */
 297   language *lang;               /* language of file */
 298   char *prop;                   /* file properties to write in tagfile */
 299   bool usecharno;               /* etags tags shall contain char number */
 300   bool written;                 /* entry written in the tags file */
 301 } fdesc;
 302
 303 typedef struct node_st
 304 {                               /* sorting structure */
 305   struct node_st *left, *right; /* left and right sons */
 306   fdesc *fdp;                   /* description of file to whom tag belongs */
 307   char *name;                   /* tag name */
 308   char *regex;                  /* search regexp */
 309   bool valid;                   /* write this tag on the tag file */
 310   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 311   bool been_warned;             /* warning already given for duplicated tag */
 312   int lno;                      /* line number tag is on */
 313   long cno;                     /* character number line starts on */
 314 } node;
 315
 316 /*
 317  * A `linebuffer' is a structure which holds a line of text.
 318  * `readline_internal' reads a line from a stream into a linebuffer
 319  * and works regardless of the length of the line.
 320  * SIZE is the size of BUFFER, LEN is the length of the string in
 321  * BUFFER after readline reads it.
 322  */
 323 typedef struct
 324 {
 325   long size;
 326   int len;
 327   char *buffer;
 328 } linebuffer;
 329
 330 /* Used to support mixing of --lang and file names. */
 331 typedef struct
 332 {
 333   enum {
 334     at_language,                /* a language specification */
 335     at_regexp,                  /* a regular expression */
 336     at_filename,                /* a file name */
 337     at_stdin,                   /* read from stdin here */
 338     at_end                      /* stop parsing the list */
 339   } arg_type;                   /* argument type */
 340   language *lang;               /* language associated with the argument */
 341   char *what;                   /* the argument itself */
 342 } argument;
 343
 344 /* Structure defining a regular expression. */
 345 typedef struct regexp
 346 {
 347   struct regexp *p_next;        /* pointer to next in list */
 348   language *lang;               /* if set, use only for this language */
 349   char *pattern;                /* the regexp pattern */
 350   char *name;                   /* tag name */
 351   struct re_pattern_buffer *pat; /* the compiled pattern */
 352   struct re_registers regs;     /* re registers */
 353   bool error_signaled;          /* already signaled for this regexp */
 354   bool force_explicit_name;     /* do not allow implict tag name */
 355   bool ignore_case;             /* ignore case when matching */
 356   bool multi_line;              /* do a multi-line match on the whole file */
 357 } regexp;
 358
 359
 360 /* Many compilers barf on this:
 361         Lang_function Ada_funcs;
 362    so let's write it this way */
 363 static void Ada_funcs (FILE *);
 364 static void Asm_labels (FILE *);
 365 static void C_entries (int c_ext, FILE *);
 366 static void default_C_entries (FILE *);
 367 static void plain_C_entries (FILE *);
 368 static void Cjava_entries (FILE *);
 369 static void Cobol_paragraphs (FILE *);
 370 static void Cplusplus_entries (FILE *);
 371 static void Cstar_entries (FILE *);
 372 static void Erlang_functions (FILE *);
 373 static void Forth_words (FILE *);
 374 static void Fortran_functions (FILE *);
 375 static void HTML_labels (FILE *);
 376 static void Lisp_functions (FILE *);
 377 static void Lua_functions (FILE *);
 378 static void Makefile_targets (FILE *);
 379 static void Pascal_functions (FILE *);
 380 static void Perl_functions (FILE *);
 381 static void PHP_functions (FILE *);
 382 static void PS_functions (FILE *);
 383 static void Prolog_functions (FILE *);
 384 static void Python_functions (FILE *);
 385 static void Scheme_functions (FILE *);
 386 static void TeX_commands (FILE *);
 387 static void Texinfo_nodes (FILE *);
 388 static void Yacc_entries (FILE *);
 389 static void just_read_file (FILE *);
 390
 391 static void print_language_names (void);
 392 static void print_version (void);
 393 static void print_help (argument *);
 394 int main (int, char **);
 395
 396 static compressor *get_compressor_from_suffix (char *, char **);
 397 static language *get_language_from_langname (const char *);
 398 static language *get_language_from_interpreter (char *);
 399 static language *get_language_from_filename (char *, bool);
 400 static void readline (linebuffer *, FILE *);
 401 static long readline_internal (linebuffer *, FILE *);
 402 static bool nocase_tail (const char *);
 403 static void get_tag (char *, char **);
 404
 405 static void analyse_regex (char *);
 406 static void free_regexps (void);
 407 static void regex_tag_multiline (void);
 408 static void error (const char *, const char *);
 409 static void suggest_asking_for_help (void) NO_RETURN;
 410 void fatal (const char *, const char *) NO_RETURN;
 411 static void pfatal (const char *) NO_RETURN;
 412 static void add_node (node *, node **);
 413
 414 static void init (void);
 415 static void process_file_name (char *, language *);
 416 static void process_file (FILE *, char *, language *);
 417 static void find_entries (FILE *);
 418 static void free_tree (node *);
 419 static void free_fdesc (fdesc *);
 420 static void pfnote (char *, bool, char *, int, int, long);
 421 static void make_tag (const char *, int, bool, char *, int, int, long);
 422 static void invalidate_nodes (fdesc *, node **);
 423 static void put_entries (node *);
 424
 425 static char *concat (const char *, const char *, const char *);
 426 static char *skip_spaces (char *);
 427 static char *skip_non_spaces (char *);
 428 static char *savenstr (const char *, int);
 429 static char *savestr (const char *);
 430 static char *etags_strchr (const char *, int);
 431 static char *etags_strrchr (const char *, int);
 432 static int etags_strcasecmp (const char *, const char *);
 433 static int etags_strncasecmp (const char *, const char *, int);
 434 static char *etags_getcwd (void);
 435 static char *relative_filename (char *, char *);
 436 static char *absolute_filename (char *, char *);
 437 static char *absolute_dirname (char *, char *);
 438 static bool filename_is_absolute (char *f);
 439 static void canonicalize_filename (char *);
 440 static void linebuffer_init (linebuffer *);
 441 static void linebuffer_setlen (linebuffer *, int);
 442 static PTR xmalloc (unsigned int);
 443 static PTR xrealloc (char *, unsigned int);
 444
 445 \f
 446 static char searchar = '/';     /* use /.../ searches */
 447
 448 static char *tagfile;           /* output file */
 449 static char *progname;          /* name this program was invoked with */
 450 static char *cwd;               /* current working directory */
 451 static char *tagfiledir;        /* directory of tagfile */
 452 static FILE *tagf;              /* ioptr for tags file */
 453
 454 static fdesc *fdhead;           /* head of file description list */
 455 static fdesc *curfdp;           /* current file description */
 456 static int lineno;              /* line number of current line */
 457 static long charno;             /* current character number */
 458 static long linecharno;         /* charno of start of current line */
 459 static char *dbp;               /* pointer to start of current tag */
 460
 461 static const int invalidcharno = -1;
 462
 463 static node *nodehead;          /* the head of the binary tree of tags */
 464 static node *last_node;         /* the last node created */
 465
 466 static linebuffer lb;           /* the current line */
 467 static linebuffer filebuf;      /* a buffer containing the whole file */
 468 static linebuffer token_name;   /* a buffer containing a tag name */
 469
 470 /* boolean "functions" (see init)       */
 471 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 472 static const char
 473   /* white chars */
 474   *white = " \f\t\n\r\v",
 475   /* not in a name */
 476   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 477   /* token ending chars */
 478   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 479   /* token starting chars */
 480   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 481   /* valid in-token chars */
 482   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 483
 484 static bool append_to_tagfile;  /* -a: append to tags */
 485 /* The next five default to TRUE in C and derived languages.  */
 486 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 487 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 488                                 /* 0 struct/enum/union decls, and C++ */
 489                                 /* member functions. */
 490 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 491                                 /* constants and variables. */
 492                                 /* -D: opposite of -d.  Default under ctags. */
 493 static bool globals;            /* create tags for global variables */
 494 static bool members;            /* create tags for C member variables */
 495 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 496 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 497 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 498 static bool update;             /* -u: update tags */
 499 static bool vgrind_style;       /* -v: create vgrind style index output */
 500 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 501 static bool cxref_style;        /* -x: create cxref style output */
 502 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 503 static bool ignoreindent;       /* -I: ignore indentation in C */
 504 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 505
 506 /* STDIN is defined in LynxOS system headers */
 507 #ifdef STDIN
 508 # undef STDIN
 509 #endif
 510
 511 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 512 static bool parsing_stdin;      /* --parse-stdin used */
 513
 514 static regexp *p_head;          /* list of all regexps */
 515 static bool need_filebuf;       /* some regexes are multi-line */
 516
 517 static struct option longopts[] =
 518 {
 519   { "append",             no_argument,       NULL,               'a'   },
 520   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 521   { "c++",                no_argument,       NULL,               'C'   },
 522   { "declarations",       no_argument,       &declarations,      TRUE  },
 523   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 524   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 525   { "help",               no_argument,       NULL,               'h'   },
 526   { "help",               no_argument,       NULL,               'H'   },
 527   { "ignore-indentation", no_argument,       NULL,               'I'   },
 528   { "language",           required_argument, NULL,               'l'   },
 529   { "members",            no_argument,       &members,           TRUE  },
 530   { "no-members",         no_argument,       &members,           FALSE },
 531   { "output",             required_argument, NULL,               'o'   },
 532   { "regex",              required_argument, NULL,               'r'   },
 533   { "no-regex",           no_argument,       NULL,               'R'   },
 534   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 535   { "parse-stdin",        required_argument, NULL,               STDIN },
 536   { "version",            no_argument,       NULL,               'V'   },
 537
 538 #if CTAGS /* Ctags options */
 539   { "backward-search",    no_argument,       NULL,               'B'   },
 540   { "cxref",              no_argument,       NULL,               'x'   },
 541   { "defines",            no_argument,       NULL,               'd'   },
 542   { "globals",            no_argument,       &globals,           TRUE  },
 543   { "typedefs",           no_argument,       NULL,               't'   },
 544   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 545   { "update",             no_argument,       NULL,               'u'   },
 546   { "vgrind",             no_argument,       NULL,               'v'   },
 547   { "no-warn",            no_argument,       NULL,               'w'   },
 548
 549 #else /* Etags options */
 550   { "no-defines",         no_argument,       NULL,               'D'   },
 551   { "no-globals",         no_argument,       &globals,           FALSE },
 552   { "include",            required_argument, NULL,               'i'   },
 553 #endif
 554   { NULL }
 555 };
 556
 557 static compressor compressors[] =
 558 {
 559   { "z", "gzip -d -c"},
 560   { "Z", "gzip -d -c"},
 561   { "gz", "gzip -d -c"},
 562   { "GZ", "gzip -d -c"},
 563   { "bz2", "bzip2 -d -c" },
 564   { "xz", "xz -d -c" },
 565   { NULL }
 566 };
 567
 568 /*
 569  * Language stuff.
 570  */
 571
 572 /* Ada code */
 573 static const char *Ada_suffixes [] =
 574   { "ads", "adb", "ada", NULL };
 575 static const char Ada_help [] =
 576 "In Ada code, functions, procedures, packages, tasks and types are\n\
 577 tags.  Use the `--packages-only' option to create tags for\n\
 578 packages only.\n\
 579 Ada tag names have suffixes indicating the type of entity:\n\
 580         Entity type:    Qualifier:\n\
 581         ------------    ----------\n\
 582         function        /f\n\
 583         procedure       /p\n\
 584         package spec    /s\n\
 585         package body    /b\n\
 586         type            /t\n\
 587         task            /k\n\
 588 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 589 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 590 will just search for any tag `bidule'.";
 591
 592 /* Assembly code */
 593 static const char *Asm_suffixes [] =
 594   { "a",        /* Unix assembler */
 595     "asm", /* Microcontroller assembly */
 596     "def", /* BSO/Tasking definition includes  */
 597     "inc", /* Microcontroller include files */
 598     "ins", /* Microcontroller include files */
 599     "s", "sa", /* Unix assembler */
 600     "S",   /* cpp-processed Unix assembler */
 601     "src", /* BSO/Tasking C compiler output */
 602     NULL
 603   };
 604 static const char Asm_help [] =
 605 "In assembler code, labels appearing at the beginning of a line,\n\
 606 followed by a colon, are tags.";
 607
 608
 609 /* Note that .c and .h can be considered C++, if the --c++ flag was
 610    given, or if the `class' or `template' keywords are met inside the file.
 611    That is why default_C_entries is called for these. */
 612 static const char *default_C_suffixes [] =
 613   { "c", "h", NULL };
 614 #if CTAGS                               /* C help for Ctags */
 615 static const char default_C_help [] =
 616 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 617 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 618 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 619 Use --globals to tag global variables.\n\
 620 You can tag function declarations and external variables by\n\
 621 using `--declarations', and struct members by using `--members'.";
 622 #else                                   /* C help for Etags */
 623 static const char default_C_help [] =
 624 "In C code, any C function or typedef is a tag, and so are\n\
 625 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 626 definitions and `enum' constants are tags unless you specify\n\
 627 `--no-defines'.  Global variables are tags unless you specify\n\
 628 `--no-globals' and so are struct members unless you specify\n\
 629 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 630 `--no-members' can make the tags table file much smaller.\n\
 631 You can tag function declarations and external variables by\n\
 632 using `--declarations'.";
 633 #endif  /* C help for Ctags and Etags */
 634
 635 static const char *Cplusplus_suffixes [] =
 636   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 637     "M",                        /* Objective C++ */
 638     "pdb",                      /* Postscript with C syntax */
 639     NULL };
 640 static const char Cplusplus_help [] =
 641 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 642 --help --lang=c --lang=c++ for full help.)\n\
 643 In addition to C tags, member functions are also recognized.  Member\n\
 644 variables are recognized unless you use the `--no-members' option.\n\
 645 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 646 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 647 `operator+'.";
 648
 649 static const char *Cjava_suffixes [] =
 650   { "java", NULL };
 651 static char Cjava_help [] =
 652 "In Java code, all the tags constructs of C and C++ code are\n\
 653 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 654
 655
 656 static const char *Cobol_suffixes [] =
 657   { "COB", "cob", NULL };
 658 static char Cobol_help [] =
 659 "In Cobol code, tags are paragraph names; that is, any word\n\
 660 starting in column 8 and followed by a period.";
 661
 662 static const char *Cstar_suffixes [] =
 663   { "cs", "hs", NULL };
 664
 665 static const char *Erlang_suffixes [] =
 666   { "erl", "hrl", NULL };
 667 static const char Erlang_help [] =
 668 "In Erlang code, the tags are the functions, records and macros\n\
 669 defined in the file.";
 670
 671 const char *Forth_suffixes [] =
 672   { "fth", "tok", NULL };
 673 static const char Forth_help [] =
 674 "In Forth code, tags are words defined by `:',\n\
 675 constant, code, create, defer, value, variable, buffer:, field.";
 676
 677 static const char *Fortran_suffixes [] =
 678   { "F", "f", "f90", "for", NULL };
 679 static const char Fortran_help [] =
 680 "In Fortran code, functions, subroutines and block data are tags.";
 681
 682 static const char *HTML_suffixes [] =
 683   { "htm", "html", "shtml", NULL };
 684 static const char HTML_help [] =
 685 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 686 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 687 occurrences of `id='.";
 688
 689 static const char *Lisp_suffixes [] =
 690   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 691 static const char Lisp_help [] =
 692 "In Lisp code, any function defined with `defun', any variable\n\
 693 defined with `defvar' or `defconst', and in general the first\n\
 694 argument of any expression that starts with `(def' in column zero\n\
 695 is a tag.";
 696
 697 static const char *Lua_suffixes [] =
 698   { "lua", "LUA", NULL };
 699 static const char Lua_help [] =
 700 "In Lua scripts, all functions are tags.";
 701
 702 static const char *Makefile_filenames [] =
 703   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 704 static const char Makefile_help [] =
 705 "In makefiles, targets are tags; additionally, variables are tags\n\
 706 unless you specify `--no-globals'.";
 707
 708 static const char *Objc_suffixes [] =
 709   { "lm",                       /* Objective lex file */
 710     "m",                        /* Objective C file */
 711      NULL };
 712 static const char Objc_help [] =
 713 "In Objective C code, tags include Objective C definitions for classes,\n\
 714 class categories, methods and protocols.  Tags for variables and\n\
 715 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 716 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 717
 718 static const char *Pascal_suffixes [] =
 719   { "p", "pas", NULL };
 720 static const char Pascal_help [] =
 721 "In Pascal code, the tags are the functions and procedures defined\n\
 722 in the file.";
 723 /* " // this is for working around an Emacs highlighting bug... */
 724
 725 static const char *Perl_suffixes [] =
 726   { "pl", "pm", NULL };
 727 static const char *Perl_interpreters [] =
 728   { "perl", "@PERL@", NULL };
 729 static const char Perl_help [] =
 730 "In Perl code, the tags are the packages, subroutines and variables\n\
 731 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 732 `--globals' if you want to tag global variables.  Tags for\n\
 733 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 734 defined in the default package is `main::SUB'.";
 735
 736 static const char *PHP_suffixes [] =
 737   { "php", "php3", "php4", NULL };
 738 static const char PHP_help [] =
 739 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 740 the `--no-members' option, vars are tags too.";
 741
 742 static const char *plain_C_suffixes [] =
 743   { "pc",                       /* Pro*C file */
 744      NULL };
 745
 746 static const char *PS_suffixes [] =
 747   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 748 static const char PS_help [] =
 749 "In PostScript code, the tags are the functions.";
 750
 751 static const char *Prolog_suffixes [] =
 752   { "prolog", NULL };
 753 static const char Prolog_help [] =
 754 "In Prolog code, tags are predicates and rules at the beginning of\n\
 755 line.";
 756
 757 static const char *Python_suffixes [] =
 758   { "py", NULL };
 759 static const char Python_help [] =
 760 "In Python code, `def' or `class' at the beginning of a line\n\
 761 generate a tag.";
 762
 763 /* Can't do the `SCM' or `scm' prefix with a version number. */
 764 static const char *Scheme_suffixes [] =
 765   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 766 static const char Scheme_help [] =
 767 "In Scheme code, tags include anything defined with `def' or with a\n\
 768 construct whose name starts with `def'.  They also include\n\
 769 variables set with `set!' at top level in the file.";
 770
 771 static const char *TeX_suffixes [] =
 772   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 773 static const char TeX_help [] =
 774 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 775 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 776 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 777 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 778 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 779 \n\
 780 Other commands can be specified by setting the environment variable\n\
 781 `TEXTAGS' to a colon-separated list like, for example,\n\
 782      TEXTAGS=\"mycommand:myothercommand\".";
 783
 784
 785 static const char *Texinfo_suffixes [] =
 786   { "texi", "texinfo", "txi", NULL };
 787 static const char Texinfo_help [] =
 788 "for texinfo files, lines starting with @node are tagged.";
 789
 790 static const char *Yacc_suffixes [] =
 791   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 792 static const char Yacc_help [] =
 793 "In Bison or Yacc input files, each rule defines as a tag the\n\
 794 nonterminal it constructs.  The portions of the file that contain\n\
 795 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 796 for full help).";
 797
 798 static const char auto_help [] =
 799 "`auto' is not a real language, it indicates to use\n\
 800 a default language for files base on file name suffix and file contents.";
 801
 802 static const char none_help [] =
 803 "`none' is not a real language, it indicates to only do\n\
 804 regexp processing on files.";
 805
 806 static const char no_lang_help [] =
 807 "No detailed help available for this language.";
 808
 809
 810 /*
 811  * Table of languages.
 812  *
 813  * It is ok for a given function to be listed under more than one
 814  * name.  I just didn't.
 815  */
 816
 817 static language lang_names [] =
 818 {
 819   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 820   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 821   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 822   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 823   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 824   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 825   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 826   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 827   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 828   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 829   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 830   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 831   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 832   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 833   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 834   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 835   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 836   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 837   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 838   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 839   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 840   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 841   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 842   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 843   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 844   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 845   { "auto",      auto_help },                      /* default guessing scheme */
 846   { "none",      none_help,      just_read_file }, /* regexp matching only */
 847   { NULL }                /* end of list */
 848 };
 849
 850 \f
 851 static void
 852 print_language_names (void)
 853 {
 854   language *lang;
 855   const char **name, **ext;
 856
 857   puts ("\nThese are the currently supported languages, along with the\n\
 858 default file names and dot suffixes:");
 859   for (lang = lang_names; lang->name != NULL; lang++)
 860     {
 861       printf ("  %-*s", 10, lang->name);
 862       if (lang->filenames != NULL)
 863         for (name = lang->filenames; *name != NULL; name++)
 864           printf (" %s", *name);
 865       if (lang->suffixes != NULL)
 866         for (ext = lang->suffixes; *ext != NULL; ext++)
 867           printf (" .%s", *ext);
 868       puts ("");
 869     }
 870   puts ("where `auto' means use default language for files based on file\n\
 871 name suffix, and `none' means only do regexp processing on files.\n\
 872 If no language is specified and no matching suffix is found,\n\
 873 the first line of the file is read for a sharp-bang (#!) sequence\n\
 874 followed by the name of an interpreter.  If no such sequence is found,\n\
 875 Fortran is tried first; if no tags are found, C is tried next.\n\
 876 When parsing any C file, a \"class\" or \"template\" keyword\n\
 877 switches to C++.");
 878   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 879 \n\
 880 For detailed help on a given language use, for example,\n\
 881 etags --help --lang=ada.");
 882 }
 883
 884 #ifndef EMACS_NAME
 885 # define EMACS_NAME "standalone"
 886 #endif
 887 #ifndef VERSION
 888 # define VERSION "17.38.1.4"
 889 #endif
 890 static void
 891 print_version (void)
 892 {
 893   /* Makes it easier to update automatically. */
 894   char emacs_copyright[] = "Copyright (C) 2010 Free Software Foundation, Inc.";
 895
 896   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 897   puts (emacs_copyright);
 898   puts ("This program is distributed under the terms in ETAGS.README");
 899
 900   exit (EXIT_SUCCESS);
 901 }
 902
 903 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 904 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 905 #endif
 906
 907 static void
 908 print_help (argument *argbuffer)
 909 {
 910   bool help_for_lang = FALSE;
 911
 912   for (; argbuffer->arg_type != at_end; argbuffer++)
 913     if (argbuffer->arg_type == at_language)
 914       {
 915         if (help_for_lang)
 916           puts ("");
 917         puts (argbuffer->lang->help);
 918         help_for_lang = TRUE;
 919       }
 920
 921   if (help_for_lang)
 922     exit (EXIT_SUCCESS);
 923
 924   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 925 \n\
 926 These are the options accepted by %s.\n", progname, progname);
 927   if (NO_LONG_OPTIONS)
 928     puts ("WARNING: long option names do not work with this executable,\n\
 929 as it is not linked with GNU getopt.");
 930   else
 931     puts ("You may use unambiguous abbreviations for the long option names.");
 932   puts ("  A - as file name means read names from stdin (one per line).\n\
 933 Absolute names are stored in the output file as they are.\n\
 934 Relative ones are stored relative to the output file's directory.\n");
 935
 936   puts ("-a, --append\n\
 937         Append tag entries to existing tags file.");
 938
 939   puts ("--packages-only\n\
 940         For Ada files, only generate tags for packages.");
 941
 942   if (CTAGS)
 943     puts ("-B, --backward-search\n\
 944         Write the search commands for the tag entries using '?', the\n\
 945         backward-search command instead of '/', the forward-search command.");
 946
 947   /* This option is mostly obsolete, because etags can now automatically
 948      detect C++.  Retained for backward compatibility and for debugging and
 949      experimentation.  In principle, we could want to tag as C++ even
 950      before any "class" or "template" keyword.
 951   puts ("-C, --c++\n\
 952         Treat files whose name suffix defaults to C language as C++ files.");
 953   */
 954
 955   puts ("--declarations\n\
 956         In C and derived languages, create tags for function declarations,");
 957   if (CTAGS)
 958     puts ("\tand create tags for extern variables if --globals is used.");
 959   else
 960     puts
 961       ("\tand create tags for extern variables unless --no-globals is used.");
 962
 963   if (CTAGS)
 964     puts ("-d, --defines\n\
 965         Create tag entries for C #define constants and enum constants, too.");
 966   else
 967     puts ("-D, --no-defines\n\
 968         Don't create tag entries for C #define constants and enum constants.\n\
 969         This makes the tags file smaller.");
 970
 971   if (!CTAGS)
 972     puts ("-i FILE, --include=FILE\n\
 973         Include a note in tag file indicating that, when searching for\n\
 974         a tag, one should also consult the tags file FILE after\n\
 975         checking the current file.");
 976
 977   puts ("-l LANG, --language=LANG\n\
 978         Force the following files to be considered as written in the\n\
 979         named language up to the next --language=LANG option.");
 980
 981   if (CTAGS)
 982     puts ("--globals\n\
 983         Create tag entries for global variables in some languages.");
 984   else
 985     puts ("--no-globals\n\
 986         Do not create tag entries for global variables in some\n\
 987         languages.  This makes the tags file smaller.");
 988
 989   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 990     puts ("--no-line-directive\n\
 991         Ignore #line preprocessor directives in C and derived languages.");
 992
 993   if (CTAGS)
 994     puts ("--members\n\
 995         Create tag entries for members of structures in some languages.");
 996   else
 997     puts ("--no-members\n\
 998         Do not create tag entries for members of structures\n\
 999         in some languages.");
1000
1001   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1002         Make a tag for each line matching a regular expression pattern\n\
1003         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1004         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
1005         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1006         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
1007   puts ("       If TAGNAME/ is present, the tags created are named.\n\
1008         For example Tcl named tags can be created with:\n\
1009           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1010         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1011         `m' means to allow multi-line matches, `s' implies `m' and\n\
1012         causes dot to match any character, including newline.");
1013
1014   puts ("-R, --no-regex\n\
1015         Don't create tags from regexps for the following files.");
1016
1017   puts ("-I, --ignore-indentation\n\
1018         In C and C++ do not assume that a closing brace in the first\n\
1019         column is the final brace of a function or structure definition.");
1020
1021   puts ("-o FILE, --output=FILE\n\
1022         Write the tags to FILE.");
1023
1024   puts ("--parse-stdin=NAME\n\
1025         Read from standard input and record tags as belonging to file NAME.");
1026
1027   if (CTAGS)
1028     {
1029       puts ("-t, --typedefs\n\
1030         Generate tag entries for C and Ada typedefs.");
1031       puts ("-T, --typedefs-and-c++\n\
1032         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1033         and C++ member functions.");
1034     }
1035
1036   if (CTAGS)
1037     puts ("-u, --update\n\
1038         Update the tag entries for the given files, leaving tag\n\
1039         entries for other files in place.  Currently, this is\n\
1040         implemented by deleting the existing entries for the given\n\
1041         files and then rewriting the new entries at the end of the\n\
1042         tags file.  It is often faster to simply rebuild the entire\n\
1043         tag file than to use this.");
1044
1045   if (CTAGS)
1046     {
1047       puts ("-v, --vgrind\n\
1048         Print on the standard output an index of items intended for\n\
1049         human consumption, similar to the output of vgrind.  The index\n\
1050         is sorted, and gives the page number of each item.");
1051
1052       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1053         puts ("-w, --no-duplicates\n\
1054         Do not create duplicate tag entries, for compatibility with\n\
1055         traditional ctags.");
1056
1057       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1058         puts ("-w, --no-warn\n\
1059         Suppress warning messages about duplicate tag entries.");
1060
1061       puts ("-x, --cxref\n\
1062         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1063         The output uses line numbers instead of page numbers, but\n\
1064         beyond that the differences are cosmetic; try both to see\n\
1065         which you like.");
1066     }
1067
1068   puts ("-V, --version\n\
1069         Print the version of the program.\n\
1070 -h, --help\n\
1071         Print this help message.\n\
1072         Followed by one or more `--language' options prints detailed\n\
1073         help about tag generation for the specified languages.");
1074
1075   print_language_names ();
1076
1077   puts ("");
1078   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1079
1080   exit (EXIT_SUCCESS);
1081 }
1082
1083 \f
1084 int
1085 main (int argc, char **argv)
1086 {
1087   int i;
1088   unsigned int nincluded_files;
1089   char **included_files;
1090   argument *argbuffer;
1091   int current_arg, file_count;
1092   linebuffer filename_lb;
1093   bool help_asked = FALSE;
1094  char *optstring;
1095  int opt;
1096
1097
1098 #ifdef DOS_NT
1099   _fmode = O_BINARY;   /* all of files are treated as binary files */
1100 #endif /* DOS_NT */
1101
1102   progname = argv[0];
1103   nincluded_files = 0;
1104   included_files = xnew (argc, char *);
1105   current_arg = 0;
1106   file_count = 0;
1107
1108   /* Allocate enough no matter what happens.  Overkill, but each one
1109      is small. */
1110   argbuffer = xnew (argc, argument);
1111
1112   /*
1113    * Always find typedefs and structure tags.
1114    * Also default to find macro constants, enum constants, struct
1115    * members and global variables.  Do it for both etags and ctags.
1116    */
1117   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1118   globals = members = TRUE;
1119
1120   /* When the optstring begins with a '-' getopt_long does not rearrange the
1121      non-options arguments to be at the end, but leaves them alone. */
1122   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1123                       "ac:Cf:Il:o:r:RSVhH",
1124                       (CTAGS) ? "BxdtTuvw" : "Di:");
1125
1126   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1127     switch (opt)
1128       {
1129       case 0:
1130         /* If getopt returns 0, then it has already processed a
1131            long-named option.  We should do nothing.  */
1132         break;
1133
1134       case 1:
1135         /* This means that a file name has been seen.  Record it. */
1136         argbuffer[current_arg].arg_type = at_filename;
1137         argbuffer[current_arg].what     = optarg;
1138         ++current_arg;
1139         ++file_count;
1140         break;
1141
1142       case STDIN:
1143         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1144         argbuffer[current_arg].arg_type = at_stdin;
1145         argbuffer[current_arg].what     = optarg;
1146         ++current_arg;
1147         ++file_count;
1148         if (parsing_stdin)
1149           fatal ("cannot parse standard input more than once", (char *)NULL);
1150         parsing_stdin = TRUE;
1151         break;
1152
1153         /* Common options. */
1154       case 'a': append_to_tagfile = TRUE;       break;
1155       case 'C': cplusplus = TRUE;               break;
1156       case 'f':         /* for compatibility with old makefiles */
1157       case 'o':
1158         if (tagfile)
1159           {
1160             error ("-o option may only be given once.", (char *)NULL);
1161             suggest_asking_for_help ();
1162             /* NOTREACHED */
1163           }
1164         tagfile = optarg;
1165         break;
1166       case 'I':
1167       case 'S':         /* for backward compatibility */
1168         ignoreindent = TRUE;
1169         break;
1170       case 'l':
1171         {
1172           language *lang = get_language_from_langname (optarg);
1173           if (lang != NULL)
1174             {
1175               argbuffer[current_arg].lang = lang;
1176               argbuffer[current_arg].arg_type = at_language;
1177               ++current_arg;
1178             }
1179         }
1180         break;
1181       case 'c':
1182         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1183         optarg = concat (optarg, "i", ""); /* memory leak here */
1184         /* FALLTHRU */
1185       case 'r':
1186         argbuffer[current_arg].arg_type = at_regexp;
1187         argbuffer[current_arg].what = optarg;
1188         ++current_arg;
1189         break;
1190       case 'R':
1191         argbuffer[current_arg].arg_type = at_regexp;
1192         argbuffer[current_arg].what = NULL;
1193         ++current_arg;
1194         break;
1195       case 'V':
1196         print_version ();
1197         break;
1198       case 'h':
1199       case 'H':
1200         help_asked = TRUE;
1201         break;
1202
1203         /* Etags options */
1204       case 'D': constantypedefs = FALSE;                        break;
1205       case 'i': included_files[nincluded_files++] = optarg;     break;
1206
1207         /* Ctags options. */
1208       case 'B': searchar = '?';                                 break;
1209       case 'd': constantypedefs = TRUE;                         break;
1210       case 't': typedefs = TRUE;                                break;
1211       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1212       case 'u': update = TRUE;                                  break;
1213       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1214       case 'x': cxref_style = TRUE;                             break;
1215       case 'w': no_warnings = TRUE;                             break;
1216       default:
1217         suggest_asking_for_help ();
1218         /* NOTREACHED */
1219       }
1220
1221   /* No more options.  Store the rest of arguments. */
1222   for (; optind < argc; optind++)
1223     {
1224       argbuffer[current_arg].arg_type = at_filename;
1225       argbuffer[current_arg].what = argv[optind];
1226       ++current_arg;
1227       ++file_count;
1228     }
1229
1230   argbuffer[current_arg].arg_type = at_end;
1231
1232   if (help_asked)
1233     print_help (argbuffer);
1234     /* NOTREACHED */
1235
1236   if (nincluded_files == 0 && file_count == 0)
1237     {
1238       error ("no input files specified.", (char *)NULL);
1239       suggest_asking_for_help ();
1240       /* NOTREACHED */
1241     }
1242
1243   if (tagfile == NULL)
1244     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1245   cwd = etags_getcwd ();        /* the current working directory */
1246   if (cwd[strlen (cwd) - 1] != '/')
1247     {
1248       char *oldcwd = cwd;
1249       cwd = concat (oldcwd, "/", "");
1250       free (oldcwd);
1251     }
1252
1253   /* Compute base directory for relative file names. */
1254   if (streq (tagfile, "-")
1255       || strneq (tagfile, "/dev/", 5))
1256     tagfiledir = cwd;            /* relative file names are relative to cwd */
1257   else
1258     {
1259       canonicalize_filename (tagfile);
1260       tagfiledir = absolute_dirname (tagfile, cwd);
1261     }
1262
1263   init ();                      /* set up boolean "functions" */
1264
1265   linebuffer_init (&lb);
1266   linebuffer_init (&filename_lb);
1267   linebuffer_init (&filebuf);
1268   linebuffer_init (&token_name);
1269
1270   if (!CTAGS)
1271     {
1272       if (streq (tagfile, "-"))
1273         {
1274           tagf = stdout;
1275 #ifdef DOS_NT
1276           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1277              doesn't take effect until after `stdout' is already open). */
1278           if (!isatty (fileno (stdout)))
1279             setmode (fileno (stdout), O_BINARY);
1280 #endif /* DOS_NT */
1281         }
1282       else
1283         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1284       if (tagf == NULL)
1285         pfatal (tagfile);
1286     }
1287
1288   /*
1289    * Loop through files finding functions.
1290    */
1291   for (i = 0; i < current_arg; i++)
1292     {
1293       static language *lang;    /* non-NULL if language is forced */
1294       char *this_file;
1295
1296       switch (argbuffer[i].arg_type)
1297         {
1298         case at_language:
1299           lang = argbuffer[i].lang;
1300           break;
1301         case at_regexp:
1302           analyse_regex (argbuffer[i].what);
1303           break;
1304         case at_filename:
1305               this_file = argbuffer[i].what;
1306               /* Input file named "-" means read file names from stdin
1307                  (one per line) and use them. */
1308               if (streq (this_file, "-"))
1309                 {
1310                   if (parsing_stdin)
1311                     fatal ("cannot parse standard input AND read file names from it",
1312                            (char *)NULL);
1313                   while (readline_internal (&filename_lb, stdin) > 0)
1314                     process_file_name (filename_lb.buffer, lang);
1315                 }
1316               else
1317                 process_file_name (this_file, lang);
1318           break;
1319         case at_stdin:
1320           this_file = argbuffer[i].what;
1321           process_file (stdin, this_file, lang);
1322           break;
1323         }
1324     }
1325
1326   free_regexps ();
1327   free (lb.buffer);
1328   free (filebuf.buffer);
1329   free (token_name.buffer);
1330
1331   if (!CTAGS || cxref_style)
1332     {
1333       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1334       put_entries (nodehead);
1335       free_tree (nodehead);
1336       nodehead = NULL;
1337       if (!CTAGS)
1338         {
1339           fdesc *fdp;
1340
1341           /* Output file entries that have no tags. */
1342           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1343             if (!fdp->written)
1344               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1345
1346           while (nincluded_files-- > 0)
1347             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1348
1349           if (fclose (tagf) == EOF)
1350             pfatal (tagfile);
1351         }
1352
1353       exit (EXIT_SUCCESS);
1354     }
1355
1356   /* From here on, we are in (CTAGS && !cxref_style) */
1357   if (update)
1358     {
1359       char cmd[BUFSIZ];
1360       for (i = 0; i < current_arg; ++i)
1361         {
1362           switch (argbuffer[i].arg_type)
1363             {
1364             case at_filename:
1365             case at_stdin:
1366               break;
1367             default:
1368               continue;         /* the for loop */
1369             }
1370           sprintf (cmd,
1371                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1372                    tagfile, argbuffer[i].what, tagfile);
1373           if (system (cmd) != EXIT_SUCCESS)
1374             fatal ("failed to execute shell command", (char *)NULL);
1375         }
1376       append_to_tagfile = TRUE;
1377     }
1378
1379   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1380   if (tagf == NULL)
1381     pfatal (tagfile);
1382   put_entries (nodehead);       /* write all the tags (CTAGS) */
1383   free_tree (nodehead);
1384   nodehead = NULL;
1385   if (fclose (tagf) == EOF)
1386     pfatal (tagfile);
1387
1388   if (CTAGS)
1389     if (append_to_tagfile || update)
1390       {
1391         char cmd[2*BUFSIZ+20];
1392         /* Maybe these should be used:
1393            setenv ("LC_COLLATE", "C", 1);
1394            setenv ("LC_ALL", "C", 1); */
1395         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1396         exit (system (cmd));
1397       }
1398   return EXIT_SUCCESS;
1399 }
1400
1401
1402 /*
1403  * Return a compressor given the file name.  If EXTPTR is non-zero,
1404  * return a pointer into FILE where the compressor-specific
1405  * extension begins.  If no compressor is found, NULL is returned
1406  * and EXTPTR is not significant.
1407  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1408  */
1409 static compressor *
1410 get_compressor_from_suffix (char *file, char **extptr)
1411 {
1412   compressor *compr;
1413   char *slash, *suffix;
1414
1415   /* File has been processed by canonicalize_filename,
1416      so we don't need to consider backslashes on DOS_NT.  */
1417   slash = etags_strrchr (file, '/');
1418   suffix = etags_strrchr (file, '.');
1419   if (suffix == NULL || suffix < slash)
1420     return NULL;
1421   if (extptr != NULL)
1422     *extptr = suffix;
1423   suffix += 1;
1424   /* Let those poor souls who live with DOS 8+3 file name limits get
1425      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1426      Only the first do loop is run if not MSDOS */
1427   do
1428     {
1429       for (compr = compressors; compr->suffix != NULL; compr++)
1430         if (streq (compr->suffix, suffix))
1431           return compr;
1432       if (!MSDOS)
1433         break;                  /* do it only once: not really a loop */
1434       if (extptr != NULL)
1435         *extptr = ++suffix;
1436     } while (*suffix != '\0');
1437   return NULL;
1438 }
1439
1440
1441
1442 /*
1443  * Return a language given the name.
1444  */
1445 static language *
1446 get_language_from_langname (const char *name)
1447 {
1448   language *lang;
1449
1450   if (name == NULL)
1451     error ("empty language name", (char *)NULL);
1452   else
1453     {
1454       for (lang = lang_names; lang->name != NULL; lang++)
1455         if (streq (name, lang->name))
1456           return lang;
1457       error ("unknown language \"%s\"", name);
1458     }
1459
1460   return NULL;
1461 }
1462
1463
1464 /*
1465  * Return a language given the interpreter name.
1466  */
1467 static language *
1468 get_language_from_interpreter (char *interpreter)
1469 {
1470   language *lang;
1471   const char **iname;
1472
1473   if (interpreter == NULL)
1474     return NULL;
1475   for (lang = lang_names; lang->name != NULL; lang++)
1476     if (lang->interpreters != NULL)
1477       for (iname = lang->interpreters; *iname != NULL; iname++)
1478         if (streq (*iname, interpreter))
1479             return lang;
1480
1481   return NULL;
1482 }
1483
1484
1485
1486 /*
1487  * Return a language given the file name.
1488  */
1489 static language *
1490 get_language_from_filename (char *file, int case_sensitive)
1491 {
1492   language *lang;
1493   const char **name, **ext, *suffix;
1494
1495   /* Try whole file name first. */
1496   for (lang = lang_names; lang->name != NULL; lang++)
1497     if (lang->filenames != NULL)
1498       for (name = lang->filenames; *name != NULL; name++)
1499         if ((case_sensitive)
1500             ? streq (*name, file)
1501             : strcaseeq (*name, file))
1502           return lang;
1503
1504   /* If not found, try suffix after last dot. */
1505   suffix = etags_strrchr (file, '.');
1506   if (suffix == NULL)
1507     return NULL;
1508   suffix += 1;
1509   for (lang = lang_names; lang->name != NULL; lang++)
1510     if (lang->suffixes != NULL)
1511       for (ext = lang->suffixes; *ext != NULL; ext++)
1512         if ((case_sensitive)
1513             ? streq (*ext, suffix)
1514             : strcaseeq (*ext, suffix))
1515           return lang;
1516   return NULL;
1517 }
1518
1519 \f
1520 /*
1521  * This routine is called on each file argument.
1522  */
1523 static void
1524 process_file_name (char *file, language *lang)
1525 {
1526   struct stat stat_buf;
1527   FILE *inf;
1528   fdesc *fdp;
1529   compressor *compr;
1530   char *compressed_name, *uncompressed_name;
1531   char *ext, *real_name;
1532   int retval;
1533
1534   canonicalize_filename (file);
1535   if (streq (file, tagfile) && !streq (tagfile, "-"))
1536     {
1537       error ("skipping inclusion of %s in self.", file);
1538       return;
1539     }
1540   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1541     {
1542       compressed_name = NULL;
1543       real_name = uncompressed_name = savestr (file);
1544     }
1545   else
1546     {
1547       real_name = compressed_name = savestr (file);
1548       uncompressed_name = savenstr (file, ext - file);
1549     }
1550
1551   /* If the canonicalized uncompressed name
1552      has already been dealt with, skip it silently. */
1553   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1554     {
1555       assert (fdp->infname != NULL);
1556       if (streq (uncompressed_name, fdp->infname))
1557         goto cleanup;
1558     }
1559
1560   if (stat (real_name, &stat_buf) != 0)
1561     {
1562       /* Reset real_name and try with a different name. */
1563       real_name = NULL;
1564       if (compressed_name != NULL) /* try with the given suffix */
1565         {
1566           if (stat (uncompressed_name, &stat_buf) == 0)
1567             real_name = uncompressed_name;
1568         }
1569       else                      /* try all possible suffixes */
1570         {
1571           for (compr = compressors; compr->suffix != NULL; compr++)
1572             {
1573               compressed_name = concat (file, ".", compr->suffix);
1574               if (stat (compressed_name, &stat_buf) != 0)
1575                 {
1576                   if (MSDOS)
1577                     {
1578                       char *suf = compressed_name + strlen (file);
1579                       size_t suflen = strlen (compr->suffix) + 1;
1580                       for ( ; suf[1]; suf++, suflen--)
1581                         {
1582                           memmove (suf, suf + 1, suflen);
1583                           if (stat (compressed_name, &stat_buf) == 0)
1584                             {
1585                               real_name = compressed_name;
1586                               break;
1587                             }
1588                         }
1589                       if (real_name != NULL)
1590                         break;
1591                     } /* MSDOS */
1592                   free (compressed_name);
1593                   compressed_name = NULL;
1594                 }
1595               else
1596                 {
1597                   real_name = compressed_name;
1598                   break;
1599                 }
1600             }
1601         }
1602       if (real_name == NULL)
1603         {
1604           perror (file);
1605           goto cleanup;
1606         }
1607     } /* try with a different name */
1608
1609   if (!S_ISREG (stat_buf.st_mode))
1610     {
1611       error ("skipping %s: it is not a regular file.", real_name);
1612       goto cleanup;
1613     }
1614   if (real_name == compressed_name)
1615     {
1616       char *cmd = concat (compr->command, " ", real_name);
1617       inf = (FILE *) popen (cmd, "r");
1618       free (cmd);
1619     }
1620   else
1621     inf = fopen (real_name, "r");
1622   if (inf == NULL)
1623     {
1624       perror (real_name);
1625       goto cleanup;
1626     }
1627
1628   process_file (inf, uncompressed_name, lang);
1629
1630   if (real_name == compressed_name)
1631     retval = pclose (inf);
1632   else
1633     retval = fclose (inf);
1634   if (retval < 0)
1635     pfatal (file);
1636
1637  cleanup:
1638   free (compressed_name);
1639   free (uncompressed_name);
1640   last_node = NULL;
1641   curfdp = NULL;
1642   return;
1643 }
1644
1645 static void
1646 process_file (FILE *fh, char *fn, language *lang)
1647 {
1648   static const fdesc emptyfdesc;
1649   fdesc *fdp;
1650
1651   /* Create a new input file description entry. */
1652   fdp = xnew (1, fdesc);
1653   *fdp = emptyfdesc;
1654   fdp->next = fdhead;
1655   fdp->infname = savestr (fn);
1656   fdp->lang = lang;
1657   fdp->infabsname = absolute_filename (fn, cwd);
1658   fdp->infabsdir = absolute_dirname (fn, cwd);
1659   if (filename_is_absolute (fn))
1660     {
1661       /* An absolute file name.  Canonicalize it. */
1662       fdp->taggedfname = absolute_filename (fn, NULL);
1663     }
1664   else
1665     {
1666       /* A file name relative to cwd.  Make it relative
1667          to the directory of the tags file. */
1668       fdp->taggedfname = relative_filename (fn, tagfiledir);
1669     }
1670   fdp->usecharno = TRUE;        /* use char position when making tags */
1671   fdp->prop = NULL;
1672   fdp->written = FALSE;         /* not written on tags file yet */
1673
1674   fdhead = fdp;
1675   curfdp = fdhead;              /* the current file description */
1676
1677   find_entries (fh);
1678
1679   /* If not Ctags, and if this is not metasource and if it contained no #line
1680      directives, we can write the tags and free all nodes pointing to
1681      curfdp. */
1682   if (!CTAGS
1683       && curfdp->usecharno      /* no #line directives in this file */
1684       && !curfdp->lang->metasource)
1685     {
1686       node *np, *prev;
1687
1688       /* Look for the head of the sublist relative to this file.  See add_node
1689          for the structure of the node tree. */
1690       prev = NULL;
1691       for (np = nodehead; np != NULL; prev = np, np = np->left)
1692         if (np->fdp == curfdp)
1693           break;
1694
1695       /* If we generated tags for this file, write and delete them. */
1696       if (np != NULL)
1697         {
1698           /* This is the head of the last sublist, if any.  The following
1699              instructions depend on this being true. */
1700           assert (np->left == NULL);
1701
1702           assert (fdhead == curfdp);
1703           assert (last_node->fdp == curfdp);
1704           put_entries (np);     /* write tags for file curfdp->taggedfname */
1705           free_tree (np);       /* remove the written nodes */
1706           if (prev == NULL)
1707             nodehead = NULL;    /* no nodes left */
1708           else
1709             prev->left = NULL;  /* delete the pointer to the sublist */
1710         }
1711     }
1712 }
1713
1714 /*
1715  * This routine sets up the boolean pseudo-functions which work
1716  * by setting boolean flags dependent upon the corresponding character.
1717  * Every char which is NOT in that string is not a white char.  Therefore,
1718  * all of the array "_wht" is set to FALSE, and then the elements
1719  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1720  * of a char is TRUE if it is the string "white", else FALSE.
1721  */
1722 static void
1723 init (void)
1724 {
1725   register const char *sp;
1726   register int i;
1727
1728   for (i = 0; i < CHARS; i++)
1729     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1730   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1731   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1732   notinname('\0') = notinname('\n');
1733   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1734   begtoken('\0') = begtoken('\n');
1735   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1736   intoken('\0') = intoken('\n');
1737   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1738   endtoken('\0') = endtoken('\n');
1739 }
1740
1741 /*
1742  * This routine opens the specified file and calls the function
1743  * which finds the function and type definitions.
1744  */
1745 static void
1746 find_entries (FILE *inf)
1747 {
1748   char *cp;
1749   language *lang = curfdp->lang;
1750   Lang_function *parser = NULL;
1751
1752   /* If user specified a language, use it. */
1753   if (lang != NULL && lang->function != NULL)
1754     {
1755       parser = lang->function;
1756     }
1757
1758   /* Else try to guess the language given the file name. */
1759   if (parser == NULL)
1760     {
1761       lang = get_language_from_filename (curfdp->infname, TRUE);
1762       if (lang != NULL && lang->function != NULL)
1763         {
1764           curfdp->lang = lang;
1765           parser = lang->function;
1766         }
1767     }
1768
1769   /* Else look for sharp-bang as the first two characters. */
1770   if (parser == NULL
1771       && readline_internal (&lb, inf) > 0
1772       && lb.len >= 2
1773       && lb.buffer[0] == '#'
1774       && lb.buffer[1] == '!')
1775     {
1776       char *lp;
1777
1778       /* Set lp to point at the first char after the last slash in the
1779          line or, if no slashes, at the first nonblank.  Then set cp to
1780          the first successive blank and terminate the string. */
1781       lp = etags_strrchr (lb.buffer+2, '/');
1782       if (lp != NULL)
1783         lp += 1;
1784       else
1785         lp = skip_spaces (lb.buffer + 2);
1786       cp = skip_non_spaces (lp);
1787       *cp = '\0';
1788
1789       if (strlen (lp) > 0)
1790         {
1791           lang = get_language_from_interpreter (lp);
1792           if (lang != NULL && lang->function != NULL)
1793             {
1794               curfdp->lang = lang;
1795               parser = lang->function;
1796             }
1797         }
1798     }
1799
1800   /* We rewind here, even if inf may be a pipe.  We fail if the
1801      length of the first line is longer than the pipe block size,
1802      which is unlikely. */
1803   rewind (inf);
1804
1805   /* Else try to guess the language given the case insensitive file name. */
1806   if (parser == NULL)
1807     {
1808       lang = get_language_from_filename (curfdp->infname, FALSE);
1809       if (lang != NULL && lang->function != NULL)
1810         {
1811           curfdp->lang = lang;
1812           parser = lang->function;
1813         }
1814     }
1815
1816   /* Else try Fortran or C. */
1817   if (parser == NULL)
1818     {
1819       node *old_last_node = last_node;
1820
1821       curfdp->lang = get_language_from_langname ("fortran");
1822       find_entries (inf);
1823
1824       if (old_last_node == last_node)
1825         /* No Fortran entries found.  Try C. */
1826         {
1827           /* We do not tag if rewind fails.
1828              Only the file name will be recorded in the tags file. */
1829           rewind (inf);
1830           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1831           find_entries (inf);
1832         }
1833       return;
1834     }
1835
1836   if (!no_line_directive
1837       && curfdp->lang != NULL && curfdp->lang->metasource)
1838     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1839        file, or anyway we parsed a file that is automatically generated from
1840        this one.  If this is the case, the bingo.c file contained #line
1841        directives that generated tags pointing to this file.  Let's delete
1842        them all before parsing this file, which is the real source. */
1843     {
1844       fdesc **fdpp = &fdhead;
1845       while (*fdpp != NULL)
1846         if (*fdpp != curfdp
1847             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1848           /* We found one of those!  We must delete both the file description
1849              and all tags referring to it. */
1850           {
1851             fdesc *badfdp = *fdpp;
1852
1853             /* Delete the tags referring to badfdp->taggedfname
1854                that were obtained from badfdp->infname. */
1855             invalidate_nodes (badfdp, &nodehead);
1856
1857             *fdpp = badfdp->next; /* remove the bad description from the list */
1858             free_fdesc (badfdp);
1859           }
1860         else
1861           fdpp = &(*fdpp)->next; /* advance the list pointer */
1862     }
1863
1864   assert (parser != NULL);
1865
1866   /* Generic initialisations before reading from file. */
1867   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1868
1869   /* Generic initialisations before parsing file with readline. */
1870   lineno = 0;                  /* reset global line number */
1871   charno = 0;                  /* reset global char number */
1872   linecharno = 0;              /* reset global char number of line start */
1873
1874   parser (inf);
1875
1876   regex_tag_multiline ();
1877 }
1878
1879 \f
1880 /*
1881  * Check whether an implicitly named tag should be created,
1882  * then call `pfnote'.
1883  * NAME is a string that is internally copied by this function.
1884  *
1885  * TAGS format specification
1886  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1887  * The following is explained in some more detail in etc/ETAGS.EBNF.
1888  *
1889  * make_tag creates tags with "implicit tag names" (unnamed tags)
1890  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1891  *  1. NAME does not contain any of the characters in NONAM;
1892  *  2. LINESTART contains name as either a rightmost, or rightmost but
1893  *     one character, substring;
1894  *  3. the character, if any, immediately before NAME in LINESTART must
1895  *     be a character in NONAM;
1896  *  4. the character, if any, immediately after NAME in LINESTART must
1897  *     also be a character in NONAM.
1898  *
1899  * The implementation uses the notinname() macro, which recognises the
1900  * characters stored in the string `nonam'.
1901  * etags.el needs to use the same characters that are in NONAM.
1902  */
1903 static void
1904 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1905           int namelen,          /* tag length */
1906           int is_func,          /* tag is a function */
1907           char *linestart,      /* start of the line where tag is */
1908           int linelen,          /* length of the line where tag is */
1909           int lno,              /* line number */
1910           long int cno)         /* character number */
1911 {
1912   bool named = (name != NULL && namelen > 0);
1913   char *nname = NULL;
1914
1915   if (!CTAGS && named)          /* maybe set named to false */
1916     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1917        such that etags.el can guess a name from it. */
1918     {
1919       int i;
1920       register const char *cp = name;
1921
1922       for (i = 0; i < namelen; i++)
1923         if (notinname (*cp++))
1924           break;
1925       if (i == namelen)                         /* rule #1 */
1926         {
1927           cp = linestart + linelen - namelen;
1928           if (notinname (linestart[linelen-1]))
1929             cp -= 1;                            /* rule #4 */
1930           if (cp >= linestart                   /* rule #2 */
1931               && (cp == linestart
1932                   || notinname (cp[-1]))        /* rule #3 */
1933               && strneq (name, cp, namelen))    /* rule #2 */
1934             named = FALSE;      /* use implicit tag name */
1935         }
1936     }
1937
1938   if (named)
1939     nname = savenstr (name, namelen);
1940
1941   pfnote (nname, is_func, linestart, linelen, lno, cno);
1942 }
1943
1944 /* Record a tag. */
1945 static void
1946 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1947                                 /* tag name, or NULL if unnamed */
1948                                 /* tag is a function */
1949                                 /* start of the line where tag is */
1950                                 /* length of the line where tag is */
1951                                 /* line number */
1952                                 /* character number */
1953 {
1954   register node *np;
1955
1956   assert (name == NULL || name[0] != '\0');
1957   if (CTAGS && name == NULL)
1958     return;
1959
1960   np = xnew (1, node);
1961
1962   /* If ctags mode, change name "main" to M<thisfilename>. */
1963   if (CTAGS && !cxref_style && streq (name, "main"))
1964     {
1965       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1966       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1967       fp = etags_strrchr (np->name, '.');
1968       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1969         fp[0] = '\0';
1970     }
1971   else
1972     np->name = name;
1973   np->valid = TRUE;
1974   np->been_warned = FALSE;
1975   np->fdp = curfdp;
1976   np->is_func = is_func;
1977   np->lno = lno;
1978   if (np->fdp->usecharno)
1979     /* Our char numbers are 0-base, because of C language tradition?
1980        ctags compatibility?  old versions compatibility?   I don't know.
1981        Anyway, since emacs's are 1-base we expect etags.el to take care
1982        of the difference.  If we wanted to have 1-based numbers, we would
1983        uncomment the +1 below. */
1984     np->cno = cno /* + 1 */ ;
1985   else
1986     np->cno = invalidcharno;
1987   np->left = np->right = NULL;
1988   if (CTAGS && !cxref_style)
1989     {
1990       if (strlen (linestart) < 50)
1991         np->regex = concat (linestart, "$", "");
1992       else
1993         np->regex = savenstr (linestart, 50);
1994     }
1995   else
1996     np->regex = savenstr (linestart, linelen);
1997
1998   add_node (np, &nodehead);
1999 }
2000
2001 /*
2002  * free_tree ()
2003  *      recurse on left children, iterate on right children.
2004  */
2005 static void
2006 free_tree (register node *np)
2007 {
2008   while (np)
2009     {
2010       register node *node_right = np->right;
2011       free_tree (np->left);
2012       free (np->name);
2013       free (np->regex);
2014       free (np);
2015       np = node_right;
2016     }
2017 }
2018
2019 /*
2020  * free_fdesc ()
2021  *      delete a file description
2022  */
2023 static void
2024 free_fdesc (register fdesc *fdp)
2025 {
2026   free (fdp->infname);
2027   free (fdp->infabsname);
2028   free (fdp->infabsdir);
2029   free (fdp->taggedfname);
2030   free (fdp->prop);
2031   free (fdp);
2032 }
2033
2034 /*
2035  * add_node ()
2036  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2037  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2038  *      balancing.
2039  *
2040  *      add_node is the only function allowed to add nodes, so it can
2041  *      maintain state.
2042  */
2043 static void
2044 add_node (node *np, node **cur_node_p)
2045 {
2046   register int dif;
2047   register node *cur_node = *cur_node_p;
2048
2049   if (cur_node == NULL)
2050     {
2051       *cur_node_p = np;
2052       last_node = np;
2053       return;
2054     }
2055
2056   if (!CTAGS)
2057     /* Etags Mode */
2058     {
2059       /* For each file name, tags are in a linked sublist on the right
2060          pointer.  The first tags of different files are a linked list
2061          on the left pointer.  last_node points to the end of the last
2062          used sublist. */
2063       if (last_node != NULL && last_node->fdp == np->fdp)
2064         {
2065           /* Let's use the same sublist as the last added node. */
2066           assert (last_node->right == NULL);
2067           last_node->right = np;
2068           last_node = np;
2069         }
2070       else if (cur_node->fdp == np->fdp)
2071         {
2072           /* Scanning the list we found the head of a sublist which is
2073              good for us.  Let's scan this sublist. */
2074           add_node (np, &cur_node->right);
2075         }
2076       else
2077         /* The head of this sublist is not good for us.  Let's try the
2078            next one. */
2079         add_node (np, &cur_node->left);
2080     } /* if ETAGS mode */
2081
2082   else
2083     {
2084       /* Ctags Mode */
2085       dif = strcmp (np->name, cur_node->name);
2086
2087       /*
2088        * If this tag name matches an existing one, then
2089        * do not add the node, but maybe print a warning.
2090        */
2091       if (no_duplicates && !dif)
2092         {
2093           if (np->fdp == cur_node->fdp)
2094             {
2095               if (!no_warnings)
2096                 {
2097                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2098                            np->fdp->infname, lineno, np->name);
2099                   fprintf (stderr, "Second entry ignored\n");
2100                 }
2101             }
2102           else if (!cur_node->been_warned && !no_warnings)
2103             {
2104               fprintf
2105                 (stderr,
2106                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2107                  np->fdp->infname, cur_node->fdp->infname, np->name);
2108               cur_node->been_warned = TRUE;
2109             }
2110           return;
2111         }
2112
2113       /* Actually add the node */
2114       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2115     } /* if CTAGS mode */
2116 }
2117
2118 /*
2119  * invalidate_nodes ()
2120  *      Scan the node tree and invalidate all nodes pointing to the
2121  *      given file description (CTAGS case) or free them (ETAGS case).
2122  */
2123 static void
2124 invalidate_nodes (fdesc *badfdp, node **npp)
2125 {
2126   node *np = *npp;
2127
2128   if (np == NULL)
2129     return;
2130
2131   if (CTAGS)
2132     {
2133       if (np->left != NULL)
2134         invalidate_nodes (badfdp, &np->left);
2135       if (np->fdp == badfdp)
2136         np->valid = FALSE;
2137       if (np->right != NULL)
2138         invalidate_nodes (badfdp, &np->right);
2139     }
2140   else
2141     {
2142       assert (np->fdp != NULL);
2143       if (np->fdp == badfdp)
2144         {
2145           *npp = np->left;      /* detach the sublist from the list */
2146           np->left = NULL;      /* isolate it */
2147           free_tree (np);       /* free it */
2148           invalidate_nodes (badfdp, npp);
2149         }
2150       else
2151         invalidate_nodes (badfdp, &np->left);
2152     }
2153 }
2154
2155 \f
2156 static int total_size_of_entries (node *);
2157 static int number_len (long);
2158
2159 /* Length of a non-negative number's decimal representation. */
2160 static int
2161 number_len (long int num)
2162 {
2163   int len = 1;
2164   while ((num /= 10) > 0)
2165     len += 1;
2166   return len;
2167 }
2168
2169 /*
2170  * Return total number of characters that put_entries will output for
2171  * the nodes in the linked list at the right of the specified node.
2172  * This count is irrelevant with etags.el since emacs 19.34 at least,
2173  * but is still supplied for backward compatibility.
2174  */
2175 static int
2176 total_size_of_entries (register node *np)
2177 {
2178   register int total = 0;
2179
2180   for (; np != NULL; np = np->right)
2181     if (np->valid)
2182       {
2183         total += strlen (np->regex) + 1;                /* pat\177 */
2184         if (np->name != NULL)
2185           total += strlen (np->name) + 1;               /* name\001 */
2186         total += number_len ((long) np->lno) + 1;       /* lno, */
2187         if (np->cno != invalidcharno)                   /* cno */
2188           total += number_len (np->cno);
2189         total += 1;                                     /* newline */
2190       }
2191
2192   return total;
2193 }
2194
2195 static void
2196 put_entries (register node *np)
2197 {
2198   register char *sp;
2199   static fdesc *fdp = NULL;
2200
2201   if (np == NULL)
2202     return;
2203
2204   /* Output subentries that precede this one */
2205   if (CTAGS)
2206     put_entries (np->left);
2207
2208   /* Output this entry */
2209   if (np->valid)
2210     {
2211       if (!CTAGS)
2212         {
2213           /* Etags mode */
2214           if (fdp != np->fdp)
2215             {
2216               fdp = np->fdp;
2217               fprintf (tagf, "\f\n%s,%d\n",
2218                        fdp->taggedfname, total_size_of_entries (np));
2219               fdp->written = TRUE;
2220             }
2221           fputs (np->regex, tagf);
2222           fputc ('\177', tagf);
2223           if (np->name != NULL)
2224             {
2225               fputs (np->name, tagf);
2226               fputc ('\001', tagf);
2227             }
2228           fprintf (tagf, "%d,", np->lno);
2229           if (np->cno != invalidcharno)
2230             fprintf (tagf, "%ld", np->cno);
2231           fputs ("\n", tagf);
2232         }
2233       else
2234         {
2235           /* Ctags mode */
2236           if (np->name == NULL)
2237             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2238
2239           if (cxref_style)
2240             {
2241               if (vgrind_style)
2242                 fprintf (stdout, "%s %s %d\n",
2243                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2244               else
2245                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2246                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2247             }
2248           else
2249             {
2250               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2251
2252               if (np->is_func)
2253                 {               /* function or #define macro with args */
2254                   putc (searchar, tagf);
2255                   putc ('^', tagf);
2256
2257                   for (sp = np->regex; *sp; sp++)
2258                     {
2259                       if (*sp == '\\' || *sp == searchar)
2260                         putc ('\\', tagf);
2261                       putc (*sp, tagf);
2262                     }
2263                   putc (searchar, tagf);
2264                 }
2265               else
2266                 {               /* anything else; text pattern inadequate */
2267                   fprintf (tagf, "%d", np->lno);
2268                 }
2269               putc ('\n', tagf);
2270             }
2271         }
2272     } /* if this node contains a valid tag */
2273
2274   /* Output subentries that follow this one */
2275   put_entries (np->right);
2276   if (!CTAGS)
2277     put_entries (np->left);
2278 }
2279
2280 \f
2281 /* C extensions. */
2282 #define C_EXT   0x00fff         /* C extensions */
2283 #define C_PLAIN 0x00000         /* C */
2284 #define C_PLPL  0x00001         /* C++ */
2285 #define C_STAR  0x00003         /* C* */
2286 #define C_JAVA  0x00005         /* JAVA */
2287 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2288 #define YACC    0x10000         /* yacc file */
2289
2290 /*
2291  * The C symbol tables.
2292  */
2293 enum sym_type
2294 {
2295   st_none,
2296   st_C_objprot, st_C_objimpl, st_C_objend,
2297   st_C_gnumacro,
2298   st_C_ignore, st_C_attribute,
2299   st_C_javastruct,
2300   st_C_operator,
2301   st_C_class, st_C_template,
2302   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2303 };
2304
2305 static unsigned int hash (const char *, unsigned int);
2306 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2307 static enum sym_type C_symtype (char *, int, int);
2308
2309 /* Feed stuff between (but not including) %[ and %] lines to:
2310      gperf -m 5
2311 %[
2312 %compare-strncmp
2313 %enum
2314 %struct-type
2315 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2316 %%
2317 if,             0,                      st_C_ignore
2318 for,            0,                      st_C_ignore
2319 while,          0,                      st_C_ignore
2320 switch,         0,                      st_C_ignore
2321 return,         0,                      st_C_ignore
2322 __attribute__,  0,                      st_C_attribute
2323 GTY,            0,                      st_C_attribute
2324 @interface,     0,                      st_C_objprot
2325 @protocol,      0,                      st_C_objprot
2326 @implementation,0,                      st_C_objimpl
2327 @end,           0,                      st_C_objend
2328 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2329 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2330 friend,         C_PLPL,                 st_C_ignore
2331 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2332 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2333 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2334 class,          0,                      st_C_class
2335 namespace,      C_PLPL,                 st_C_struct
2336 domain,         C_STAR,                 st_C_struct
2337 union,          0,                      st_C_struct
2338 struct,         0,                      st_C_struct
2339 extern,         0,                      st_C_extern
2340 enum,           0,                      st_C_enum
2341 typedef,        0,                      st_C_typedef
2342 define,         0,                      st_C_define
2343 undef,          0,                      st_C_define
2344 operator,       C_PLPL,                 st_C_operator
2345 template,       0,                      st_C_template
2346 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2347 DEFUN,          0,                      st_C_gnumacro
2348 SYSCALL,        0,                      st_C_gnumacro
2349 ENTRY,          0,                      st_C_gnumacro
2350 PSEUDO,         0,                      st_C_gnumacro
2351 # These are defined inside C functions, so currently they are not met.
2352 # EXFUN used in glibc, DEFVAR_* in emacs.
2353 #EXFUN,         0,                      st_C_gnumacro
2354 #DEFVAR_,       0,                      st_C_gnumacro
2355 %]
2356 and replace lines between %< and %> with its output, then:
2357  - remove the #if characterset check
2358  - make in_word_set static and not inline. */
2359 /*%<*/
2360 /* C code produced by gperf version 3.0.1 */
2361 /* Command-line: gperf -m 5  */
2362 /* Computed positions: -k'2-3' */
2363
2364 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2365 /* maximum key range = 33, duplicates = 0 */
2366
2367 #ifdef __GNUC__
2368 __inline
2369 #else
2370 #ifdef __cplusplus
2371 inline
2372 #endif
2373 #endif
2374 static unsigned int
2375 hash (register const char *str, register unsigned int len)
2376 {
2377   static unsigned char asso_values[] =
2378     {
2379       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2380       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2381       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2382       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2383       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2384       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2385       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2386       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2387       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2388       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2389       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2390        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2391        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2392       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2393       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2394       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2395       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2396       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2397       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2398       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2399       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2400       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2401       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2402       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2403       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2404       35, 35, 35, 35, 35, 35
2405     };
2406   register int hval = len;
2407
2408   switch (hval)
2409     {
2410       default:
2411         hval += asso_values[(unsigned char)str[2]];
2412       /*FALLTHROUGH*/
2413       case 2:
2414         hval += asso_values[(unsigned char)str[1]];
2415         break;
2416     }
2417   return hval;
2418 }
2419
2420 static struct C_stab_entry *
2421 in_word_set (register const char *str, register unsigned int len)
2422 {
2423   enum
2424     {
2425       TOTAL_KEYWORDS = 33,
2426       MIN_WORD_LENGTH = 2,
2427       MAX_WORD_LENGTH = 15,
2428       MIN_HASH_VALUE = 2,
2429       MAX_HASH_VALUE = 34
2430     };
2431
2432   static struct C_stab_entry wordlist[] =
2433     {
2434       {""}, {""},
2435       {"if",            0,                      st_C_ignore},
2436       {"GTY",           0,                      st_C_attribute},
2437       {"@end",          0,                      st_C_objend},
2438       {"union",         0,                      st_C_struct},
2439       {"define",                0,                      st_C_define},
2440       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2441       {"template",      0,                      st_C_template},
2442       {"operator",      C_PLPL,                 st_C_operator},
2443       {"@interface",    0,                      st_C_objprot},
2444       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2445       {"friend",                C_PLPL,                 st_C_ignore},
2446       {"typedef",       0,                      st_C_typedef},
2447       {"return",                0,                      st_C_ignore},
2448       {"@implementation",0,                     st_C_objimpl},
2449       {"@protocol",     0,                      st_C_objprot},
2450       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2451       {"extern",                0,                      st_C_extern},
2452       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2453       {"struct",                0,                      st_C_struct},
2454       {"domain",                C_STAR,                 st_C_struct},
2455       {"switch",                0,                      st_C_ignore},
2456       {"enum",          0,                      st_C_enum},
2457       {"for",           0,                      st_C_ignore},
2458       {"namespace",     C_PLPL,                 st_C_struct},
2459       {"class",         0,                      st_C_class},
2460       {"while",         0,                      st_C_ignore},
2461       {"undef",         0,                      st_C_define},
2462       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2463       {"__attribute__", 0,                      st_C_attribute},
2464       {"SYSCALL",       0,                      st_C_gnumacro},
2465       {"ENTRY",         0,                      st_C_gnumacro},
2466       {"PSEUDO",                0,                      st_C_gnumacro},
2467       {"DEFUN",         0,                      st_C_gnumacro}
2468     };
2469
2470   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2471     {
2472       register int key = hash (str, len);
2473
2474       if (key <= MAX_HASH_VALUE && key >= 0)
2475         {
2476           register const char *s = wordlist[key].name;
2477
2478           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2479             return &wordlist[key];
2480         }
2481     }
2482   return 0;
2483 }
2484 /*%>*/
2485
2486 static enum sym_type
2487 C_symtype (char *str, int len, int c_ext)
2488 {
2489   register struct C_stab_entry *se = in_word_set (str, len);
2490
2491   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2492     return st_none;
2493   return se->type;
2494 }
2495
2496 \f
2497 /*
2498  * Ignoring __attribute__ ((list))
2499  */
2500 static bool inattribute;        /* looking at an __attribute__ construct */
2501
2502 /*
2503  * C functions and variables are recognized using a simple
2504  * finite automaton.  fvdef is its state variable.
2505  */
2506 static enum
2507 {
2508   fvnone,                       /* nothing seen */
2509   fdefunkey,                    /* Emacs DEFUN keyword seen */
2510   fdefunname,                   /* Emacs DEFUN name seen */
2511   foperator,                    /* func: operator keyword seen (cplpl) */
2512   fvnameseen,                   /* function or variable name seen */
2513   fstartlist,                   /* func: just after open parenthesis */
2514   finlist,                      /* func: in parameter list */
2515   flistseen,                    /* func: after parameter list */
2516   fignore,                      /* func: before open brace */
2517   vignore                       /* var-like: ignore until ';' */
2518 } fvdef;
2519
2520 static bool fvextern;           /* func or var: extern keyword seen; */
2521
2522 /*
2523  * typedefs are recognized using a simple finite automaton.
2524  * typdef is its state variable.
2525  */
2526 static enum
2527 {
2528   tnone,                        /* nothing seen */
2529   tkeyseen,                     /* typedef keyword seen */
2530   ttypeseen,                    /* defined type seen */
2531   tinbody,                      /* inside typedef body */
2532   tend,                         /* just before typedef tag */
2533   tignore                       /* junk after typedef tag */
2534 } typdef;
2535
2536 /*
2537  * struct-like structures (enum, struct and union) are recognized
2538  * using another simple finite automaton.  `structdef' is its state
2539  * variable.
2540  */
2541 static enum
2542 {
2543   snone,                        /* nothing seen yet,
2544                                    or in struct body if bracelev > 0 */
2545   skeyseen,                     /* struct-like keyword seen */
2546   stagseen,                     /* struct-like tag seen */
2547   scolonseen                    /* colon seen after struct-like tag */
2548 } structdef;
2549
2550 /*
2551  * When objdef is different from onone, objtag is the name of the class.
2552  */
2553 static const char *objtag = "<uninited>";
2554
2555 /*
2556  * Yet another little state machine to deal with preprocessor lines.
2557  */
2558 static enum
2559 {
2560   dnone,                        /* nothing seen */
2561   dsharpseen,                   /* '#' seen as first char on line */
2562   ddefineseen,                  /* '#' and 'define' seen */
2563   dignorerest                   /* ignore rest of line */
2564 } definedef;
2565
2566 /*
2567  * State machine for Objective C protocols and implementations.
2568  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2569  */
2570 static enum
2571 {
2572   onone,                        /* nothing seen */
2573   oprotocol,                    /* @interface or @protocol seen */
2574   oimplementation,              /* @implementations seen */
2575   otagseen,                     /* class name seen */
2576   oparenseen,                   /* parenthesis before category seen */
2577   ocatseen,                     /* category name seen */
2578   oinbody,                      /* in @implementation body */
2579   omethodsign,                  /* in @implementation body, after +/- */
2580   omethodtag,                   /* after method name */
2581   omethodcolon,                 /* after method colon */
2582   omethodparm,                  /* after method parameter */
2583   oignore                       /* wait for @end */
2584 } objdef;
2585
2586
2587 /*
2588  * Use this structure to keep info about the token read, and how it
2589  * should be tagged.  Used by the make_C_tag function to build a tag.
2590  */
2591 static struct tok
2592 {
2593   char *line;                   /* string containing the token */
2594   int offset;                   /* where the token starts in LINE */
2595   int length;                   /* token length */
2596   /*
2597     The previous members can be used to pass strings around for generic
2598     purposes.  The following ones specifically refer to creating tags.  In this
2599     case the token contained here is the pattern that will be used to create a
2600     tag.
2601   */
2602   bool valid;                   /* do not create a tag; the token should be
2603                                    invalidated whenever a state machine is
2604                                    reset prematurely */
2605   bool named;                   /* create a named tag */
2606   int lineno;                   /* source line number of tag */
2607   long linepos;                 /* source char number of tag */
2608 } token;                        /* latest token read */
2609
2610 /*
2611  * Variables and functions for dealing with nested structures.
2612  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2613  */
2614 static void pushclass_above (int, char *, int);
2615 static void popclass_above (int);
2616 static void write_classname (linebuffer *, const char *qualifier);
2617
2618 static struct {
2619   char **cname;                 /* nested class names */
2620   int *bracelev;                /* nested class brace level */
2621   int nl;                       /* class nesting level (elements used) */
2622   int size;                     /* length of the array */
2623 } cstack;                       /* stack for nested declaration tags */
2624 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2625 #define nestlev         (cstack.nl)
2626 /* After struct keyword or in struct body, not inside a nested function. */
2627 #define instruct        (structdef == snone && nestlev > 0                      \
2628                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2629
2630 static void
2631 pushclass_above (int bracelev, char *str, int len)
2632 {
2633   int nl;
2634
2635   popclass_above (bracelev);
2636   nl = cstack.nl;
2637   if (nl >= cstack.size)
2638     {
2639       int size = cstack.size *= 2;
2640       xrnew (cstack.cname, size, char *);
2641       xrnew (cstack.bracelev, size, int);
2642     }
2643   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2644   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2645   cstack.bracelev[nl] = bracelev;
2646   cstack.nl = nl + 1;
2647 }
2648
2649 static void
2650 popclass_above (int bracelev)
2651 {
2652   int nl;
2653
2654   for (nl = cstack.nl - 1;
2655        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2656        nl--)
2657     {
2658       free (cstack.cname[nl]);
2659       cstack.nl = nl;
2660     }
2661 }
2662
2663 static void
2664 write_classname (linebuffer *cn, const char *qualifier)
2665 {
2666   int i, len;
2667   int qlen = strlen (qualifier);
2668
2669   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2670     {
2671       len = 0;
2672       cn->len = 0;
2673       cn->buffer[0] = '\0';
2674     }
2675   else
2676     {
2677       len = strlen (cstack.cname[0]);
2678       linebuffer_setlen (cn, len);
2679       strcpy (cn->buffer, cstack.cname[0]);
2680     }
2681   for (i = 1; i < cstack.nl; i++)
2682     {
2683       char *s;
2684       int slen;
2685
2686       s = cstack.cname[i];
2687       if (s == NULL)
2688         continue;
2689       slen = strlen (s);
2690       len += slen + qlen;
2691       linebuffer_setlen (cn, len);
2692       strncat (cn->buffer, qualifier, qlen);
2693       strncat (cn->buffer, s, slen);
2694     }
2695 }
2696
2697 \f
2698 static bool consider_token (char *, int, int, int *, int, int, bool *);
2699 static void make_C_tag (bool);
2700
2701 /*
2702  * consider_token ()
2703  *      checks to see if the current token is at the start of a
2704  *      function or variable, or corresponds to a typedef, or
2705  *      is a struct/union/enum tag, or #define, or an enum constant.
2706  *
2707  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2708  *      with args.  C_EXTP points to which language we are looking at.
2709  *
2710  * Globals
2711  *      fvdef                   IN OUT
2712  *      structdef               IN OUT
2713  *      definedef               IN OUT
2714  *      typdef                  IN OUT
2715  *      objdef                  IN OUT
2716  */
2717
2718 static bool
2719 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2720                                 /* IN: token pointer */
2721                                 /* IN: token length */
2722                                 /* IN: first char after the token */
2723                                 /* IN, OUT: C extensions mask */
2724                                 /* IN: brace level */
2725                                 /* IN: parenthesis level */
2726                                 /* OUT: function or variable found */
2727 {
2728   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2729      structtype is the type of the preceding struct-like keyword, and
2730      structbracelev is the brace level where it has been seen. */
2731   static enum sym_type structtype;
2732   static int structbracelev;
2733   static enum sym_type toktype;
2734
2735
2736   toktype = C_symtype (str, len, *c_extp);
2737
2738   /*
2739    * Skip __attribute__
2740    */
2741   if (toktype == st_C_attribute)
2742     {
2743       inattribute = TRUE;
2744       return FALSE;
2745      }
2746
2747    /*
2748     * Advance the definedef state machine.
2749     */
2750    switch (definedef)
2751      {
2752      case dnone:
2753        /* We're not on a preprocessor line. */
2754        if (toktype == st_C_gnumacro)
2755          {
2756            fvdef = fdefunkey;
2757            return FALSE;
2758          }
2759        break;
2760      case dsharpseen:
2761        if (toktype == st_C_define)
2762          {
2763            definedef = ddefineseen;
2764          }
2765        else
2766          {
2767            definedef = dignorerest;
2768          }
2769        return FALSE;
2770      case ddefineseen:
2771        /*
2772         * Make a tag for any macro, unless it is a constant
2773         * and constantypedefs is FALSE.
2774         */
2775        definedef = dignorerest;
2776        *is_func_or_var = (c == '(');
2777        if (!*is_func_or_var && !constantypedefs)
2778          return FALSE;
2779        else
2780          return TRUE;
2781      case dignorerest:
2782        return FALSE;
2783      default:
2784        error ("internal error: definedef value.", (char *)NULL);
2785      }
2786
2787    /*
2788     * Now typedefs
2789     */
2790    switch (typdef)
2791      {
2792      case tnone:
2793        if (toktype == st_C_typedef)
2794          {
2795            if (typedefs)
2796              typdef = tkeyseen;
2797            fvextern = FALSE;
2798            fvdef = fvnone;
2799            return FALSE;
2800          }
2801        break;
2802      case tkeyseen:
2803        switch (toktype)
2804          {
2805          case st_none:
2806          case st_C_class:
2807          case st_C_struct:
2808          case st_C_enum:
2809            typdef = ttypeseen;
2810          }
2811        break;
2812      case ttypeseen:
2813        if (structdef == snone && fvdef == fvnone)
2814          {
2815            fvdef = fvnameseen;
2816            return TRUE;
2817          }
2818        break;
2819      case tend:
2820        switch (toktype)
2821          {
2822          case st_C_class:
2823          case st_C_struct:
2824          case st_C_enum:
2825            return FALSE;
2826          }
2827        return TRUE;
2828      }
2829
2830    switch (toktype)
2831      {
2832      case st_C_javastruct:
2833        if (structdef == stagseen)
2834          structdef = scolonseen;
2835        return FALSE;
2836      case st_C_template:
2837      case st_C_class:
2838        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2839            && bracelev == 0
2840            && definedef == dnone && structdef == snone
2841            && typdef == tnone && fvdef == fvnone)
2842          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2843        if (toktype == st_C_template)
2844          break;
2845        /* FALLTHRU */
2846      case st_C_struct:
2847      case st_C_enum:
2848        if (parlev == 0
2849            && fvdef != vignore
2850            && (typdef == tkeyseen
2851                || (typedefs_or_cplusplus && structdef == snone)))
2852          {
2853            structdef = skeyseen;
2854            structtype = toktype;
2855            structbracelev = bracelev;
2856            if (fvdef == fvnameseen)
2857              fvdef = fvnone;
2858          }
2859        return FALSE;
2860      }
2861
2862    if (structdef == skeyseen)
2863      {
2864        structdef = stagseen;
2865        return TRUE;
2866      }
2867
2868    if (typdef != tnone)
2869      definedef = dnone;
2870
2871    /* Detect Objective C constructs. */
2872    switch (objdef)
2873      {
2874      case onone:
2875        switch (toktype)
2876          {
2877          case st_C_objprot:
2878            objdef = oprotocol;
2879            return FALSE;
2880          case st_C_objimpl:
2881            objdef = oimplementation;
2882            return FALSE;
2883          }
2884        break;
2885      case oimplementation:
2886        /* Save the class tag for functions or variables defined inside. */
2887        objtag = savenstr (str, len);
2888        objdef = oinbody;
2889        return FALSE;
2890      case oprotocol:
2891        /* Save the class tag for categories. */
2892        objtag = savenstr (str, len);
2893        objdef = otagseen;
2894        *is_func_or_var = TRUE;
2895        return TRUE;
2896      case oparenseen:
2897        objdef = ocatseen;
2898        *is_func_or_var = TRUE;
2899        return TRUE;
2900      case oinbody:
2901        break;
2902      case omethodsign:
2903        if (parlev == 0)
2904          {
2905            fvdef = fvnone;
2906            objdef = omethodtag;
2907            linebuffer_setlen (&token_name, len);
2908            strncpy (token_name.buffer, str, len);
2909            token_name.buffer[len] = '\0';
2910            return TRUE;
2911          }
2912        return FALSE;
2913      case omethodcolon:
2914        if (parlev == 0)
2915          objdef = omethodparm;
2916        return FALSE;
2917      case omethodparm:
2918        if (parlev == 0)
2919          {
2920            fvdef = fvnone;
2921            objdef = omethodtag;
2922            linebuffer_setlen (&token_name, token_name.len + len);
2923            strncat (token_name.buffer, str, len);
2924            return TRUE;
2925          }
2926        return FALSE;
2927      case oignore:
2928        if (toktype == st_C_objend)
2929          {
2930            /* Memory leakage here: the string pointed by objtag is
2931               never released, because many tests would be needed to
2932               avoid breaking on incorrect input code.  The amount of
2933               memory leaked here is the sum of the lengths of the
2934               class tags.
2935            free (objtag); */
2936            objdef = onone;
2937          }
2938        return FALSE;
2939      }
2940
2941    /* A function, variable or enum constant? */
2942    switch (toktype)
2943      {
2944      case st_C_extern:
2945        fvextern = TRUE;
2946        switch  (fvdef)
2947          {
2948          case finlist:
2949          case flistseen:
2950          case fignore:
2951          case vignore:
2952            break;
2953          default:
2954            fvdef = fvnone;
2955          }
2956        return FALSE;
2957      case st_C_ignore:
2958        fvextern = FALSE;
2959        fvdef = vignore;
2960        return FALSE;
2961      case st_C_operator:
2962        fvdef = foperator;
2963        *is_func_or_var = TRUE;
2964        return TRUE;
2965      case st_none:
2966        if (constantypedefs
2967            && structdef == snone
2968            && structtype == st_C_enum && bracelev > structbracelev)
2969          return TRUE;           /* enum constant */
2970        switch (fvdef)
2971          {
2972          case fdefunkey:
2973            if (bracelev > 0)
2974              break;
2975            fvdef = fdefunname;  /* GNU macro */
2976            *is_func_or_var = TRUE;
2977            return TRUE;
2978          case fvnone:
2979            switch (typdef)
2980              {
2981              case ttypeseen:
2982                return FALSE;
2983              case tnone:
2984                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2985                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2986                  {
2987                    fvdef = vignore;
2988                    return FALSE;
2989                  }
2990                break;
2991              }
2992           /* FALLTHRU */
2993           case fvnameseen:
2994           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2995             {
2996               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2997                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2998               fvdef = foperator;
2999               *is_func_or_var = TRUE;
3000               return TRUE;
3001             }
3002           if (bracelev > 0 && !instruct)
3003             break;
3004           fvdef = fvnameseen;   /* function or variable */
3005           *is_func_or_var = TRUE;
3006           return TRUE;
3007         }
3008       break;
3009     }
3010
3011   return FALSE;
3012 }
3013
3014 \f
3015 /*
3016  * C_entries often keeps pointers to tokens or lines which are older than
3017  * the line currently read.  By keeping two line buffers, and switching
3018  * them at end of line, it is possible to use those pointers.
3019  */
3020 static struct
3021 {
3022   long linepos;
3023   linebuffer lb;
3024 } lbs[2];
3025
3026 #define current_lb_is_new (newndx == curndx)
3027 #define switch_line_buffers() (curndx = 1 - curndx)
3028
3029 #define curlb (lbs[curndx].lb)
3030 #define newlb (lbs[newndx].lb)
3031 #define curlinepos (lbs[curndx].linepos)
3032 #define newlinepos (lbs[newndx].linepos)
3033
3034 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3035 #define cplpl (c_ext & C_PLPL)
3036 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3037
3038 #define CNL_SAVE_DEFINEDEF()                                            \
3039 do {                                                                    \
3040   curlinepos = charno;                                                  \
3041   readline (&curlb, inf);                                               \
3042   lp = curlb.buffer;                                                    \
3043   quotednl = FALSE;                                                     \
3044   newndx = curndx;                                                      \
3045 } while (0)
3046
3047 #define CNL()                                                           \
3048 do {                                                                    \
3049   CNL_SAVE_DEFINEDEF();                                                 \
3050   if (savetoken.valid)                                                  \
3051     {                                                                   \
3052       token = savetoken;                                                \
3053       savetoken.valid = FALSE;                                          \
3054     }                                                                   \
3055   definedef = dnone;                                                    \
3056 } while (0)
3057
3058
3059 static void
3060 make_C_tag (int isfun)
3061 {
3062   /* This function is never called when token.valid is FALSE, but
3063      we must protect against invalid input or internal errors. */
3064   if (token.valid)
3065     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3066               token.offset+token.length+1, token.lineno, token.linepos);
3067   else if (DEBUG)
3068     {                             /* this branch is optimised away if !DEBUG */
3069       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3070                 token_name.len + 17, isfun, token.line,
3071                 token.offset+token.length+1, token.lineno, token.linepos);
3072       error ("INVALID TOKEN", NULL);
3073     }
3074
3075   token.valid = FALSE;
3076 }
3077
3078
3079 /*
3080  * C_entries ()
3081  *      This routine finds functions, variables, typedefs,
3082  *      #define's, enum constants and struct/union/enum definitions in
3083  *      C syntax and adds them to the list.
3084  */
3085 static void
3086 C_entries (int c_ext, FILE *inf)
3087                                 /* extension of C */
3088                                 /* input file */
3089 {
3090   register char c;              /* latest char read; '\0' for end of line */
3091   register char *lp;            /* pointer one beyond the character `c' */
3092   int curndx, newndx;           /* indices for current and new lb */
3093   register int tokoff;          /* offset in line of start of current token */
3094   register int toklen;          /* length of current token */
3095   const char *qualifier;        /* string used to qualify names */
3096   int qlen;                     /* length of qualifier */
3097   int bracelev;                 /* current brace level */
3098   int bracketlev;               /* current bracket level */
3099   int parlev;                   /* current parenthesis level */
3100   int attrparlev;               /* __attribute__ parenthesis level */
3101   int templatelev;              /* current template level */
3102   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3103   bool incomm, inquote, inchar, quotednl, midtoken;
3104   bool yacc_rules;              /* in the rules part of a yacc file */
3105   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3106
3107
3108   linebuffer_init (&lbs[0].lb);
3109   linebuffer_init (&lbs[1].lb);
3110   if (cstack.size == 0)
3111     {
3112       cstack.size = (DEBUG) ? 1 : 4;
3113       cstack.nl = 0;
3114       cstack.cname = xnew (cstack.size, char *);
3115       cstack.bracelev = xnew (cstack.size, int);
3116     }
3117
3118   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3119   curndx = newndx = 0;
3120   lp = curlb.buffer;
3121   *lp = 0;
3122
3123   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3124   structdef = snone; definedef = dnone; objdef = onone;
3125   yacc_rules = FALSE;
3126   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3127   token.valid = savetoken.valid = FALSE;
3128   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3129   if (cjava)
3130     { qualifier = "."; qlen = 1; }
3131   else
3132     { qualifier = "::"; qlen = 2; }
3133
3134
3135   while (!feof (inf))
3136     {
3137       c = *lp++;
3138       if (c == '\\')
3139         {
3140           /* If we are at the end of the line, the next character is a
3141              '\0'; do not skip it, because it is what tells us
3142              to read the next line.  */
3143           if (*lp == '\0')
3144             {
3145               quotednl = TRUE;
3146               continue;
3147             }
3148           lp++;
3149           c = ' ';
3150         }
3151       else if (incomm)
3152         {
3153           switch (c)
3154             {
3155             case '*':
3156               if (*lp == '/')
3157                 {
3158                   c = *lp++;
3159                   incomm = FALSE;
3160                 }
3161               break;
3162             case '\0':
3163               /* Newlines inside comments do not end macro definitions in
3164                  traditional cpp. */
3165               CNL_SAVE_DEFINEDEF ();
3166               break;
3167             }
3168           continue;
3169         }
3170       else if (inquote)
3171         {
3172           switch (c)
3173             {
3174             case '"':
3175               inquote = FALSE;
3176               break;
3177             case '\0':
3178               /* Newlines inside strings do not end macro definitions
3179                  in traditional cpp, even though compilers don't
3180                  usually accept them. */
3181               CNL_SAVE_DEFINEDEF ();
3182               break;
3183             }
3184           continue;
3185         }
3186       else if (inchar)
3187         {
3188           switch (c)
3189             {
3190             case '\0':
3191               /* Hmmm, something went wrong. */
3192               CNL ();
3193               /* FALLTHRU */
3194             case '\'':
3195               inchar = FALSE;
3196               break;
3197             }
3198           continue;
3199         }
3200       else if (bracketlev > 0)
3201         {
3202           switch (c)
3203             {
3204             case ']':
3205               if (--bracketlev > 0)
3206                 continue;
3207               break;
3208             case '\0':
3209               CNL_SAVE_DEFINEDEF ();
3210               break;
3211             }
3212           continue;
3213         }
3214       else switch (c)
3215         {
3216         case '"':
3217           inquote = TRUE;
3218           if (inattribute)
3219             break;
3220           switch (fvdef)
3221             {
3222             case fdefunkey:
3223             case fstartlist:
3224             case finlist:
3225             case fignore:
3226             case vignore:
3227               break;
3228             default:
3229               fvextern = FALSE;
3230               fvdef = fvnone;
3231             }
3232           continue;
3233         case '\'':
3234           inchar = TRUE;
3235           if (inattribute)
3236             break;
3237           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3238             {
3239               fvextern = FALSE;
3240               fvdef = fvnone;
3241             }
3242           continue;
3243         case '/':
3244           if (*lp == '*')
3245             {
3246               incomm = TRUE;
3247               lp++;
3248               c = ' ';
3249             }
3250           else if (/* cplpl && */ *lp == '/')
3251             {
3252               c = '\0';
3253             }
3254           break;
3255         case '%':
3256           if ((c_ext & YACC) && *lp == '%')
3257             {
3258               /* Entering or exiting rules section in yacc file. */
3259               lp++;
3260               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3261               typdef = tnone; structdef = snone;
3262               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3263               bracelev = 0;
3264               yacc_rules = !yacc_rules;
3265               continue;
3266             }
3267           else
3268             break;
3269         case '#':
3270           if (definedef == dnone)
3271             {
3272               char *cp;
3273               bool cpptoken = TRUE;
3274
3275               /* Look back on this line.  If all blanks, or nonblanks
3276                  followed by an end of comment, this is a preprocessor
3277                  token. */
3278               for (cp = newlb.buffer; cp < lp-1; cp++)
3279                 if (!iswhite (*cp))
3280                   {
3281                     if (*cp == '*' && *(cp+1) == '/')
3282                       {
3283                         cp++;
3284                         cpptoken = TRUE;
3285                       }
3286                     else
3287                       cpptoken = FALSE;
3288                   }
3289               if (cpptoken)
3290                 definedef = dsharpseen;
3291             } /* if (definedef == dnone) */
3292           continue;
3293         case '[':
3294           bracketlev++;
3295             continue;
3296         } /* switch (c) */
3297
3298
3299       /* Consider token only if some involved conditions are satisfied. */
3300       if (typdef != tignore
3301           && definedef != dignorerest
3302           && fvdef != finlist
3303           && templatelev == 0
3304           && (definedef != dnone
3305               || structdef != scolonseen)
3306           && !inattribute)
3307         {
3308           if (midtoken)
3309             {
3310               if (endtoken (c))
3311                 {
3312                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3313                     /* This handles :: in the middle,
3314                        but not at the beginning of an identifier.
3315                        Also, space-separated :: is not recognised. */
3316                     {
3317                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3318                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3319                       lp += 2;
3320                       toklen += 2;
3321                       c = lp[-1];
3322                       goto still_in_token;
3323                     }
3324                   else
3325                     {
3326                       bool funorvar = FALSE;
3327
3328                       if (yacc_rules
3329                           || consider_token (newlb.buffer + tokoff, toklen, c,
3330                                              &c_ext, bracelev, parlev,
3331                                              &funorvar))
3332                         {
3333                           if (fvdef == foperator)
3334                             {
3335                               char *oldlp = lp;
3336                               lp = skip_spaces (lp-1);
3337                               if (*lp != '\0')
3338                                 lp += 1;
3339                               while (*lp != '\0'
3340                                      && !iswhite (*lp) && *lp != '(')
3341                                 lp += 1;
3342                               c = *lp++;
3343                               toklen += lp - oldlp;
3344                             }
3345                           token.named = FALSE;
3346                           if (!plainc
3347                               && nestlev > 0 && definedef == dnone)
3348                             /* in struct body */
3349                             {
3350                               write_classname (&token_name, qualifier);
3351                               linebuffer_setlen (&token_name,
3352                                                  token_name.len+qlen+toklen);
3353                               strcat (token_name.buffer, qualifier);
3354                               strncat (token_name.buffer,
3355                                        newlb.buffer + tokoff, toklen);
3356                               token.named = TRUE;
3357                             }
3358                           else if (objdef == ocatseen)
3359                             /* Objective C category */
3360                             {
3361                               int len = strlen (objtag) + 2 + toklen;
3362                               linebuffer_setlen (&token_name, len);
3363                               strcpy (token_name.buffer, objtag);
3364                               strcat (token_name.buffer, "(");
3365                               strncat (token_name.buffer,
3366                                        newlb.buffer + tokoff, toklen);
3367                               strcat (token_name.buffer, ")");
3368                               token.named = TRUE;
3369                             }
3370                           else if (objdef == omethodtag
3371                                    || objdef == omethodparm)
3372                             /* Objective C method */
3373                             {
3374                               token.named = TRUE;
3375                             }
3376                           else if (fvdef == fdefunname)
3377                             /* GNU DEFUN and similar macros */
3378                             {
3379                               bool defun = (newlb.buffer[tokoff] == 'F');
3380                               int off = tokoff;
3381                               int len = toklen;
3382
3383                               /* Rewrite the tag so that emacs lisp DEFUNs
3384                                  can be found by their elisp name */
3385                               if (defun)
3386                                 {
3387                                   off += 1;
3388                                   len -= 1;
3389                                 }
3390                               linebuffer_setlen (&token_name, len);
3391                               strncpy (token_name.buffer,
3392                                        newlb.buffer + off, len);
3393                               token_name.buffer[len] = '\0';
3394                               if (defun)
3395                                 while (--len >= 0)
3396                                   if (token_name.buffer[len] == '_')
3397                                     token_name.buffer[len] = '-';
3398                               token.named = defun;
3399                             }
3400                           else
3401                             {
3402                               linebuffer_setlen (&token_name, toklen);
3403                               strncpy (token_name.buffer,
3404                                        newlb.buffer + tokoff, toklen);
3405                               token_name.buffer[toklen] = '\0';
3406                               /* Name macros and members. */
3407                               token.named = (structdef == stagseen
3408                                              || typdef == ttypeseen
3409                                              || typdef == tend
3410                                              || (funorvar
3411                                                  && definedef == dignorerest)
3412                                              || (funorvar
3413                                                  && definedef == dnone
3414                                                  && structdef == snone
3415                                                  && bracelev > 0));
3416                             }
3417                           token.lineno = lineno;
3418                           token.offset = tokoff;
3419                           token.length = toklen;
3420                           token.line = newlb.buffer;
3421                           token.linepos = newlinepos;
3422                           token.valid = TRUE;
3423
3424                           if (definedef == dnone
3425                               && (fvdef == fvnameseen
3426                                   || fvdef == foperator
3427                                   || structdef == stagseen
3428                                   || typdef == tend
3429                                   || typdef == ttypeseen
3430                                   || objdef != onone))
3431                             {
3432                               if (current_lb_is_new)
3433                                 switch_line_buffers ();
3434                             }
3435                           else if (definedef != dnone
3436                                    || fvdef == fdefunname
3437                                    || instruct)
3438                             make_C_tag (funorvar);
3439                         }
3440                       else /* not yacc and consider_token failed */
3441                         {
3442                           if (inattribute && fvdef == fignore)
3443                             {
3444                               /* We have just met __attribute__ after a
3445                                  function parameter list: do not tag the
3446                                  function again. */
3447                               fvdef = fvnone;
3448                             }
3449                         }
3450                       midtoken = FALSE;
3451                     }
3452                 } /* if (endtoken (c)) */
3453               else if (intoken (c))
3454                 still_in_token:
3455                 {
3456                   toklen++;
3457                   continue;
3458                 }
3459             } /* if (midtoken) */
3460           else if (begtoken (c))
3461             {
3462               switch (definedef)
3463                 {
3464                 case dnone:
3465                   switch (fvdef)
3466                     {
3467                     case fstartlist:
3468                       /* This prevents tagging fb in
3469                          void (__attribute__((noreturn)) *fb) (void);
3470                          Fixing this is not easy and not very important. */
3471                       fvdef = finlist;
3472                       continue;
3473                     case flistseen:
3474                       if (plainc || declarations)
3475                         {
3476                           make_C_tag (TRUE); /* a function */
3477                           fvdef = fignore;
3478                         }
3479                       break;
3480                     }
3481                   if (structdef == stagseen && !cjava)
3482                     {
3483                       popclass_above (bracelev);
3484                       structdef = snone;
3485                     }
3486                   break;
3487                 case dsharpseen:
3488                   savetoken = token;
3489                   break;
3490                 }
3491               if (!yacc_rules || lp == newlb.buffer + 1)
3492                 {
3493                   tokoff = lp - 1 - newlb.buffer;
3494                   toklen = 1;
3495                   midtoken = TRUE;
3496                 }
3497               continue;
3498             } /* if (begtoken) */
3499         } /* if must look at token */
3500
3501
3502       /* Detect end of line, colon, comma, semicolon and various braces
3503          after having handled a token.*/
3504       switch (c)
3505         {
3506         case ':':
3507           if (inattribute)
3508             break;
3509           if (yacc_rules && token.offset == 0 && token.valid)
3510             {
3511               make_C_tag (FALSE); /* a yacc function */
3512               break;
3513             }
3514           if (definedef != dnone)
3515             break;
3516           switch (objdef)
3517             {
3518             case  otagseen:
3519               objdef = oignore;
3520               make_C_tag (TRUE); /* an Objective C class */
3521               break;
3522             case omethodtag:
3523             case omethodparm:
3524               objdef = omethodcolon;
3525               linebuffer_setlen (&token_name, token_name.len + 1);
3526               strcat (token_name.buffer, ":");
3527               break;
3528             }
3529           if (structdef == stagseen)
3530             {
3531               structdef = scolonseen;
3532               break;
3533             }
3534           /* Should be useless, but may be work as a safety net. */
3535           if (cplpl && fvdef == flistseen)
3536             {
3537               make_C_tag (TRUE); /* a function */
3538               fvdef = fignore;
3539               break;
3540             }
3541           break;
3542         case ';':
3543           if (definedef != dnone || inattribute)
3544             break;
3545           switch (typdef)
3546             {
3547             case tend:
3548             case ttypeseen:
3549               make_C_tag (FALSE); /* a typedef */
3550               typdef = tnone;
3551               fvdef = fvnone;
3552               break;
3553             case tnone:
3554             case tinbody:
3555             case tignore:
3556               switch (fvdef)
3557                 {
3558                 case fignore:
3559                   if (typdef == tignore || cplpl)
3560                     fvdef = fvnone;
3561                   break;
3562                 case fvnameseen:
3563                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3564                       || (members && instruct))
3565                     make_C_tag (FALSE); /* a variable */
3566                   fvextern = FALSE;
3567                   fvdef = fvnone;
3568                   token.valid = FALSE;
3569                   break;
3570                 case flistseen:
3571                   if ((declarations
3572                        && (cplpl || !instruct)
3573                        && (typdef == tnone || (typdef != tignore && instruct)))
3574                       || (members
3575                           && plainc && instruct))
3576                     make_C_tag (TRUE);  /* a function */
3577                   /* FALLTHRU */
3578                 default:
3579                   fvextern = FALSE;
3580                   fvdef = fvnone;
3581                   if (declarations
3582                        && cplpl && structdef == stagseen)
3583                     make_C_tag (FALSE); /* forward declaration */
3584                   else
3585                     token.valid = FALSE;
3586                 } /* switch (fvdef) */
3587               /* FALLTHRU */
3588             default:
3589               if (!instruct)
3590                 typdef = tnone;
3591             }
3592           if (structdef == stagseen)
3593             structdef = snone;
3594           break;
3595         case ',':
3596           if (definedef != dnone || inattribute)
3597             break;
3598           switch (objdef)
3599             {
3600             case omethodtag:
3601             case omethodparm:
3602               make_C_tag (TRUE); /* an Objective C method */
3603               objdef = oinbody;
3604               break;
3605             }
3606           switch (fvdef)
3607             {
3608             case fdefunkey:
3609             case foperator:
3610             case fstartlist:
3611             case finlist:
3612             case fignore:
3613             case vignore:
3614               break;
3615             case fdefunname:
3616               fvdef = fignore;
3617               break;
3618             case fvnameseen:
3619               if (parlev == 0
3620                   && ((globals
3621                        && bracelev == 0
3622                        && templatelev == 0
3623                        && (!fvextern || declarations))
3624                       || (members && instruct)))
3625                   make_C_tag (FALSE); /* a variable */
3626               break;
3627             case flistseen:
3628               if ((declarations && typdef == tnone && !instruct)
3629                   || (members && typdef != tignore && instruct))
3630                 {
3631                   make_C_tag (TRUE); /* a function */
3632                   fvdef = fvnameseen;
3633                 }
3634               else if (!declarations)
3635                 fvdef = fvnone;
3636               token.valid = FALSE;
3637               break;
3638             default:
3639               fvdef = fvnone;
3640             }
3641           if (structdef == stagseen)
3642             structdef = snone;
3643           break;
3644         case ']':
3645           if (definedef != dnone || inattribute)
3646             break;
3647           if (structdef == stagseen)
3648             structdef = snone;
3649           switch (typdef)
3650             {
3651             case ttypeseen:
3652             case tend:
3653               typdef = tignore;
3654               make_C_tag (FALSE);       /* a typedef */
3655               break;
3656             case tnone:
3657             case tinbody:
3658               switch (fvdef)
3659                 {
3660                 case foperator:
3661                 case finlist:
3662                 case fignore:
3663                 case vignore:
3664                   break;
3665                 case fvnameseen:
3666                   if ((members && bracelev == 1)
3667                       || (globals && bracelev == 0
3668                           && (!fvextern || declarations)))
3669                     make_C_tag (FALSE); /* a variable */
3670                   /* FALLTHRU */
3671                 default:
3672                   fvdef = fvnone;
3673                 }
3674               break;
3675             }
3676           break;
3677         case '(':
3678           if (inattribute)
3679             {
3680               attrparlev++;
3681               break;
3682             }
3683           if (definedef != dnone)
3684             break;
3685           if (objdef == otagseen && parlev == 0)
3686             objdef = oparenseen;
3687           switch (fvdef)
3688             {
3689             case fvnameseen:
3690               if (typdef == ttypeseen
3691                   && *lp != '*'
3692                   && !instruct)
3693                 {
3694                   /* This handles constructs like:
3695                      typedef void OperatorFun (int fun); */
3696                   make_C_tag (FALSE);
3697                   typdef = tignore;
3698                   fvdef = fignore;
3699                   break;
3700                 }
3701               /* FALLTHRU */
3702             case foperator:
3703               fvdef = fstartlist;
3704               break;
3705             case flistseen:
3706               fvdef = finlist;
3707               break;
3708             }
3709           parlev++;
3710           break;
3711         case ')':
3712           if (inattribute)
3713             {
3714               if (--attrparlev == 0)
3715                 inattribute = FALSE;
3716               break;
3717             }
3718           if (definedef != dnone)
3719             break;
3720           if (objdef == ocatseen && parlev == 1)
3721             {
3722               make_C_tag (TRUE); /* an Objective C category */
3723               objdef = oignore;
3724             }
3725           if (--parlev == 0)
3726             {
3727               switch (fvdef)
3728                 {
3729                 case fstartlist:
3730                 case finlist:
3731                   fvdef = flistseen;
3732                   break;
3733                 }
3734               if (!instruct
3735                   && (typdef == tend
3736                       || typdef == ttypeseen))
3737                 {
3738                   typdef = tignore;
3739                   make_C_tag (FALSE); /* a typedef */
3740                 }
3741             }
3742           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3743             parlev = 0;
3744           break;
3745         case '{':
3746           if (definedef != dnone)
3747             break;
3748           if (typdef == ttypeseen)
3749             {
3750               /* Whenever typdef is set to tinbody (currently only
3751                  here), typdefbracelev should be set to bracelev. */
3752               typdef = tinbody;
3753               typdefbracelev = bracelev;
3754             }
3755           switch (fvdef)
3756             {
3757             case flistseen:
3758               make_C_tag (TRUE);    /* a function */
3759               /* FALLTHRU */
3760             case fignore:
3761               fvdef = fvnone;
3762               break;
3763             case fvnone:
3764               switch (objdef)
3765                 {
3766                 case otagseen:
3767                   make_C_tag (TRUE); /* an Objective C class */
3768                   objdef = oignore;
3769                   break;
3770                 case omethodtag:
3771                 case omethodparm:
3772                   make_C_tag (TRUE); /* an Objective C method */
3773                   objdef = oinbody;
3774                   break;
3775                 default:
3776                   /* Neutralize `extern "C" {' grot. */
3777                   if (bracelev == 0 && structdef == snone && nestlev == 0
3778                       && typdef == tnone)
3779                     bracelev = -1;
3780                 }
3781               break;
3782             }
3783           switch (structdef)
3784             {
3785             case skeyseen:         /* unnamed struct */
3786               pushclass_above (bracelev, NULL, 0);
3787               structdef = snone;
3788               break;
3789             case stagseen:         /* named struct or enum */
3790             case scolonseen:       /* a class */
3791               pushclass_above (bracelev,token.line+token.offset, token.length);
3792               structdef = snone;
3793               make_C_tag (FALSE);  /* a struct or enum */
3794               break;
3795             }
3796           bracelev += 1;
3797           break;
3798         case '*':
3799           if (definedef != dnone)
3800             break;
3801           if (fvdef == fstartlist)
3802             {
3803               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3804               token.valid = FALSE;
3805             }
3806           break;
3807         case '}':
3808           if (definedef != dnone)
3809             break;
3810           bracelev -= 1;
3811           if (!ignoreindent && lp == newlb.buffer + 1)
3812             {
3813               if (bracelev != 0)
3814                 token.valid = FALSE; /* unexpected value, token unreliable */
3815               bracelev = 0;     /* reset brace level if first column */
3816               parlev = 0;       /* also reset paren level, just in case... */
3817             }
3818           else if (bracelev < 0)
3819             {
3820               token.valid = FALSE; /* something gone amiss, token unreliable */
3821               bracelev = 0;
3822             }
3823           if (bracelev == 0 && fvdef == vignore)
3824             fvdef = fvnone;             /* end of function */
3825           popclass_above (bracelev);
3826           structdef = snone;
3827           /* Only if typdef == tinbody is typdefbracelev significant. */
3828           if (typdef == tinbody && bracelev <= typdefbracelev)
3829             {
3830               assert (bracelev == typdefbracelev);
3831               typdef = tend;
3832             }
3833           break;
3834         case '=':
3835           if (definedef != dnone)
3836             break;
3837           switch (fvdef)
3838             {
3839             case foperator:
3840             case finlist:
3841             case fignore:
3842             case vignore:
3843               break;
3844             case fvnameseen:
3845               if ((members && bracelev == 1)
3846                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3847                 make_C_tag (FALSE); /* a variable */
3848               /* FALLTHRU */
3849             default:
3850               fvdef = vignore;
3851             }
3852           break;
3853         case '<':
3854           if (cplpl
3855               && (structdef == stagseen || fvdef == fvnameseen))
3856             {
3857               templatelev++;
3858               break;
3859             }
3860           goto resetfvdef;
3861         case '>':
3862           if (templatelev > 0)
3863             {
3864               templatelev--;
3865               break;
3866             }
3867           goto resetfvdef;
3868         case '+':
3869         case '-':
3870           if (objdef == oinbody && bracelev == 0)
3871             {
3872               objdef = omethodsign;
3873               break;
3874             }
3875           /* FALLTHRU */
3876         resetfvdef:
3877         case '#': case '~': case '&': case '%': case '/':
3878         case '|': case '^': case '!': case '.': case '?':
3879           if (definedef != dnone)
3880             break;
3881           /* These surely cannot follow a function tag in C. */
3882           switch (fvdef)
3883             {
3884             case foperator:
3885             case finlist:
3886             case fignore:
3887             case vignore:
3888               break;
3889             default:
3890               fvdef = fvnone;
3891             }
3892           break;
3893         case '\0':
3894           if (objdef == otagseen)
3895             {
3896               make_C_tag (TRUE); /* an Objective C class */
3897               objdef = oignore;
3898             }
3899           /* If a macro spans multiple lines don't reset its state. */
3900           if (quotednl)
3901             CNL_SAVE_DEFINEDEF ();
3902           else
3903             CNL ();
3904           break;
3905         } /* switch (c) */
3906
3907     } /* while not eof */
3908
3909   free (lbs[0].lb.buffer);
3910   free (lbs[1].lb.buffer);
3911 }
3912
3913 /*
3914  * Process either a C++ file or a C file depending on the setting
3915  * of a global flag.
3916  */
3917 static void
3918 default_C_entries (FILE *inf)
3919 {
3920   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3921 }
3922
3923 /* Always do plain C. */
3924 static void
3925 plain_C_entries (FILE *inf)
3926 {
3927   C_entries (0, inf);
3928 }
3929
3930 /* Always do C++. */
3931 static void
3932 Cplusplus_entries (FILE *inf)
3933 {
3934   C_entries (C_PLPL, inf);
3935 }
3936
3937 /* Always do Java. */
3938 static void
3939 Cjava_entries (FILE *inf)
3940 {
3941   C_entries (C_JAVA, inf);
3942 }
3943
3944 /* Always do C*. */
3945 static void
3946 Cstar_entries (FILE *inf)
3947 {
3948   C_entries (C_STAR, inf);
3949 }
3950
3951 /* Always do Yacc. */
3952 static void
3953 Yacc_entries (FILE *inf)
3954 {
3955   C_entries (YACC, inf);
3956 }
3957
3958 \f
3959 /* Useful macros. */
3960 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3961   for (;                        /* loop initialization */               \
3962        !feof (file_pointer)     /* loop test */                         \
3963        &&                       /* instructions at start of loop */     \
3964           (readline (&line_buffer, file_pointer),                       \
3965            char_pointer = line_buffer.buffer,                           \
3966            TRUE);                                                       \
3967       )
3968
3969 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3970   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
3971    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
3972    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
3973    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
3974
3975 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3976 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3977   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
3978    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
3979    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
3980
3981 /*
3982  * Read a file, but do no processing.  This is used to do regexp
3983  * matching on files that have no language defined.
3984  */
3985 static void
3986 just_read_file (FILE *inf)
3987 {
3988   register char *dummy;
3989
3990   LOOP_ON_INPUT_LINES (inf, lb, dummy)
3991     continue;
3992 }
3993
3994 \f
3995 /* Fortran parsing */
3996
3997 static void F_takeprec (void);
3998 static void F_getit (FILE *);
3999
4000 static void
4001 F_takeprec (void)
4002 {
4003   dbp = skip_spaces (dbp);
4004   if (*dbp != '*')
4005     return;
4006   dbp++;
4007   dbp = skip_spaces (dbp);
4008   if (strneq (dbp, "(*)", 3))
4009     {
4010       dbp += 3;
4011       return;
4012     }
4013   if (!ISDIGIT (*dbp))
4014     {
4015       --dbp;                    /* force failure */
4016       return;
4017     }
4018   do
4019     dbp++;
4020   while (ISDIGIT (*dbp));
4021 }
4022
4023 static void
4024 F_getit (FILE *inf)
4025 {
4026   register char *cp;
4027
4028   dbp = skip_spaces (dbp);
4029   if (*dbp == '\0')
4030     {
4031       readline (&lb, inf);
4032       dbp = lb.buffer;
4033       if (dbp[5] != '&')
4034         return;
4035       dbp += 6;
4036       dbp = skip_spaces (dbp);
4037     }
4038   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4039     return;
4040   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4041     continue;
4042   make_tag (dbp, cp-dbp, TRUE,
4043             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4044 }
4045
4046
4047 static void
4048 Fortran_functions (FILE *inf)
4049 {
4050   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4051     {
4052       if (*dbp == '%')
4053         dbp++;                  /* Ratfor escape to fortran */
4054       dbp = skip_spaces (dbp);
4055       if (*dbp == '\0')
4056         continue;
4057
4058       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4059         dbp = skip_spaces (dbp);
4060
4061       switch (lowcase (*dbp))
4062         {
4063         case 'i':
4064           if (nocase_tail ("integer"))
4065             F_takeprec ();
4066           break;
4067         case 'r':
4068           if (nocase_tail ("real"))
4069             F_takeprec ();
4070           break;
4071         case 'l':
4072           if (nocase_tail ("logical"))
4073             F_takeprec ();
4074           break;
4075         case 'c':
4076           if (nocase_tail ("complex") || nocase_tail ("character"))
4077             F_takeprec ();
4078           break;
4079         case 'd':
4080           if (nocase_tail ("double"))
4081             {
4082               dbp = skip_spaces (dbp);
4083               if (*dbp == '\0')
4084                 continue;
4085               if (nocase_tail ("precision"))
4086                 break;
4087               continue;
4088             }
4089           break;
4090         }
4091       dbp = skip_spaces (dbp);
4092       if (*dbp == '\0')
4093         continue;
4094       switch (lowcase (*dbp))
4095         {
4096         case 'f':
4097           if (nocase_tail ("function"))
4098             F_getit (inf);
4099           continue;
4100         case 's':
4101           if (nocase_tail ("subroutine"))
4102             F_getit (inf);
4103           continue;
4104         case 'e':
4105           if (nocase_tail ("entry"))
4106             F_getit (inf);
4107           continue;
4108         case 'b':
4109           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4110             {
4111               dbp = skip_spaces (dbp);
4112               if (*dbp == '\0') /* assume un-named */
4113                 make_tag ("blockdata", 9, TRUE,
4114                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4115               else
4116                 F_getit (inf);  /* look for name */
4117             }
4118           continue;
4119         }
4120     }
4121 }
4122
4123 \f
4124 /*
4125  * Ada parsing
4126  * Original code by
4127  * Philippe Waroquiers (1998)
4128  */
4129
4130 /* Once we are positioned after an "interesting" keyword, let's get
4131    the real tag value necessary. */
4132 static void
4133 Ada_getit (FILE *inf, const char *name_qualifier)
4134 {
4135   register char *cp;
4136   char *name;
4137   char c;
4138
4139   while (!feof (inf))
4140     {
4141       dbp = skip_spaces (dbp);
4142       if (*dbp == '\0'
4143           || (dbp[0] == '-' && dbp[1] == '-'))
4144         {
4145           readline (&lb, inf);
4146           dbp = lb.buffer;
4147         }
4148       switch (lowcase(*dbp))
4149         {
4150         case 'b':
4151           if (nocase_tail ("body"))
4152             {
4153               /* Skipping body of   procedure body   or   package body or ....
4154                  resetting qualifier to body instead of spec. */
4155               name_qualifier = "/b";
4156               continue;
4157             }
4158           break;
4159         case 't':
4160           /* Skipping type of   task type   or   protected type ... */
4161           if (nocase_tail ("type"))
4162             continue;
4163           break;
4164         }
4165       if (*dbp == '"')
4166         {
4167           dbp += 1;
4168           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4169             continue;
4170         }
4171       else
4172         {
4173           dbp = skip_spaces (dbp);
4174           for (cp = dbp;
4175                (*cp != '\0'
4176                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4177                cp++)
4178             continue;
4179           if (cp == dbp)
4180             return;
4181         }
4182       c = *cp;
4183       *cp = '\0';
4184       name = concat (dbp, name_qualifier, "");
4185       *cp = c;
4186       make_tag (name, strlen (name), TRUE,
4187                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4188       free (name);
4189       if (c == '"')
4190         dbp = cp + 1;
4191       return;
4192     }
4193 }
4194
4195 static void
4196 Ada_funcs (FILE *inf)
4197 {
4198   bool inquote = FALSE;
4199   bool skip_till_semicolumn = FALSE;
4200
4201   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4202     {
4203       while (*dbp != '\0')
4204         {
4205           /* Skip a string i.e. "abcd". */
4206           if (inquote || (*dbp == '"'))
4207             {
4208               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4209               if (dbp != NULL)
4210                 {
4211                   inquote = FALSE;
4212                   dbp += 1;
4213                   continue;     /* advance char */
4214                 }
4215               else
4216                 {
4217                   inquote = TRUE;
4218                   break;        /* advance line */
4219                 }
4220             }
4221
4222           /* Skip comments. */
4223           if (dbp[0] == '-' && dbp[1] == '-')
4224             break;              /* advance line */
4225
4226           /* Skip character enclosed in single quote i.e. 'a'
4227              and skip single quote starting an attribute i.e. 'Image. */
4228           if (*dbp == '\'')
4229             {
4230               dbp++ ;
4231               if (*dbp != '\0')
4232                 dbp++;
4233               continue;
4234             }
4235
4236           if (skip_till_semicolumn)
4237             {
4238               if (*dbp == ';')
4239                 skip_till_semicolumn = FALSE;
4240               dbp++;
4241               continue;         /* advance char */
4242             }
4243
4244           /* Search for beginning of a token.  */
4245           if (!begtoken (*dbp))
4246             {
4247               dbp++;
4248               continue;         /* advance char */
4249             }
4250
4251           /* We are at the beginning of a token. */
4252           switch (lowcase(*dbp))
4253             {
4254             case 'f':
4255               if (!packages_only && nocase_tail ("function"))
4256                 Ada_getit (inf, "/f");
4257               else
4258                 break;          /* from switch */
4259               continue;         /* advance char */
4260             case 'p':
4261               if (!packages_only && nocase_tail ("procedure"))
4262                 Ada_getit (inf, "/p");
4263               else if (nocase_tail ("package"))
4264                 Ada_getit (inf, "/s");
4265               else if (nocase_tail ("protected")) /* protected type */
4266                 Ada_getit (inf, "/t");
4267               else
4268                 break;          /* from switch */
4269               continue;         /* advance char */
4270
4271             case 'u':
4272               if (typedefs && !packages_only && nocase_tail ("use"))
4273                 {
4274                   /* when tagging types, avoid tagging  use type Pack.Typename;
4275                      for this, we will skip everything till a ; */
4276                   skip_till_semicolumn = TRUE;
4277                   continue;     /* advance char */
4278                 }
4279
4280             case 't':
4281               if (!packages_only && nocase_tail ("task"))
4282                 Ada_getit (inf, "/k");
4283               else if (typedefs && !packages_only && nocase_tail ("type"))
4284                 {
4285                   Ada_getit (inf, "/t");
4286                   while (*dbp != '\0')
4287                     dbp += 1;
4288                 }
4289               else
4290                 break;          /* from switch */
4291               continue;         /* advance char */
4292             }
4293
4294           /* Look for the end of the token. */
4295           while (!endtoken (*dbp))
4296             dbp++;
4297
4298         } /* advance char */
4299     } /* advance line */
4300 }
4301
4302 \f
4303 /*
4304  * Unix and microcontroller assembly tag handling
4305  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4306  * Idea by Bob Weiner, Motorola Inc. (1994)
4307  */
4308 static void
4309 Asm_labels (FILE *inf)
4310 {
4311   register char *cp;
4312
4313   LOOP_ON_INPUT_LINES (inf, lb, cp)
4314     {
4315       /* If first char is alphabetic or one of [_.$], test for colon
4316          following identifier. */
4317       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4318         {
4319           /* Read past label. */
4320           cp++;
4321           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4322             cp++;
4323           if (*cp == ':' || iswhite (*cp))
4324             /* Found end of label, so copy it and add it to the table. */
4325             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4326                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4327         }
4328     }
4329 }
4330
4331 \f
4332 /*
4333  * Perl support
4334  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4335  * Perl variable names: /^(my|local).../
4336  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4337  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4338  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4339  */
4340 static void
4341 Perl_functions (FILE *inf)
4342 {
4343   char *package = savestr ("main"); /* current package name */
4344   register char *cp;
4345
4346   LOOP_ON_INPUT_LINES (inf, lb, cp)
4347     {
4348       cp = skip_spaces (cp);
4349
4350       if (LOOKING_AT (cp, "package"))
4351         {
4352           free (package);
4353           get_tag (cp, &package);
4354         }
4355       else if (LOOKING_AT (cp, "sub"))
4356         {
4357           char *pos;
4358           char *sp = cp;
4359
4360           while (!notinname (*cp))
4361             cp++;
4362           if (cp == sp)
4363             continue;           /* nothing found */
4364           if ((pos = etags_strchr (sp, ':')) != NULL
4365               && pos < cp && pos[1] == ':')
4366             /* The name is already qualified. */
4367             make_tag (sp, cp - sp, TRUE,
4368                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4369           else
4370             /* Qualify it. */
4371             {
4372               char savechar, *name;
4373
4374               savechar = *cp;
4375               *cp = '\0';
4376               name = concat (package, "::", sp);
4377               *cp = savechar;
4378               make_tag (name, strlen(name), TRUE,
4379                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4380               free (name);
4381             }
4382         }
4383        else if (globals)        /* only if we are tagging global vars */
4384         {
4385           /* Skip a qualifier, if any. */
4386           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4387           /* After "my" or "local", but before any following paren or space. */
4388           char *varstart = cp;
4389
4390           if (qual              /* should this be removed?  If yes, how? */
4391               && (*cp == '$' || *cp == '@' || *cp == '%'))
4392             {
4393               varstart += 1;
4394               do
4395                 cp++;
4396               while (ISALNUM (*cp) || *cp == '_');
4397             }
4398           else if (qual)
4399             {
4400               /* Should be examining a variable list at this point;
4401                  could insist on seeing an open parenthesis. */
4402               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4403                 cp++;
4404             }
4405           else
4406             continue;
4407
4408           make_tag (varstart, cp - varstart, FALSE,
4409                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4410         }
4411     }
4412   free (package);
4413 }
4414
4415
4416 /*
4417  * Python support
4418  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4419  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4420  * More ideas by seb bacon <seb@jamkit.com> (2002)
4421  */
4422 static void
4423 Python_functions (FILE *inf)
4424 {
4425   register char *cp;
4426
4427   LOOP_ON_INPUT_LINES (inf, lb, cp)
4428     {
4429       cp = skip_spaces (cp);
4430       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4431         {
4432           char *name = cp;
4433           while (!notinname (*cp) && *cp != ':')
4434             cp++;
4435           make_tag (name, cp - name, TRUE,
4436                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4437         }
4438     }
4439 }
4440
4441 \f
4442 /*
4443  * PHP support
4444  * Look for:
4445  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4446  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4447  *  - /^[ \t]*define\(\"[^\"]+/
4448  * Only with --members:
4449  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4450  * Idea by Diez B. Roggisch (2001)
4451  */
4452 static void
4453 PHP_functions (FILE *inf)
4454 {
4455   register char *cp, *name;
4456   bool search_identifier = FALSE;
4457
4458   LOOP_ON_INPUT_LINES (inf, lb, cp)
4459     {
4460       cp = skip_spaces (cp);
4461       name = cp;
4462       if (search_identifier
4463           && *cp != '\0')
4464         {
4465           while (!notinname (*cp))
4466             cp++;
4467           make_tag (name, cp - name, TRUE,
4468                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4469           search_identifier = FALSE;
4470         }
4471       else if (LOOKING_AT (cp, "function"))
4472         {
4473           if(*cp == '&')
4474             cp = skip_spaces (cp+1);
4475           if(*cp != '\0')
4476             {
4477               name = cp;
4478               while (!notinname (*cp))
4479                 cp++;
4480               make_tag (name, cp - name, TRUE,
4481                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4482             }
4483           else
4484             search_identifier = TRUE;
4485         }
4486       else if (LOOKING_AT (cp, "class"))
4487         {
4488           if (*cp != '\0')
4489             {
4490               name = cp;
4491               while (*cp != '\0' && !iswhite (*cp))
4492                 cp++;
4493               make_tag (name, cp - name, FALSE,
4494                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4495             }
4496           else
4497             search_identifier = TRUE;
4498         }
4499       else if (strneq (cp, "define", 6)
4500                && (cp = skip_spaces (cp+6))
4501                && *cp++ == '('
4502                && (*cp == '"' || *cp == '\''))
4503         {
4504           char quote = *cp++;
4505           name = cp;
4506           while (*cp != quote && *cp != '\0')
4507             cp++;
4508           make_tag (name, cp - name, FALSE,
4509                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4510         }
4511       else if (members
4512                && LOOKING_AT (cp, "var")
4513                && *cp == '$')
4514         {
4515           name = cp;
4516           while (!notinname(*cp))
4517             cp++;
4518           make_tag (name, cp - name, FALSE,
4519                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4520         }
4521     }
4522 }
4523
4524 \f
4525 /*
4526  * Cobol tag functions
4527  * We could look for anything that could be a paragraph name.
4528  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4529  * Idea by Corny de Souza (1993)
4530  */
4531 static void
4532 Cobol_paragraphs (FILE *inf)
4533 {
4534   register char *bp, *ep;
4535
4536   LOOP_ON_INPUT_LINES (inf, lb, bp)
4537     {
4538       if (lb.len < 9)
4539         continue;
4540       bp += 8;
4541
4542       /* If eoln, compiler option or comment ignore whole line. */
4543       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4544         continue;
4545
4546       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4547         continue;
4548       if (*ep++ == '.')
4549         make_tag (bp, ep - bp, TRUE,
4550                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4551     }
4552 }
4553
4554 \f
4555 /*
4556  * Makefile support
4557  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4558  */
4559 static void
4560 Makefile_targets (FILE *inf)
4561 {
4562   register char *bp;
4563
4564   LOOP_ON_INPUT_LINES (inf, lb, bp)
4565     {
4566       if (*bp == '\t' || *bp == '#')
4567         continue;
4568       while (*bp != '\0' && *bp != '=' && *bp != ':')
4569         bp++;
4570       if (*bp == ':' || (globals && *bp == '='))
4571         {
4572           /* We should detect if there is more than one tag, but we do not.
4573              We just skip initial and final spaces. */
4574           char * namestart = skip_spaces (lb.buffer);
4575           while (--bp > namestart)
4576             if (!notinname (*bp))
4577               break;
4578           make_tag (namestart, bp - namestart + 1, TRUE,
4579                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4580         }
4581     }
4582 }
4583
4584 \f
4585 /*
4586  * Pascal parsing
4587  * Original code by Mosur K. Mohan (1989)
4588  *
4589  *  Locates tags for procedures & functions.  Doesn't do any type- or
4590  *  var-definitions.  It does look for the keyword "extern" or
4591  *  "forward" immediately following the procedure statement; if found,
4592  *  the tag is skipped.
4593  */
4594 static void
4595 Pascal_functions (FILE *inf)
4596 {
4597   linebuffer tline;             /* mostly copied from C_entries */
4598   long save_lcno;
4599   int save_lineno, namelen, taglen;
4600   char c, *name;
4601
4602   bool                          /* each of these flags is TRUE if: */
4603     incomment,                  /* point is inside a comment */
4604     inquote,                    /* point is inside '..' string */
4605     get_tagname,                /* point is after PROCEDURE/FUNCTION
4606                                    keyword, so next item = potential tag */
4607     found_tag,                  /* point is after a potential tag */
4608     inparms,                    /* point is within parameter-list */
4609     verify_tag;                 /* point has passed the parm-list, so the
4610                                    next token will determine whether this
4611                                    is a FORWARD/EXTERN to be ignored, or
4612                                    whether it is a real tag */
4613
4614   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4615   name = NULL;                  /* keep compiler quiet */
4616   dbp = lb.buffer;
4617   *dbp = '\0';
4618   linebuffer_init (&tline);
4619
4620   incomment = inquote = FALSE;
4621   found_tag = FALSE;            /* have a proc name; check if extern */
4622   get_tagname = FALSE;          /* found "procedure" keyword         */
4623   inparms = FALSE;              /* found '(' after "proc"            */
4624   verify_tag = FALSE;           /* check if "extern" is ahead        */
4625
4626
4627   while (!feof (inf))           /* long main loop to get next char */
4628     {
4629       c = *dbp++;
4630       if (c == '\0')            /* if end of line */
4631         {
4632           readline (&lb, inf);
4633           dbp = lb.buffer;
4634           if (*dbp == '\0')
4635             continue;
4636           if (!((found_tag && verify_tag)
4637                 || get_tagname))
4638             c = *dbp++;         /* only if don't need *dbp pointing
4639                                    to the beginning of the name of
4640                                    the procedure or function */
4641         }
4642       if (incomment)
4643         {
4644           if (c == '}')         /* within { } comments */
4645             incomment = FALSE;
4646           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4647             {
4648               dbp++;
4649               incomment = FALSE;
4650             }
4651           continue;
4652         }
4653       else if (inquote)
4654         {
4655           if (c == '\'')
4656             inquote = FALSE;
4657           continue;
4658         }
4659       else
4660         switch (c)
4661           {
4662           case '\'':
4663             inquote = TRUE;     /* found first quote */
4664             continue;
4665           case '{':             /* found open { comment */
4666             incomment = TRUE;
4667             continue;
4668           case '(':
4669             if (*dbp == '*')    /* found open (* comment */
4670               {
4671                 incomment = TRUE;
4672                 dbp++;
4673               }
4674             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4675               inparms = TRUE;
4676             continue;
4677           case ')':             /* end of parms list */
4678             if (inparms)
4679               inparms = FALSE;
4680             continue;
4681           case ';':
4682             if (found_tag && !inparms) /* end of proc or fn stmt */
4683               {
4684                 verify_tag = TRUE;
4685                 break;
4686               }
4687             continue;
4688           }
4689       if (found_tag && verify_tag && (*dbp != ' '))
4690         {
4691           /* Check if this is an "extern" declaration. */
4692           if (*dbp == '\0')
4693             continue;
4694           if (lowcase (*dbp == 'e'))
4695             {
4696               if (nocase_tail ("extern")) /* superfluous, really! */
4697                 {
4698                   found_tag = FALSE;
4699                   verify_tag = FALSE;
4700                 }
4701             }
4702           else if (lowcase (*dbp) == 'f')
4703             {
4704               if (nocase_tail ("forward")) /* check for forward reference */
4705                 {
4706                   found_tag = FALSE;
4707                   verify_tag = FALSE;
4708                 }
4709             }
4710           if (found_tag && verify_tag) /* not external proc, so make tag */
4711             {
4712               found_tag = FALSE;
4713               verify_tag = FALSE;
4714               make_tag (name, namelen, TRUE,
4715                         tline.buffer, taglen, save_lineno, save_lcno);
4716               continue;
4717             }
4718         }
4719       if (get_tagname)          /* grab name of proc or fn */
4720         {
4721           char *cp;
4722
4723           if (*dbp == '\0')
4724             continue;
4725
4726           /* Find block name. */
4727           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4728             continue;
4729
4730           /* Save all values for later tagging. */
4731           linebuffer_setlen (&tline, lb.len);
4732           strcpy (tline.buffer, lb.buffer);
4733           save_lineno = lineno;
4734           save_lcno = linecharno;
4735           name = tline.buffer + (dbp - lb.buffer);
4736           namelen = cp - dbp;
4737           taglen = cp - lb.buffer + 1;
4738
4739           dbp = cp;             /* set dbp to e-o-token */
4740           get_tagname = FALSE;
4741           found_tag = TRUE;
4742           continue;
4743
4744           /* And proceed to check for "extern". */
4745         }
4746       else if (!incomment && !inquote && !found_tag)
4747         {
4748           /* Check for proc/fn keywords. */
4749           switch (lowcase (c))
4750             {
4751             case 'p':
4752               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4753                 get_tagname = TRUE;
4754               continue;
4755             case 'f':
4756               if (nocase_tail ("unction"))
4757                 get_tagname = TRUE;
4758               continue;
4759             }
4760         }
4761     } /* while not eof */
4762
4763   free (tline.buffer);
4764 }
4765
4766 \f
4767 /*
4768  * Lisp tag functions
4769  *  look for (def or (DEF, quote or QUOTE
4770  */
4771
4772 static void L_getit (void);
4773
4774 static void
4775 L_getit (void)
4776 {
4777   if (*dbp == '\'')             /* Skip prefix quote */
4778     dbp++;
4779   else if (*dbp == '(')
4780   {
4781     dbp++;
4782     /* Try to skip "(quote " */
4783     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4784       /* Ok, then skip "(" before name in (defstruct (foo)) */
4785       dbp = skip_spaces (dbp);
4786   }
4787   get_tag (dbp, NULL);
4788 }
4789
4790 static void
4791 Lisp_functions (FILE *inf)
4792 {
4793   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4794     {
4795       if (dbp[0] != '(')
4796         continue;
4797
4798       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4799         {
4800           dbp = skip_non_spaces (dbp);
4801           dbp = skip_spaces (dbp);
4802           L_getit ();
4803         }
4804       else
4805         {
4806           /* Check for (foo::defmumble name-defined ... */
4807           do
4808             dbp++;
4809           while (!notinname (*dbp) && *dbp != ':');
4810           if (*dbp == ':')
4811             {
4812               do
4813                 dbp++;
4814               while (*dbp == ':');
4815
4816               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4817                 {
4818                   dbp = skip_non_spaces (dbp);
4819                   dbp = skip_spaces (dbp);
4820                   L_getit ();
4821                 }
4822             }
4823         }
4824     }
4825 }
4826
4827 \f
4828 /*
4829  * Lua script language parsing
4830  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4831  *
4832  *  "function" and "local function" are tags if they start at column 1.
4833  */
4834 static void
4835 Lua_functions (FILE *inf)
4836 {
4837   register char *bp;
4838
4839   LOOP_ON_INPUT_LINES (inf, lb, bp)
4840     {
4841       if (bp[0] != 'f' && bp[0] != 'l')
4842         continue;
4843
4844       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4845
4846       if (LOOKING_AT (bp, "function"))
4847         get_tag (bp, NULL);
4848     }
4849 }
4850
4851 \f
4852 /*
4853  * Postscript tags
4854  * Just look for lines where the first character is '/'
4855  * Also look at "defineps" for PSWrap
4856  * Ideas by:
4857  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4858  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4859  */
4860 static void
4861 PS_functions (FILE *inf)
4862 {
4863   register char *bp, *ep;
4864
4865   LOOP_ON_INPUT_LINES (inf, lb, bp)
4866     {
4867       if (bp[0] == '/')
4868         {
4869           for (ep = bp+1;
4870                *ep != '\0' && *ep != ' ' && *ep != '{';
4871                ep++)
4872             continue;
4873           make_tag (bp, ep - bp, TRUE,
4874                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4875         }
4876       else if (LOOKING_AT (bp, "defineps"))
4877         get_tag (bp, NULL);
4878     }
4879 }
4880
4881 \f
4882 /*
4883  * Forth tags
4884  * Ignore anything after \ followed by space or in ( )
4885  * Look for words defined by :
4886  * Look for constant, code, create, defer, value, and variable
4887  * OBP extensions:  Look for buffer:, field,
4888  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4889  */
4890 static void
4891 Forth_words (FILE *inf)
4892 {
4893   register char *bp;
4894
4895   LOOP_ON_INPUT_LINES (inf, lb, bp)
4896     while ((bp = skip_spaces (bp))[0] != '\0')
4897       if (bp[0] == '\\' && iswhite(bp[1]))
4898         break;                  /* read next line */
4899       else if (bp[0] == '(' && iswhite(bp[1]))
4900         do                      /* skip to ) or eol */
4901           bp++;
4902         while (*bp != ')' && *bp != '\0');
4903       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4904                || LOOKING_AT_NOCASE (bp, "constant")
4905                || LOOKING_AT_NOCASE (bp, "code")
4906                || LOOKING_AT_NOCASE (bp, "create")
4907                || LOOKING_AT_NOCASE (bp, "defer")
4908                || LOOKING_AT_NOCASE (bp, "value")
4909                || LOOKING_AT_NOCASE (bp, "variable")
4910                || LOOKING_AT_NOCASE (bp, "buffer:")
4911                || LOOKING_AT_NOCASE (bp, "field"))
4912         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4913       else
4914         bp = skip_non_spaces (bp);
4915 }
4916
4917 \f
4918 /*
4919  * Scheme tag functions
4920  * look for (def... xyzzy
4921  *          (def... (xyzzy
4922  *          (def ... ((...(xyzzy ....
4923  *          (set! xyzzy
4924  * Original code by Ken Haase (1985?)
4925  */
4926 static void
4927 Scheme_functions (FILE *inf)
4928 {
4929   register char *bp;
4930
4931   LOOP_ON_INPUT_LINES (inf, lb, bp)
4932     {
4933       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4934         {
4935           bp = skip_non_spaces (bp+4);
4936           /* Skip over open parens and white space.  Don't continue past
4937              '\0'. */
4938           while (*bp && notinname (*bp))
4939             bp++;
4940           get_tag (bp, NULL);
4941         }
4942       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4943         get_tag (bp, NULL);
4944     }
4945 }
4946
4947 \f
4948 /* Find tags in TeX and LaTeX input files.  */
4949
4950 /* TEX_toktab is a table of TeX control sequences that define tags.
4951  * Each entry records one such control sequence.
4952  *
4953  * Original code from who knows whom.
4954  * Ideas by:
4955  *   Stefan Monnier (2002)
4956  */
4957
4958 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4959
4960 /* Default set of control sequences to put into TEX_toktab.
4961    The value of environment var TEXTAGS is prepended to this.  */
4962 static const char *TEX_defenv = "\
4963 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4964 :part:appendix:entry:index:def\
4965 :newcommand:renewcommand:newenvironment:renewenvironment";
4966
4967 static void TEX_mode (FILE *);
4968 static void TEX_decode_env (const char *, const char *);
4969
4970 static char TEX_esc = '\\';
4971 static char TEX_opgrp = '{';
4972 static char TEX_clgrp = '}';
4973
4974 /*
4975  * TeX/LaTeX scanning loop.
4976  */
4977 static void
4978 TeX_commands (FILE *inf)
4979 {
4980   char *cp;
4981   linebuffer *key;
4982
4983   /* Select either \ or ! as escape character.  */
4984   TEX_mode (inf);
4985
4986   /* Initialize token table once from environment. */
4987   if (TEX_toktab == NULL)
4988     TEX_decode_env ("TEXTAGS", TEX_defenv);
4989
4990   LOOP_ON_INPUT_LINES (inf, lb, cp)
4991     {
4992       /* Look at each TEX keyword in line. */
4993       for (;;)
4994         {
4995           /* Look for a TEX escape. */
4996           while (*cp++ != TEX_esc)
4997             if (cp[-1] == '\0' || cp[-1] == '%')
4998               goto tex_next_line;
4999
5000           for (key = TEX_toktab; key->buffer != NULL; key++)
5001             if (strneq (cp, key->buffer, key->len))
5002               {
5003                 register char *p;
5004                 int namelen, linelen;
5005                 bool opgrp = FALSE;
5006
5007                 cp = skip_spaces (cp + key->len);
5008                 if (*cp == TEX_opgrp)
5009                   {
5010                     opgrp = TRUE;
5011                     cp++;
5012                   }
5013                 for (p = cp;
5014                      (!iswhite (*p) && *p != '#' &&
5015                       *p != TEX_opgrp && *p != TEX_clgrp);
5016                      p++)
5017                   continue;
5018                 namelen = p - cp;
5019                 linelen = lb.len;
5020                 if (!opgrp || *p == TEX_clgrp)
5021                   {
5022                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5023                       p++;
5024                     linelen = p - lb.buffer + 1;
5025                   }
5026                 make_tag (cp, namelen, TRUE,
5027                           lb.buffer, linelen, lineno, linecharno);
5028                 goto tex_next_line; /* We only tag a line once */
5029               }
5030         }
5031     tex_next_line:
5032       ;
5033     }
5034 }
5035
5036 #define TEX_LESC '\\'
5037 #define TEX_SESC '!'
5038
5039 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5040    chars accordingly. */
5041 static void
5042 TEX_mode (FILE *inf)
5043 {
5044   int c;
5045
5046   while ((c = getc (inf)) != EOF)
5047     {
5048       /* Skip to next line if we hit the TeX comment char. */
5049       if (c == '%')
5050         while (c != '\n' && c != EOF)
5051           c = getc (inf);
5052       else if (c == TEX_LESC || c == TEX_SESC )
5053         break;
5054     }
5055
5056   if (c == TEX_LESC)
5057     {
5058       TEX_esc = TEX_LESC;
5059       TEX_opgrp = '{';
5060       TEX_clgrp = '}';
5061     }
5062   else
5063     {
5064       TEX_esc = TEX_SESC;
5065       TEX_opgrp = '<';
5066       TEX_clgrp = '>';
5067     }
5068   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5069      No attempt is made to correct the situation. */
5070   rewind (inf);
5071 }
5072
5073 /* Read environment and prepend it to the default string.
5074    Build token table. */
5075 static void
5076 TEX_decode_env (const char *evarname, const char *defenv)
5077 {
5078   register const char *env, *p;
5079   int i, len;
5080
5081   /* Append default string to environment. */
5082   env = getenv (evarname);
5083   if (!env)
5084     env = defenv;
5085   else
5086     env = concat (env, defenv, "");
5087
5088   /* Allocate a token table */
5089   for (len = 1, p = env; p;)
5090     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5091       len++;
5092   TEX_toktab = xnew (len, linebuffer);
5093
5094   /* Unpack environment string into token table. Be careful about */
5095   /* zero-length strings (leading ':', "::" and trailing ':') */
5096   for (i = 0; *env != '\0';)
5097     {
5098       p = etags_strchr (env, ':');
5099       if (!p)                   /* End of environment string. */
5100         p = env + strlen (env);
5101       if (p - env > 0)
5102         {                       /* Only non-zero strings. */
5103           TEX_toktab[i].buffer = savenstr (env, p - env);
5104           TEX_toktab[i].len = p - env;
5105           i++;
5106         }
5107       if (*p)
5108         env = p + 1;
5109       else
5110         {
5111           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5112           TEX_toktab[i].len = 0;
5113           break;
5114         }
5115     }
5116 }
5117
5118 \f
5119 /* Texinfo support.  Dave Love, Mar. 2000.  */
5120 static void
5121 Texinfo_nodes (FILE *inf)
5122 {
5123   char *cp, *start;
5124   LOOP_ON_INPUT_LINES (inf, lb, cp)
5125     if (LOOKING_AT (cp, "@node"))
5126       {
5127         start = cp;
5128         while (*cp != '\0' && *cp != ',')
5129           cp++;
5130         make_tag (start, cp - start, TRUE,
5131                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5132       }
5133 }
5134
5135 \f
5136 /*
5137  * HTML support.
5138  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5139  * Contents of <a name=xxx> are tags with name xxx.
5140  *
5141  * Francesco Potortì, 2002.
5142  */
5143 static void
5144 HTML_labels (FILE *inf)
5145 {
5146   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5147   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5148   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5149   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5150   char *end;
5151
5152
5153   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5154
5155   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5156     for (;;)                    /* loop on the same line */
5157       {
5158         if (skiptag)            /* skip HTML tag */
5159           {
5160             while (*dbp != '\0' && *dbp != '>')
5161               dbp++;
5162             if (*dbp == '>')
5163               {
5164                 dbp += 1;
5165                 skiptag = FALSE;
5166                 continue;       /* look on the same line */
5167               }
5168             break;              /* go to next line */
5169           }
5170
5171         else if (intag) /* look for "name=" or "id=" */
5172           {
5173             while (*dbp != '\0' && *dbp != '>'
5174                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5175               dbp++;
5176             if (*dbp == '\0')
5177               break;            /* go to next line */
5178             if (*dbp == '>')
5179               {
5180                 dbp += 1;
5181                 intag = FALSE;
5182                 continue;       /* look on the same line */
5183               }
5184             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5185                 || LOOKING_AT_NOCASE (dbp, "id="))
5186               {
5187                 bool quoted = (dbp[0] == '"');
5188
5189                 if (quoted)
5190                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5191                     continue;
5192                 else
5193                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5194                     continue;
5195                 linebuffer_setlen (&token_name, end - dbp);
5196                 strncpy (token_name.buffer, dbp, end - dbp);
5197                 token_name.buffer[end - dbp] = '\0';
5198
5199                 dbp = end;
5200                 intag = FALSE;  /* we found what we looked for */
5201                 skiptag = TRUE; /* skip to the end of the tag */
5202                 getnext = TRUE; /* then grab the text */
5203                 continue;       /* look on the same line */
5204               }
5205             dbp += 1;
5206           }
5207
5208         else if (getnext)       /* grab next tokens and tag them */
5209           {
5210             dbp = skip_spaces (dbp);
5211             if (*dbp == '\0')
5212               break;            /* go to next line */
5213             if (*dbp == '<')
5214               {
5215                 intag = TRUE;
5216                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5217                 continue;       /* look on the same line */
5218               }
5219
5220             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5221               continue;
5222             make_tag (token_name.buffer, token_name.len, TRUE,
5223                       dbp, end - dbp, lineno, linecharno);
5224             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5225             getnext = FALSE;
5226             break;              /* go to next line */
5227           }
5228
5229         else                    /* look for an interesting HTML tag */
5230           {
5231             while (*dbp != '\0' && *dbp != '<')
5232               dbp++;
5233             if (*dbp == '\0')
5234               break;            /* go to next line */
5235             intag = TRUE;
5236             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5237               {
5238                 inanchor = TRUE;
5239                 continue;       /* look on the same line */
5240               }
5241             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5242                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5243                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5244                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5245               {
5246                 intag = FALSE;
5247                 getnext = TRUE;
5248                 continue;       /* look on the same line */
5249               }
5250             dbp += 1;
5251           }
5252       }
5253 }
5254
5255 \f
5256 /*
5257  * Prolog support
5258  *
5259  * Assumes that the predicate or rule starts at column 0.
5260  * Only the first clause of a predicate or rule is added.
5261  * Original code by Sunichirou Sugou (1989)
5262  * Rewritten by Anders Lindgren (1996)
5263  */
5264 static int prolog_pr (char *, char *);
5265 static void prolog_skip_comment (linebuffer *, FILE *);
5266 static int prolog_atom (char *, int);
5267
5268 static void
5269 Prolog_functions (FILE *inf)
5270 {
5271   char *cp, *last;
5272   int len;
5273   int allocated;
5274
5275   allocated = 0;
5276   len = 0;
5277   last = NULL;
5278
5279   LOOP_ON_INPUT_LINES (inf, lb, cp)
5280     {
5281       if (cp[0] == '\0')        /* Empty line */
5282         continue;
5283       else if (iswhite (cp[0])) /* Not a predicate */
5284         continue;
5285       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5286         prolog_skip_comment (&lb, inf);
5287       else if ((len = prolog_pr (cp, last)) > 0)
5288         {
5289           /* Predicate or rule.  Store the function name so that we
5290              only generate a tag for the first clause.  */
5291           if (last == NULL)
5292             last = xnew(len + 1, char);
5293           else if (len + 1 > allocated)
5294             xrnew (last, len + 1, char);
5295           allocated = len + 1;
5296           strncpy (last, cp, len);
5297           last[len] = '\0';
5298         }
5299     }
5300   free (last);
5301 }
5302
5303
5304 static void
5305 prolog_skip_comment (linebuffer *plb, FILE *inf)
5306 {
5307   char *cp;
5308
5309   do
5310     {
5311       for (cp = plb->buffer; *cp != '\0'; cp++)
5312         if (cp[0] == '*' && cp[1] == '/')
5313           return;
5314       readline (plb, inf);
5315     }
5316   while (!feof(inf));
5317 }
5318
5319 /*
5320  * A predicate or rule definition is added if it matches:
5321  *     <beginning of line><Prolog Atom><whitespace>(
5322  * or  <beginning of line><Prolog Atom><whitespace>:-
5323  *
5324  * It is added to the tags database if it doesn't match the
5325  * name of the previous clause header.
5326  *
5327  * Return the size of the name of the predicate or rule, or 0 if no
5328  * header was found.
5329  */
5330 static int
5331 prolog_pr (char *s, char *last)
5332
5333                                 /* Name of last clause. */
5334 {
5335   int pos;
5336   int len;
5337
5338   pos = prolog_atom (s, 0);
5339   if (pos < 1)
5340     return 0;
5341
5342   len = pos;
5343   pos = skip_spaces (s + pos) - s;
5344
5345   if ((s[pos] == '.'
5346        || (s[pos] == '(' && (pos += 1))
5347        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5348       && (last == NULL          /* save only the first clause */
5349           || len != (int)strlen (last)
5350           || !strneq (s, last, len)))
5351         {
5352           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5353           return len;
5354         }
5355   else
5356     return 0;
5357 }
5358
5359 /*
5360  * Consume a Prolog atom.
5361  * Return the number of bytes consumed, or -1 if there was an error.
5362  *
5363  * A prolog atom, in this context, could be one of:
5364  * - An alphanumeric sequence, starting with a lower case letter.
5365  * - A quoted arbitrary string. Single quotes can escape themselves.
5366  *   Backslash quotes everything.
5367  */
5368 static int
5369 prolog_atom (char *s, int pos)
5370 {
5371   int origpos;
5372
5373   origpos = pos;
5374
5375   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5376     {
5377       /* The atom is unquoted. */
5378       pos++;
5379       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5380         {
5381           pos++;
5382         }
5383       return pos - origpos;
5384     }
5385   else if (s[pos] == '\'')
5386     {
5387       pos++;
5388
5389       for (;;)
5390         {
5391           if (s[pos] == '\'')
5392             {
5393               pos++;
5394               if (s[pos] != '\'')
5395                 break;
5396               pos++;            /* A double quote */
5397             }
5398           else if (s[pos] == '\0')
5399             /* Multiline quoted atoms are ignored. */
5400             return -1;
5401           else if (s[pos] == '\\')
5402             {
5403               if (s[pos+1] == '\0')
5404                 return -1;
5405               pos += 2;
5406             }
5407           else
5408             pos++;
5409         }
5410       return pos - origpos;
5411     }
5412   else
5413     return -1;
5414 }
5415
5416 \f
5417 /*
5418  * Support for Erlang
5419  *
5420  * Generates tags for functions, defines, and records.
5421  * Assumes that Erlang functions start at column 0.
5422  * Original code by Anders Lindgren (1996)
5423  */
5424 static int erlang_func (char *, char *);
5425 static void erlang_attribute (char *);
5426 static int erlang_atom (char *);
5427
5428 static void
5429 Erlang_functions (FILE *inf)
5430 {
5431   char *cp, *last;
5432   int len;
5433   int allocated;
5434
5435   allocated = 0;
5436   len = 0;
5437   last = NULL;
5438
5439   LOOP_ON_INPUT_LINES (inf, lb, cp)
5440     {
5441       if (cp[0] == '\0')        /* Empty line */
5442         continue;
5443       else if (iswhite (cp[0])) /* Not function nor attribute */
5444         continue;
5445       else if (cp[0] == '%')    /* comment */
5446         continue;
5447       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5448         continue;
5449       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5450         {
5451           erlang_attribute (cp);
5452           if (last != NULL)
5453             {
5454               free (last);
5455               last = NULL;
5456             }
5457         }
5458       else if ((len = erlang_func (cp, last)) > 0)
5459         {
5460           /*
5461            * Function.  Store the function name so that we only
5462            * generates a tag for the first clause.
5463            */
5464           if (last == NULL)
5465             last = xnew (len + 1, char);
5466           else if (len + 1 > allocated)
5467             xrnew (last, len + 1, char);
5468           allocated = len + 1;
5469           strncpy (last, cp, len);
5470           last[len] = '\0';
5471         }
5472     }
5473   free (last);
5474 }
5475
5476
5477 /*
5478  * A function definition is added if it matches:
5479  *     <beginning of line><Erlang Atom><whitespace>(
5480  *
5481  * It is added to the tags database if it doesn't match the
5482  * name of the previous clause header.
5483  *
5484  * Return the size of the name of the function, or 0 if no function
5485  * was found.
5486  */
5487 static int
5488 erlang_func (char *s, char *last)
5489
5490                                 /* Name of last clause. */
5491 {
5492   int pos;
5493   int len;
5494
5495   pos = erlang_atom (s);
5496   if (pos < 1)
5497     return 0;
5498
5499   len = pos;
5500   pos = skip_spaces (s + pos) - s;
5501
5502   /* Save only the first clause. */
5503   if (s[pos++] == '('
5504       && (last == NULL
5505           || len != (int)strlen (last)
5506           || !strneq (s, last, len)))
5507         {
5508           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5509           return len;
5510         }
5511
5512   return 0;
5513 }
5514
5515
5516 /*
5517  * Handle attributes.  Currently, tags are generated for defines
5518  * and records.
5519  *
5520  * They are on the form:
5521  * -define(foo, bar).
5522  * -define(Foo(M, N), M+N).
5523  * -record(graph, {vtab = notable, cyclic = true}).
5524  */
5525 static void
5526 erlang_attribute (char *s)
5527 {
5528   char *cp = s;
5529
5530   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5531       && *cp++ == '(')
5532     {
5533       int len = erlang_atom (skip_spaces (cp));
5534       if (len > 0)
5535         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5536     }
5537   return;
5538 }
5539
5540
5541 /*
5542  * Consume an Erlang atom (or variable).
5543  * Return the number of bytes consumed, or -1 if there was an error.
5544  */
5545 static int
5546 erlang_atom (char *s)
5547 {
5548   int pos = 0;
5549
5550   if (ISALPHA (s[pos]) || s[pos] == '_')
5551     {
5552       /* The atom is unquoted. */
5553       do
5554         pos++;
5555       while (ISALNUM (s[pos]) || s[pos] == '_');
5556     }
5557   else if (s[pos] == '\'')
5558     {
5559       for (pos++; s[pos] != '\''; pos++)
5560         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5561             || (s[pos] == '\\' && s[++pos] == '\0'))
5562           return 0;
5563       pos++;
5564     }
5565
5566   return pos;
5567 }
5568
5569 \f
5570 static char *scan_separators (char *);
5571 static void add_regex (char *, language *);
5572 static char *substitute (char *, char *, struct re_registers *);
5573
5574 /*
5575  * Take a string like "/blah/" and turn it into "blah", verifying
5576  * that the first and last characters are the same, and handling
5577  * quoted separator characters.  Actually, stops on the occurrence of
5578  * an unquoted separator.  Also process \t, \n, etc. and turn into
5579  * appropriate characters. Works in place.  Null terminates name string.
5580  * Returns pointer to terminating separator, or NULL for
5581  * unterminated regexps.
5582  */
5583 static char *
5584 scan_separators (char *name)
5585 {
5586   char sep = name[0];
5587   char *copyto = name;
5588   bool quoted = FALSE;
5589
5590   for (++name; *name != '\0'; ++name)
5591     {
5592       if (quoted)
5593         {
5594           switch (*name)
5595             {
5596             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5597             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5598             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5599             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5600             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5601             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5602             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5603             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5604             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5605             default:
5606               if (*name == sep)
5607                 *copyto++ = sep;
5608               else
5609                 {
5610                   /* Something else is quoted, so preserve the quote. */
5611                   *copyto++ = '\\';
5612                   *copyto++ = *name;
5613                 }
5614               break;
5615             }
5616           quoted = FALSE;
5617         }
5618       else if (*name == '\\')
5619         quoted = TRUE;
5620       else if (*name == sep)
5621         break;
5622       else
5623         *copyto++ = *name;
5624     }
5625   if (*name != sep)
5626     name = NULL;                /* signal unterminated regexp */
5627
5628   /* Terminate copied string. */
5629   *copyto = '\0';
5630   return name;
5631 }
5632
5633 /* Look at the argument of --regex or --no-regex and do the right
5634    thing.  Same for each line of a regexp file. */
5635 static void
5636 analyse_regex (char *regex_arg)
5637 {
5638   if (regex_arg == NULL)
5639     {
5640       free_regexps ();          /* --no-regex: remove existing regexps */
5641       return;
5642     }
5643
5644   /* A real --regexp option or a line in a regexp file. */
5645   switch (regex_arg[0])
5646     {
5647       /* Comments in regexp file or null arg to --regex. */
5648     case '\0':
5649     case ' ':
5650     case '\t':
5651       break;
5652
5653       /* Read a regex file.  This is recursive and may result in a
5654          loop, which will stop when the file descriptors are exhausted. */
5655     case '@':
5656       {
5657         FILE *regexfp;
5658         linebuffer regexbuf;
5659         char *regexfile = regex_arg + 1;
5660
5661         /* regexfile is a file containing regexps, one per line. */
5662         regexfp = fopen (regexfile, "r");
5663         if (regexfp == NULL)
5664           {
5665             pfatal (regexfile);
5666             return;
5667           }
5668         linebuffer_init (&regexbuf);
5669         while (readline_internal (&regexbuf, regexfp) > 0)
5670           analyse_regex (regexbuf.buffer);
5671         free (regexbuf.buffer);
5672         fclose (regexfp);
5673       }
5674       break;
5675
5676       /* Regexp to be used for a specific language only. */
5677     case '{':
5678       {
5679         language *lang;
5680         char *lang_name = regex_arg + 1;
5681         char *cp;
5682
5683         for (cp = lang_name; *cp != '}'; cp++)
5684           if (*cp == '\0')
5685             {
5686               error ("unterminated language name in regex: %s", regex_arg);
5687               return;
5688             }
5689         *cp++ = '\0';
5690         lang = get_language_from_langname (lang_name);
5691         if (lang == NULL)
5692           return;
5693         add_regex (cp, lang);
5694       }
5695       break;
5696
5697       /* Regexp to be used for any language. */
5698     default:
5699       add_regex (regex_arg, NULL);
5700       break;
5701     }
5702 }
5703
5704 /* Separate the regexp pattern, compile it,
5705    and care for optional name and modifiers. */
5706 static void
5707 add_regex (char *regexp_pattern, language *lang)
5708 {
5709   static struct re_pattern_buffer zeropattern;
5710   char sep, *pat, *name, *modifiers;
5711   char empty[] = "";
5712   const char *err;
5713   struct re_pattern_buffer *patbuf;
5714   regexp *rp;
5715   bool
5716     force_explicit_name = TRUE, /* do not use implicit tag names */
5717     ignore_case = FALSE,        /* case is significant */
5718     multi_line = FALSE,         /* matches are done one line at a time */
5719     single_line = FALSE;        /* dot does not match newline */
5720
5721
5722   if (strlen(regexp_pattern) < 3)
5723     {
5724       error ("null regexp", (char *)NULL);
5725       return;
5726     }
5727   sep = regexp_pattern[0];
5728   name = scan_separators (regexp_pattern);
5729   if (name == NULL)
5730     {
5731       error ("%s: unterminated regexp", regexp_pattern);
5732       return;
5733     }
5734   if (name[1] == sep)
5735     {
5736       error ("null name for regexp \"%s\"", regexp_pattern);
5737       return;
5738     }
5739   modifiers = scan_separators (name);
5740   if (modifiers == NULL)        /* no terminating separator --> no name */
5741     {
5742       modifiers = name;
5743       name = empty;
5744     }
5745   else
5746     modifiers += 1;             /* skip separator */
5747
5748   /* Parse regex modifiers. */
5749   for (; modifiers[0] != '\0'; modifiers++)
5750     switch (modifiers[0])
5751       {
5752       case 'N':
5753         if (modifiers == name)
5754           error ("forcing explicit tag name but no name, ignoring", NULL);
5755         force_explicit_name = TRUE;
5756         break;
5757       case 'i':
5758         ignore_case = TRUE;
5759         break;
5760       case 's':
5761         single_line = TRUE;
5762         /* FALLTHRU */
5763       case 'm':
5764         multi_line = TRUE;
5765         need_filebuf = TRUE;
5766         break;
5767       default:
5768         {
5769           char wrongmod [2];
5770           wrongmod[0] = modifiers[0];
5771           wrongmod[1] = '\0';
5772           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5773         }
5774         break;
5775       }
5776
5777   patbuf = xnew (1, struct re_pattern_buffer);
5778   *patbuf = zeropattern;
5779   if (ignore_case)
5780     {
5781       static char lc_trans[CHARS];
5782       int i;
5783       for (i = 0; i < CHARS; i++)
5784         lc_trans[i] = lowcase (i);
5785       patbuf->translate = lc_trans;     /* translation table to fold case  */
5786     }
5787
5788   if (multi_line)
5789     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5790   else
5791     pat = regexp_pattern;
5792
5793   if (single_line)
5794     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5795   else
5796     re_set_syntax (RE_SYNTAX_EMACS);
5797
5798   err = re_compile_pattern (pat, strlen (pat), patbuf);
5799   if (multi_line)
5800     free (pat);
5801   if (err != NULL)
5802     {
5803       error ("%s while compiling pattern", err);
5804       return;
5805     }
5806
5807   rp = p_head;
5808   p_head = xnew (1, regexp);
5809   p_head->pattern = savestr (regexp_pattern);
5810   p_head->p_next = rp;
5811   p_head->lang = lang;
5812   p_head->pat = patbuf;
5813   p_head->name = savestr (name);
5814   p_head->error_signaled = FALSE;
5815   p_head->force_explicit_name = force_explicit_name;
5816   p_head->ignore_case = ignore_case;
5817   p_head->multi_line = multi_line;
5818 }
5819
5820 /*
5821  * Do the substitutions indicated by the regular expression and
5822  * arguments.
5823  */
5824 static char *
5825 substitute (char *in, char *out, struct re_registers *regs)
5826 {
5827   char *result, *t;
5828   int size, dig, diglen;
5829
5830   result = NULL;
5831   size = strlen (out);
5832
5833   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5834   if (out[size - 1] == '\\')
5835     fatal ("pattern error in \"%s\"", out);
5836   for (t = etags_strchr (out, '\\');
5837        t != NULL;
5838        t = etags_strchr (t + 2, '\\'))
5839     if (ISDIGIT (t[1]))
5840       {
5841         dig = t[1] - '0';
5842         diglen = regs->end[dig] - regs->start[dig];
5843         size += diglen - 2;
5844       }
5845     else
5846       size -= 1;
5847
5848   /* Allocate space and do the substitutions. */
5849   assert (size >= 0);
5850   result = xnew (size + 1, char);
5851
5852   for (t = result; *out != '\0'; out++)
5853     if (*out == '\\' && ISDIGIT (*++out))
5854       {
5855         dig = *out - '0';
5856         diglen = regs->end[dig] - regs->start[dig];
5857         strncpy (t, in + regs->start[dig], diglen);
5858         t += diglen;
5859       }
5860     else
5861       *t++ = *out;
5862   *t = '\0';
5863
5864   assert (t <= result + size);
5865   assert (t - result == (int)strlen (result));
5866
5867   return result;
5868 }
5869
5870 /* Deallocate all regexps. */
5871 static void
5872 free_regexps (void)
5873 {
5874   regexp *rp;
5875   while (p_head != NULL)
5876     {
5877       rp = p_head->p_next;
5878       free (p_head->pattern);
5879       free (p_head->name);
5880       free (p_head);
5881       p_head = rp;
5882     }
5883   return;
5884 }
5885
5886 /*
5887  * Reads the whole file as a single string from `filebuf' and looks for
5888  * multi-line regular expressions, creating tags on matches.
5889  * readline already dealt with normal regexps.
5890  *
5891  * Idea by Ben Wing <ben@666.com> (2002).
5892  */
5893 static void
5894 regex_tag_multiline (void)
5895 {
5896   char *buffer = filebuf.buffer;
5897   regexp *rp;
5898   char *name;
5899
5900   for (rp = p_head; rp != NULL; rp = rp->p_next)
5901     {
5902       int match = 0;
5903
5904       if (!rp->multi_line)
5905         continue;               /* skip normal regexps */
5906
5907       /* Generic initialisations before parsing file from memory. */
5908       lineno = 1;               /* reset global line number */
5909       charno = 0;               /* reset global char number */
5910       linecharno = 0;           /* reset global char number of line start */
5911
5912       /* Only use generic regexps or those for the current language. */
5913       if (rp->lang != NULL && rp->lang != curfdp->lang)
5914         continue;
5915
5916       while (match >= 0 && match < filebuf.len)
5917         {
5918           match = re_search (rp->pat, buffer, filebuf.len, charno,
5919                              filebuf.len - match, &rp->regs);
5920           switch (match)
5921             {
5922             case -2:
5923               /* Some error. */
5924               if (!rp->error_signaled)
5925                 {
5926                   error ("regexp stack overflow while matching \"%s\"",
5927                          rp->pattern);
5928                   rp->error_signaled = TRUE;
5929                 }
5930               break;
5931             case -1:
5932               /* No match. */
5933               break;
5934             default:
5935               if (match == rp->regs.end[0])
5936                 {
5937                   if (!rp->error_signaled)
5938                     {
5939                       error ("regexp matches the empty string: \"%s\"",
5940                              rp->pattern);
5941                       rp->error_signaled = TRUE;
5942                     }
5943                   match = -3;   /* exit from while loop */
5944                   break;
5945                 }
5946
5947               /* Match occurred.  Construct a tag. */
5948               while (charno < rp->regs.end[0])
5949                 if (buffer[charno++] == '\n')
5950                   lineno++, linecharno = charno;
5951               name = rp->name;
5952               if (name[0] == '\0')
5953                 name = NULL;
5954               else /* make a named tag */
5955                 name = substitute (buffer, rp->name, &rp->regs);
5956               if (rp->force_explicit_name)
5957                 /* Force explicit tag name, if a name is there. */
5958                 pfnote (name, TRUE, buffer + linecharno,
5959                         charno - linecharno + 1, lineno, linecharno);
5960               else
5961                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5962                           charno - linecharno + 1, lineno, linecharno);
5963               break;
5964             }
5965         }
5966     }
5967 }
5968
5969 \f
5970 static bool
5971 nocase_tail (const char *cp)
5972 {
5973   register int len = 0;
5974
5975   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5976     cp++, len++;
5977   if (*cp == '\0' && !intoken (dbp[len]))
5978     {
5979       dbp += len;
5980       return TRUE;
5981     }
5982   return FALSE;
5983 }
5984
5985 static void
5986 get_tag (register char *bp, char **namepp)
5987 {
5988   register char *cp = bp;
5989
5990   if (*bp != '\0')
5991     {
5992       /* Go till you get to white space or a syntactic break */
5993       for (cp = bp + 1; !notinname (*cp); cp++)
5994         continue;
5995       make_tag (bp, cp - bp, TRUE,
5996                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5997     }
5998
5999   if (namepp != NULL)
6000     *namepp = savenstr (bp, cp - bp);
6001 }
6002
6003 /*
6004  * Read a line of text from `stream' into `lbp', excluding the
6005  * newline or CR-NL, if any.  Return the number of characters read from
6006  * `stream', which is the length of the line including the newline.
6007  *
6008  * On DOS or Windows we do not count the CR character, if any before the
6009  * NL, in the returned length; this mirrors the behavior of Emacs on those
6010  * platforms (for text files, it translates CR-NL to NL as it reads in the
6011  * file).
6012  *
6013  * If multi-line regular expressions are requested, each line read is
6014  * appended to `filebuf'.
6015  */
6016 static long
6017 readline_internal (linebuffer *lbp, register FILE *stream)
6018 {
6019   char *buffer = lbp->buffer;
6020   register char *p = lbp->buffer;
6021   register char *pend;
6022   int chars_deleted;
6023
6024   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6025
6026   for (;;)
6027     {
6028       register int c = getc (stream);
6029       if (p == pend)
6030         {
6031           /* We're at the end of linebuffer: expand it. */
6032           lbp->size *= 2;
6033           xrnew (buffer, lbp->size, char);
6034           p += buffer - lbp->buffer;
6035           pend = buffer + lbp->size;
6036           lbp->buffer = buffer;
6037         }
6038       if (c == EOF)
6039         {
6040           *p = '\0';
6041           chars_deleted = 0;
6042           break;
6043         }
6044       if (c == '\n')
6045         {
6046           if (p > buffer && p[-1] == '\r')
6047             {
6048               p -= 1;
6049 #ifdef DOS_NT
6050              /* Assume CRLF->LF translation will be performed by Emacs
6051                 when loading this file, so CRs won't appear in the buffer.
6052                 It would be cleaner to compensate within Emacs;
6053                 however, Emacs does not know how many CRs were deleted
6054                 before any given point in the file.  */
6055               chars_deleted = 1;
6056 #else
6057               chars_deleted = 2;
6058 #endif
6059             }
6060           else
6061             {
6062               chars_deleted = 1;
6063             }
6064           *p = '\0';
6065           break;
6066         }
6067       *p++ = c;
6068     }
6069   lbp->len = p - buffer;
6070
6071   if (need_filebuf              /* we need filebuf for multi-line regexps */
6072       && chars_deleted > 0)     /* not at EOF */
6073     {
6074       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6075         {
6076           /* Expand filebuf. */
6077           filebuf.size *= 2;
6078           xrnew (filebuf.buffer, filebuf.size, char);
6079         }
6080       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6081       filebuf.len += lbp->len;
6082       filebuf.buffer[filebuf.len++] = '\n';
6083       filebuf.buffer[filebuf.len] = '\0';
6084     }
6085
6086   return lbp->len + chars_deleted;
6087 }
6088
6089 /*
6090  * Like readline_internal, above, but in addition try to match the
6091  * input line against relevant regular expressions and manage #line
6092  * directives.
6093  */
6094 static void
6095 readline (linebuffer *lbp, FILE *stream)
6096 {
6097   long result;
6098
6099   linecharno = charno;          /* update global char number of line start */
6100   result = readline_internal (lbp, stream); /* read line */
6101   lineno += 1;                  /* increment global line number */
6102   charno += result;             /* increment global char number */
6103
6104   /* Honour #line directives. */
6105   if (!no_line_directive)
6106     {
6107       static bool discard_until_line_directive;
6108
6109       /* Check whether this is a #line directive. */
6110       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6111         {
6112           unsigned int lno;
6113           int start = 0;
6114
6115           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6116               && start > 0)     /* double quote character found */
6117             {
6118               char *endp = lbp->buffer + start;
6119
6120               while ((endp = etags_strchr (endp, '"')) != NULL
6121                      && endp[-1] == '\\')
6122                 endp++;
6123               if (endp != NULL)
6124                 /* Ok, this is a real #line directive.  Let's deal with it. */
6125                 {
6126                   char *taggedabsname;  /* absolute name of original file */
6127                   char *taggedfname;    /* name of original file as given */
6128                   char *name;           /* temp var */
6129
6130                   discard_until_line_directive = FALSE; /* found it */
6131                   name = lbp->buffer + start;
6132                   *endp = '\0';
6133                   canonicalize_filename (name);
6134                   taggedabsname = absolute_filename (name, tagfiledir);
6135                   if (filename_is_absolute (name)
6136                       || filename_is_absolute (curfdp->infname))
6137                     taggedfname = savestr (taggedabsname);
6138                   else
6139                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6140
6141                   if (streq (curfdp->taggedfname, taggedfname))
6142                     /* The #line directive is only a line number change.  We
6143                        deal with this afterwards. */
6144                     free (taggedfname);
6145                   else
6146                     /* The tags following this #line directive should be
6147                        attributed to taggedfname.  In order to do this, set
6148                        curfdp accordingly. */
6149                     {
6150                       fdesc *fdp; /* file description pointer */
6151
6152                       /* Go look for a file description already set up for the
6153                          file indicated in the #line directive.  If there is
6154                          one, use it from now until the next #line
6155                          directive. */
6156                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6157                         if (streq (fdp->infname, curfdp->infname)
6158                             && streq (fdp->taggedfname, taggedfname))
6159                           /* If we remove the second test above (after the &&)
6160                              then all entries pertaining to the same file are
6161                              coalesced in the tags file.  If we use it, then
6162                              entries pertaining to the same file but generated
6163                              from different files (via #line directives) will
6164                              go into separate sections in the tags file.  These
6165                              alternatives look equivalent.  The first one
6166                              destroys some apparently useless information. */
6167                           {
6168                             curfdp = fdp;
6169                             free (taggedfname);
6170                             break;
6171                           }
6172                       /* Else, if we already tagged the real file, skip all
6173                          input lines until the next #line directive. */
6174                       if (fdp == NULL) /* not found */
6175                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6176                           if (streq (fdp->infabsname, taggedabsname))
6177                             {
6178                               discard_until_line_directive = TRUE;
6179                               free (taggedfname);
6180                               break;
6181                             }
6182                       /* Else create a new file description and use that from
6183                          now on, until the next #line directive. */
6184                       if (fdp == NULL) /* not found */
6185                         {
6186                           fdp = fdhead;
6187                           fdhead = xnew (1, fdesc);
6188                           *fdhead = *curfdp; /* copy curr. file description */
6189                           fdhead->next = fdp;
6190                           fdhead->infname = savestr (curfdp->infname);
6191                           fdhead->infabsname = savestr (curfdp->infabsname);
6192                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6193                           fdhead->taggedfname = taggedfname;
6194                           fdhead->usecharno = FALSE;
6195                           fdhead->prop = NULL;
6196                           fdhead->written = FALSE;
6197                           curfdp = fdhead;
6198                         }
6199                     }
6200                   free (taggedabsname);
6201                   lineno = lno - 1;
6202                   readline (lbp, stream);
6203                   return;
6204                 } /* if a real #line directive */
6205             } /* if #line is followed by a number */
6206         } /* if line begins with "#line " */
6207
6208       /* If we are here, no #line directive was found. */
6209       if (discard_until_line_directive)
6210         {
6211           if (result > 0)
6212             {
6213               /* Do a tail recursion on ourselves, thus discarding the contents
6214                  of the line buffer. */
6215               readline (lbp, stream);
6216               return;
6217             }
6218           /* End of file. */
6219           discard_until_line_directive = FALSE;
6220           return;
6221         }
6222     } /* if #line directives should be considered */
6223
6224   {
6225     int match;
6226     regexp *rp;
6227     char *name;
6228
6229     /* Match against relevant regexps. */
6230     if (lbp->len > 0)
6231       for (rp = p_head; rp != NULL; rp = rp->p_next)
6232         {
6233           /* Only use generic regexps or those for the current language.
6234              Also do not use multiline regexps, which is the job of
6235              regex_tag_multiline. */
6236           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6237               || rp->multi_line)
6238             continue;
6239
6240           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6241           switch (match)
6242             {
6243             case -2:
6244               /* Some error. */
6245               if (!rp->error_signaled)
6246                 {
6247                   error ("regexp stack overflow while matching \"%s\"",
6248                          rp->pattern);
6249                   rp->error_signaled = TRUE;
6250                 }
6251               break;
6252             case -1:
6253               /* No match. */
6254               break;
6255             case 0:
6256               /* Empty string matched. */
6257               if (!rp->error_signaled)
6258                 {
6259                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6260                   rp->error_signaled = TRUE;
6261                 }
6262               break;
6263             default:
6264               /* Match occurred.  Construct a tag. */
6265               name = rp->name;
6266               if (name[0] == '\0')
6267                 name = NULL;
6268               else /* make a named tag */
6269                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6270               if (rp->force_explicit_name)
6271                 /* Force explicit tag name, if a name is there. */
6272                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6273               else
6274                 make_tag (name, strlen (name), TRUE,
6275                           lbp->buffer, match, lineno, linecharno);
6276               break;
6277             }
6278         }
6279   }
6280 }
6281
6282 \f
6283 /*
6284  * Return a pointer to a space of size strlen(cp)+1 allocated
6285  * with xnew where the string CP has been copied.
6286  */
6287 static char *
6288 savestr (const char *cp)
6289 {
6290   return savenstr (cp, strlen (cp));
6291 }
6292
6293 /*
6294  * Return a pointer to a space of size LEN+1 allocated with xnew where
6295  * the string CP has been copied for at most the first LEN characters.
6296  */
6297 static char *
6298 savenstr (const char *cp, int len)
6299 {
6300   register char *dp;
6301
6302   dp = xnew (len + 1, char);
6303   strncpy (dp, cp, len);
6304   dp[len] = '\0';
6305   return dp;
6306 }
6307
6308 /*
6309  * Return the ptr in sp at which the character c last
6310  * appears; NULL if not found
6311  *
6312  * Identical to POSIX strrchr, included for portability.
6313  */
6314 static char *
6315 etags_strrchr (register const char *sp, register int c)
6316 {
6317   register const char *r;
6318
6319   r = NULL;
6320   do
6321     {
6322       if (*sp == c)
6323         r = sp;
6324   } while (*sp++);
6325   return (char *)r;
6326 }
6327
6328 /*
6329  * Return the ptr in sp at which the character c first
6330  * appears; NULL if not found
6331  *
6332  * Identical to POSIX strchr, included for portability.
6333  */
6334 static char *
6335 etags_strchr (register const char *sp, register int c)
6336 {
6337   do
6338     {
6339       if (*sp == c)
6340         return (char *)sp;
6341     } while (*sp++);
6342   return NULL;
6343 }
6344
6345 /*
6346  * Compare two strings, ignoring case for alphabetic characters.
6347  *
6348  * Same as BSD's strcasecmp, included for portability.
6349  */
6350 static int
6351 etags_strcasecmp (register const char *s1, register const char *s2)
6352 {
6353   while (*s1 != '\0'
6354          && (ISALPHA (*s1) && ISALPHA (*s2)
6355              ? lowcase (*s1) == lowcase (*s2)
6356              : *s1 == *s2))
6357     s1++, s2++;
6358
6359   return (ISALPHA (*s1) && ISALPHA (*s2)
6360           ? lowcase (*s1) - lowcase (*s2)
6361           : *s1 - *s2);
6362 }
6363
6364 /*
6365  * Compare two strings, ignoring case for alphabetic characters.
6366  * Stop after a given number of characters
6367  *
6368  * Same as BSD's strncasecmp, included for portability.
6369  */
6370 static int
6371 etags_strncasecmp (register const char *s1, register const char *s2, register int n)
6372 {
6373   while (*s1 != '\0' && n-- > 0
6374          && (ISALPHA (*s1) && ISALPHA (*s2)
6375              ? lowcase (*s1) == lowcase (*s2)
6376              : *s1 == *s2))
6377     s1++, s2++;
6378
6379   if (n < 0)
6380     return 0;
6381   else
6382     return (ISALPHA (*s1) && ISALPHA (*s2)
6383             ? lowcase (*s1) - lowcase (*s2)
6384             : *s1 - *s2);
6385 }
6386
6387 /* Skip spaces (end of string is not space), return new pointer. */
6388 static char *
6389 skip_spaces (char *cp)
6390 {
6391   while (iswhite (*cp))
6392     cp++;
6393   return cp;
6394 }
6395
6396 /* Skip non spaces, except end of string, return new pointer. */
6397 static char *
6398 skip_non_spaces (char *cp)
6399 {
6400   while (*cp != '\0' && !iswhite (*cp))
6401     cp++;
6402   return cp;
6403 }
6404
6405 /* Print error message and exit.  */
6406 void
6407 fatal (const char *s1, const char *s2)
6408 {
6409   error (s1, s2);
6410   exit (EXIT_FAILURE);
6411 }
6412
6413 static void
6414 pfatal (const char *s1)
6415 {
6416   perror (s1);
6417   exit (EXIT_FAILURE);
6418 }
6419
6420 static void
6421 suggest_asking_for_help (void)
6422 {
6423   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6424            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6425   exit (EXIT_FAILURE);
6426 }
6427
6428 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6429 static void
6430 error (const char *s1, const char *s2)
6431 {
6432   fprintf (stderr, "%s: ", progname);
6433   fprintf (stderr, s1, s2);
6434   fprintf (stderr, "\n");
6435 }
6436
6437 /* Return a newly-allocated string whose contents
6438    concatenate those of s1, s2, s3.  */
6439 static char *
6440 concat (const char *s1, const char *s2, const char *s3)
6441 {
6442   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6443   char *result = xnew (len1 + len2 + len3 + 1, char);
6444
6445   strcpy (result, s1);
6446   strcpy (result + len1, s2);
6447   strcpy (result + len1 + len2, s3);
6448   result[len1 + len2 + len3] = '\0';
6449
6450   return result;
6451 }
6452
6453 \f
6454 /* Does the same work as the system V getcwd, but does not need to
6455    guess the buffer size in advance. */
6456 static char *
6457 etags_getcwd (void)
6458 {
6459 #ifdef HAVE_GETCWD
6460   int bufsize = 200;
6461   char *path = xnew (bufsize, char);
6462
6463   while (getcwd (path, bufsize) == NULL)
6464     {
6465       if (errno != ERANGE)
6466         pfatal ("getcwd");
6467       bufsize *= 2;
6468       free (path);
6469       path = xnew (bufsize, char);
6470     }
6471
6472   canonicalize_filename (path);
6473   return path;
6474
6475 #else /* not HAVE_GETCWD */
6476 #if MSDOS
6477
6478   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6479
6480   getwd (path);
6481
6482   for (p = path; *p != '\0'; p++)
6483     if (*p == '\\')
6484       *p = '/';
6485     else
6486       *p = lowcase (*p);
6487
6488   return strdup (path);
6489 #else /* not MSDOS */
6490   linebuffer path;
6491   FILE *pipe;
6492
6493   linebuffer_init (&path);
6494   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6495   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6496     pfatal ("pwd");
6497   pclose (pipe);
6498
6499   return path.buffer;
6500 #endif /* not MSDOS */
6501 #endif /* not HAVE_GETCWD */
6502 }
6503
6504 /* Return a newly allocated string containing the file name of FILE
6505    relative to the absolute directory DIR (which should end with a slash). */
6506 static char *
6507 relative_filename (char *file, char *dir)
6508 {
6509   char *fp, *dp, *afn, *res;
6510   int i;
6511
6512   /* Find the common root of file and dir (with a trailing slash). */
6513   afn = absolute_filename (file, cwd);
6514   fp = afn;
6515   dp = dir;
6516   while (*fp++ == *dp++)
6517     continue;
6518   fp--, dp--;                   /* back to the first differing char */
6519 #ifdef DOS_NT
6520   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6521     return afn;
6522 #endif
6523   do                            /* look at the equal chars until '/' */
6524     fp--, dp--;
6525   while (*fp != '/');
6526
6527   /* Build a sequence of "../" strings for the resulting relative file name. */
6528   i = 0;
6529   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6530     i += 1;
6531   res = xnew (3*i + strlen (fp + 1) + 1, char);
6532   res[0] = '\0';
6533   while (i-- > 0)
6534     strcat (res, "../");
6535
6536   /* Add the file name relative to the common root of file and dir. */
6537   strcat (res, fp + 1);
6538   free (afn);
6539
6540   return res;
6541 }
6542
6543 /* Return a newly allocated string containing the absolute file name
6544    of FILE given DIR (which should end with a slash). */
6545 static char *
6546 absolute_filename (char *file, char *dir)
6547 {
6548   char *slashp, *cp, *res;
6549
6550   if (filename_is_absolute (file))
6551     res = savestr (file);
6552 #ifdef DOS_NT
6553   /* We don't support non-absolute file names with a drive
6554      letter, like `d:NAME' (it's too much hassle).  */
6555   else if (file[1] == ':')
6556     fatal ("%s: relative file names with drive letters not supported", file);
6557 #endif
6558   else
6559     res = concat (dir, file, "");
6560
6561   /* Delete the "/dirname/.." and "/." substrings. */
6562   slashp = etags_strchr (res, '/');
6563   while (slashp != NULL && slashp[0] != '\0')
6564     {
6565       if (slashp[1] == '.')
6566         {
6567           if (slashp[2] == '.'
6568               && (slashp[3] == '/' || slashp[3] == '\0'))
6569             {
6570               cp = slashp;
6571               do
6572                 cp--;
6573               while (cp >= res && !filename_is_absolute (cp));
6574               if (cp < res)
6575                 cp = slashp;    /* the absolute name begins with "/.." */
6576 #ifdef DOS_NT
6577               /* Under MSDOS and NT we get `d:/NAME' as absolute
6578                  file name, so the luser could say `d:/../NAME'.
6579                  We silently treat this as `d:/NAME'.  */
6580               else if (cp[0] != '/')
6581                 cp = slashp;
6582 #endif
6583 #ifdef HAVE_MEMMOVE
6584               memmove (cp, slashp + 3, strlen (slashp + 2));
6585 #else
6586               /* Overlapping copy isn't really okay */
6587               strcpy (cp, slashp + 3);
6588 #endif
6589               slashp = cp;
6590               continue;
6591             }
6592           else if (slashp[2] == '/' || slashp[2] == '\0')
6593             {
6594 #ifdef HAVE_MEMMOVE
6595               memmove (slashp, slashp + 2, strlen (slashp + 1));
6596 #else
6597               strcpy (slashp, slashp + 2);
6598 #endif
6599               continue;
6600             }
6601         }
6602
6603       slashp = etags_strchr (slashp + 1, '/');
6604     }
6605
6606   if (res[0] == '\0')           /* just a safety net: should never happen */
6607     {
6608       free (res);
6609       return savestr ("/");
6610     }
6611   else
6612     return res;
6613 }
6614
6615 /* Return a newly allocated string containing the absolute
6616    file name of dir where FILE resides given DIR (which should
6617    end with a slash). */
6618 static char *
6619 absolute_dirname (char *file, char *dir)
6620 {
6621   char *slashp, *res;
6622   char save;
6623
6624   slashp = etags_strrchr (file, '/');
6625   if (slashp == NULL)
6626     return savestr (dir);
6627   save = slashp[1];
6628   slashp[1] = '\0';
6629   res = absolute_filename (file, dir);
6630   slashp[1] = save;
6631
6632   return res;
6633 }
6634
6635 /* Whether the argument string is an absolute file name.  The argument
6636    string must have been canonicalized with canonicalize_filename. */
6637 static bool
6638 filename_is_absolute (char *fn)
6639 {
6640   return (fn[0] == '/'
6641 #ifdef DOS_NT
6642           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6643 #endif
6644           );
6645 }
6646
6647 /* Upcase DOS drive letter and collapse separators into single slashes.
6648    Works in place. */
6649 static void
6650 canonicalize_filename (register char *fn)
6651 {
6652   register char* cp;
6653   char sep = '/';
6654
6655 #ifdef DOS_NT
6656   /* Canonicalize drive letter case.  */
6657   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6658     fn[0] = upcase (fn[0]);
6659
6660   sep = '\\';
6661 #endif
6662
6663   /* Collapse multiple separators into a single slash. */
6664   for (cp = fn; *cp != '\0'; cp++, fn++)
6665     if (*cp == sep)
6666       {
6667         *fn = '/';
6668         while (cp[1] == sep)
6669           cp++;
6670       }
6671     else
6672       *fn = *cp;
6673   *fn = '\0';
6674 }
6675
6676 \f
6677 /* Initialize a linebuffer for use. */
6678 static void
6679 linebuffer_init (linebuffer *lbp)
6680 {
6681   lbp->size = (DEBUG) ? 3 : 200;
6682   lbp->buffer = xnew (lbp->size, char);
6683   lbp->buffer[0] = '\0';
6684   lbp->len = 0;
6685 }
6686
6687 /* Set the minimum size of a string contained in a linebuffer. */
6688 static void
6689 linebuffer_setlen (linebuffer *lbp, int toksize)
6690 {
6691   while (lbp->size <= toksize)
6692     {
6693       lbp->size *= 2;
6694       xrnew (lbp->buffer, lbp->size, char);
6695     }
6696   lbp->len = toksize;
6697 }
6698
6699 /* Like malloc but get fatal error if memory is exhausted. */
6700 static PTR
6701 xmalloc (unsigned int size)
6702 {
6703   PTR result = (PTR) malloc (size);
6704   if (result == NULL)
6705     fatal ("virtual memory exhausted", (char *)NULL);
6706   return result;
6707 }
6708
6709 static PTR
6710 xrealloc (char *ptr, unsigned int size)
6711 {
6712   PTR result = (PTR) realloc (ptr, size);
6713   if (result == NULL)
6714     fatal ("virtual memory exhausted", (char *)NULL);
6715   return result;
6716 }
6717
6718 /*
6719  * Local Variables:
6720  * indent-tabs-mode: t
6721  * tab-width: 8
6722  * fill-column: 79
6723  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6724  * c-file-style: "gnu"
6725  * End:
6726  */
6727
6728 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6729    (do not change this comment) */
6730
6731 /* etags.c ends here */