lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2011
  32   Free Software Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  *
  72  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #define TRUE    1
  84 #define FALSE   0
  85
  86 #ifdef DEBUG
  87 #  undef DEBUG
  88 #  define DEBUG TRUE
  89 #else
  90 #  define DEBUG  FALSE
  91 #  define NDEBUG                /* disable assert */
  92 #endif
  93
  94 #ifdef HAVE_CONFIG_H
  95 # include <config.h>
  96   /* This is probably not necessary any more.  On some systems, config.h
  97      used to define static as nothing for the sake of unexec.  We don't
  98      want that here since we don't use unexec.  None of these systems
  99      are supported any more, but the idea is still mentioned in
 100      etc/PROBLEMS.  */
 101 # undef static
 102 # ifndef PTR                    /* for XEmacs */
 103 #   define PTR void *
 104 # endif
 105 #else  /* no config.h */
 106 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 107 #   define PTR void *           /* for generic pointers */
 108 # else /* not standard C */
 109 #   define const                /* remove const for old compilers' sake */
 110 #   define PTR long *           /* don't use void* */
 111 # endif
 112 #endif /* !HAVE_CONFIG_H */
 113
 114 #ifndef _GNU_SOURCE
 115 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 116 #endif
 117
 118 /* WIN32_NATIVE is for XEmacs.
 119    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 120 #ifdef WIN32_NATIVE
 121 # undef MSDOS
 122 # undef  WINDOWSNT
 123 # define WINDOWSNT
 124 #endif /* WIN32_NATIVE */
 125
 126 #ifdef MSDOS
 127 # undef MSDOS
 128 # define MSDOS TRUE
 129 # include <fcntl.h>
 130 # include <sys/param.h>
 131 # include <io.h>
 132 # ifndef HAVE_CONFIG_H
 133 #   define DOS_NT
 134 #   include <sys/config.h>
 135 # endif
 136 #else
 137 # define MSDOS FALSE
 138 #endif /* MSDOS */
 139
 140 #ifdef WINDOWSNT
 141 # include <fcntl.h>
 142 # include <direct.h>
 143 # include <io.h>
 144 # define MAXPATHLEN _MAX_PATH
 145 # undef HAVE_NTGUI
 146 # undef  DOS_NT
 147 # define DOS_NT
 148 # ifndef HAVE_GETCWD
 149 #   define HAVE_GETCWD
 150 # endif /* undef HAVE_GETCWD */
 151 #else /* not WINDOWSNT */
 152 #endif /* !WINDOWSNT */
 153
 154 #include <unistd.h>
 155 #ifndef HAVE_UNISTD_H
 156 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 157     extern char *getcwd (char *buf, size_t size);
 158 # endif
 159 #endif /* HAVE_UNISTD_H */
 160
 161 #include <stdlib.h>
 162 #include <string.h>
 163 #include <stdio.h>
 164 #include <ctype.h>
 165 #include <errno.h>
 166 #include <sys/types.h>
 167 #include <sys/stat.h>
 168
 169 #include <assert.h>
 170 #ifdef NDEBUG
 171 # undef  assert                 /* some systems have a buggy assert.h */
 172 # define assert(x) ((void) 0)
 173 #endif
 174
 175 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 176 # define NO_LONG_OPTIONS TRUE
 177 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 178   extern char *optarg;
 179   extern int optind, opterr;
 180 #else
 181 # define NO_LONG_OPTIONS FALSE
 182 # include <getopt.h>
 183 #endif /* NO_LONG_OPTIONS */
 184
 185 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 186 # ifdef __CYGWIN__              /* compiling on Cygwin */
 187                              !!! NOTICE !!!
 188  the regex.h distributed with Cygwin is not compatible with etags, alas!
 189 If you want regular expression support, you should delete this notice and
 190               arrange to use the GNU regex.h and regex.c.
 191 # endif
 192 #endif
 193 #include <regex.h>
 194
 195 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 196  Leave it undefined to make the program "etags", which makes emacs-style
 197  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 198 #ifdef CTAGS
 199 # undef  CTAGS
 200 # define CTAGS TRUE
 201 #else
 202 # define CTAGS FALSE
 203 #endif
 204
 205 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 206 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 207 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 208 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 209
 210 #define CHARS 256               /* 2^sizeof(char) */
 211 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 212 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 213 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 214 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 215 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 216 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 217
 218 #define ISALNUM(c)      isalnum (CHAR(c))
 219 #define ISALPHA(c)      isalpha (CHAR(c))
 220 #define ISDIGIT(c)      isdigit (CHAR(c))
 221 #define ISLOWER(c)      islower (CHAR(c))
 222
 223 #define lowcase(c)      tolower (CHAR(c))
 224
 225
 226 /*
 227  *      xnew, xrnew -- allocate, reallocate storage
 228  *
 229  * SYNOPSIS:    Type *xnew (int n, Type);
 230  *              void xrnew (OldPointer, int n, Type);
 231  */
 232 #if DEBUG
 233 # include "chkmalloc.h"
 234 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 235                                                   (n) * sizeof (Type)))
 236 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 237                                         (char *) (op), (n) * sizeof (Type)))
 238 #else
 239 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 240 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 241                                         (char *) (op), (n) * sizeof (Type)))
 242 #endif
 243
 244 #define bool int
 245
 246 typedef void Lang_function (FILE *);
 247
 248 typedef struct
 249 {
 250   const char *suffix;           /* file name suffix for this compressor */
 251   const char *command;          /* takes one arg and decompresses to stdout */
 252 } compressor;
 253
 254 typedef struct
 255 {
 256   const char *name;             /* language name */
 257   const char *help;             /* detailed help for the language */
 258   Lang_function *function;      /* parse function */
 259   const char **suffixes;        /* name suffixes of this language's files */
 260   const char **filenames;       /* names of this language's files */
 261   const char **interpreters;    /* interpreters for this language */
 262   bool metasource;              /* source used to generate other sources */
 263 } language;
 264
 265 typedef struct fdesc
 266 {
 267   struct fdesc *next;           /* for the linked list */
 268   char *infname;                /* uncompressed input file name */
 269   char *infabsname;             /* absolute uncompressed input file name */
 270   char *infabsdir;              /* absolute dir of input file */
 271   char *taggedfname;            /* file name to write in tagfile */
 272   language *lang;               /* language of file */
 273   char *prop;                   /* file properties to write in tagfile */
 274   bool usecharno;               /* etags tags shall contain char number */
 275   bool written;                 /* entry written in the tags file */
 276 } fdesc;
 277
 278 typedef struct node_st
 279 {                               /* sorting structure */
 280   struct node_st *left, *right; /* left and right sons */
 281   fdesc *fdp;                   /* description of file to whom tag belongs */
 282   char *name;                   /* tag name */
 283   char *regex;                  /* search regexp */
 284   bool valid;                   /* write this tag on the tag file */
 285   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 286   bool been_warned;             /* warning already given for duplicated tag */
 287   int lno;                      /* line number tag is on */
 288   long cno;                     /* character number line starts on */
 289 } node;
 290
 291 /*
 292  * A `linebuffer' is a structure which holds a line of text.
 293  * `readline_internal' reads a line from a stream into a linebuffer
 294  * and works regardless of the length of the line.
 295  * SIZE is the size of BUFFER, LEN is the length of the string in
 296  * BUFFER after readline reads it.
 297  */
 298 typedef struct
 299 {
 300   long size;
 301   int len;
 302   char *buffer;
 303 } linebuffer;
 304
 305 /* Used to support mixing of --lang and file names. */
 306 typedef struct
 307 {
 308   enum {
 309     at_language,                /* a language specification */
 310     at_regexp,                  /* a regular expression */
 311     at_filename,                /* a file name */
 312     at_stdin,                   /* read from stdin here */
 313     at_end                      /* stop parsing the list */
 314   } arg_type;                   /* argument type */
 315   language *lang;               /* language associated with the argument */
 316   char *what;                   /* the argument itself */
 317 } argument;
 318
 319 /* Structure defining a regular expression. */
 320 typedef struct regexp
 321 {
 322   struct regexp *p_next;        /* pointer to next in list */
 323   language *lang;               /* if set, use only for this language */
 324   char *pattern;                /* the regexp pattern */
 325   char *name;                   /* tag name */
 326   struct re_pattern_buffer *pat; /* the compiled pattern */
 327   struct re_registers regs;     /* re registers */
 328   bool error_signaled;          /* already signaled for this regexp */
 329   bool force_explicit_name;     /* do not allow implict tag name */
 330   bool ignore_case;             /* ignore case when matching */
 331   bool multi_line;              /* do a multi-line match on the whole file */
 332 } regexp;
 333
 334
 335 /* Many compilers barf on this:
 336         Lang_function Ada_funcs;
 337    so let's write it this way */
 338 static void Ada_funcs (FILE *);
 339 static void Asm_labels (FILE *);
 340 static void C_entries (int c_ext, FILE *);
 341 static void default_C_entries (FILE *);
 342 static void plain_C_entries (FILE *);
 343 static void Cjava_entries (FILE *);
 344 static void Cobol_paragraphs (FILE *);
 345 static void Cplusplus_entries (FILE *);
 346 static void Cstar_entries (FILE *);
 347 static void Erlang_functions (FILE *);
 348 static void Forth_words (FILE *);
 349 static void Fortran_functions (FILE *);
 350 static void HTML_labels (FILE *);
 351 static void Lisp_functions (FILE *);
 352 static void Lua_functions (FILE *);
 353 static void Makefile_targets (FILE *);
 354 static void Pascal_functions (FILE *);
 355 static void Perl_functions (FILE *);
 356 static void PHP_functions (FILE *);
 357 static void PS_functions (FILE *);
 358 static void Prolog_functions (FILE *);
 359 static void Python_functions (FILE *);
 360 static void Scheme_functions (FILE *);
 361 static void TeX_commands (FILE *);
 362 static void Texinfo_nodes (FILE *);
 363 static void Yacc_entries (FILE *);
 364 static void just_read_file (FILE *);
 365
 366 static void print_language_names (void);
 367 static void print_version (void);
 368 static void print_help (argument *);
 369 int main (int, char **);
 370
 371 static compressor *get_compressor_from_suffix (char *, char **);
 372 static language *get_language_from_langname (const char *);
 373 static language *get_language_from_interpreter (char *);
 374 static language *get_language_from_filename (char *, bool);
 375 static void readline (linebuffer *, FILE *);
 376 static long readline_internal (linebuffer *, FILE *);
 377 static bool nocase_tail (const char *);
 378 static void get_tag (char *, char **);
 379
 380 static void analyse_regex (char *);
 381 static void free_regexps (void);
 382 static void regex_tag_multiline (void);
 383 static void error (const char *, const char *);
 384 static void suggest_asking_for_help (void) NO_RETURN;
 385 void fatal (const char *, const char *) NO_RETURN;
 386 static void pfatal (const char *) NO_RETURN;
 387 static void add_node (node *, node **);
 388
 389 static void init (void);
 390 static void process_file_name (char *, language *);
 391 static void process_file (FILE *, char *, language *);
 392 static void find_entries (FILE *);
 393 static void free_tree (node *);
 394 static void free_fdesc (fdesc *);
 395 static void pfnote (char *, bool, char *, int, int, long);
 396 static void make_tag (const char *, int, bool, char *, int, int, long);
 397 static void invalidate_nodes (fdesc *, node **);
 398 static void put_entries (node *);
 399
 400 static char *concat (const char *, const char *, const char *);
 401 static char *skip_spaces (char *);
 402 static char *skip_non_spaces (char *);
 403 static char *savenstr (const char *, int);
 404 static char *savestr (const char *);
 405 static char *etags_strchr (const char *, int);
 406 static char *etags_strrchr (const char *, int);
 407 static int etags_strcasecmp (const char *, const char *);
 408 static int etags_strncasecmp (const char *, const char *, int);
 409 static char *etags_getcwd (void);
 410 static char *relative_filename (char *, char *);
 411 static char *absolute_filename (char *, char *);
 412 static char *absolute_dirname (char *, char *);
 413 static bool filename_is_absolute (char *f);
 414 static void canonicalize_filename (char *);
 415 static void linebuffer_init (linebuffer *);
 416 static void linebuffer_setlen (linebuffer *, int);
 417 static PTR xmalloc (size_t);
 418 static PTR xrealloc (char *, size_t);
 419
 420 \f
 421 static char searchar = '/';     /* use /.../ searches */
 422
 423 static char *tagfile;           /* output file */
 424 static char *progname;          /* name this program was invoked with */
 425 static char *cwd;               /* current working directory */
 426 static char *tagfiledir;        /* directory of tagfile */
 427 static FILE *tagf;              /* ioptr for tags file */
 428 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 429
 430 static fdesc *fdhead;           /* head of file description list */
 431 static fdesc *curfdp;           /* current file description */
 432 static int lineno;              /* line number of current line */
 433 static long charno;             /* current character number */
 434 static long linecharno;         /* charno of start of current line */
 435 static char *dbp;               /* pointer to start of current tag */
 436
 437 static const int invalidcharno = -1;
 438
 439 static node *nodehead;          /* the head of the binary tree of tags */
 440 static node *last_node;         /* the last node created */
 441
 442 static linebuffer lb;           /* the current line */
 443 static linebuffer filebuf;      /* a buffer containing the whole file */
 444 static linebuffer token_name;   /* a buffer containing a tag name */
 445
 446 /* boolean "functions" (see init)       */
 447 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 448 static const char
 449   /* white chars */
 450   *white = " \f\t\n\r\v",
 451   /* not in a name */
 452   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 453   /* token ending chars */
 454   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 455   /* token starting chars */
 456   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 457   /* valid in-token chars */
 458   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 459
 460 static bool append_to_tagfile;  /* -a: append to tags */
 461 /* The next five default to TRUE in C and derived languages.  */
 462 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 463 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 464                                 /* 0 struct/enum/union decls, and C++ */
 465                                 /* member functions. */
 466 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 467                                 /* constants and variables. */
 468                                 /* -D: opposite of -d.  Default under ctags. */
 469 static bool globals;            /* create tags for global variables */
 470 static bool members;            /* create tags for C member variables */
 471 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 472 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 473 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 474 static bool update;             /* -u: update tags */
 475 static bool vgrind_style;       /* -v: create vgrind style index output */
 476 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 477 static bool cxref_style;        /* -x: create cxref style output */
 478 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 479 static bool ignoreindent;       /* -I: ignore indentation in C */
 480 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 481
 482 /* STDIN is defined in LynxOS system headers */
 483 #ifdef STDIN
 484 # undef STDIN
 485 #endif
 486
 487 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 488 static bool parsing_stdin;      /* --parse-stdin used */
 489
 490 static regexp *p_head;          /* list of all regexps */
 491 static bool need_filebuf;       /* some regexes are multi-line */
 492
 493 static struct option longopts[] =
 494 {
 495   { "append",             no_argument,       NULL,               'a'   },
 496   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 497   { "c++",                no_argument,       NULL,               'C'   },
 498   { "declarations",       no_argument,       &declarations,      TRUE  },
 499   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 500   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 501   { "help",               no_argument,       NULL,               'h'   },
 502   { "help",               no_argument,       NULL,               'H'   },
 503   { "ignore-indentation", no_argument,       NULL,               'I'   },
 504   { "language",           required_argument, NULL,               'l'   },
 505   { "members",            no_argument,       &members,           TRUE  },
 506   { "no-members",         no_argument,       &members,           FALSE },
 507   { "output",             required_argument, NULL,               'o'   },
 508   { "regex",              required_argument, NULL,               'r'   },
 509   { "no-regex",           no_argument,       NULL,               'R'   },
 510   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 511   { "parse-stdin",        required_argument, NULL,               STDIN },
 512   { "version",            no_argument,       NULL,               'V'   },
 513
 514 #if CTAGS /* Ctags options */
 515   { "backward-search",    no_argument,       NULL,               'B'   },
 516   { "cxref",              no_argument,       NULL,               'x'   },
 517   { "defines",            no_argument,       NULL,               'd'   },
 518   { "globals",            no_argument,       &globals,           TRUE  },
 519   { "typedefs",           no_argument,       NULL,               't'   },
 520   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 521   { "update",             no_argument,       NULL,               'u'   },
 522   { "vgrind",             no_argument,       NULL,               'v'   },
 523   { "no-warn",            no_argument,       NULL,               'w'   },
 524
 525 #else /* Etags options */
 526   { "no-defines",         no_argument,       NULL,               'D'   },
 527   { "no-globals",         no_argument,       &globals,           FALSE },
 528   { "include",            required_argument, NULL,               'i'   },
 529 #endif
 530   { NULL }
 531 };
 532
 533 static compressor compressors[] =
 534 {
 535   { "z", "gzip -d -c"},
 536   { "Z", "gzip -d -c"},
 537   { "gz", "gzip -d -c"},
 538   { "GZ", "gzip -d -c"},
 539   { "bz2", "bzip2 -d -c" },
 540   { "xz", "xz -d -c" },
 541   { NULL }
 542 };
 543
 544 /*
 545  * Language stuff.
 546  */
 547
 548 /* Ada code */
 549 static const char *Ada_suffixes [] =
 550   { "ads", "adb", "ada", NULL };
 551 static const char Ada_help [] =
 552 "In Ada code, functions, procedures, packages, tasks and types are\n\
 553 tags.  Use the `--packages-only' option to create tags for\n\
 554 packages only.\n\
 555 Ada tag names have suffixes indicating the type of entity:\n\
 556         Entity type:    Qualifier:\n\
 557         ------------    ----------\n\
 558         function        /f\n\
 559         procedure       /p\n\
 560         package spec    /s\n\
 561         package body    /b\n\
 562         type            /t\n\
 563         task            /k\n\
 564 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 565 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 566 will just search for any tag `bidule'.";
 567
 568 /* Assembly code */
 569 static const char *Asm_suffixes [] =
 570   { "a",        /* Unix assembler */
 571     "asm", /* Microcontroller assembly */
 572     "def", /* BSO/Tasking definition includes  */
 573     "inc", /* Microcontroller include files */
 574     "ins", /* Microcontroller include files */
 575     "s", "sa", /* Unix assembler */
 576     "S",   /* cpp-processed Unix assembler */
 577     "src", /* BSO/Tasking C compiler output */
 578     NULL
 579   };
 580 static const char Asm_help [] =
 581 "In assembler code, labels appearing at the beginning of a line,\n\
 582 followed by a colon, are tags.";
 583
 584
 585 /* Note that .c and .h can be considered C++, if the --c++ flag was
 586    given, or if the `class' or `template' keywords are met inside the file.
 587    That is why default_C_entries is called for these. */
 588 static const char *default_C_suffixes [] =
 589   { "c", "h", NULL };
 590 #if CTAGS                               /* C help for Ctags */
 591 static const char default_C_help [] =
 592 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 593 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 594 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 595 Use --globals to tag global variables.\n\
 596 You can tag function declarations and external variables by\n\
 597 using `--declarations', and struct members by using `--members'.";
 598 #else                                   /* C help for Etags */
 599 static const char default_C_help [] =
 600 "In C code, any C function or typedef is a tag, and so are\n\
 601 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 602 definitions and `enum' constants are tags unless you specify\n\
 603 `--no-defines'.  Global variables are tags unless you specify\n\
 604 `--no-globals' and so are struct members unless you specify\n\
 605 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 606 `--no-members' can make the tags table file much smaller.\n\
 607 You can tag function declarations and external variables by\n\
 608 using `--declarations'.";
 609 #endif  /* C help for Ctags and Etags */
 610
 611 static const char *Cplusplus_suffixes [] =
 612   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 613     "M",                        /* Objective C++ */
 614     "pdb",                      /* Postscript with C syntax */
 615     NULL };
 616 static const char Cplusplus_help [] =
 617 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 618 --help --lang=c --lang=c++ for full help.)\n\
 619 In addition to C tags, member functions are also recognized.  Member\n\
 620 variables are recognized unless you use the `--no-members' option.\n\
 621 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 622 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 623 `operator+'.";
 624
 625 static const char *Cjava_suffixes [] =
 626   { "java", NULL };
 627 static char Cjava_help [] =
 628 "In Java code, all the tags constructs of C and C++ code are\n\
 629 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 630
 631
 632 static const char *Cobol_suffixes [] =
 633   { "COB", "cob", NULL };
 634 static char Cobol_help [] =
 635 "In Cobol code, tags are paragraph names; that is, any word\n\
 636 starting in column 8 and followed by a period.";
 637
 638 static const char *Cstar_suffixes [] =
 639   { "cs", "hs", NULL };
 640
 641 static const char *Erlang_suffixes [] =
 642   { "erl", "hrl", NULL };
 643 static const char Erlang_help [] =
 644 "In Erlang code, the tags are the functions, records and macros\n\
 645 defined in the file.";
 646
 647 const char *Forth_suffixes [] =
 648   { "fth", "tok", NULL };
 649 static const char Forth_help [] =
 650 "In Forth code, tags are words defined by `:',\n\
 651 constant, code, create, defer, value, variable, buffer:, field.";
 652
 653 static const char *Fortran_suffixes [] =
 654   { "F", "f", "f90", "for", NULL };
 655 static const char Fortran_help [] =
 656 "In Fortran code, functions, subroutines and block data are tags.";
 657
 658 static const char *HTML_suffixes [] =
 659   { "htm", "html", "shtml", NULL };
 660 static const char HTML_help [] =
 661 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 662 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 663 occurrences of `id='.";
 664
 665 static const char *Lisp_suffixes [] =
 666   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 667 static const char Lisp_help [] =
 668 "In Lisp code, any function defined with `defun', any variable\n\
 669 defined with `defvar' or `defconst', and in general the first\n\
 670 argument of any expression that starts with `(def' in column zero\n\
 671 is a tag.";
 672
 673 static const char *Lua_suffixes [] =
 674   { "lua", "LUA", NULL };
 675 static const char Lua_help [] =
 676 "In Lua scripts, all functions are tags.";
 677
 678 static const char *Makefile_filenames [] =
 679   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 680 static const char Makefile_help [] =
 681 "In makefiles, targets are tags; additionally, variables are tags\n\
 682 unless you specify `--no-globals'.";
 683
 684 static const char *Objc_suffixes [] =
 685   { "lm",                       /* Objective lex file */
 686     "m",                        /* Objective C file */
 687      NULL };
 688 static const char Objc_help [] =
 689 "In Objective C code, tags include Objective C definitions for classes,\n\
 690 class categories, methods and protocols.  Tags for variables and\n\
 691 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 692 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 693
 694 static const char *Pascal_suffixes [] =
 695   { "p", "pas", NULL };
 696 static const char Pascal_help [] =
 697 "In Pascal code, the tags are the functions and procedures defined\n\
 698 in the file.";
 699 /* " // this is for working around an Emacs highlighting bug... */
 700
 701 static const char *Perl_suffixes [] =
 702   { "pl", "pm", NULL };
 703 static const char *Perl_interpreters [] =
 704   { "perl", "@PERL@", NULL };
 705 static const char Perl_help [] =
 706 "In Perl code, the tags are the packages, subroutines and variables\n\
 707 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 708 `--globals' if you want to tag global variables.  Tags for\n\
 709 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 710 defined in the default package is `main::SUB'.";
 711
 712 static const char *PHP_suffixes [] =
 713   { "php", "php3", "php4", NULL };
 714 static const char PHP_help [] =
 715 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 716 the `--no-members' option, vars are tags too.";
 717
 718 static const char *plain_C_suffixes [] =
 719   { "pc",                       /* Pro*C file */
 720      NULL };
 721
 722 static const char *PS_suffixes [] =
 723   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 724 static const char PS_help [] =
 725 "In PostScript code, the tags are the functions.";
 726
 727 static const char *Prolog_suffixes [] =
 728   { "prolog", NULL };
 729 static const char Prolog_help [] =
 730 "In Prolog code, tags are predicates and rules at the beginning of\n\
 731 line.";
 732
 733 static const char *Python_suffixes [] =
 734   { "py", NULL };
 735 static const char Python_help [] =
 736 "In Python code, `def' or `class' at the beginning of a line\n\
 737 generate a tag.";
 738
 739 /* Can't do the `SCM' or `scm' prefix with a version number. */
 740 static const char *Scheme_suffixes [] =
 741   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 742 static const char Scheme_help [] =
 743 "In Scheme code, tags include anything defined with `def' or with a\n\
 744 construct whose name starts with `def'.  They also include\n\
 745 variables set with `set!' at top level in the file.";
 746
 747 static const char *TeX_suffixes [] =
 748   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 749 static const char TeX_help [] =
 750 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 751 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 752 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 753 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 754 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 755 \n\
 756 Other commands can be specified by setting the environment variable\n\
 757 `TEXTAGS' to a colon-separated list like, for example,\n\
 758      TEXTAGS=\"mycommand:myothercommand\".";
 759
 760
 761 static const char *Texinfo_suffixes [] =
 762   { "texi", "texinfo", "txi", NULL };
 763 static const char Texinfo_help [] =
 764 "for texinfo files, lines starting with @node are tagged.";
 765
 766 static const char *Yacc_suffixes [] =
 767   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 768 static const char Yacc_help [] =
 769 "In Bison or Yacc input files, each rule defines as a tag the\n\
 770 nonterminal it constructs.  The portions of the file that contain\n\
 771 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 772 for full help).";
 773
 774 static const char auto_help [] =
 775 "`auto' is not a real language, it indicates to use\n\
 776 a default language for files base on file name suffix and file contents.";
 777
 778 static const char none_help [] =
 779 "`none' is not a real language, it indicates to only do\n\
 780 regexp processing on files.";
 781
 782 static const char no_lang_help [] =
 783 "No detailed help available for this language.";
 784
 785
 786 /*
 787  * Table of languages.
 788  *
 789  * It is ok for a given function to be listed under more than one
 790  * name.  I just didn't.
 791  */
 792
 793 static language lang_names [] =
 794 {
 795   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 796   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 797   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 798   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 799   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 800   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 801   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 802   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 803   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 804   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 805   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 806   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 807   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 808   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 809   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 810   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 811   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 812   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 813   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 814   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 815   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 816   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 817   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 818   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 819   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 820   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 821   { "auto",      auto_help },                      /* default guessing scheme */
 822   { "none",      none_help,      just_read_file }, /* regexp matching only */
 823   { NULL }                /* end of list */
 824 };
 825
 826 \f
 827 static void
 828 print_language_names (void)
 829 {
 830   language *lang;
 831   const char **name, **ext;
 832
 833   puts ("\nThese are the currently supported languages, along with the\n\
 834 default file names and dot suffixes:");
 835   for (lang = lang_names; lang->name != NULL; lang++)
 836     {
 837       printf ("  %-*s", 10, lang->name);
 838       if (lang->filenames != NULL)
 839         for (name = lang->filenames; *name != NULL; name++)
 840           printf (" %s", *name);
 841       if (lang->suffixes != NULL)
 842         for (ext = lang->suffixes; *ext != NULL; ext++)
 843           printf (" .%s", *ext);
 844       puts ("");
 845     }
 846   puts ("where `auto' means use default language for files based on file\n\
 847 name suffix, and `none' means only do regexp processing on files.\n\
 848 If no language is specified and no matching suffix is found,\n\
 849 the first line of the file is read for a sharp-bang (#!) sequence\n\
 850 followed by the name of an interpreter.  If no such sequence is found,\n\
 851 Fortran is tried first; if no tags are found, C is tried next.\n\
 852 When parsing any C file, a \"class\" or \"template\" keyword\n\
 853 switches to C++.");
 854   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 855 \n\
 856 For detailed help on a given language use, for example,\n\
 857 etags --help --lang=ada.");
 858 }
 859
 860 #ifndef EMACS_NAME
 861 # define EMACS_NAME "standalone"
 862 #endif
 863 #ifndef VERSION
 864 # define VERSION "17.38.1.4"
 865 #endif
 866 static void
 867 print_version (void)
 868 {
 869   /* Makes it easier to update automatically. */
 870   char emacs_copyright[] = "Copyright (C) 2011 Free Software Foundation, Inc.";
 871
 872   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 873   puts (emacs_copyright);
 874   puts ("This program is distributed under the terms in ETAGS.README");
 875
 876   exit (EXIT_SUCCESS);
 877 }
 878
 879 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 880 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 881 #endif
 882
 883 static void
 884 print_help (argument *argbuffer)
 885 {
 886   bool help_for_lang = FALSE;
 887
 888   for (; argbuffer->arg_type != at_end; argbuffer++)
 889     if (argbuffer->arg_type == at_language)
 890       {
 891         if (help_for_lang)
 892           puts ("");
 893         puts (argbuffer->lang->help);
 894         help_for_lang = TRUE;
 895       }
 896
 897   if (help_for_lang)
 898     exit (EXIT_SUCCESS);
 899
 900   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 901 \n\
 902 These are the options accepted by %s.\n", progname, progname);
 903   if (NO_LONG_OPTIONS)
 904     puts ("WARNING: long option names do not work with this executable,\n\
 905 as it is not linked with GNU getopt.");
 906   else
 907     puts ("You may use unambiguous abbreviations for the long option names.");
 908   puts ("  A - as file name means read names from stdin (one per line).\n\
 909 Absolute names are stored in the output file as they are.\n\
 910 Relative ones are stored relative to the output file's directory.\n");
 911
 912   puts ("-a, --append\n\
 913         Append tag entries to existing tags file.");
 914
 915   puts ("--packages-only\n\
 916         For Ada files, only generate tags for packages.");
 917
 918   if (CTAGS)
 919     puts ("-B, --backward-search\n\
 920         Write the search commands for the tag entries using '?', the\n\
 921         backward-search command instead of '/', the forward-search command.");
 922
 923   /* This option is mostly obsolete, because etags can now automatically
 924      detect C++.  Retained for backward compatibility and for debugging and
 925      experimentation.  In principle, we could want to tag as C++ even
 926      before any "class" or "template" keyword.
 927   puts ("-C, --c++\n\
 928         Treat files whose name suffix defaults to C language as C++ files.");
 929   */
 930
 931   puts ("--declarations\n\
 932         In C and derived languages, create tags for function declarations,");
 933   if (CTAGS)
 934     puts ("\tand create tags for extern variables if --globals is used.");
 935   else
 936     puts
 937       ("\tand create tags for extern variables unless --no-globals is used.");
 938
 939   if (CTAGS)
 940     puts ("-d, --defines\n\
 941         Create tag entries for C #define constants and enum constants, too.");
 942   else
 943     puts ("-D, --no-defines\n\
 944         Don't create tag entries for C #define constants and enum constants.\n\
 945         This makes the tags file smaller.");
 946
 947   if (!CTAGS)
 948     puts ("-i FILE, --include=FILE\n\
 949         Include a note in tag file indicating that, when searching for\n\
 950         a tag, one should also consult the tags file FILE after\n\
 951         checking the current file.");
 952
 953   puts ("-l LANG, --language=LANG\n\
 954         Force the following files to be considered as written in the\n\
 955         named language up to the next --language=LANG option.");
 956
 957   if (CTAGS)
 958     puts ("--globals\n\
 959         Create tag entries for global variables in some languages.");
 960   else
 961     puts ("--no-globals\n\
 962         Do not create tag entries for global variables in some\n\
 963         languages.  This makes the tags file smaller.");
 964
 965   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 966     puts ("--no-line-directive\n\
 967         Ignore #line preprocessor directives in C and derived languages.");
 968
 969   if (CTAGS)
 970     puts ("--members\n\
 971         Create tag entries for members of structures in some languages.");
 972   else
 973     puts ("--no-members\n\
 974         Do not create tag entries for members of structures\n\
 975         in some languages.");
 976
 977   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 978         Make a tag for each line matching a regular expression pattern\n\
 979         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 980         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 981         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 982         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 983   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 984         For example Tcl named tags can be created with:\n\
 985           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 986         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 987         `m' means to allow multi-line matches, `s' implies `m' and\n\
 988         causes dot to match any character, including newline.");
 989
 990   puts ("-R, --no-regex\n\
 991         Don't create tags from regexps for the following files.");
 992
 993   puts ("-I, --ignore-indentation\n\
 994         In C and C++ do not assume that a closing brace in the first\n\
 995         column is the final brace of a function or structure definition.");
 996
 997   puts ("-o FILE, --output=FILE\n\
 998         Write the tags to FILE.");
 999
1000   puts ("--parse-stdin=NAME\n\
1001         Read from standard input and record tags as belonging to file NAME.");
1002
1003   if (CTAGS)
1004     {
1005       puts ("-t, --typedefs\n\
1006         Generate tag entries for C and Ada typedefs.");
1007       puts ("-T, --typedefs-and-c++\n\
1008         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1009         and C++ member functions.");
1010     }
1011
1012   if (CTAGS)
1013     puts ("-u, --update\n\
1014         Update the tag entries for the given files, leaving tag\n\
1015         entries for other files in place.  Currently, this is\n\
1016         implemented by deleting the existing entries for the given\n\
1017         files and then rewriting the new entries at the end of the\n\
1018         tags file.  It is often faster to simply rebuild the entire\n\
1019         tag file than to use this.");
1020
1021   if (CTAGS)
1022     {
1023       puts ("-v, --vgrind\n\
1024         Print on the standard output an index of items intended for\n\
1025         human consumption, similar to the output of vgrind.  The index\n\
1026         is sorted, and gives the page number of each item.");
1027
1028       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1029         puts ("-w, --no-duplicates\n\
1030         Do not create duplicate tag entries, for compatibility with\n\
1031         traditional ctags.");
1032
1033       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1034         puts ("-w, --no-warn\n\
1035         Suppress warning messages about duplicate tag entries.");
1036
1037       puts ("-x, --cxref\n\
1038         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1039         The output uses line numbers instead of page numbers, but\n\
1040         beyond that the differences are cosmetic; try both to see\n\
1041         which you like.");
1042     }
1043
1044   puts ("-V, --version\n\
1045         Print the version of the program.\n\
1046 -h, --help\n\
1047         Print this help message.\n\
1048         Followed by one or more `--language' options prints detailed\n\
1049         help about tag generation for the specified languages.");
1050
1051   print_language_names ();
1052
1053   puts ("");
1054   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1055
1056   exit (EXIT_SUCCESS);
1057 }
1058
1059 \f
1060 int
1061 main (int argc, char **argv)
1062 {
1063   int i;
1064   unsigned int nincluded_files;
1065   char **included_files;
1066   argument *argbuffer;
1067   int current_arg, file_count;
1068   linebuffer filename_lb;
1069   bool help_asked = FALSE;
1070   ptrdiff_t len;
1071  char *optstring;
1072  int opt;
1073
1074
1075 #ifdef DOS_NT
1076   _fmode = O_BINARY;   /* all of files are treated as binary files */
1077 #endif /* DOS_NT */
1078
1079   progname = argv[0];
1080   nincluded_files = 0;
1081   included_files = xnew (argc, char *);
1082   current_arg = 0;
1083   file_count = 0;
1084
1085   /* Allocate enough no matter what happens.  Overkill, but each one
1086      is small. */
1087   argbuffer = xnew (argc, argument);
1088
1089   /*
1090    * Always find typedefs and structure tags.
1091    * Also default to find macro constants, enum constants, struct
1092    * members and global variables.  Do it for both etags and ctags.
1093    */
1094   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1095   globals = members = TRUE;
1096
1097   /* When the optstring begins with a '-' getopt_long does not rearrange the
1098      non-options arguments to be at the end, but leaves them alone. */
1099   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1100                       "ac:Cf:Il:o:r:RSVhH",
1101                       (CTAGS) ? "BxdtTuvw" : "Di:");
1102
1103   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1104     switch (opt)
1105       {
1106       case 0:
1107         /* If getopt returns 0, then it has already processed a
1108            long-named option.  We should do nothing.  */
1109         break;
1110
1111       case 1:
1112         /* This means that a file name has been seen.  Record it. */
1113         argbuffer[current_arg].arg_type = at_filename;
1114         argbuffer[current_arg].what     = optarg;
1115         len = strlen (optarg);
1116         if (whatlen_max < len)
1117           whatlen_max = len;
1118         ++current_arg;
1119         ++file_count;
1120         break;
1121
1122       case STDIN:
1123         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1124         argbuffer[current_arg].arg_type = at_stdin;
1125         argbuffer[current_arg].what     = optarg;
1126         len = strlen (optarg);
1127         if (whatlen_max < len)
1128           whatlen_max = len;
1129         ++current_arg;
1130         ++file_count;
1131         if (parsing_stdin)
1132           fatal ("cannot parse standard input more than once", (char *)NULL);
1133         parsing_stdin = TRUE;
1134         break;
1135
1136         /* Common options. */
1137       case 'a': append_to_tagfile = TRUE;       break;
1138       case 'C': cplusplus = TRUE;               break;
1139       case 'f':         /* for compatibility with old makefiles */
1140       case 'o':
1141         if (tagfile)
1142           {
1143             error ("-o option may only be given once.", (char *)NULL);
1144             suggest_asking_for_help ();
1145             /* NOTREACHED */
1146           }
1147         tagfile = optarg;
1148         break;
1149       case 'I':
1150       case 'S':         /* for backward compatibility */
1151         ignoreindent = TRUE;
1152         break;
1153       case 'l':
1154         {
1155           language *lang = get_language_from_langname (optarg);
1156           if (lang != NULL)
1157             {
1158               argbuffer[current_arg].lang = lang;
1159               argbuffer[current_arg].arg_type = at_language;
1160               ++current_arg;
1161             }
1162         }
1163         break;
1164       case 'c':
1165         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1166         optarg = concat (optarg, "i", ""); /* memory leak here */
1167         /* FALLTHRU */
1168       case 'r':
1169         argbuffer[current_arg].arg_type = at_regexp;
1170         argbuffer[current_arg].what = optarg;
1171         len = strlen (optarg);
1172         if (whatlen_max < len)
1173           whatlen_max = len;
1174         ++current_arg;
1175         break;
1176       case 'R':
1177         argbuffer[current_arg].arg_type = at_regexp;
1178         argbuffer[current_arg].what = NULL;
1179         ++current_arg;
1180         break;
1181       case 'V':
1182         print_version ();
1183         break;
1184       case 'h':
1185       case 'H':
1186         help_asked = TRUE;
1187         break;
1188
1189         /* Etags options */
1190       case 'D': constantypedefs = FALSE;                        break;
1191       case 'i': included_files[nincluded_files++] = optarg;     break;
1192
1193         /* Ctags options. */
1194       case 'B': searchar = '?';                                 break;
1195       case 'd': constantypedefs = TRUE;                         break;
1196       case 't': typedefs = TRUE;                                break;
1197       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1198       case 'u': update = TRUE;                                  break;
1199       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1200       case 'x': cxref_style = TRUE;                             break;
1201       case 'w': no_warnings = TRUE;                             break;
1202       default:
1203         suggest_asking_for_help ();
1204         /* NOTREACHED */
1205       }
1206
1207   /* No more options.  Store the rest of arguments. */
1208   for (; optind < argc; optind++)
1209     {
1210       argbuffer[current_arg].arg_type = at_filename;
1211       argbuffer[current_arg].what = argv[optind];
1212       len = strlen (argv[optind]);
1213       if (whatlen_max < len)
1214         whatlen_max = len;
1215       ++current_arg;
1216       ++file_count;
1217     }
1218
1219   argbuffer[current_arg].arg_type = at_end;
1220
1221   if (help_asked)
1222     print_help (argbuffer);
1223     /* NOTREACHED */
1224
1225   if (nincluded_files == 0 && file_count == 0)
1226     {
1227       error ("no input files specified.", (char *)NULL);
1228       suggest_asking_for_help ();
1229       /* NOTREACHED */
1230     }
1231
1232   if (tagfile == NULL)
1233     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1234   cwd = etags_getcwd ();        /* the current working directory */
1235   if (cwd[strlen (cwd) - 1] != '/')
1236     {
1237       char *oldcwd = cwd;
1238       cwd = concat (oldcwd, "/", "");
1239       free (oldcwd);
1240     }
1241
1242   /* Compute base directory for relative file names. */
1243   if (streq (tagfile, "-")
1244       || strneq (tagfile, "/dev/", 5))
1245     tagfiledir = cwd;            /* relative file names are relative to cwd */
1246   else
1247     {
1248       canonicalize_filename (tagfile);
1249       tagfiledir = absolute_dirname (tagfile, cwd);
1250     }
1251
1252   init ();                      /* set up boolean "functions" */
1253
1254   linebuffer_init (&lb);
1255   linebuffer_init (&filename_lb);
1256   linebuffer_init (&filebuf);
1257   linebuffer_init (&token_name);
1258
1259   if (!CTAGS)
1260     {
1261       if (streq (tagfile, "-"))
1262         {
1263           tagf = stdout;
1264 #ifdef DOS_NT
1265           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1266              doesn't take effect until after `stdout' is already open). */
1267           if (!isatty (fileno (stdout)))
1268             setmode (fileno (stdout), O_BINARY);
1269 #endif /* DOS_NT */
1270         }
1271       else
1272         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1273       if (tagf == NULL)
1274         pfatal (tagfile);
1275     }
1276
1277   /*
1278    * Loop through files finding functions.
1279    */
1280   for (i = 0; i < current_arg; i++)
1281     {
1282       static language *lang;    /* non-NULL if language is forced */
1283       char *this_file;
1284
1285       switch (argbuffer[i].arg_type)
1286         {
1287         case at_language:
1288           lang = argbuffer[i].lang;
1289           break;
1290         case at_regexp:
1291           analyse_regex (argbuffer[i].what);
1292           break;
1293         case at_filename:
1294               this_file = argbuffer[i].what;
1295               /* Input file named "-" means read file names from stdin
1296                  (one per line) and use them. */
1297               if (streq (this_file, "-"))
1298                 {
1299                   if (parsing_stdin)
1300                     fatal ("cannot parse standard input AND read file names from it",
1301                            (char *)NULL);
1302                   while (readline_internal (&filename_lb, stdin) > 0)
1303                     process_file_name (filename_lb.buffer, lang);
1304                 }
1305               else
1306                 process_file_name (this_file, lang);
1307           break;
1308         case at_stdin:
1309           this_file = argbuffer[i].what;
1310           process_file (stdin, this_file, lang);
1311           break;
1312         }
1313     }
1314
1315   free_regexps ();
1316   free (lb.buffer);
1317   free (filebuf.buffer);
1318   free (token_name.buffer);
1319
1320   if (!CTAGS || cxref_style)
1321     {
1322       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1323       put_entries (nodehead);
1324       free_tree (nodehead);
1325       nodehead = NULL;
1326       if (!CTAGS)
1327         {
1328           fdesc *fdp;
1329
1330           /* Output file entries that have no tags. */
1331           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1332             if (!fdp->written)
1333               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1334
1335           while (nincluded_files-- > 0)
1336             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1337
1338           if (fclose (tagf) == EOF)
1339             pfatal (tagfile);
1340         }
1341
1342       exit (EXIT_SUCCESS);
1343     }
1344
1345   /* From here on, we are in (CTAGS && !cxref_style) */
1346   if (update)
1347     {
1348       char *cmd =
1349         xmalloc (strlen (tagfile) + whatlen_max +
1350                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1351       for (i = 0; i < current_arg; ++i)
1352         {
1353           switch (argbuffer[i].arg_type)
1354             {
1355             case at_filename:
1356             case at_stdin:
1357               break;
1358             default:
1359               continue;         /* the for loop */
1360             }
1361           strcpy (cmd, "mv ");
1362           strcat (cmd, tagfile);
1363           strcat (cmd, " OTAGS;fgrep -v '\t");
1364           strcat (cmd, argbuffer[i].what);
1365           strcat (cmd, "\t' OTAGS >");
1366           strcat (cmd, tagfile);
1367           strcat (cmd, ";rm OTAGS");
1368           if (system (cmd) != EXIT_SUCCESS)
1369             fatal ("failed to execute shell command", (char *)NULL);
1370         }
1371       free (cmd);
1372       append_to_tagfile = TRUE;
1373     }
1374
1375   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1376   if (tagf == NULL)
1377     pfatal (tagfile);
1378   put_entries (nodehead);       /* write all the tags (CTAGS) */
1379   free_tree (nodehead);
1380   nodehead = NULL;
1381   if (fclose (tagf) == EOF)
1382     pfatal (tagfile);
1383
1384   if (CTAGS)
1385     if (append_to_tagfile || update)
1386       {
1387         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1388         /* Maybe these should be used:
1389            setenv ("LC_COLLATE", "C", 1);
1390            setenv ("LC_ALL", "C", 1); */
1391         strcpy (cmd, "sort -u -o ");
1392         strcat (cmd, tagfile);
1393         strcat (cmd, " ");
1394         strcat (cmd, tagfile);
1395         exit (system (cmd));
1396       }
1397   return EXIT_SUCCESS;
1398 }
1399
1400
1401 /*
1402  * Return a compressor given the file name.  If EXTPTR is non-zero,
1403  * return a pointer into FILE where the compressor-specific
1404  * extension begins.  If no compressor is found, NULL is returned
1405  * and EXTPTR is not significant.
1406  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1407  */
1408 static compressor *
1409 get_compressor_from_suffix (char *file, char **extptr)
1410 {
1411   compressor *compr;
1412   char *slash, *suffix;
1413
1414   /* File has been processed by canonicalize_filename,
1415      so we don't need to consider backslashes on DOS_NT.  */
1416   slash = etags_strrchr (file, '/');
1417   suffix = etags_strrchr (file, '.');
1418   if (suffix == NULL || suffix < slash)
1419     return NULL;
1420   if (extptr != NULL)
1421     *extptr = suffix;
1422   suffix += 1;
1423   /* Let those poor souls who live with DOS 8+3 file name limits get
1424      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1425      Only the first do loop is run if not MSDOS */
1426   do
1427     {
1428       for (compr = compressors; compr->suffix != NULL; compr++)
1429         if (streq (compr->suffix, suffix))
1430           return compr;
1431       if (!MSDOS)
1432         break;                  /* do it only once: not really a loop */
1433       if (extptr != NULL)
1434         *extptr = ++suffix;
1435     } while (*suffix != '\0');
1436   return NULL;
1437 }
1438
1439
1440
1441 /*
1442  * Return a language given the name.
1443  */
1444 static language *
1445 get_language_from_langname (const char *name)
1446 {
1447   language *lang;
1448
1449   if (name == NULL)
1450     error ("empty language name", (char *)NULL);
1451   else
1452     {
1453       for (lang = lang_names; lang->name != NULL; lang++)
1454         if (streq (name, lang->name))
1455           return lang;
1456       error ("unknown language \"%s\"", name);
1457     }
1458
1459   return NULL;
1460 }
1461
1462
1463 /*
1464  * Return a language given the interpreter name.
1465  */
1466 static language *
1467 get_language_from_interpreter (char *interpreter)
1468 {
1469   language *lang;
1470   const char **iname;
1471
1472   if (interpreter == NULL)
1473     return NULL;
1474   for (lang = lang_names; lang->name != NULL; lang++)
1475     if (lang->interpreters != NULL)
1476       for (iname = lang->interpreters; *iname != NULL; iname++)
1477         if (streq (*iname, interpreter))
1478             return lang;
1479
1480   return NULL;
1481 }
1482
1483
1484
1485 /*
1486  * Return a language given the file name.
1487  */
1488 static language *
1489 get_language_from_filename (char *file, int case_sensitive)
1490 {
1491   language *lang;
1492   const char **name, **ext, *suffix;
1493
1494   /* Try whole file name first. */
1495   for (lang = lang_names; lang->name != NULL; lang++)
1496     if (lang->filenames != NULL)
1497       for (name = lang->filenames; *name != NULL; name++)
1498         if ((case_sensitive)
1499             ? streq (*name, file)
1500             : strcaseeq (*name, file))
1501           return lang;
1502
1503   /* If not found, try suffix after last dot. */
1504   suffix = etags_strrchr (file, '.');
1505   if (suffix == NULL)
1506     return NULL;
1507   suffix += 1;
1508   for (lang = lang_names; lang->name != NULL; lang++)
1509     if (lang->suffixes != NULL)
1510       for (ext = lang->suffixes; *ext != NULL; ext++)
1511         if ((case_sensitive)
1512             ? streq (*ext, suffix)
1513             : strcaseeq (*ext, suffix))
1514           return lang;
1515   return NULL;
1516 }
1517
1518 \f
1519 /*
1520  * This routine is called on each file argument.
1521  */
1522 static void
1523 process_file_name (char *file, language *lang)
1524 {
1525   struct stat stat_buf;
1526   FILE *inf;
1527   fdesc *fdp;
1528   compressor *compr;
1529   char *compressed_name, *uncompressed_name;
1530   char *ext, *real_name;
1531   int retval;
1532
1533   canonicalize_filename (file);
1534   if (streq (file, tagfile) && !streq (tagfile, "-"))
1535     {
1536       error ("skipping inclusion of %s in self.", file);
1537       return;
1538     }
1539   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1540     {
1541       compressed_name = NULL;
1542       real_name = uncompressed_name = savestr (file);
1543     }
1544   else
1545     {
1546       real_name = compressed_name = savestr (file);
1547       uncompressed_name = savenstr (file, ext - file);
1548     }
1549
1550   /* If the canonicalized uncompressed name
1551      has already been dealt with, skip it silently. */
1552   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1553     {
1554       assert (fdp->infname != NULL);
1555       if (streq (uncompressed_name, fdp->infname))
1556         goto cleanup;
1557     }
1558
1559   if (stat (real_name, &stat_buf) != 0)
1560     {
1561       /* Reset real_name and try with a different name. */
1562       real_name = NULL;
1563       if (compressed_name != NULL) /* try with the given suffix */
1564         {
1565           if (stat (uncompressed_name, &stat_buf) == 0)
1566             real_name = uncompressed_name;
1567         }
1568       else                      /* try all possible suffixes */
1569         {
1570           for (compr = compressors; compr->suffix != NULL; compr++)
1571             {
1572               compressed_name = concat (file, ".", compr->suffix);
1573               if (stat (compressed_name, &stat_buf) != 0)
1574                 {
1575                   if (MSDOS)
1576                     {
1577                       char *suf = compressed_name + strlen (file);
1578                       size_t suflen = strlen (compr->suffix) + 1;
1579                       for ( ; suf[1]; suf++, suflen--)
1580                         {
1581                           memmove (suf, suf + 1, suflen);
1582                           if (stat (compressed_name, &stat_buf) == 0)
1583                             {
1584                               real_name = compressed_name;
1585                               break;
1586                             }
1587                         }
1588                       if (real_name != NULL)
1589                         break;
1590                     } /* MSDOS */
1591                   free (compressed_name);
1592                   compressed_name = NULL;
1593                 }
1594               else
1595                 {
1596                   real_name = compressed_name;
1597                   break;
1598                 }
1599             }
1600         }
1601       if (real_name == NULL)
1602         {
1603           perror (file);
1604           goto cleanup;
1605         }
1606     } /* try with a different name */
1607
1608   if (!S_ISREG (stat_buf.st_mode))
1609     {
1610       error ("skipping %s: it is not a regular file.", real_name);
1611       goto cleanup;
1612     }
1613   if (real_name == compressed_name)
1614     {
1615       char *cmd = concat (compr->command, " ", real_name);
1616       inf = (FILE *) popen (cmd, "r");
1617       free (cmd);
1618     }
1619   else
1620     inf = fopen (real_name, "r");
1621   if (inf == NULL)
1622     {
1623       perror (real_name);
1624       goto cleanup;
1625     }
1626
1627   process_file (inf, uncompressed_name, lang);
1628
1629   if (real_name == compressed_name)
1630     retval = pclose (inf);
1631   else
1632     retval = fclose (inf);
1633   if (retval < 0)
1634     pfatal (file);
1635
1636  cleanup:
1637   free (compressed_name);
1638   free (uncompressed_name);
1639   last_node = NULL;
1640   curfdp = NULL;
1641   return;
1642 }
1643
1644 static void
1645 process_file (FILE *fh, char *fn, language *lang)
1646 {
1647   static const fdesc emptyfdesc;
1648   fdesc *fdp;
1649
1650   /* Create a new input file description entry. */
1651   fdp = xnew (1, fdesc);
1652   *fdp = emptyfdesc;
1653   fdp->next = fdhead;
1654   fdp->infname = savestr (fn);
1655   fdp->lang = lang;
1656   fdp->infabsname = absolute_filename (fn, cwd);
1657   fdp->infabsdir = absolute_dirname (fn, cwd);
1658   if (filename_is_absolute (fn))
1659     {
1660       /* An absolute file name.  Canonicalize it. */
1661       fdp->taggedfname = absolute_filename (fn, NULL);
1662     }
1663   else
1664     {
1665       /* A file name relative to cwd.  Make it relative
1666          to the directory of the tags file. */
1667       fdp->taggedfname = relative_filename (fn, tagfiledir);
1668     }
1669   fdp->usecharno = TRUE;        /* use char position when making tags */
1670   fdp->prop = NULL;
1671   fdp->written = FALSE;         /* not written on tags file yet */
1672
1673   fdhead = fdp;
1674   curfdp = fdhead;              /* the current file description */
1675
1676   find_entries (fh);
1677
1678   /* If not Ctags, and if this is not metasource and if it contained no #line
1679      directives, we can write the tags and free all nodes pointing to
1680      curfdp. */
1681   if (!CTAGS
1682       && curfdp->usecharno      /* no #line directives in this file */
1683       && !curfdp->lang->metasource)
1684     {
1685       node *np, *prev;
1686
1687       /* Look for the head of the sublist relative to this file.  See add_node
1688          for the structure of the node tree. */
1689       prev = NULL;
1690       for (np = nodehead; np != NULL; prev = np, np = np->left)
1691         if (np->fdp == curfdp)
1692           break;
1693
1694       /* If we generated tags for this file, write and delete them. */
1695       if (np != NULL)
1696         {
1697           /* This is the head of the last sublist, if any.  The following
1698              instructions depend on this being true. */
1699           assert (np->left == NULL);
1700
1701           assert (fdhead == curfdp);
1702           assert (last_node->fdp == curfdp);
1703           put_entries (np);     /* write tags for file curfdp->taggedfname */
1704           free_tree (np);       /* remove the written nodes */
1705           if (prev == NULL)
1706             nodehead = NULL;    /* no nodes left */
1707           else
1708             prev->left = NULL;  /* delete the pointer to the sublist */
1709         }
1710     }
1711 }
1712
1713 /*
1714  * This routine sets up the boolean pseudo-functions which work
1715  * by setting boolean flags dependent upon the corresponding character.
1716  * Every char which is NOT in that string is not a white char.  Therefore,
1717  * all of the array "_wht" is set to FALSE, and then the elements
1718  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1719  * of a char is TRUE if it is the string "white", else FALSE.
1720  */
1721 static void
1722 init (void)
1723 {
1724   register const char *sp;
1725   register int i;
1726
1727   for (i = 0; i < CHARS; i++)
1728     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1729   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1730   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1731   notinname('\0') = notinname('\n');
1732   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1733   begtoken('\0') = begtoken('\n');
1734   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1735   intoken('\0') = intoken('\n');
1736   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1737   endtoken('\0') = endtoken('\n');
1738 }
1739
1740 /*
1741  * This routine opens the specified file and calls the function
1742  * which finds the function and type definitions.
1743  */
1744 static void
1745 find_entries (FILE *inf)
1746 {
1747   char *cp;
1748   language *lang = curfdp->lang;
1749   Lang_function *parser = NULL;
1750
1751   /* If user specified a language, use it. */
1752   if (lang != NULL && lang->function != NULL)
1753     {
1754       parser = lang->function;
1755     }
1756
1757   /* Else try to guess the language given the file name. */
1758   if (parser == NULL)
1759     {
1760       lang = get_language_from_filename (curfdp->infname, TRUE);
1761       if (lang != NULL && lang->function != NULL)
1762         {
1763           curfdp->lang = lang;
1764           parser = lang->function;
1765         }
1766     }
1767
1768   /* Else look for sharp-bang as the first two characters. */
1769   if (parser == NULL
1770       && readline_internal (&lb, inf) > 0
1771       && lb.len >= 2
1772       && lb.buffer[0] == '#'
1773       && lb.buffer[1] == '!')
1774     {
1775       char *lp;
1776
1777       /* Set lp to point at the first char after the last slash in the
1778          line or, if no slashes, at the first nonblank.  Then set cp to
1779          the first successive blank and terminate the string. */
1780       lp = etags_strrchr (lb.buffer+2, '/');
1781       if (lp != NULL)
1782         lp += 1;
1783       else
1784         lp = skip_spaces (lb.buffer + 2);
1785       cp = skip_non_spaces (lp);
1786       *cp = '\0';
1787
1788       if (strlen (lp) > 0)
1789         {
1790           lang = get_language_from_interpreter (lp);
1791           if (lang != NULL && lang->function != NULL)
1792             {
1793               curfdp->lang = lang;
1794               parser = lang->function;
1795             }
1796         }
1797     }
1798
1799   /* We rewind here, even if inf may be a pipe.  We fail if the
1800      length of the first line is longer than the pipe block size,
1801      which is unlikely. */
1802   rewind (inf);
1803
1804   /* Else try to guess the language given the case insensitive file name. */
1805   if (parser == NULL)
1806     {
1807       lang = get_language_from_filename (curfdp->infname, FALSE);
1808       if (lang != NULL && lang->function != NULL)
1809         {
1810           curfdp->lang = lang;
1811           parser = lang->function;
1812         }
1813     }
1814
1815   /* Else try Fortran or C. */
1816   if (parser == NULL)
1817     {
1818       node *old_last_node = last_node;
1819
1820       curfdp->lang = get_language_from_langname ("fortran");
1821       find_entries (inf);
1822
1823       if (old_last_node == last_node)
1824         /* No Fortran entries found.  Try C. */
1825         {
1826           /* We do not tag if rewind fails.
1827              Only the file name will be recorded in the tags file. */
1828           rewind (inf);
1829           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1830           find_entries (inf);
1831         }
1832       return;
1833     }
1834
1835   if (!no_line_directive
1836       && curfdp->lang != NULL && curfdp->lang->metasource)
1837     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1838        file, or anyway we parsed a file that is automatically generated from
1839        this one.  If this is the case, the bingo.c file contained #line
1840        directives that generated tags pointing to this file.  Let's delete
1841        them all before parsing this file, which is the real source. */
1842     {
1843       fdesc **fdpp = &fdhead;
1844       while (*fdpp != NULL)
1845         if (*fdpp != curfdp
1846             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1847           /* We found one of those!  We must delete both the file description
1848              and all tags referring to it. */
1849           {
1850             fdesc *badfdp = *fdpp;
1851
1852             /* Delete the tags referring to badfdp->taggedfname
1853                that were obtained from badfdp->infname. */
1854             invalidate_nodes (badfdp, &nodehead);
1855
1856             *fdpp = badfdp->next; /* remove the bad description from the list */
1857             free_fdesc (badfdp);
1858           }
1859         else
1860           fdpp = &(*fdpp)->next; /* advance the list pointer */
1861     }
1862
1863   assert (parser != NULL);
1864
1865   /* Generic initialisations before reading from file. */
1866   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1867
1868   /* Generic initialisations before parsing file with readline. */
1869   lineno = 0;                  /* reset global line number */
1870   charno = 0;                  /* reset global char number */
1871   linecharno = 0;              /* reset global char number of line start */
1872
1873   parser (inf);
1874
1875   regex_tag_multiline ();
1876 }
1877
1878 \f
1879 /*
1880  * Check whether an implicitly named tag should be created,
1881  * then call `pfnote'.
1882  * NAME is a string that is internally copied by this function.
1883  *
1884  * TAGS format specification
1885  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1886  * The following is explained in some more detail in etc/ETAGS.EBNF.
1887  *
1888  * make_tag creates tags with "implicit tag names" (unnamed tags)
1889  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1890  *  1. NAME does not contain any of the characters in NONAM;
1891  *  2. LINESTART contains name as either a rightmost, or rightmost but
1892  *     one character, substring;
1893  *  3. the character, if any, immediately before NAME in LINESTART must
1894  *     be a character in NONAM;
1895  *  4. the character, if any, immediately after NAME in LINESTART must
1896  *     also be a character in NONAM.
1897  *
1898  * The implementation uses the notinname() macro, which recognises the
1899  * characters stored in the string `nonam'.
1900  * etags.el needs to use the same characters that are in NONAM.
1901  */
1902 static void
1903 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1904           int namelen,          /* tag length */
1905           int is_func,          /* tag is a function */
1906           char *linestart,      /* start of the line where tag is */
1907           int linelen,          /* length of the line where tag is */
1908           int lno,              /* line number */
1909           long int cno)         /* character number */
1910 {
1911   bool named = (name != NULL && namelen > 0);
1912   char *nname = NULL;
1913
1914   if (!CTAGS && named)          /* maybe set named to false */
1915     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1916        such that etags.el can guess a name from it. */
1917     {
1918       int i;
1919       register const char *cp = name;
1920
1921       for (i = 0; i < namelen; i++)
1922         if (notinname (*cp++))
1923           break;
1924       if (i == namelen)                         /* rule #1 */
1925         {
1926           cp = linestart + linelen - namelen;
1927           if (notinname (linestart[linelen-1]))
1928             cp -= 1;                            /* rule #4 */
1929           if (cp >= linestart                   /* rule #2 */
1930               && (cp == linestart
1931                   || notinname (cp[-1]))        /* rule #3 */
1932               && strneq (name, cp, namelen))    /* rule #2 */
1933             named = FALSE;      /* use implicit tag name */
1934         }
1935     }
1936
1937   if (named)
1938     nname = savenstr (name, namelen);
1939
1940   pfnote (nname, is_func, linestart, linelen, lno, cno);
1941 }
1942
1943 /* Record a tag. */
1944 static void
1945 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1946                                 /* tag name, or NULL if unnamed */
1947                                 /* tag is a function */
1948                                 /* start of the line where tag is */
1949                                 /* length of the line where tag is */
1950                                 /* line number */
1951                                 /* character number */
1952 {
1953   register node *np;
1954
1955   assert (name == NULL || name[0] != '\0');
1956   if (CTAGS && name == NULL)
1957     return;
1958
1959   np = xnew (1, node);
1960
1961   /* If ctags mode, change name "main" to M<thisfilename>. */
1962   if (CTAGS && !cxref_style && streq (name, "main"))
1963     {
1964       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1965       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1966       fp = etags_strrchr (np->name, '.');
1967       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1968         fp[0] = '\0';
1969     }
1970   else
1971     np->name = name;
1972   np->valid = TRUE;
1973   np->been_warned = FALSE;
1974   np->fdp = curfdp;
1975   np->is_func = is_func;
1976   np->lno = lno;
1977   if (np->fdp->usecharno)
1978     /* Our char numbers are 0-base, because of C language tradition?
1979        ctags compatibility?  old versions compatibility?   I don't know.
1980        Anyway, since emacs's are 1-base we expect etags.el to take care
1981        of the difference.  If we wanted to have 1-based numbers, we would
1982        uncomment the +1 below. */
1983     np->cno = cno /* + 1 */ ;
1984   else
1985     np->cno = invalidcharno;
1986   np->left = np->right = NULL;
1987   if (CTAGS && !cxref_style)
1988     {
1989       if (strlen (linestart) < 50)
1990         np->regex = concat (linestart, "$", "");
1991       else
1992         np->regex = savenstr (linestart, 50);
1993     }
1994   else
1995     np->regex = savenstr (linestart, linelen);
1996
1997   add_node (np, &nodehead);
1998 }
1999
2000 /*
2001  * free_tree ()
2002  *      recurse on left children, iterate on right children.
2003  */
2004 static void
2005 free_tree (register node *np)
2006 {
2007   while (np)
2008     {
2009       register node *node_right = np->right;
2010       free_tree (np->left);
2011       free (np->name);
2012       free (np->regex);
2013       free (np);
2014       np = node_right;
2015     }
2016 }
2017
2018 /*
2019  * free_fdesc ()
2020  *      delete a file description
2021  */
2022 static void
2023 free_fdesc (register fdesc *fdp)
2024 {
2025   free (fdp->infname);
2026   free (fdp->infabsname);
2027   free (fdp->infabsdir);
2028   free (fdp->taggedfname);
2029   free (fdp->prop);
2030   free (fdp);
2031 }
2032
2033 /*
2034  * add_node ()
2035  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2036  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2037  *      balancing.
2038  *
2039  *      add_node is the only function allowed to add nodes, so it can
2040  *      maintain state.
2041  */
2042 static void
2043 add_node (node *np, node **cur_node_p)
2044 {
2045   register int dif;
2046   register node *cur_node = *cur_node_p;
2047
2048   if (cur_node == NULL)
2049     {
2050       *cur_node_p = np;
2051       last_node = np;
2052       return;
2053     }
2054
2055   if (!CTAGS)
2056     /* Etags Mode */
2057     {
2058       /* For each file name, tags are in a linked sublist on the right
2059          pointer.  The first tags of different files are a linked list
2060          on the left pointer.  last_node points to the end of the last
2061          used sublist. */
2062       if (last_node != NULL && last_node->fdp == np->fdp)
2063         {
2064           /* Let's use the same sublist as the last added node. */
2065           assert (last_node->right == NULL);
2066           last_node->right = np;
2067           last_node = np;
2068         }
2069       else if (cur_node->fdp == np->fdp)
2070         {
2071           /* Scanning the list we found the head of a sublist which is
2072              good for us.  Let's scan this sublist. */
2073           add_node (np, &cur_node->right);
2074         }
2075       else
2076         /* The head of this sublist is not good for us.  Let's try the
2077            next one. */
2078         add_node (np, &cur_node->left);
2079     } /* if ETAGS mode */
2080
2081   else
2082     {
2083       /* Ctags Mode */
2084       dif = strcmp (np->name, cur_node->name);
2085
2086       /*
2087        * If this tag name matches an existing one, then
2088        * do not add the node, but maybe print a warning.
2089        */
2090       if (no_duplicates && !dif)
2091         {
2092           if (np->fdp == cur_node->fdp)
2093             {
2094               if (!no_warnings)
2095                 {
2096                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2097                            np->fdp->infname, lineno, np->name);
2098                   fprintf (stderr, "Second entry ignored\n");
2099                 }
2100             }
2101           else if (!cur_node->been_warned && !no_warnings)
2102             {
2103               fprintf
2104                 (stderr,
2105                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2106                  np->fdp->infname, cur_node->fdp->infname, np->name);
2107               cur_node->been_warned = TRUE;
2108             }
2109           return;
2110         }
2111
2112       /* Actually add the node */
2113       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2114     } /* if CTAGS mode */
2115 }
2116
2117 /*
2118  * invalidate_nodes ()
2119  *      Scan the node tree and invalidate all nodes pointing to the
2120  *      given file description (CTAGS case) or free them (ETAGS case).
2121  */
2122 static void
2123 invalidate_nodes (fdesc *badfdp, node **npp)
2124 {
2125   node *np = *npp;
2126
2127   if (np == NULL)
2128     return;
2129
2130   if (CTAGS)
2131     {
2132       if (np->left != NULL)
2133         invalidate_nodes (badfdp, &np->left);
2134       if (np->fdp == badfdp)
2135         np->valid = FALSE;
2136       if (np->right != NULL)
2137         invalidate_nodes (badfdp, &np->right);
2138     }
2139   else
2140     {
2141       assert (np->fdp != NULL);
2142       if (np->fdp == badfdp)
2143         {
2144           *npp = np->left;      /* detach the sublist from the list */
2145           np->left = NULL;      /* isolate it */
2146           free_tree (np);       /* free it */
2147           invalidate_nodes (badfdp, npp);
2148         }
2149       else
2150         invalidate_nodes (badfdp, &np->left);
2151     }
2152 }
2153
2154 \f
2155 static int total_size_of_entries (node *);
2156 static int number_len (long);
2157
2158 /* Length of a non-negative number's decimal representation. */
2159 static int
2160 number_len (long int num)
2161 {
2162   int len = 1;
2163   while ((num /= 10) > 0)
2164     len += 1;
2165   return len;
2166 }
2167
2168 /*
2169  * Return total number of characters that put_entries will output for
2170  * the nodes in the linked list at the right of the specified node.
2171  * This count is irrelevant with etags.el since emacs 19.34 at least,
2172  * but is still supplied for backward compatibility.
2173  */
2174 static int
2175 total_size_of_entries (register node *np)
2176 {
2177   register int total = 0;
2178
2179   for (; np != NULL; np = np->right)
2180     if (np->valid)
2181       {
2182         total += strlen (np->regex) + 1;                /* pat\177 */
2183         if (np->name != NULL)
2184           total += strlen (np->name) + 1;               /* name\001 */
2185         total += number_len ((long) np->lno) + 1;       /* lno, */
2186         if (np->cno != invalidcharno)                   /* cno */
2187           total += number_len (np->cno);
2188         total += 1;                                     /* newline */
2189       }
2190
2191   return total;
2192 }
2193
2194 static void
2195 put_entries (register node *np)
2196 {
2197   register char *sp;
2198   static fdesc *fdp = NULL;
2199
2200   if (np == NULL)
2201     return;
2202
2203   /* Output subentries that precede this one */
2204   if (CTAGS)
2205     put_entries (np->left);
2206
2207   /* Output this entry */
2208   if (np->valid)
2209     {
2210       if (!CTAGS)
2211         {
2212           /* Etags mode */
2213           if (fdp != np->fdp)
2214             {
2215               fdp = np->fdp;
2216               fprintf (tagf, "\f\n%s,%d\n",
2217                        fdp->taggedfname, total_size_of_entries (np));
2218               fdp->written = TRUE;
2219             }
2220           fputs (np->regex, tagf);
2221           fputc ('\177', tagf);
2222           if (np->name != NULL)
2223             {
2224               fputs (np->name, tagf);
2225               fputc ('\001', tagf);
2226             }
2227           fprintf (tagf, "%d,", np->lno);
2228           if (np->cno != invalidcharno)
2229             fprintf (tagf, "%ld", np->cno);
2230           fputs ("\n", tagf);
2231         }
2232       else
2233         {
2234           /* Ctags mode */
2235           if (np->name == NULL)
2236             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2237
2238           if (cxref_style)
2239             {
2240               if (vgrind_style)
2241                 fprintf (stdout, "%s %s %d\n",
2242                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2243               else
2244                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2245                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2246             }
2247           else
2248             {
2249               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2250
2251               if (np->is_func)
2252                 {               /* function or #define macro with args */
2253                   putc (searchar, tagf);
2254                   putc ('^', tagf);
2255
2256                   for (sp = np->regex; *sp; sp++)
2257                     {
2258                       if (*sp == '\\' || *sp == searchar)
2259                         putc ('\\', tagf);
2260                       putc (*sp, tagf);
2261                     }
2262                   putc (searchar, tagf);
2263                 }
2264               else
2265                 {               /* anything else; text pattern inadequate */
2266                   fprintf (tagf, "%d", np->lno);
2267                 }
2268               putc ('\n', tagf);
2269             }
2270         }
2271     } /* if this node contains a valid tag */
2272
2273   /* Output subentries that follow this one */
2274   put_entries (np->right);
2275   if (!CTAGS)
2276     put_entries (np->left);
2277 }
2278
2279 \f
2280 /* C extensions. */
2281 #define C_EXT   0x00fff         /* C extensions */
2282 #define C_PLAIN 0x00000         /* C */
2283 #define C_PLPL  0x00001         /* C++ */
2284 #define C_STAR  0x00003         /* C* */
2285 #define C_JAVA  0x00005         /* JAVA */
2286 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2287 #define YACC    0x10000         /* yacc file */
2288
2289 /*
2290  * The C symbol tables.
2291  */
2292 enum sym_type
2293 {
2294   st_none,
2295   st_C_objprot, st_C_objimpl, st_C_objend,
2296   st_C_gnumacro,
2297   st_C_ignore, st_C_attribute,
2298   st_C_javastruct,
2299   st_C_operator,
2300   st_C_class, st_C_template,
2301   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2302 };
2303
2304 static unsigned int hash (const char *, unsigned int);
2305 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2306 static enum sym_type C_symtype (char *, int, int);
2307
2308 /* Feed stuff between (but not including) %[ and %] lines to:
2309      gperf -m 5
2310 %[
2311 %compare-strncmp
2312 %enum
2313 %struct-type
2314 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2315 %%
2316 if,             0,                      st_C_ignore
2317 for,            0,                      st_C_ignore
2318 while,          0,                      st_C_ignore
2319 switch,         0,                      st_C_ignore
2320 return,         0,                      st_C_ignore
2321 __attribute__,  0,                      st_C_attribute
2322 GTY,            0,                      st_C_attribute
2323 @interface,     0,                      st_C_objprot
2324 @protocol,      0,                      st_C_objprot
2325 @implementation,0,                      st_C_objimpl
2326 @end,           0,                      st_C_objend
2327 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2328 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2329 friend,         C_PLPL,                 st_C_ignore
2330 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2331 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2332 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2333 class,          0,                      st_C_class
2334 namespace,      C_PLPL,                 st_C_struct
2335 domain,         C_STAR,                 st_C_struct
2336 union,          0,                      st_C_struct
2337 struct,         0,                      st_C_struct
2338 extern,         0,                      st_C_extern
2339 enum,           0,                      st_C_enum
2340 typedef,        0,                      st_C_typedef
2341 define,         0,                      st_C_define
2342 undef,          0,                      st_C_define
2343 operator,       C_PLPL,                 st_C_operator
2344 template,       0,                      st_C_template
2345 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2346 DEFUN,          0,                      st_C_gnumacro
2347 SYSCALL,        0,                      st_C_gnumacro
2348 ENTRY,          0,                      st_C_gnumacro
2349 PSEUDO,         0,                      st_C_gnumacro
2350 # These are defined inside C functions, so currently they are not met.
2351 # EXFUN used in glibc, DEFVAR_* in emacs.
2352 #EXFUN,         0,                      st_C_gnumacro
2353 #DEFVAR_,       0,                      st_C_gnumacro
2354 %]
2355 and replace lines between %< and %> with its output, then:
2356  - remove the #if characterset check
2357  - make in_word_set static and not inline. */
2358 /*%<*/
2359 /* C code produced by gperf version 3.0.1 */
2360 /* Command-line: gperf -m 5  */
2361 /* Computed positions: -k'2-3' */
2362
2363 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2364 /* maximum key range = 33, duplicates = 0 */
2365
2366 static inline unsigned int
2367 hash (register const char *str, register unsigned int len)
2368 {
2369   static unsigned char asso_values[] =
2370     {
2371       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2372       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2373       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2374       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2375       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2376       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2377       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2378       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2379       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2380       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2381       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2382        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2383        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2384       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2385       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2386       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2387       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2388       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2389       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2390       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2391       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2392       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2393       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2394       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2395       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2396       35, 35, 35, 35, 35, 35
2397     };
2398   register int hval = len;
2399
2400   switch (hval)
2401     {
2402       default:
2403         hval += asso_values[(unsigned char)str[2]];
2404       /*FALLTHROUGH*/
2405       case 2:
2406         hval += asso_values[(unsigned char)str[1]];
2407         break;
2408     }
2409   return hval;
2410 }
2411
2412 static struct C_stab_entry *
2413 in_word_set (register const char *str, register unsigned int len)
2414 {
2415   enum
2416     {
2417       TOTAL_KEYWORDS = 33,
2418       MIN_WORD_LENGTH = 2,
2419       MAX_WORD_LENGTH = 15,
2420       MIN_HASH_VALUE = 2,
2421       MAX_HASH_VALUE = 34
2422     };
2423
2424   static struct C_stab_entry wordlist[] =
2425     {
2426       {""}, {""},
2427       {"if",            0,                      st_C_ignore},
2428       {"GTY",           0,                      st_C_attribute},
2429       {"@end",          0,                      st_C_objend},
2430       {"union",         0,                      st_C_struct},
2431       {"define",                0,                      st_C_define},
2432       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2433       {"template",      0,                      st_C_template},
2434       {"operator",      C_PLPL,                 st_C_operator},
2435       {"@interface",    0,                      st_C_objprot},
2436       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2437       {"friend",                C_PLPL,                 st_C_ignore},
2438       {"typedef",       0,                      st_C_typedef},
2439       {"return",                0,                      st_C_ignore},
2440       {"@implementation",0,                     st_C_objimpl},
2441       {"@protocol",     0,                      st_C_objprot},
2442       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2443       {"extern",                0,                      st_C_extern},
2444       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2445       {"struct",                0,                      st_C_struct},
2446       {"domain",                C_STAR,                 st_C_struct},
2447       {"switch",                0,                      st_C_ignore},
2448       {"enum",          0,                      st_C_enum},
2449       {"for",           0,                      st_C_ignore},
2450       {"namespace",     C_PLPL,                 st_C_struct},
2451       {"class",         0,                      st_C_class},
2452       {"while",         0,                      st_C_ignore},
2453       {"undef",         0,                      st_C_define},
2454       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2455       {"__attribute__", 0,                      st_C_attribute},
2456       {"SYSCALL",       0,                      st_C_gnumacro},
2457       {"ENTRY",         0,                      st_C_gnumacro},
2458       {"PSEUDO",                0,                      st_C_gnumacro},
2459       {"DEFUN",         0,                      st_C_gnumacro}
2460     };
2461
2462   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2463     {
2464       register int key = hash (str, len);
2465
2466       if (key <= MAX_HASH_VALUE && key >= 0)
2467         {
2468           register const char *s = wordlist[key].name;
2469
2470           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2471             return &wordlist[key];
2472         }
2473     }
2474   return 0;
2475 }
2476 /*%>*/
2477
2478 static enum sym_type
2479 C_symtype (char *str, int len, int c_ext)
2480 {
2481   register struct C_stab_entry *se = in_word_set (str, len);
2482
2483   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2484     return st_none;
2485   return se->type;
2486 }
2487
2488 \f
2489 /*
2490  * Ignoring __attribute__ ((list))
2491  */
2492 static bool inattribute;        /* looking at an __attribute__ construct */
2493
2494 /*
2495  * C functions and variables are recognized using a simple
2496  * finite automaton.  fvdef is its state variable.
2497  */
2498 static enum
2499 {
2500   fvnone,                       /* nothing seen */
2501   fdefunkey,                    /* Emacs DEFUN keyword seen */
2502   fdefunname,                   /* Emacs DEFUN name seen */
2503   foperator,                    /* func: operator keyword seen (cplpl) */
2504   fvnameseen,                   /* function or variable name seen */
2505   fstartlist,                   /* func: just after open parenthesis */
2506   finlist,                      /* func: in parameter list */
2507   flistseen,                    /* func: after parameter list */
2508   fignore,                      /* func: before open brace */
2509   vignore                       /* var-like: ignore until ';' */
2510 } fvdef;
2511
2512 static bool fvextern;           /* func or var: extern keyword seen; */
2513
2514 /*
2515  * typedefs are recognized using a simple finite automaton.
2516  * typdef is its state variable.
2517  */
2518 static enum
2519 {
2520   tnone,                        /* nothing seen */
2521   tkeyseen,                     /* typedef keyword seen */
2522   ttypeseen,                    /* defined type seen */
2523   tinbody,                      /* inside typedef body */
2524   tend,                         /* just before typedef tag */
2525   tignore                       /* junk after typedef tag */
2526 } typdef;
2527
2528 /*
2529  * struct-like structures (enum, struct and union) are recognized
2530  * using another simple finite automaton.  `structdef' is its state
2531  * variable.
2532  */
2533 static enum
2534 {
2535   snone,                        /* nothing seen yet,
2536                                    or in struct body if bracelev > 0 */
2537   skeyseen,                     /* struct-like keyword seen */
2538   stagseen,                     /* struct-like tag seen */
2539   scolonseen                    /* colon seen after struct-like tag */
2540 } structdef;
2541
2542 /*
2543  * When objdef is different from onone, objtag is the name of the class.
2544  */
2545 static const char *objtag = "<uninited>";
2546
2547 /*
2548  * Yet another little state machine to deal with preprocessor lines.
2549  */
2550 static enum
2551 {
2552   dnone,                        /* nothing seen */
2553   dsharpseen,                   /* '#' seen as first char on line */
2554   ddefineseen,                  /* '#' and 'define' seen */
2555   dignorerest                   /* ignore rest of line */
2556 } definedef;
2557
2558 /*
2559  * State machine for Objective C protocols and implementations.
2560  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2561  */
2562 static enum
2563 {
2564   onone,                        /* nothing seen */
2565   oprotocol,                    /* @interface or @protocol seen */
2566   oimplementation,              /* @implementations seen */
2567   otagseen,                     /* class name seen */
2568   oparenseen,                   /* parenthesis before category seen */
2569   ocatseen,                     /* category name seen */
2570   oinbody,                      /* in @implementation body */
2571   omethodsign,                  /* in @implementation body, after +/- */
2572   omethodtag,                   /* after method name */
2573   omethodcolon,                 /* after method colon */
2574   omethodparm,                  /* after method parameter */
2575   oignore                       /* wait for @end */
2576 } objdef;
2577
2578
2579 /*
2580  * Use this structure to keep info about the token read, and how it
2581  * should be tagged.  Used by the make_C_tag function to build a tag.
2582  */
2583 static struct tok
2584 {
2585   char *line;                   /* string containing the token */
2586   int offset;                   /* where the token starts in LINE */
2587   int length;                   /* token length */
2588   /*
2589     The previous members can be used to pass strings around for generic
2590     purposes.  The following ones specifically refer to creating tags.  In this
2591     case the token contained here is the pattern that will be used to create a
2592     tag.
2593   */
2594   bool valid;                   /* do not create a tag; the token should be
2595                                    invalidated whenever a state machine is
2596                                    reset prematurely */
2597   bool named;                   /* create a named tag */
2598   int lineno;                   /* source line number of tag */
2599   long linepos;                 /* source char number of tag */
2600 } token;                        /* latest token read */
2601
2602 /*
2603  * Variables and functions for dealing with nested structures.
2604  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2605  */
2606 static void pushclass_above (int, char *, int);
2607 static void popclass_above (int);
2608 static void write_classname (linebuffer *, const char *qualifier);
2609
2610 static struct {
2611   char **cname;                 /* nested class names */
2612   int *bracelev;                /* nested class brace level */
2613   int nl;                       /* class nesting level (elements used) */
2614   int size;                     /* length of the array */
2615 } cstack;                       /* stack for nested declaration tags */
2616 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2617 #define nestlev         (cstack.nl)
2618 /* After struct keyword or in struct body, not inside a nested function. */
2619 #define instruct        (structdef == snone && nestlev > 0                      \
2620                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2621
2622 static void
2623 pushclass_above (int bracelev, char *str, int len)
2624 {
2625   int nl;
2626
2627   popclass_above (bracelev);
2628   nl = cstack.nl;
2629   if (nl >= cstack.size)
2630     {
2631       int size = cstack.size *= 2;
2632       xrnew (cstack.cname, size, char *);
2633       xrnew (cstack.bracelev, size, int);
2634     }
2635   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2636   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2637   cstack.bracelev[nl] = bracelev;
2638   cstack.nl = nl + 1;
2639 }
2640
2641 static void
2642 popclass_above (int bracelev)
2643 {
2644   int nl;
2645
2646   for (nl = cstack.nl - 1;
2647        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2648        nl--)
2649     {
2650       free (cstack.cname[nl]);
2651       cstack.nl = nl;
2652     }
2653 }
2654
2655 static void
2656 write_classname (linebuffer *cn, const char *qualifier)
2657 {
2658   int i, len;
2659   int qlen = strlen (qualifier);
2660
2661   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2662     {
2663       len = 0;
2664       cn->len = 0;
2665       cn->buffer[0] = '\0';
2666     }
2667   else
2668     {
2669       len = strlen (cstack.cname[0]);
2670       linebuffer_setlen (cn, len);
2671       strcpy (cn->buffer, cstack.cname[0]);
2672     }
2673   for (i = 1; i < cstack.nl; i++)
2674     {
2675       char *s;
2676       int slen;
2677
2678       s = cstack.cname[i];
2679       if (s == NULL)
2680         continue;
2681       slen = strlen (s);
2682       len += slen + qlen;
2683       linebuffer_setlen (cn, len);
2684       strncat (cn->buffer, qualifier, qlen);
2685       strncat (cn->buffer, s, slen);
2686     }
2687 }
2688
2689 \f
2690 static bool consider_token (char *, int, int, int *, int, int, bool *);
2691 static void make_C_tag (bool);
2692
2693 /*
2694  * consider_token ()
2695  *      checks to see if the current token is at the start of a
2696  *      function or variable, or corresponds to a typedef, or
2697  *      is a struct/union/enum tag, or #define, or an enum constant.
2698  *
2699  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2700  *      with args.  C_EXTP points to which language we are looking at.
2701  *
2702  * Globals
2703  *      fvdef                   IN OUT
2704  *      structdef               IN OUT
2705  *      definedef               IN OUT
2706  *      typdef                  IN OUT
2707  *      objdef                  IN OUT
2708  */
2709
2710 static bool
2711 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2712                                 /* IN: token pointer */
2713                                 /* IN: token length */
2714                                 /* IN: first char after the token */
2715                                 /* IN, OUT: C extensions mask */
2716                                 /* IN: brace level */
2717                                 /* IN: parenthesis level */
2718                                 /* OUT: function or variable found */
2719 {
2720   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2721      structtype is the type of the preceding struct-like keyword, and
2722      structbracelev is the brace level where it has been seen. */
2723   static enum sym_type structtype;
2724   static int structbracelev;
2725   static enum sym_type toktype;
2726
2727
2728   toktype = C_symtype (str, len, *c_extp);
2729
2730   /*
2731    * Skip __attribute__
2732    */
2733   if (toktype == st_C_attribute)
2734     {
2735       inattribute = TRUE;
2736       return FALSE;
2737      }
2738
2739    /*
2740     * Advance the definedef state machine.
2741     */
2742    switch (definedef)
2743      {
2744      case dnone:
2745        /* We're not on a preprocessor line. */
2746        if (toktype == st_C_gnumacro)
2747          {
2748            fvdef = fdefunkey;
2749            return FALSE;
2750          }
2751        break;
2752      case dsharpseen:
2753        if (toktype == st_C_define)
2754          {
2755            definedef = ddefineseen;
2756          }
2757        else
2758          {
2759            definedef = dignorerest;
2760          }
2761        return FALSE;
2762      case ddefineseen:
2763        /*
2764         * Make a tag for any macro, unless it is a constant
2765         * and constantypedefs is FALSE.
2766         */
2767        definedef = dignorerest;
2768        *is_func_or_var = (c == '(');
2769        if (!*is_func_or_var && !constantypedefs)
2770          return FALSE;
2771        else
2772          return TRUE;
2773      case dignorerest:
2774        return FALSE;
2775      default:
2776        error ("internal error: definedef value.", (char *)NULL);
2777      }
2778
2779    /*
2780     * Now typedefs
2781     */
2782    switch (typdef)
2783      {
2784      case tnone:
2785        if (toktype == st_C_typedef)
2786          {
2787            if (typedefs)
2788              typdef = tkeyseen;
2789            fvextern = FALSE;
2790            fvdef = fvnone;
2791            return FALSE;
2792          }
2793        break;
2794      case tkeyseen:
2795        switch (toktype)
2796          {
2797          case st_none:
2798          case st_C_class:
2799          case st_C_struct:
2800          case st_C_enum:
2801            typdef = ttypeseen;
2802          }
2803        break;
2804      case ttypeseen:
2805        if (structdef == snone && fvdef == fvnone)
2806          {
2807            fvdef = fvnameseen;
2808            return TRUE;
2809          }
2810        break;
2811      case tend:
2812        switch (toktype)
2813          {
2814          case st_C_class:
2815          case st_C_struct:
2816          case st_C_enum:
2817            return FALSE;
2818          }
2819        return TRUE;
2820      }
2821
2822    switch (toktype)
2823      {
2824      case st_C_javastruct:
2825        if (structdef == stagseen)
2826          structdef = scolonseen;
2827        return FALSE;
2828      case st_C_template:
2829      case st_C_class:
2830        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2831            && bracelev == 0
2832            && definedef == dnone && structdef == snone
2833            && typdef == tnone && fvdef == fvnone)
2834          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2835        if (toktype == st_C_template)
2836          break;
2837        /* FALLTHRU */
2838      case st_C_struct:
2839      case st_C_enum:
2840        if (parlev == 0
2841            && fvdef != vignore
2842            && (typdef == tkeyseen
2843                || (typedefs_or_cplusplus && structdef == snone)))
2844          {
2845            structdef = skeyseen;
2846            structtype = toktype;
2847            structbracelev = bracelev;
2848            if (fvdef == fvnameseen)
2849              fvdef = fvnone;
2850          }
2851        return FALSE;
2852      }
2853
2854    if (structdef == skeyseen)
2855      {
2856        structdef = stagseen;
2857        return TRUE;
2858      }
2859
2860    if (typdef != tnone)
2861      definedef = dnone;
2862
2863    /* Detect Objective C constructs. */
2864    switch (objdef)
2865      {
2866      case onone:
2867        switch (toktype)
2868          {
2869          case st_C_objprot:
2870            objdef = oprotocol;
2871            return FALSE;
2872          case st_C_objimpl:
2873            objdef = oimplementation;
2874            return FALSE;
2875          }
2876        break;
2877      case oimplementation:
2878        /* Save the class tag for functions or variables defined inside. */
2879        objtag = savenstr (str, len);
2880        objdef = oinbody;
2881        return FALSE;
2882      case oprotocol:
2883        /* Save the class tag for categories. */
2884        objtag = savenstr (str, len);
2885        objdef = otagseen;
2886        *is_func_or_var = TRUE;
2887        return TRUE;
2888      case oparenseen:
2889        objdef = ocatseen;
2890        *is_func_or_var = TRUE;
2891        return TRUE;
2892      case oinbody:
2893        break;
2894      case omethodsign:
2895        if (parlev == 0)
2896          {
2897            fvdef = fvnone;
2898            objdef = omethodtag;
2899            linebuffer_setlen (&token_name, len);
2900            strncpy (token_name.buffer, str, len);
2901            token_name.buffer[len] = '\0';
2902            return TRUE;
2903          }
2904        return FALSE;
2905      case omethodcolon:
2906        if (parlev == 0)
2907          objdef = omethodparm;
2908        return FALSE;
2909      case omethodparm:
2910        if (parlev == 0)
2911          {
2912            fvdef = fvnone;
2913            objdef = omethodtag;
2914            linebuffer_setlen (&token_name, token_name.len + len);
2915            strncat (token_name.buffer, str, len);
2916            return TRUE;
2917          }
2918        return FALSE;
2919      case oignore:
2920        if (toktype == st_C_objend)
2921          {
2922            /* Memory leakage here: the string pointed by objtag is
2923               never released, because many tests would be needed to
2924               avoid breaking on incorrect input code.  The amount of
2925               memory leaked here is the sum of the lengths of the
2926               class tags.
2927            free (objtag); */
2928            objdef = onone;
2929          }
2930        return FALSE;
2931      }
2932
2933    /* A function, variable or enum constant? */
2934    switch (toktype)
2935      {
2936      case st_C_extern:
2937        fvextern = TRUE;
2938        switch  (fvdef)
2939          {
2940          case finlist:
2941          case flistseen:
2942          case fignore:
2943          case vignore:
2944            break;
2945          default:
2946            fvdef = fvnone;
2947          }
2948        return FALSE;
2949      case st_C_ignore:
2950        fvextern = FALSE;
2951        fvdef = vignore;
2952        return FALSE;
2953      case st_C_operator:
2954        fvdef = foperator;
2955        *is_func_or_var = TRUE;
2956        return TRUE;
2957      case st_none:
2958        if (constantypedefs
2959            && structdef == snone
2960            && structtype == st_C_enum && bracelev > structbracelev)
2961          return TRUE;           /* enum constant */
2962        switch (fvdef)
2963          {
2964          case fdefunkey:
2965            if (bracelev > 0)
2966              break;
2967            fvdef = fdefunname;  /* GNU macro */
2968            *is_func_or_var = TRUE;
2969            return TRUE;
2970          case fvnone:
2971            switch (typdef)
2972              {
2973              case ttypeseen:
2974                return FALSE;
2975              case tnone:
2976                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2977                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2978                  {
2979                    fvdef = vignore;
2980                    return FALSE;
2981                  }
2982                break;
2983              }
2984           /* FALLTHRU */
2985           case fvnameseen:
2986           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2987             {
2988               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2989                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2990               fvdef = foperator;
2991               *is_func_or_var = TRUE;
2992               return TRUE;
2993             }
2994           if (bracelev > 0 && !instruct)
2995             break;
2996           fvdef = fvnameseen;   /* function or variable */
2997           *is_func_or_var = TRUE;
2998           return TRUE;
2999         }
3000       break;
3001     }
3002
3003   return FALSE;
3004 }
3005
3006 \f
3007 /*
3008  * C_entries often keeps pointers to tokens or lines which are older than
3009  * the line currently read.  By keeping two line buffers, and switching
3010  * them at end of line, it is possible to use those pointers.
3011  */
3012 static struct
3013 {
3014   long linepos;
3015   linebuffer lb;
3016 } lbs[2];
3017
3018 #define current_lb_is_new (newndx == curndx)
3019 #define switch_line_buffers() (curndx = 1 - curndx)
3020
3021 #define curlb (lbs[curndx].lb)
3022 #define newlb (lbs[newndx].lb)
3023 #define curlinepos (lbs[curndx].linepos)
3024 #define newlinepos (lbs[newndx].linepos)
3025
3026 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3027 #define cplpl (c_ext & C_PLPL)
3028 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3029
3030 #define CNL_SAVE_DEFINEDEF()                                            \
3031 do {                                                                    \
3032   curlinepos = charno;                                                  \
3033   readline (&curlb, inf);                                               \
3034   lp = curlb.buffer;                                                    \
3035   quotednl = FALSE;                                                     \
3036   newndx = curndx;                                                      \
3037 } while (0)
3038
3039 #define CNL()                                                           \
3040 do {                                                                    \
3041   CNL_SAVE_DEFINEDEF();                                                 \
3042   if (savetoken.valid)                                                  \
3043     {                                                                   \
3044       token = savetoken;                                                \
3045       savetoken.valid = FALSE;                                          \
3046     }                                                                   \
3047   definedef = dnone;                                                    \
3048 } while (0)
3049
3050
3051 static void
3052 make_C_tag (int isfun)
3053 {
3054   /* This function is never called when token.valid is FALSE, but
3055      we must protect against invalid input or internal errors. */
3056   if (token.valid)
3057     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3058               token.offset+token.length+1, token.lineno, token.linepos);
3059   else if (DEBUG)
3060     {                             /* this branch is optimised away if !DEBUG */
3061       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3062                 token_name.len + 17, isfun, token.line,
3063                 token.offset+token.length+1, token.lineno, token.linepos);
3064       error ("INVALID TOKEN", NULL);
3065     }
3066
3067   token.valid = FALSE;
3068 }
3069
3070
3071 /*
3072  * C_entries ()
3073  *      This routine finds functions, variables, typedefs,
3074  *      #define's, enum constants and struct/union/enum definitions in
3075  *      C syntax and adds them to the list.
3076  */
3077 static void
3078 C_entries (int c_ext, FILE *inf)
3079                                 /* extension of C */
3080                                 /* input file */
3081 {
3082   register char c;              /* latest char read; '\0' for end of line */
3083   register char *lp;            /* pointer one beyond the character `c' */
3084   int curndx, newndx;           /* indices for current and new lb */
3085   register int tokoff;          /* offset in line of start of current token */
3086   register int toklen;          /* length of current token */
3087   const char *qualifier;        /* string used to qualify names */
3088   int qlen;                     /* length of qualifier */
3089   int bracelev;                 /* current brace level */
3090   int bracketlev;               /* current bracket level */
3091   int parlev;                   /* current parenthesis level */
3092   int attrparlev;               /* __attribute__ parenthesis level */
3093   int templatelev;              /* current template level */
3094   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3095   bool incomm, inquote, inchar, quotednl, midtoken;
3096   bool yacc_rules;              /* in the rules part of a yacc file */
3097   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3098
3099
3100   linebuffer_init (&lbs[0].lb);
3101   linebuffer_init (&lbs[1].lb);
3102   if (cstack.size == 0)
3103     {
3104       cstack.size = (DEBUG) ? 1 : 4;
3105       cstack.nl = 0;
3106       cstack.cname = xnew (cstack.size, char *);
3107       cstack.bracelev = xnew (cstack.size, int);
3108     }
3109
3110   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3111   curndx = newndx = 0;
3112   lp = curlb.buffer;
3113   *lp = 0;
3114
3115   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3116   structdef = snone; definedef = dnone; objdef = onone;
3117   yacc_rules = FALSE;
3118   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3119   token.valid = savetoken.valid = FALSE;
3120   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3121   if (cjava)
3122     { qualifier = "."; qlen = 1; }
3123   else
3124     { qualifier = "::"; qlen = 2; }
3125
3126
3127   while (!feof (inf))
3128     {
3129       c = *lp++;
3130       if (c == '\\')
3131         {
3132           /* If we are at the end of the line, the next character is a
3133              '\0'; do not skip it, because it is what tells us
3134              to read the next line.  */
3135           if (*lp == '\0')
3136             {
3137               quotednl = TRUE;
3138               continue;
3139             }
3140           lp++;
3141           c = ' ';
3142         }
3143       else if (incomm)
3144         {
3145           switch (c)
3146             {
3147             case '*':
3148               if (*lp == '/')
3149                 {
3150                   c = *lp++;
3151                   incomm = FALSE;
3152                 }
3153               break;
3154             case '\0':
3155               /* Newlines inside comments do not end macro definitions in
3156                  traditional cpp. */
3157               CNL_SAVE_DEFINEDEF ();
3158               break;
3159             }
3160           continue;
3161         }
3162       else if (inquote)
3163         {
3164           switch (c)
3165             {
3166             case '"':
3167               inquote = FALSE;
3168               break;
3169             case '\0':
3170               /* Newlines inside strings do not end macro definitions
3171                  in traditional cpp, even though compilers don't
3172                  usually accept them. */
3173               CNL_SAVE_DEFINEDEF ();
3174               break;
3175             }
3176           continue;
3177         }
3178       else if (inchar)
3179         {
3180           switch (c)
3181             {
3182             case '\0':
3183               /* Hmmm, something went wrong. */
3184               CNL ();
3185               /* FALLTHRU */
3186             case '\'':
3187               inchar = FALSE;
3188               break;
3189             }
3190           continue;
3191         }
3192       else if (bracketlev > 0)
3193         {
3194           switch (c)
3195             {
3196             case ']':
3197               if (--bracketlev > 0)
3198                 continue;
3199               break;
3200             case '\0':
3201               CNL_SAVE_DEFINEDEF ();
3202               break;
3203             }
3204           continue;
3205         }
3206       else switch (c)
3207         {
3208         case '"':
3209           inquote = TRUE;
3210           if (inattribute)
3211             break;
3212           switch (fvdef)
3213             {
3214             case fdefunkey:
3215             case fstartlist:
3216             case finlist:
3217             case fignore:
3218             case vignore:
3219               break;
3220             default:
3221               fvextern = FALSE;
3222               fvdef = fvnone;
3223             }
3224           continue;
3225         case '\'':
3226           inchar = TRUE;
3227           if (inattribute)
3228             break;
3229           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3230             {
3231               fvextern = FALSE;
3232               fvdef = fvnone;
3233             }
3234           continue;
3235         case '/':
3236           if (*lp == '*')
3237             {
3238               incomm = TRUE;
3239               lp++;
3240               c = ' ';
3241             }
3242           else if (/* cplpl && */ *lp == '/')
3243             {
3244               c = '\0';
3245             }
3246           break;
3247         case '%':
3248           if ((c_ext & YACC) && *lp == '%')
3249             {
3250               /* Entering or exiting rules section in yacc file. */
3251               lp++;
3252               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3253               typdef = tnone; structdef = snone;
3254               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3255               bracelev = 0;
3256               yacc_rules = !yacc_rules;
3257               continue;
3258             }
3259           else
3260             break;
3261         case '#':
3262           if (definedef == dnone)
3263             {
3264               char *cp;
3265               bool cpptoken = TRUE;
3266
3267               /* Look back on this line.  If all blanks, or nonblanks
3268                  followed by an end of comment, this is a preprocessor
3269                  token. */
3270               for (cp = newlb.buffer; cp < lp-1; cp++)
3271                 if (!iswhite (*cp))
3272                   {
3273                     if (*cp == '*' && *(cp+1) == '/')
3274                       {
3275                         cp++;
3276                         cpptoken = TRUE;
3277                       }
3278                     else
3279                       cpptoken = FALSE;
3280                   }
3281               if (cpptoken)
3282                 definedef = dsharpseen;
3283             } /* if (definedef == dnone) */
3284           continue;
3285         case '[':
3286           bracketlev++;
3287             continue;
3288         } /* switch (c) */
3289
3290
3291       /* Consider token only if some involved conditions are satisfied. */
3292       if (typdef != tignore
3293           && definedef != dignorerest
3294           && fvdef != finlist
3295           && templatelev == 0
3296           && (definedef != dnone
3297               || structdef != scolonseen)
3298           && !inattribute)
3299         {
3300           if (midtoken)
3301             {
3302               if (endtoken (c))
3303                 {
3304                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3305                     /* This handles :: in the middle,
3306                        but not at the beginning of an identifier.
3307                        Also, space-separated :: is not recognised. */
3308                     {
3309                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3310                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3311                       lp += 2;
3312                       toklen += 2;
3313                       c = lp[-1];
3314                       goto still_in_token;
3315                     }
3316                   else
3317                     {
3318                       bool funorvar = FALSE;
3319
3320                       if (yacc_rules
3321                           || consider_token (newlb.buffer + tokoff, toklen, c,
3322                                              &c_ext, bracelev, parlev,
3323                                              &funorvar))
3324                         {
3325                           if (fvdef == foperator)
3326                             {
3327                               char *oldlp = lp;
3328                               lp = skip_spaces (lp-1);
3329                               if (*lp != '\0')
3330                                 lp += 1;
3331                               while (*lp != '\0'
3332                                      && !iswhite (*lp) && *lp != '(')
3333                                 lp += 1;
3334                               c = *lp++;
3335                               toklen += lp - oldlp;
3336                             }
3337                           token.named = FALSE;
3338                           if (!plainc
3339                               && nestlev > 0 && definedef == dnone)
3340                             /* in struct body */
3341                             {
3342                               write_classname (&token_name, qualifier);
3343                               linebuffer_setlen (&token_name,
3344                                                  token_name.len+qlen+toklen);
3345                               strcat (token_name.buffer, qualifier);
3346                               strncat (token_name.buffer,
3347                                        newlb.buffer + tokoff, toklen);
3348                               token.named = TRUE;
3349                             }
3350                           else if (objdef == ocatseen)
3351                             /* Objective C category */
3352                             {
3353                               int len = strlen (objtag) + 2 + toklen;
3354                               linebuffer_setlen (&token_name, len);
3355                               strcpy (token_name.buffer, objtag);
3356                               strcat (token_name.buffer, "(");
3357                               strncat (token_name.buffer,
3358                                        newlb.buffer + tokoff, toklen);
3359                               strcat (token_name.buffer, ")");
3360                               token.named = TRUE;
3361                             }
3362                           else if (objdef == omethodtag
3363                                    || objdef == omethodparm)
3364                             /* Objective C method */
3365                             {
3366                               token.named = TRUE;
3367                             }
3368                           else if (fvdef == fdefunname)
3369                             /* GNU DEFUN and similar macros */
3370                             {
3371                               bool defun = (newlb.buffer[tokoff] == 'F');
3372                               int off = tokoff;
3373                               int len = toklen;
3374
3375                               /* Rewrite the tag so that emacs lisp DEFUNs
3376                                  can be found by their elisp name */
3377                               if (defun)
3378                                 {
3379                                   off += 1;
3380                                   len -= 1;
3381                                 }
3382                               linebuffer_setlen (&token_name, len);
3383                               strncpy (token_name.buffer,
3384                                        newlb.buffer + off, len);
3385                               token_name.buffer[len] = '\0';
3386                               if (defun)
3387                                 while (--len >= 0)
3388                                   if (token_name.buffer[len] == '_')
3389                                     token_name.buffer[len] = '-';
3390                               token.named = defun;
3391                             }
3392                           else
3393                             {
3394                               linebuffer_setlen (&token_name, toklen);
3395                               strncpy (token_name.buffer,
3396                                        newlb.buffer + tokoff, toklen);
3397                               token_name.buffer[toklen] = '\0';
3398                               /* Name macros and members. */
3399                               token.named = (structdef == stagseen
3400                                              || typdef == ttypeseen
3401                                              || typdef == tend
3402                                              || (funorvar
3403                                                  && definedef == dignorerest)
3404                                              || (funorvar
3405                                                  && definedef == dnone
3406                                                  && structdef == snone
3407                                                  && bracelev > 0));
3408                             }
3409                           token.lineno = lineno;
3410                           token.offset = tokoff;
3411                           token.length = toklen;
3412                           token.line = newlb.buffer;
3413                           token.linepos = newlinepos;
3414                           token.valid = TRUE;
3415
3416                           if (definedef == dnone
3417                               && (fvdef == fvnameseen
3418                                   || fvdef == foperator
3419                                   || structdef == stagseen
3420                                   || typdef == tend
3421                                   || typdef == ttypeseen
3422                                   || objdef != onone))
3423                             {
3424                               if (current_lb_is_new)
3425                                 switch_line_buffers ();
3426                             }
3427                           else if (definedef != dnone
3428                                    || fvdef == fdefunname
3429                                    || instruct)
3430                             make_C_tag (funorvar);
3431                         }
3432                       else /* not yacc and consider_token failed */
3433                         {
3434                           if (inattribute && fvdef == fignore)
3435                             {
3436                               /* We have just met __attribute__ after a
3437                                  function parameter list: do not tag the
3438                                  function again. */
3439                               fvdef = fvnone;
3440                             }
3441                         }
3442                       midtoken = FALSE;
3443                     }
3444                 } /* if (endtoken (c)) */
3445               else if (intoken (c))
3446                 still_in_token:
3447                 {
3448                   toklen++;
3449                   continue;
3450                 }
3451             } /* if (midtoken) */
3452           else if (begtoken (c))
3453             {
3454               switch (definedef)
3455                 {
3456                 case dnone:
3457                   switch (fvdef)
3458                     {
3459                     case fstartlist:
3460                       /* This prevents tagging fb in
3461                          void (__attribute__((noreturn)) *fb) (void);
3462                          Fixing this is not easy and not very important. */
3463                       fvdef = finlist;
3464                       continue;
3465                     case flistseen:
3466                       if (plainc || declarations)
3467                         {
3468                           make_C_tag (TRUE); /* a function */
3469                           fvdef = fignore;
3470                         }
3471                       break;
3472                     }
3473                   if (structdef == stagseen && !cjava)
3474                     {
3475                       popclass_above (bracelev);
3476                       structdef = snone;
3477                     }
3478                   break;
3479                 case dsharpseen:
3480                   savetoken = token;
3481                   break;
3482                 }
3483               if (!yacc_rules || lp == newlb.buffer + 1)
3484                 {
3485                   tokoff = lp - 1 - newlb.buffer;
3486                   toklen = 1;
3487                   midtoken = TRUE;
3488                 }
3489               continue;
3490             } /* if (begtoken) */
3491         } /* if must look at token */
3492
3493
3494       /* Detect end of line, colon, comma, semicolon and various braces
3495          after having handled a token.*/
3496       switch (c)
3497         {
3498         case ':':
3499           if (inattribute)
3500             break;
3501           if (yacc_rules && token.offset == 0 && token.valid)
3502             {
3503               make_C_tag (FALSE); /* a yacc function */
3504               break;
3505             }
3506           if (definedef != dnone)
3507             break;
3508           switch (objdef)
3509             {
3510             case  otagseen:
3511               objdef = oignore;
3512               make_C_tag (TRUE); /* an Objective C class */
3513               break;
3514             case omethodtag:
3515             case omethodparm:
3516               objdef = omethodcolon;
3517               linebuffer_setlen (&token_name, token_name.len + 1);
3518               strcat (token_name.buffer, ":");
3519               break;
3520             }
3521           if (structdef == stagseen)
3522             {
3523               structdef = scolonseen;
3524               break;
3525             }
3526           /* Should be useless, but may be work as a safety net. */
3527           if (cplpl && fvdef == flistseen)
3528             {
3529               make_C_tag (TRUE); /* a function */
3530               fvdef = fignore;
3531               break;
3532             }
3533           break;
3534         case ';':
3535           if (definedef != dnone || inattribute)
3536             break;
3537           switch (typdef)
3538             {
3539             case tend:
3540             case ttypeseen:
3541               make_C_tag (FALSE); /* a typedef */
3542               typdef = tnone;
3543               fvdef = fvnone;
3544               break;
3545             case tnone:
3546             case tinbody:
3547             case tignore:
3548               switch (fvdef)
3549                 {
3550                 case fignore:
3551                   if (typdef == tignore || cplpl)
3552                     fvdef = fvnone;
3553                   break;
3554                 case fvnameseen:
3555                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3556                       || (members && instruct))
3557                     make_C_tag (FALSE); /* a variable */
3558                   fvextern = FALSE;
3559                   fvdef = fvnone;
3560                   token.valid = FALSE;
3561                   break;
3562                 case flistseen:
3563                   if ((declarations
3564                        && (cplpl || !instruct)
3565                        && (typdef == tnone || (typdef != tignore && instruct)))
3566                       || (members
3567                           && plainc && instruct))
3568                     make_C_tag (TRUE);  /* a function */
3569                   /* FALLTHRU */
3570                 default:
3571                   fvextern = FALSE;
3572                   fvdef = fvnone;
3573                   if (declarations
3574                        && cplpl && structdef == stagseen)
3575                     make_C_tag (FALSE); /* forward declaration */
3576                   else
3577                     token.valid = FALSE;
3578                 } /* switch (fvdef) */
3579               /* FALLTHRU */
3580             default:
3581               if (!instruct)
3582                 typdef = tnone;
3583             }
3584           if (structdef == stagseen)
3585             structdef = snone;
3586           break;
3587         case ',':
3588           if (definedef != dnone || inattribute)
3589             break;
3590           switch (objdef)
3591             {
3592             case omethodtag:
3593             case omethodparm:
3594               make_C_tag (TRUE); /* an Objective C method */
3595               objdef = oinbody;
3596               break;
3597             }
3598           switch (fvdef)
3599             {
3600             case fdefunkey:
3601             case foperator:
3602             case fstartlist:
3603             case finlist:
3604             case fignore:
3605             case vignore:
3606               break;
3607             case fdefunname:
3608               fvdef = fignore;
3609               break;
3610             case fvnameseen:
3611               if (parlev == 0
3612                   && ((globals
3613                        && bracelev == 0
3614                        && templatelev == 0
3615                        && (!fvextern || declarations))
3616                       || (members && instruct)))
3617                   make_C_tag (FALSE); /* a variable */
3618               break;
3619             case flistseen:
3620               if ((declarations && typdef == tnone && !instruct)
3621                   || (members && typdef != tignore && instruct))
3622                 {
3623                   make_C_tag (TRUE); /* a function */
3624                   fvdef = fvnameseen;
3625                 }
3626               else if (!declarations)
3627                 fvdef = fvnone;
3628               token.valid = FALSE;
3629               break;
3630             default:
3631               fvdef = fvnone;
3632             }
3633           if (structdef == stagseen)
3634             structdef = snone;
3635           break;
3636         case ']':
3637           if (definedef != dnone || inattribute)
3638             break;
3639           if (structdef == stagseen)
3640             structdef = snone;
3641           switch (typdef)
3642             {
3643             case ttypeseen:
3644             case tend:
3645               typdef = tignore;
3646               make_C_tag (FALSE);       /* a typedef */
3647               break;
3648             case tnone:
3649             case tinbody:
3650               switch (fvdef)
3651                 {
3652                 case foperator:
3653                 case finlist:
3654                 case fignore:
3655                 case vignore:
3656                   break;
3657                 case fvnameseen:
3658                   if ((members && bracelev == 1)
3659                       || (globals && bracelev == 0
3660                           && (!fvextern || declarations)))
3661                     make_C_tag (FALSE); /* a variable */
3662                   /* FALLTHRU */
3663                 default:
3664                   fvdef = fvnone;
3665                 }
3666               break;
3667             }
3668           break;
3669         case '(':
3670           if (inattribute)
3671             {
3672               attrparlev++;
3673               break;
3674             }
3675           if (definedef != dnone)
3676             break;
3677           if (objdef == otagseen && parlev == 0)
3678             objdef = oparenseen;
3679           switch (fvdef)
3680             {
3681             case fvnameseen:
3682               if (typdef == ttypeseen
3683                   && *lp != '*'
3684                   && !instruct)
3685                 {
3686                   /* This handles constructs like:
3687                      typedef void OperatorFun (int fun); */
3688                   make_C_tag (FALSE);
3689                   typdef = tignore;
3690                   fvdef = fignore;
3691                   break;
3692                 }
3693               /* FALLTHRU */
3694             case foperator:
3695               fvdef = fstartlist;
3696               break;
3697             case flistseen:
3698               fvdef = finlist;
3699               break;
3700             }
3701           parlev++;
3702           break;
3703         case ')':
3704           if (inattribute)
3705             {
3706               if (--attrparlev == 0)
3707                 inattribute = FALSE;
3708               break;
3709             }
3710           if (definedef != dnone)
3711             break;
3712           if (objdef == ocatseen && parlev == 1)
3713             {
3714               make_C_tag (TRUE); /* an Objective C category */
3715               objdef = oignore;
3716             }
3717           if (--parlev == 0)
3718             {
3719               switch (fvdef)
3720                 {
3721                 case fstartlist:
3722                 case finlist:
3723                   fvdef = flistseen;
3724                   break;
3725                 }
3726               if (!instruct
3727                   && (typdef == tend
3728                       || typdef == ttypeseen))
3729                 {
3730                   typdef = tignore;
3731                   make_C_tag (FALSE); /* a typedef */
3732                 }
3733             }
3734           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3735             parlev = 0;
3736           break;
3737         case '{':
3738           if (definedef != dnone)
3739             break;
3740           if (typdef == ttypeseen)
3741             {
3742               /* Whenever typdef is set to tinbody (currently only
3743                  here), typdefbracelev should be set to bracelev. */
3744               typdef = tinbody;
3745               typdefbracelev = bracelev;
3746             }
3747           switch (fvdef)
3748             {
3749             case flistseen:
3750               make_C_tag (TRUE);    /* a function */
3751               /* FALLTHRU */
3752             case fignore:
3753               fvdef = fvnone;
3754               break;
3755             case fvnone:
3756               switch (objdef)
3757                 {
3758                 case otagseen:
3759                   make_C_tag (TRUE); /* an Objective C class */
3760                   objdef = oignore;
3761                   break;
3762                 case omethodtag:
3763                 case omethodparm:
3764                   make_C_tag (TRUE); /* an Objective C method */
3765                   objdef = oinbody;
3766                   break;
3767                 default:
3768                   /* Neutralize `extern "C" {' grot. */
3769                   if (bracelev == 0 && structdef == snone && nestlev == 0
3770                       && typdef == tnone)
3771                     bracelev = -1;
3772                 }
3773               break;
3774             }
3775           switch (structdef)
3776             {
3777             case skeyseen:         /* unnamed struct */
3778               pushclass_above (bracelev, NULL, 0);
3779               structdef = snone;
3780               break;
3781             case stagseen:         /* named struct or enum */
3782             case scolonseen:       /* a class */
3783               pushclass_above (bracelev,token.line+token.offset, token.length);
3784               structdef = snone;
3785               make_C_tag (FALSE);  /* a struct or enum */
3786               break;
3787             }
3788           bracelev += 1;
3789           break;
3790         case '*':
3791           if (definedef != dnone)
3792             break;
3793           if (fvdef == fstartlist)
3794             {
3795               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3796               token.valid = FALSE;
3797             }
3798           break;
3799         case '}':
3800           if (definedef != dnone)
3801             break;
3802           bracelev -= 1;
3803           if (!ignoreindent && lp == newlb.buffer + 1)
3804             {
3805               if (bracelev != 0)
3806                 token.valid = FALSE; /* unexpected value, token unreliable */
3807               bracelev = 0;     /* reset brace level if first column */
3808               parlev = 0;       /* also reset paren level, just in case... */
3809             }
3810           else if (bracelev < 0)
3811             {
3812               token.valid = FALSE; /* something gone amiss, token unreliable */
3813               bracelev = 0;
3814             }
3815           if (bracelev == 0 && fvdef == vignore)
3816             fvdef = fvnone;             /* end of function */
3817           popclass_above (bracelev);
3818           structdef = snone;
3819           /* Only if typdef == tinbody is typdefbracelev significant. */
3820           if (typdef == tinbody && bracelev <= typdefbracelev)
3821             {
3822               assert (bracelev == typdefbracelev);
3823               typdef = tend;
3824             }
3825           break;
3826         case '=':
3827           if (definedef != dnone)
3828             break;
3829           switch (fvdef)
3830             {
3831             case foperator:
3832             case finlist:
3833             case fignore:
3834             case vignore:
3835               break;
3836             case fvnameseen:
3837               if ((members && bracelev == 1)
3838                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3839                 make_C_tag (FALSE); /* a variable */
3840               /* FALLTHRU */
3841             default:
3842               fvdef = vignore;
3843             }
3844           break;
3845         case '<':
3846           if (cplpl
3847               && (structdef == stagseen || fvdef == fvnameseen))
3848             {
3849               templatelev++;
3850               break;
3851             }
3852           goto resetfvdef;
3853         case '>':
3854           if (templatelev > 0)
3855             {
3856               templatelev--;
3857               break;
3858             }
3859           goto resetfvdef;
3860         case '+':
3861         case '-':
3862           if (objdef == oinbody && bracelev == 0)
3863             {
3864               objdef = omethodsign;
3865               break;
3866             }
3867           /* FALLTHRU */
3868         resetfvdef:
3869         case '#': case '~': case '&': case '%': case '/':
3870         case '|': case '^': case '!': case '.': case '?':
3871           if (definedef != dnone)
3872             break;
3873           /* These surely cannot follow a function tag in C. */
3874           switch (fvdef)
3875             {
3876             case foperator:
3877             case finlist:
3878             case fignore:
3879             case vignore:
3880               break;
3881             default:
3882               fvdef = fvnone;
3883             }
3884           break;
3885         case '\0':
3886           if (objdef == otagseen)
3887             {
3888               make_C_tag (TRUE); /* an Objective C class */
3889               objdef = oignore;
3890             }
3891           /* If a macro spans multiple lines don't reset its state. */
3892           if (quotednl)
3893             CNL_SAVE_DEFINEDEF ();
3894           else
3895             CNL ();
3896           break;
3897         } /* switch (c) */
3898
3899     } /* while not eof */
3900
3901   free (lbs[0].lb.buffer);
3902   free (lbs[1].lb.buffer);
3903 }
3904
3905 /*
3906  * Process either a C++ file or a C file depending on the setting
3907  * of a global flag.
3908  */
3909 static void
3910 default_C_entries (FILE *inf)
3911 {
3912   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3913 }
3914
3915 /* Always do plain C. */
3916 static void
3917 plain_C_entries (FILE *inf)
3918 {
3919   C_entries (0, inf);
3920 }
3921
3922 /* Always do C++. */
3923 static void
3924 Cplusplus_entries (FILE *inf)
3925 {
3926   C_entries (C_PLPL, inf);
3927 }
3928
3929 /* Always do Java. */
3930 static void
3931 Cjava_entries (FILE *inf)
3932 {
3933   C_entries (C_JAVA, inf);
3934 }
3935
3936 /* Always do C*. */
3937 static void
3938 Cstar_entries (FILE *inf)
3939 {
3940   C_entries (C_STAR, inf);
3941 }
3942
3943 /* Always do Yacc. */
3944 static void
3945 Yacc_entries (FILE *inf)
3946 {
3947   C_entries (YACC, inf);
3948 }
3949
3950 \f
3951 /* Useful macros. */
3952 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3953   for (;                        /* loop initialization */               \
3954        !feof (file_pointer)     /* loop test */                         \
3955        &&                       /* instructions at start of loop */     \
3956           (readline (&line_buffer, file_pointer),                       \
3957            char_pointer = line_buffer.buffer,                           \
3958            TRUE);                                                       \
3959       )
3960
3961 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3962   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
3963    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
3964    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
3965    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
3966
3967 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3968 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3969   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
3970    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
3971    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
3972
3973 /*
3974  * Read a file, but do no processing.  This is used to do regexp
3975  * matching on files that have no language defined.
3976  */
3977 static void
3978 just_read_file (FILE *inf)
3979 {
3980   while (!feof (inf))
3981     readline (&lb, inf);
3982 }
3983
3984 \f
3985 /* Fortran parsing */
3986
3987 static void F_takeprec (void);
3988 static void F_getit (FILE *);
3989
3990 static void
3991 F_takeprec (void)
3992 {
3993   dbp = skip_spaces (dbp);
3994   if (*dbp != '*')
3995     return;
3996   dbp++;
3997   dbp = skip_spaces (dbp);
3998   if (strneq (dbp, "(*)", 3))
3999     {
4000       dbp += 3;
4001       return;
4002     }
4003   if (!ISDIGIT (*dbp))
4004     {
4005       --dbp;                    /* force failure */
4006       return;
4007     }
4008   do
4009     dbp++;
4010   while (ISDIGIT (*dbp));
4011 }
4012
4013 static void
4014 F_getit (FILE *inf)
4015 {
4016   register char *cp;
4017
4018   dbp = skip_spaces (dbp);
4019   if (*dbp == '\0')
4020     {
4021       readline (&lb, inf);
4022       dbp = lb.buffer;
4023       if (dbp[5] != '&')
4024         return;
4025       dbp += 6;
4026       dbp = skip_spaces (dbp);
4027     }
4028   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4029     return;
4030   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4031     continue;
4032   make_tag (dbp, cp-dbp, TRUE,
4033             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4034 }
4035
4036
4037 static void
4038 Fortran_functions (FILE *inf)
4039 {
4040   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4041     {
4042       if (*dbp == '%')
4043         dbp++;                  /* Ratfor escape to fortran */
4044       dbp = skip_spaces (dbp);
4045       if (*dbp == '\0')
4046         continue;
4047
4048       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4049         dbp = skip_spaces (dbp);
4050
4051       switch (lowcase (*dbp))
4052         {
4053         case 'i':
4054           if (nocase_tail ("integer"))
4055             F_takeprec ();
4056           break;
4057         case 'r':
4058           if (nocase_tail ("real"))
4059             F_takeprec ();
4060           break;
4061         case 'l':
4062           if (nocase_tail ("logical"))
4063             F_takeprec ();
4064           break;
4065         case 'c':
4066           if (nocase_tail ("complex") || nocase_tail ("character"))
4067             F_takeprec ();
4068           break;
4069         case 'd':
4070           if (nocase_tail ("double"))
4071             {
4072               dbp = skip_spaces (dbp);
4073               if (*dbp == '\0')
4074                 continue;
4075               if (nocase_tail ("precision"))
4076                 break;
4077               continue;
4078             }
4079           break;
4080         }
4081       dbp = skip_spaces (dbp);
4082       if (*dbp == '\0')
4083         continue;
4084       switch (lowcase (*dbp))
4085         {
4086         case 'f':
4087           if (nocase_tail ("function"))
4088             F_getit (inf);
4089           continue;
4090         case 's':
4091           if (nocase_tail ("subroutine"))
4092             F_getit (inf);
4093           continue;
4094         case 'e':
4095           if (nocase_tail ("entry"))
4096             F_getit (inf);
4097           continue;
4098         case 'b':
4099           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4100             {
4101               dbp = skip_spaces (dbp);
4102               if (*dbp == '\0') /* assume un-named */
4103                 make_tag ("blockdata", 9, TRUE,
4104                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4105               else
4106                 F_getit (inf);  /* look for name */
4107             }
4108           continue;
4109         }
4110     }
4111 }
4112
4113 \f
4114 /*
4115  * Ada parsing
4116  * Original code by
4117  * Philippe Waroquiers (1998)
4118  */
4119
4120 /* Once we are positioned after an "interesting" keyword, let's get
4121    the real tag value necessary. */
4122 static void
4123 Ada_getit (FILE *inf, const char *name_qualifier)
4124 {
4125   register char *cp;
4126   char *name;
4127   char c;
4128
4129   while (!feof (inf))
4130     {
4131       dbp = skip_spaces (dbp);
4132       if (*dbp == '\0'
4133           || (dbp[0] == '-' && dbp[1] == '-'))
4134         {
4135           readline (&lb, inf);
4136           dbp = lb.buffer;
4137         }
4138       switch (lowcase(*dbp))
4139         {
4140         case 'b':
4141           if (nocase_tail ("body"))
4142             {
4143               /* Skipping body of   procedure body   or   package body or ....
4144                  resetting qualifier to body instead of spec. */
4145               name_qualifier = "/b";
4146               continue;
4147             }
4148           break;
4149         case 't':
4150           /* Skipping type of   task type   or   protected type ... */
4151           if (nocase_tail ("type"))
4152             continue;
4153           break;
4154         }
4155       if (*dbp == '"')
4156         {
4157           dbp += 1;
4158           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4159             continue;
4160         }
4161       else
4162         {
4163           dbp = skip_spaces (dbp);
4164           for (cp = dbp;
4165                (*cp != '\0'
4166                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4167                cp++)
4168             continue;
4169           if (cp == dbp)
4170             return;
4171         }
4172       c = *cp;
4173       *cp = '\0';
4174       name = concat (dbp, name_qualifier, "");
4175       *cp = c;
4176       make_tag (name, strlen (name), TRUE,
4177                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4178       free (name);
4179       if (c == '"')
4180         dbp = cp + 1;
4181       return;
4182     }
4183 }
4184
4185 static void
4186 Ada_funcs (FILE *inf)
4187 {
4188   bool inquote = FALSE;
4189   bool skip_till_semicolumn = FALSE;
4190
4191   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4192     {
4193       while (*dbp != '\0')
4194         {
4195           /* Skip a string i.e. "abcd". */
4196           if (inquote || (*dbp == '"'))
4197             {
4198               dbp = etags_strchr (dbp + !inquote, '"');
4199               if (dbp != NULL)
4200                 {
4201                   inquote = FALSE;
4202                   dbp += 1;
4203                   continue;     /* advance char */
4204                 }
4205               else
4206                 {
4207                   inquote = TRUE;
4208                   break;        /* advance line */
4209                 }
4210             }
4211
4212           /* Skip comments. */
4213           if (dbp[0] == '-' && dbp[1] == '-')
4214             break;              /* advance line */
4215
4216           /* Skip character enclosed in single quote i.e. 'a'
4217              and skip single quote starting an attribute i.e. 'Image. */
4218           if (*dbp == '\'')
4219             {
4220               dbp++ ;
4221               if (*dbp != '\0')
4222                 dbp++;
4223               continue;
4224             }
4225
4226           if (skip_till_semicolumn)
4227             {
4228               if (*dbp == ';')
4229                 skip_till_semicolumn = FALSE;
4230               dbp++;
4231               continue;         /* advance char */
4232             }
4233
4234           /* Search for beginning of a token.  */
4235           if (!begtoken (*dbp))
4236             {
4237               dbp++;
4238               continue;         /* advance char */
4239             }
4240
4241           /* We are at the beginning of a token. */
4242           switch (lowcase(*dbp))
4243             {
4244             case 'f':
4245               if (!packages_only && nocase_tail ("function"))
4246                 Ada_getit (inf, "/f");
4247               else
4248                 break;          /* from switch */
4249               continue;         /* advance char */
4250             case 'p':
4251               if (!packages_only && nocase_tail ("procedure"))
4252                 Ada_getit (inf, "/p");
4253               else if (nocase_tail ("package"))
4254                 Ada_getit (inf, "/s");
4255               else if (nocase_tail ("protected")) /* protected type */
4256                 Ada_getit (inf, "/t");
4257               else
4258                 break;          /* from switch */
4259               continue;         /* advance char */
4260
4261             case 'u':
4262               if (typedefs && !packages_only && nocase_tail ("use"))
4263                 {
4264                   /* when tagging types, avoid tagging  use type Pack.Typename;
4265                      for this, we will skip everything till a ; */
4266                   skip_till_semicolumn = TRUE;
4267                   continue;     /* advance char */
4268                 }
4269
4270             case 't':
4271               if (!packages_only && nocase_tail ("task"))
4272                 Ada_getit (inf, "/k");
4273               else if (typedefs && !packages_only && nocase_tail ("type"))
4274                 {
4275                   Ada_getit (inf, "/t");
4276                   while (*dbp != '\0')
4277                     dbp += 1;
4278                 }
4279               else
4280                 break;          /* from switch */
4281               continue;         /* advance char */
4282             }
4283
4284           /* Look for the end of the token. */
4285           while (!endtoken (*dbp))
4286             dbp++;
4287
4288         } /* advance char */
4289     } /* advance line */
4290 }
4291
4292 \f
4293 /*
4294  * Unix and microcontroller assembly tag handling
4295  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4296  * Idea by Bob Weiner, Motorola Inc. (1994)
4297  */
4298 static void
4299 Asm_labels (FILE *inf)
4300 {
4301   register char *cp;
4302
4303   LOOP_ON_INPUT_LINES (inf, lb, cp)
4304     {
4305       /* If first char is alphabetic or one of [_.$], test for colon
4306          following identifier. */
4307       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4308         {
4309           /* Read past label. */
4310           cp++;
4311           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4312             cp++;
4313           if (*cp == ':' || iswhite (*cp))
4314             /* Found end of label, so copy it and add it to the table. */
4315             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4316                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4317         }
4318     }
4319 }
4320
4321 \f
4322 /*
4323  * Perl support
4324  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4325  * Perl variable names: /^(my|local).../
4326  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4327  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4328  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4329  */
4330 static void
4331 Perl_functions (FILE *inf)
4332 {
4333   char *package = savestr ("main"); /* current package name */
4334   register char *cp;
4335
4336   LOOP_ON_INPUT_LINES (inf, lb, cp)
4337     {
4338       cp = skip_spaces (cp);
4339
4340       if (LOOKING_AT (cp, "package"))
4341         {
4342           free (package);
4343           get_tag (cp, &package);
4344         }
4345       else if (LOOKING_AT (cp, "sub"))
4346         {
4347           char *pos;
4348           char *sp = cp;
4349
4350           while (!notinname (*cp))
4351             cp++;
4352           if (cp == sp)
4353             continue;           /* nothing found */
4354           if ((pos = etags_strchr (sp, ':')) != NULL
4355               && pos < cp && pos[1] == ':')
4356             /* The name is already qualified. */
4357             make_tag (sp, cp - sp, TRUE,
4358                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4359           else
4360             /* Qualify it. */
4361             {
4362               char savechar, *name;
4363
4364               savechar = *cp;
4365               *cp = '\0';
4366               name = concat (package, "::", sp);
4367               *cp = savechar;
4368               make_tag (name, strlen(name), TRUE,
4369                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4370               free (name);
4371             }
4372         }
4373        else if (globals)        /* only if we are tagging global vars */
4374         {
4375           /* Skip a qualifier, if any. */
4376           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4377           /* After "my" or "local", but before any following paren or space. */
4378           char *varstart = cp;
4379
4380           if (qual              /* should this be removed?  If yes, how? */
4381               && (*cp == '$' || *cp == '@' || *cp == '%'))
4382             {
4383               varstart += 1;
4384               do
4385                 cp++;
4386               while (ISALNUM (*cp) || *cp == '_');
4387             }
4388           else if (qual)
4389             {
4390               /* Should be examining a variable list at this point;
4391                  could insist on seeing an open parenthesis. */
4392               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4393                 cp++;
4394             }
4395           else
4396             continue;
4397
4398           make_tag (varstart, cp - varstart, FALSE,
4399                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4400         }
4401     }
4402   free (package);
4403 }
4404
4405
4406 /*
4407  * Python support
4408  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4409  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4410  * More ideas by seb bacon <seb@jamkit.com> (2002)
4411  */
4412 static void
4413 Python_functions (FILE *inf)
4414 {
4415   register char *cp;
4416
4417   LOOP_ON_INPUT_LINES (inf, lb, cp)
4418     {
4419       cp = skip_spaces (cp);
4420       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4421         {
4422           char *name = cp;
4423           while (!notinname (*cp) && *cp != ':')
4424             cp++;
4425           make_tag (name, cp - name, TRUE,
4426                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4427         }
4428     }
4429 }
4430
4431 \f
4432 /*
4433  * PHP support
4434  * Look for:
4435  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4436  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4437  *  - /^[ \t]*define\(\"[^\"]+/
4438  * Only with --members:
4439  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4440  * Idea by Diez B. Roggisch (2001)
4441  */
4442 static void
4443 PHP_functions (FILE *inf)
4444 {
4445   register char *cp, *name;
4446   bool search_identifier = FALSE;
4447
4448   LOOP_ON_INPUT_LINES (inf, lb, cp)
4449     {
4450       cp = skip_spaces (cp);
4451       name = cp;
4452       if (search_identifier
4453           && *cp != '\0')
4454         {
4455           while (!notinname (*cp))
4456             cp++;
4457           make_tag (name, cp - name, TRUE,
4458                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4459           search_identifier = FALSE;
4460         }
4461       else if (LOOKING_AT (cp, "function"))
4462         {
4463           if(*cp == '&')
4464             cp = skip_spaces (cp+1);
4465           if(*cp != '\0')
4466             {
4467               name = cp;
4468               while (!notinname (*cp))
4469                 cp++;
4470               make_tag (name, cp - name, TRUE,
4471                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4472             }
4473           else
4474             search_identifier = TRUE;
4475         }
4476       else if (LOOKING_AT (cp, "class"))
4477         {
4478           if (*cp != '\0')
4479             {
4480               name = cp;
4481               while (*cp != '\0' && !iswhite (*cp))
4482                 cp++;
4483               make_tag (name, cp - name, FALSE,
4484                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4485             }
4486           else
4487             search_identifier = TRUE;
4488         }
4489       else if (strneq (cp, "define", 6)
4490                && (cp = skip_spaces (cp+6))
4491                && *cp++ == '('
4492                && (*cp == '"' || *cp == '\''))
4493         {
4494           char quote = *cp++;
4495           name = cp;
4496           while (*cp != quote && *cp != '\0')
4497             cp++;
4498           make_tag (name, cp - name, FALSE,
4499                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4500         }
4501       else if (members
4502                && LOOKING_AT (cp, "var")
4503                && *cp == '$')
4504         {
4505           name = cp;
4506           while (!notinname(*cp))
4507             cp++;
4508           make_tag (name, cp - name, FALSE,
4509                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4510         }
4511     }
4512 }
4513
4514 \f
4515 /*
4516  * Cobol tag functions
4517  * We could look for anything that could be a paragraph name.
4518  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4519  * Idea by Corny de Souza (1993)
4520  */
4521 static void
4522 Cobol_paragraphs (FILE *inf)
4523 {
4524   register char *bp, *ep;
4525
4526   LOOP_ON_INPUT_LINES (inf, lb, bp)
4527     {
4528       if (lb.len < 9)
4529         continue;
4530       bp += 8;
4531
4532       /* If eoln, compiler option or comment ignore whole line. */
4533       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4534         continue;
4535
4536       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4537         continue;
4538       if (*ep++ == '.')
4539         make_tag (bp, ep - bp, TRUE,
4540                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4541     }
4542 }
4543
4544 \f
4545 /*
4546  * Makefile support
4547  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4548  */
4549 static void
4550 Makefile_targets (FILE *inf)
4551 {
4552   register char *bp;
4553
4554   LOOP_ON_INPUT_LINES (inf, lb, bp)
4555     {
4556       if (*bp == '\t' || *bp == '#')
4557         continue;
4558       while (*bp != '\0' && *bp != '=' && *bp != ':')
4559         bp++;
4560       if (*bp == ':' || (globals && *bp == '='))
4561         {
4562           /* We should detect if there is more than one tag, but we do not.
4563              We just skip initial and final spaces. */
4564           char * namestart = skip_spaces (lb.buffer);
4565           while (--bp > namestart)
4566             if (!notinname (*bp))
4567               break;
4568           make_tag (namestart, bp - namestart + 1, TRUE,
4569                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4570         }
4571     }
4572 }
4573
4574 \f
4575 /*
4576  * Pascal parsing
4577  * Original code by Mosur K. Mohan (1989)
4578  *
4579  *  Locates tags for procedures & functions.  Doesn't do any type- or
4580  *  var-definitions.  It does look for the keyword "extern" or
4581  *  "forward" immediately following the procedure statement; if found,
4582  *  the tag is skipped.
4583  */
4584 static void
4585 Pascal_functions (FILE *inf)
4586 {
4587   linebuffer tline;             /* mostly copied from C_entries */
4588   long save_lcno;
4589   int save_lineno, namelen, taglen;
4590   char c, *name;
4591
4592   bool                          /* each of these flags is TRUE if: */
4593     incomment,                  /* point is inside a comment */
4594     inquote,                    /* point is inside '..' string */
4595     get_tagname,                /* point is after PROCEDURE/FUNCTION
4596                                    keyword, so next item = potential tag */
4597     found_tag,                  /* point is after a potential tag */
4598     inparms,                    /* point is within parameter-list */
4599     verify_tag;                 /* point has passed the parm-list, so the
4600                                    next token will determine whether this
4601                                    is a FORWARD/EXTERN to be ignored, or
4602                                    whether it is a real tag */
4603
4604   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4605   name = NULL;                  /* keep compiler quiet */
4606   dbp = lb.buffer;
4607   *dbp = '\0';
4608   linebuffer_init (&tline);
4609
4610   incomment = inquote = FALSE;
4611   found_tag = FALSE;            /* have a proc name; check if extern */
4612   get_tagname = FALSE;          /* found "procedure" keyword         */
4613   inparms = FALSE;              /* found '(' after "proc"            */
4614   verify_tag = FALSE;           /* check if "extern" is ahead        */
4615
4616
4617   while (!feof (inf))           /* long main loop to get next char */
4618     {
4619       c = *dbp++;
4620       if (c == '\0')            /* if end of line */
4621         {
4622           readline (&lb, inf);
4623           dbp = lb.buffer;
4624           if (*dbp == '\0')
4625             continue;
4626           if (!((found_tag && verify_tag)
4627                 || get_tagname))
4628             c = *dbp++;         /* only if don't need *dbp pointing
4629                                    to the beginning of the name of
4630                                    the procedure or function */
4631         }
4632       if (incomment)
4633         {
4634           if (c == '}')         /* within { } comments */
4635             incomment = FALSE;
4636           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4637             {
4638               dbp++;
4639               incomment = FALSE;
4640             }
4641           continue;
4642         }
4643       else if (inquote)
4644         {
4645           if (c == '\'')
4646             inquote = FALSE;
4647           continue;
4648         }
4649       else
4650         switch (c)
4651           {
4652           case '\'':
4653             inquote = TRUE;     /* found first quote */
4654             continue;
4655           case '{':             /* found open { comment */
4656             incomment = TRUE;
4657             continue;
4658           case '(':
4659             if (*dbp == '*')    /* found open (* comment */
4660               {
4661                 incomment = TRUE;
4662                 dbp++;
4663               }
4664             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4665               inparms = TRUE;
4666             continue;
4667           case ')':             /* end of parms list */
4668             if (inparms)
4669               inparms = FALSE;
4670             continue;
4671           case ';':
4672             if (found_tag && !inparms) /* end of proc or fn stmt */
4673               {
4674                 verify_tag = TRUE;
4675                 break;
4676               }
4677             continue;
4678           }
4679       if (found_tag && verify_tag && (*dbp != ' '))
4680         {
4681           /* Check if this is an "extern" declaration. */
4682           if (*dbp == '\0')
4683             continue;
4684           if (lowcase (*dbp == 'e'))
4685             {
4686               if (nocase_tail ("extern")) /* superfluous, really! */
4687                 {
4688                   found_tag = FALSE;
4689                   verify_tag = FALSE;
4690                 }
4691             }
4692           else if (lowcase (*dbp) == 'f')
4693             {
4694               if (nocase_tail ("forward")) /* check for forward reference */
4695                 {
4696                   found_tag = FALSE;
4697                   verify_tag = FALSE;
4698                 }
4699             }
4700           if (found_tag && verify_tag) /* not external proc, so make tag */
4701             {
4702               found_tag = FALSE;
4703               verify_tag = FALSE;
4704               make_tag (name, namelen, TRUE,
4705                         tline.buffer, taglen, save_lineno, save_lcno);
4706               continue;
4707             }
4708         }
4709       if (get_tagname)          /* grab name of proc or fn */
4710         {
4711           char *cp;
4712
4713           if (*dbp == '\0')
4714             continue;
4715
4716           /* Find block name. */
4717           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4718             continue;
4719
4720           /* Save all values for later tagging. */
4721           linebuffer_setlen (&tline, lb.len);
4722           strcpy (tline.buffer, lb.buffer);
4723           save_lineno = lineno;
4724           save_lcno = linecharno;
4725           name = tline.buffer + (dbp - lb.buffer);
4726           namelen = cp - dbp;
4727           taglen = cp - lb.buffer + 1;
4728
4729           dbp = cp;             /* set dbp to e-o-token */
4730           get_tagname = FALSE;
4731           found_tag = TRUE;
4732           continue;
4733
4734           /* And proceed to check for "extern". */
4735         }
4736       else if (!incomment && !inquote && !found_tag)
4737         {
4738           /* Check for proc/fn keywords. */
4739           switch (lowcase (c))
4740             {
4741             case 'p':
4742               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4743                 get_tagname = TRUE;
4744               continue;
4745             case 'f':
4746               if (nocase_tail ("unction"))
4747                 get_tagname = TRUE;
4748               continue;
4749             }
4750         }
4751     } /* while not eof */
4752
4753   free (tline.buffer);
4754 }
4755
4756 \f
4757 /*
4758  * Lisp tag functions
4759  *  look for (def or (DEF, quote or QUOTE
4760  */
4761
4762 static void L_getit (void);
4763
4764 static void
4765 L_getit (void)
4766 {
4767   if (*dbp == '\'')             /* Skip prefix quote */
4768     dbp++;
4769   else if (*dbp == '(')
4770   {
4771     dbp++;
4772     /* Try to skip "(quote " */
4773     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4774       /* Ok, then skip "(" before name in (defstruct (foo)) */
4775       dbp = skip_spaces (dbp);
4776   }
4777   get_tag (dbp, NULL);
4778 }
4779
4780 static void
4781 Lisp_functions (FILE *inf)
4782 {
4783   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4784     {
4785       if (dbp[0] != '(')
4786         continue;
4787
4788       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4789         {
4790           dbp = skip_non_spaces (dbp);
4791           dbp = skip_spaces (dbp);
4792           L_getit ();
4793         }
4794       else
4795         {
4796           /* Check for (foo::defmumble name-defined ... */
4797           do
4798             dbp++;
4799           while (!notinname (*dbp) && *dbp != ':');
4800           if (*dbp == ':')
4801             {
4802               do
4803                 dbp++;
4804               while (*dbp == ':');
4805
4806               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4807                 {
4808                   dbp = skip_non_spaces (dbp);
4809                   dbp = skip_spaces (dbp);
4810                   L_getit ();
4811                 }
4812             }
4813         }
4814     }
4815 }
4816
4817 \f
4818 /*
4819  * Lua script language parsing
4820  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4821  *
4822  *  "function" and "local function" are tags if they start at column 1.
4823  */
4824 static void
4825 Lua_functions (FILE *inf)
4826 {
4827   register char *bp;
4828
4829   LOOP_ON_INPUT_LINES (inf, lb, bp)
4830     {
4831       if (bp[0] != 'f' && bp[0] != 'l')
4832         continue;
4833
4834       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4835
4836       if (LOOKING_AT (bp, "function"))
4837         get_tag (bp, NULL);
4838     }
4839 }
4840
4841 \f
4842 /*
4843  * Postscript tags
4844  * Just look for lines where the first character is '/'
4845  * Also look at "defineps" for PSWrap
4846  * Ideas by:
4847  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4848  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4849  */
4850 static void
4851 PS_functions (FILE *inf)
4852 {
4853   register char *bp, *ep;
4854
4855   LOOP_ON_INPUT_LINES (inf, lb, bp)
4856     {
4857       if (bp[0] == '/')
4858         {
4859           for (ep = bp+1;
4860                *ep != '\0' && *ep != ' ' && *ep != '{';
4861                ep++)
4862             continue;
4863           make_tag (bp, ep - bp, TRUE,
4864                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4865         }
4866       else if (LOOKING_AT (bp, "defineps"))
4867         get_tag (bp, NULL);
4868     }
4869 }
4870
4871 \f
4872 /*
4873  * Forth tags
4874  * Ignore anything after \ followed by space or in ( )
4875  * Look for words defined by :
4876  * Look for constant, code, create, defer, value, and variable
4877  * OBP extensions:  Look for buffer:, field,
4878  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4879  */
4880 static void
4881 Forth_words (FILE *inf)
4882 {
4883   register char *bp;
4884
4885   LOOP_ON_INPUT_LINES (inf, lb, bp)
4886     while ((bp = skip_spaces (bp))[0] != '\0')
4887       if (bp[0] == '\\' && iswhite(bp[1]))
4888         break;                  /* read next line */
4889       else if (bp[0] == '(' && iswhite(bp[1]))
4890         do                      /* skip to ) or eol */
4891           bp++;
4892         while (*bp != ')' && *bp != '\0');
4893       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4894                || LOOKING_AT_NOCASE (bp, "constant")
4895                || LOOKING_AT_NOCASE (bp, "code")
4896                || LOOKING_AT_NOCASE (bp, "create")
4897                || LOOKING_AT_NOCASE (bp, "defer")
4898                || LOOKING_AT_NOCASE (bp, "value")
4899                || LOOKING_AT_NOCASE (bp, "variable")
4900                || LOOKING_AT_NOCASE (bp, "buffer:")
4901                || LOOKING_AT_NOCASE (bp, "field"))
4902         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4903       else
4904         bp = skip_non_spaces (bp);
4905 }
4906
4907 \f
4908 /*
4909  * Scheme tag functions
4910  * look for (def... xyzzy
4911  *          (def... (xyzzy
4912  *          (def ... ((...(xyzzy ....
4913  *          (set! xyzzy
4914  * Original code by Ken Haase (1985?)
4915  */
4916 static void
4917 Scheme_functions (FILE *inf)
4918 {
4919   register char *bp;
4920
4921   LOOP_ON_INPUT_LINES (inf, lb, bp)
4922     {
4923       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4924         {
4925           bp = skip_non_spaces (bp+4);
4926           /* Skip over open parens and white space.  Don't continue past
4927              '\0'. */
4928           while (*bp && notinname (*bp))
4929             bp++;
4930           get_tag (bp, NULL);
4931         }
4932       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4933         get_tag (bp, NULL);
4934     }
4935 }
4936
4937 \f
4938 /* Find tags in TeX and LaTeX input files.  */
4939
4940 /* TEX_toktab is a table of TeX control sequences that define tags.
4941  * Each entry records one such control sequence.
4942  *
4943  * Original code from who knows whom.
4944  * Ideas by:
4945  *   Stefan Monnier (2002)
4946  */
4947
4948 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4949
4950 /* Default set of control sequences to put into TEX_toktab.
4951    The value of environment var TEXTAGS is prepended to this.  */
4952 static const char *TEX_defenv = "\
4953 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4954 :part:appendix:entry:index:def\
4955 :newcommand:renewcommand:newenvironment:renewenvironment";
4956
4957 static void TEX_mode (FILE *);
4958 static void TEX_decode_env (const char *, const char *);
4959
4960 static char TEX_esc = '\\';
4961 static char TEX_opgrp = '{';
4962 static char TEX_clgrp = '}';
4963
4964 /*
4965  * TeX/LaTeX scanning loop.
4966  */
4967 static void
4968 TeX_commands (FILE *inf)
4969 {
4970   char *cp;
4971   linebuffer *key;
4972
4973   /* Select either \ or ! as escape character.  */
4974   TEX_mode (inf);
4975
4976   /* Initialize token table once from environment. */
4977   if (TEX_toktab == NULL)
4978     TEX_decode_env ("TEXTAGS", TEX_defenv);
4979
4980   LOOP_ON_INPUT_LINES (inf, lb, cp)
4981     {
4982       /* Look at each TEX keyword in line. */
4983       for (;;)
4984         {
4985           /* Look for a TEX escape. */
4986           while (*cp++ != TEX_esc)
4987             if (cp[-1] == '\0' || cp[-1] == '%')
4988               goto tex_next_line;
4989
4990           for (key = TEX_toktab; key->buffer != NULL; key++)
4991             if (strneq (cp, key->buffer, key->len))
4992               {
4993                 register char *p;
4994                 int namelen, linelen;
4995                 bool opgrp = FALSE;
4996
4997                 cp = skip_spaces (cp + key->len);
4998                 if (*cp == TEX_opgrp)
4999                   {
5000                     opgrp = TRUE;
5001                     cp++;
5002                   }
5003                 for (p = cp;
5004                      (!iswhite (*p) && *p != '#' &&
5005                       *p != TEX_opgrp && *p != TEX_clgrp);
5006                      p++)
5007                   continue;
5008                 namelen = p - cp;
5009                 linelen = lb.len;
5010                 if (!opgrp || *p == TEX_clgrp)
5011                   {
5012                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5013                       p++;
5014                     linelen = p - lb.buffer + 1;
5015                   }
5016                 make_tag (cp, namelen, TRUE,
5017                           lb.buffer, linelen, lineno, linecharno);
5018                 goto tex_next_line; /* We only tag a line once */
5019               }
5020         }
5021     tex_next_line:
5022       ;
5023     }
5024 }
5025
5026 #define TEX_LESC '\\'
5027 #define TEX_SESC '!'
5028
5029 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5030    chars accordingly. */
5031 static void
5032 TEX_mode (FILE *inf)
5033 {
5034   int c;
5035
5036   while ((c = getc (inf)) != EOF)
5037     {
5038       /* Skip to next line if we hit the TeX comment char. */
5039       if (c == '%')
5040         while (c != '\n' && c != EOF)
5041           c = getc (inf);
5042       else if (c == TEX_LESC || c == TEX_SESC )
5043         break;
5044     }
5045
5046   if (c == TEX_LESC)
5047     {
5048       TEX_esc = TEX_LESC;
5049       TEX_opgrp = '{';
5050       TEX_clgrp = '}';
5051     }
5052   else
5053     {
5054       TEX_esc = TEX_SESC;
5055       TEX_opgrp = '<';
5056       TEX_clgrp = '>';
5057     }
5058   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5059      No attempt is made to correct the situation. */
5060   rewind (inf);
5061 }
5062
5063 /* Read environment and prepend it to the default string.
5064    Build token table. */
5065 static void
5066 TEX_decode_env (const char *evarname, const char *defenv)
5067 {
5068   register const char *env, *p;
5069   int i, len;
5070
5071   /* Append default string to environment. */
5072   env = getenv (evarname);
5073   if (!env)
5074     env = defenv;
5075   else
5076     env = concat (env, defenv, "");
5077
5078   /* Allocate a token table */
5079   for (len = 1, p = env; p;)
5080     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5081       len++;
5082   TEX_toktab = xnew (len, linebuffer);
5083
5084   /* Unpack environment string into token table. Be careful about */
5085   /* zero-length strings (leading ':', "::" and trailing ':') */
5086   for (i = 0; *env != '\0';)
5087     {
5088       p = etags_strchr (env, ':');
5089       if (!p)                   /* End of environment string. */
5090         p = env + strlen (env);
5091       if (p - env > 0)
5092         {                       /* Only non-zero strings. */
5093           TEX_toktab[i].buffer = savenstr (env, p - env);
5094           TEX_toktab[i].len = p - env;
5095           i++;
5096         }
5097       if (*p)
5098         env = p + 1;
5099       else
5100         {
5101           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5102           TEX_toktab[i].len = 0;
5103           break;
5104         }
5105     }
5106 }
5107
5108 \f
5109 /* Texinfo support.  Dave Love, Mar. 2000.  */
5110 static void
5111 Texinfo_nodes (FILE *inf)
5112 {
5113   char *cp, *start;
5114   LOOP_ON_INPUT_LINES (inf, lb, cp)
5115     if (LOOKING_AT (cp, "@node"))
5116       {
5117         start = cp;
5118         while (*cp != '\0' && *cp != ',')
5119           cp++;
5120         make_tag (start, cp - start, TRUE,
5121                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5122       }
5123 }
5124
5125 \f
5126 /*
5127  * HTML support.
5128  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5129  * Contents of <a name=xxx> are tags with name xxx.
5130  *
5131  * Francesco Potortì, 2002.
5132  */
5133 static void
5134 HTML_labels (FILE *inf)
5135 {
5136   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5137   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5138   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5139   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5140   char *end;
5141
5142
5143   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5144
5145   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5146     for (;;)                    /* loop on the same line */
5147       {
5148         if (skiptag)            /* skip HTML tag */
5149           {
5150             while (*dbp != '\0' && *dbp != '>')
5151               dbp++;
5152             if (*dbp == '>')
5153               {
5154                 dbp += 1;
5155                 skiptag = FALSE;
5156                 continue;       /* look on the same line */
5157               }
5158             break;              /* go to next line */
5159           }
5160
5161         else if (intag) /* look for "name=" or "id=" */
5162           {
5163             while (*dbp != '\0' && *dbp != '>'
5164                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5165               dbp++;
5166             if (*dbp == '\0')
5167               break;            /* go to next line */
5168             if (*dbp == '>')
5169               {
5170                 dbp += 1;
5171                 intag = FALSE;
5172                 continue;       /* look on the same line */
5173               }
5174             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5175                 || LOOKING_AT_NOCASE (dbp, "id="))
5176               {
5177                 bool quoted = (dbp[0] == '"');
5178
5179                 if (quoted)
5180                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5181                     continue;
5182                 else
5183                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5184                     continue;
5185                 linebuffer_setlen (&token_name, end - dbp);
5186                 strncpy (token_name.buffer, dbp, end - dbp);
5187                 token_name.buffer[end - dbp] = '\0';
5188
5189                 dbp = end;
5190                 intag = FALSE;  /* we found what we looked for */
5191                 skiptag = TRUE; /* skip to the end of the tag */
5192                 getnext = TRUE; /* then grab the text */
5193                 continue;       /* look on the same line */
5194               }
5195             dbp += 1;
5196           }
5197
5198         else if (getnext)       /* grab next tokens and tag them */
5199           {
5200             dbp = skip_spaces (dbp);
5201             if (*dbp == '\0')
5202               break;            /* go to next line */
5203             if (*dbp == '<')
5204               {
5205                 intag = TRUE;
5206                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5207                 continue;       /* look on the same line */
5208               }
5209
5210             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5211               continue;
5212             make_tag (token_name.buffer, token_name.len, TRUE,
5213                       dbp, end - dbp, lineno, linecharno);
5214             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5215             getnext = FALSE;
5216             break;              /* go to next line */
5217           }
5218
5219         else                    /* look for an interesting HTML tag */
5220           {
5221             while (*dbp != '\0' && *dbp != '<')
5222               dbp++;
5223             if (*dbp == '\0')
5224               break;            /* go to next line */
5225             intag = TRUE;
5226             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5227               {
5228                 inanchor = TRUE;
5229                 continue;       /* look on the same line */
5230               }
5231             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5232                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5233                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5234                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5235               {
5236                 intag = FALSE;
5237                 getnext = TRUE;
5238                 continue;       /* look on the same line */
5239               }
5240             dbp += 1;
5241           }
5242       }
5243 }
5244
5245 \f
5246 /*
5247  * Prolog support
5248  *
5249  * Assumes that the predicate or rule starts at column 0.
5250  * Only the first clause of a predicate or rule is added.
5251  * Original code by Sunichirou Sugou (1989)
5252  * Rewritten by Anders Lindgren (1996)
5253  */
5254 static size_t prolog_pr (char *, char *);
5255 static void prolog_skip_comment (linebuffer *, FILE *);
5256 static size_t prolog_atom (char *, size_t);
5257
5258 static void
5259 Prolog_functions (FILE *inf)
5260 {
5261   char *cp, *last;
5262   size_t len;
5263   size_t allocated;
5264
5265   allocated = 0;
5266   len = 0;
5267   last = NULL;
5268
5269   LOOP_ON_INPUT_LINES (inf, lb, cp)
5270     {
5271       if (cp[0] == '\0')        /* Empty line */
5272         continue;
5273       else if (iswhite (cp[0])) /* Not a predicate */
5274         continue;
5275       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5276         prolog_skip_comment (&lb, inf);
5277       else if ((len = prolog_pr (cp, last)) > 0)
5278         {
5279           /* Predicate or rule.  Store the function name so that we
5280              only generate a tag for the first clause.  */
5281           if (last == NULL)
5282             last = xnew(len + 1, char);
5283           else if (len + 1 > allocated)
5284             xrnew (last, len + 1, char);
5285           allocated = len + 1;
5286           strncpy (last, cp, len);
5287           last[len] = '\0';
5288         }
5289     }
5290   free (last);
5291 }
5292
5293
5294 static void
5295 prolog_skip_comment (linebuffer *plb, FILE *inf)
5296 {
5297   char *cp;
5298
5299   do
5300     {
5301       for (cp = plb->buffer; *cp != '\0'; cp++)
5302         if (cp[0] == '*' && cp[1] == '/')
5303           return;
5304       readline (plb, inf);
5305     }
5306   while (!feof(inf));
5307 }
5308
5309 /*
5310  * A predicate or rule definition is added if it matches:
5311  *     <beginning of line><Prolog Atom><whitespace>(
5312  * or  <beginning of line><Prolog Atom><whitespace>:-
5313  *
5314  * It is added to the tags database if it doesn't match the
5315  * name of the previous clause header.
5316  *
5317  * Return the size of the name of the predicate or rule, or 0 if no
5318  * header was found.
5319  */
5320 static size_t
5321 prolog_pr (char *s, char *last)
5322
5323                                 /* Name of last clause. */
5324 {
5325   size_t pos;
5326   size_t len;
5327
5328   pos = prolog_atom (s, 0);
5329   if (! pos)
5330     return 0;
5331
5332   len = pos;
5333   pos = skip_spaces (s + pos) - s;
5334
5335   if ((s[pos] == '.'
5336        || (s[pos] == '(' && (pos += 1))
5337        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5338       && (last == NULL          /* save only the first clause */
5339           || len != strlen (last)
5340           || !strneq (s, last, len)))
5341         {
5342           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5343           return len;
5344         }
5345   else
5346     return 0;
5347 }
5348
5349 /*
5350  * Consume a Prolog atom.
5351  * Return the number of bytes consumed, or 0 if there was an error.
5352  *
5353  * A prolog atom, in this context, could be one of:
5354  * - An alphanumeric sequence, starting with a lower case letter.
5355  * - A quoted arbitrary string. Single quotes can escape themselves.
5356  *   Backslash quotes everything.
5357  */
5358 static size_t
5359 prolog_atom (char *s, size_t pos)
5360 {
5361   size_t origpos;
5362
5363   origpos = pos;
5364
5365   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5366     {
5367       /* The atom is unquoted. */
5368       pos++;
5369       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5370         {
5371           pos++;
5372         }
5373       return pos - origpos;
5374     }
5375   else if (s[pos] == '\'')
5376     {
5377       pos++;
5378
5379       for (;;)
5380         {
5381           if (s[pos] == '\'')
5382             {
5383               pos++;
5384               if (s[pos] != '\'')
5385                 break;
5386               pos++;            /* A double quote */
5387             }
5388           else if (s[pos] == '\0')
5389             /* Multiline quoted atoms are ignored. */
5390             return 0;
5391           else if (s[pos] == '\\')
5392             {
5393               if (s[pos+1] == '\0')
5394                 return 0;
5395               pos += 2;
5396             }
5397           else
5398             pos++;
5399         }
5400       return pos - origpos;
5401     }
5402   else
5403     return 0;
5404 }
5405
5406 \f
5407 /*
5408  * Support for Erlang
5409  *
5410  * Generates tags for functions, defines, and records.
5411  * Assumes that Erlang functions start at column 0.
5412  * Original code by Anders Lindgren (1996)
5413  */
5414 static int erlang_func (char *, char *);
5415 static void erlang_attribute (char *);
5416 static int erlang_atom (char *);
5417
5418 static void
5419 Erlang_functions (FILE *inf)
5420 {
5421   char *cp, *last;
5422   int len;
5423   int allocated;
5424
5425   allocated = 0;
5426   len = 0;
5427   last = NULL;
5428
5429   LOOP_ON_INPUT_LINES (inf, lb, cp)
5430     {
5431       if (cp[0] == '\0')        /* Empty line */
5432         continue;
5433       else if (iswhite (cp[0])) /* Not function nor attribute */
5434         continue;
5435       else if (cp[0] == '%')    /* comment */
5436         continue;
5437       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5438         continue;
5439       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5440         {
5441           erlang_attribute (cp);
5442           if (last != NULL)
5443             {
5444               free (last);
5445               last = NULL;
5446             }
5447         }
5448       else if ((len = erlang_func (cp, last)) > 0)
5449         {
5450           /*
5451            * Function.  Store the function name so that we only
5452            * generates a tag for the first clause.
5453            */
5454           if (last == NULL)
5455             last = xnew (len + 1, char);
5456           else if (len + 1 > allocated)
5457             xrnew (last, len + 1, char);
5458           allocated = len + 1;
5459           strncpy (last, cp, len);
5460           last[len] = '\0';
5461         }
5462     }
5463   free (last);
5464 }
5465
5466
5467 /*
5468  * A function definition is added if it matches:
5469  *     <beginning of line><Erlang Atom><whitespace>(
5470  *
5471  * It is added to the tags database if it doesn't match the
5472  * name of the previous clause header.
5473  *
5474  * Return the size of the name of the function, or 0 if no function
5475  * was found.
5476  */
5477 static int
5478 erlang_func (char *s, char *last)
5479
5480                                 /* Name of last clause. */
5481 {
5482   int pos;
5483   int len;
5484
5485   pos = erlang_atom (s);
5486   if (pos < 1)
5487     return 0;
5488
5489   len = pos;
5490   pos = skip_spaces (s + pos) - s;
5491
5492   /* Save only the first clause. */
5493   if (s[pos++] == '('
5494       && (last == NULL
5495           || len != (int)strlen (last)
5496           || !strneq (s, last, len)))
5497         {
5498           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5499           return len;
5500         }
5501
5502   return 0;
5503 }
5504
5505
5506 /*
5507  * Handle attributes.  Currently, tags are generated for defines
5508  * and records.
5509  *
5510  * They are on the form:
5511  * -define(foo, bar).
5512  * -define(Foo(M, N), M+N).
5513  * -record(graph, {vtab = notable, cyclic = true}).
5514  */
5515 static void
5516 erlang_attribute (char *s)
5517 {
5518   char *cp = s;
5519
5520   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5521       && *cp++ == '(')
5522     {
5523       int len = erlang_atom (skip_spaces (cp));
5524       if (len > 0)
5525         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5526     }
5527   return;
5528 }
5529
5530
5531 /*
5532  * Consume an Erlang atom (or variable).
5533  * Return the number of bytes consumed, or -1 if there was an error.
5534  */
5535 static int
5536 erlang_atom (char *s)
5537 {
5538   int pos = 0;
5539
5540   if (ISALPHA (s[pos]) || s[pos] == '_')
5541     {
5542       /* The atom is unquoted. */
5543       do
5544         pos++;
5545       while (ISALNUM (s[pos]) || s[pos] == '_');
5546     }
5547   else if (s[pos] == '\'')
5548     {
5549       for (pos++; s[pos] != '\''; pos++)
5550         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5551             || (s[pos] == '\\' && s[++pos] == '\0'))
5552           return 0;
5553       pos++;
5554     }
5555
5556   return pos;
5557 }
5558
5559 \f
5560 static char *scan_separators (char *);
5561 static void add_regex (char *, language *);
5562 static char *substitute (char *, char *, struct re_registers *);
5563
5564 /*
5565  * Take a string like "/blah/" and turn it into "blah", verifying
5566  * that the first and last characters are the same, and handling
5567  * quoted separator characters.  Actually, stops on the occurrence of
5568  * an unquoted separator.  Also process \t, \n, etc. and turn into
5569  * appropriate characters. Works in place.  Null terminates name string.
5570  * Returns pointer to terminating separator, or NULL for
5571  * unterminated regexps.
5572  */
5573 static char *
5574 scan_separators (char *name)
5575 {
5576   char sep = name[0];
5577   char *copyto = name;
5578   bool quoted = FALSE;
5579
5580   for (++name; *name != '\0'; ++name)
5581     {
5582       if (quoted)
5583         {
5584           switch (*name)
5585             {
5586             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5587             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5588             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5589             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5590             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5591             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5592             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5593             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5594             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5595             default:
5596               if (*name == sep)
5597                 *copyto++ = sep;
5598               else
5599                 {
5600                   /* Something else is quoted, so preserve the quote. */
5601                   *copyto++ = '\\';
5602                   *copyto++ = *name;
5603                 }
5604               break;
5605             }
5606           quoted = FALSE;
5607         }
5608       else if (*name == '\\')
5609         quoted = TRUE;
5610       else if (*name == sep)
5611         break;
5612       else
5613         *copyto++ = *name;
5614     }
5615   if (*name != sep)
5616     name = NULL;                /* signal unterminated regexp */
5617
5618   /* Terminate copied string. */
5619   *copyto = '\0';
5620   return name;
5621 }
5622
5623 /* Look at the argument of --regex or --no-regex and do the right
5624    thing.  Same for each line of a regexp file. */
5625 static void
5626 analyse_regex (char *regex_arg)
5627 {
5628   if (regex_arg == NULL)
5629     {
5630       free_regexps ();          /* --no-regex: remove existing regexps */
5631       return;
5632     }
5633
5634   /* A real --regexp option or a line in a regexp file. */
5635   switch (regex_arg[0])
5636     {
5637       /* Comments in regexp file or null arg to --regex. */
5638     case '\0':
5639     case ' ':
5640     case '\t':
5641       break;
5642
5643       /* Read a regex file.  This is recursive and may result in a
5644          loop, which will stop when the file descriptors are exhausted. */
5645     case '@':
5646       {
5647         FILE *regexfp;
5648         linebuffer regexbuf;
5649         char *regexfile = regex_arg + 1;
5650
5651         /* regexfile is a file containing regexps, one per line. */
5652         regexfp = fopen (regexfile, "r");
5653         if (regexfp == NULL)
5654           {
5655             pfatal (regexfile);
5656             return;
5657           }
5658         linebuffer_init (&regexbuf);
5659         while (readline_internal (&regexbuf, regexfp) > 0)
5660           analyse_regex (regexbuf.buffer);
5661         free (regexbuf.buffer);
5662         fclose (regexfp);
5663       }
5664       break;
5665
5666       /* Regexp to be used for a specific language only. */
5667     case '{':
5668       {
5669         language *lang;
5670         char *lang_name = regex_arg + 1;
5671         char *cp;
5672
5673         for (cp = lang_name; *cp != '}'; cp++)
5674           if (*cp == '\0')
5675             {
5676               error ("unterminated language name in regex: %s", regex_arg);
5677               return;
5678             }
5679         *cp++ = '\0';
5680         lang = get_language_from_langname (lang_name);
5681         if (lang == NULL)
5682           return;
5683         add_regex (cp, lang);
5684       }
5685       break;
5686
5687       /* Regexp to be used for any language. */
5688     default:
5689       add_regex (regex_arg, NULL);
5690       break;
5691     }
5692 }
5693
5694 /* Separate the regexp pattern, compile it,
5695    and care for optional name and modifiers. */
5696 static void
5697 add_regex (char *regexp_pattern, language *lang)
5698 {
5699   static struct re_pattern_buffer zeropattern;
5700   char sep, *pat, *name, *modifiers;
5701   char empty[] = "";
5702   const char *err;
5703   struct re_pattern_buffer *patbuf;
5704   regexp *rp;
5705   bool
5706     force_explicit_name = TRUE, /* do not use implicit tag names */
5707     ignore_case = FALSE,        /* case is significant */
5708     multi_line = FALSE,         /* matches are done one line at a time */
5709     single_line = FALSE;        /* dot does not match newline */
5710
5711
5712   if (strlen(regexp_pattern) < 3)
5713     {
5714       error ("null regexp", (char *)NULL);
5715       return;
5716     }
5717   sep = regexp_pattern[0];
5718   name = scan_separators (regexp_pattern);
5719   if (name == NULL)
5720     {
5721       error ("%s: unterminated regexp", regexp_pattern);
5722       return;
5723     }
5724   if (name[1] == sep)
5725     {
5726       error ("null name for regexp \"%s\"", regexp_pattern);
5727       return;
5728     }
5729   modifiers = scan_separators (name);
5730   if (modifiers == NULL)        /* no terminating separator --> no name */
5731     {
5732       modifiers = name;
5733       name = empty;
5734     }
5735   else
5736     modifiers += 1;             /* skip separator */
5737
5738   /* Parse regex modifiers. */
5739   for (; modifiers[0] != '\0'; modifiers++)
5740     switch (modifiers[0])
5741       {
5742       case 'N':
5743         if (modifiers == name)
5744           error ("forcing explicit tag name but no name, ignoring", NULL);
5745         force_explicit_name = TRUE;
5746         break;
5747       case 'i':
5748         ignore_case = TRUE;
5749         break;
5750       case 's':
5751         single_line = TRUE;
5752         /* FALLTHRU */
5753       case 'm':
5754         multi_line = TRUE;
5755         need_filebuf = TRUE;
5756         break;
5757       default:
5758         {
5759           char wrongmod [2];
5760           wrongmod[0] = modifiers[0];
5761           wrongmod[1] = '\0';
5762           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5763         }
5764         break;
5765       }
5766
5767   patbuf = xnew (1, struct re_pattern_buffer);
5768   *patbuf = zeropattern;
5769   if (ignore_case)
5770     {
5771       static char lc_trans[CHARS];
5772       int i;
5773       for (i = 0; i < CHARS; i++)
5774         lc_trans[i] = lowcase (i);
5775       patbuf->translate = lc_trans;     /* translation table to fold case  */
5776     }
5777
5778   if (multi_line)
5779     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5780   else
5781     pat = regexp_pattern;
5782
5783   if (single_line)
5784     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5785   else
5786     re_set_syntax (RE_SYNTAX_EMACS);
5787
5788   err = re_compile_pattern (pat, strlen (pat), patbuf);
5789   if (multi_line)
5790     free (pat);
5791   if (err != NULL)
5792     {
5793       error ("%s while compiling pattern", err);
5794       return;
5795     }
5796
5797   rp = p_head;
5798   p_head = xnew (1, regexp);
5799   p_head->pattern = savestr (regexp_pattern);
5800   p_head->p_next = rp;
5801   p_head->lang = lang;
5802   p_head->pat = patbuf;
5803   p_head->name = savestr (name);
5804   p_head->error_signaled = FALSE;
5805   p_head->force_explicit_name = force_explicit_name;
5806   p_head->ignore_case = ignore_case;
5807   p_head->multi_line = multi_line;
5808 }
5809
5810 /*
5811  * Do the substitutions indicated by the regular expression and
5812  * arguments.
5813  */
5814 static char *
5815 substitute (char *in, char *out, struct re_registers *regs)
5816 {
5817   char *result, *t;
5818   int size, dig, diglen;
5819
5820   result = NULL;
5821   size = strlen (out);
5822
5823   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5824   if (out[size - 1] == '\\')
5825     fatal ("pattern error in \"%s\"", out);
5826   for (t = etags_strchr (out, '\\');
5827        t != NULL;
5828        t = etags_strchr (t + 2, '\\'))
5829     if (ISDIGIT (t[1]))
5830       {
5831         dig = t[1] - '0';
5832         diglen = regs->end[dig] - regs->start[dig];
5833         size += diglen - 2;
5834       }
5835     else
5836       size -= 1;
5837
5838   /* Allocate space and do the substitutions. */
5839   assert (size >= 0);
5840   result = xnew (size + 1, char);
5841
5842   for (t = result; *out != '\0'; out++)
5843     if (*out == '\\' && ISDIGIT (*++out))
5844       {
5845         dig = *out - '0';
5846         diglen = regs->end[dig] - regs->start[dig];
5847         strncpy (t, in + regs->start[dig], diglen);
5848         t += diglen;
5849       }
5850     else
5851       *t++ = *out;
5852   *t = '\0';
5853
5854   assert (t <= result + size);
5855   assert (t - result == (int)strlen (result));
5856
5857   return result;
5858 }
5859
5860 /* Deallocate all regexps. */
5861 static void
5862 free_regexps (void)
5863 {
5864   regexp *rp;
5865   while (p_head != NULL)
5866     {
5867       rp = p_head->p_next;
5868       free (p_head->pattern);
5869       free (p_head->name);
5870       free (p_head);
5871       p_head = rp;
5872     }
5873   return;
5874 }
5875
5876 /*
5877  * Reads the whole file as a single string from `filebuf' and looks for
5878  * multi-line regular expressions, creating tags on matches.
5879  * readline already dealt with normal regexps.
5880  *
5881  * Idea by Ben Wing <ben@666.com> (2002).
5882  */
5883 static void
5884 regex_tag_multiline (void)
5885 {
5886   char *buffer = filebuf.buffer;
5887   regexp *rp;
5888   char *name;
5889
5890   for (rp = p_head; rp != NULL; rp = rp->p_next)
5891     {
5892       int match = 0;
5893
5894       if (!rp->multi_line)
5895         continue;               /* skip normal regexps */
5896
5897       /* Generic initialisations before parsing file from memory. */
5898       lineno = 1;               /* reset global line number */
5899       charno = 0;               /* reset global char number */
5900       linecharno = 0;           /* reset global char number of line start */
5901
5902       /* Only use generic regexps or those for the current language. */
5903       if (rp->lang != NULL && rp->lang != curfdp->lang)
5904         continue;
5905
5906       while (match >= 0 && match < filebuf.len)
5907         {
5908           match = re_search (rp->pat, buffer, filebuf.len, charno,
5909                              filebuf.len - match, &rp->regs);
5910           switch (match)
5911             {
5912             case -2:
5913               /* Some error. */
5914               if (!rp->error_signaled)
5915                 {
5916                   error ("regexp stack overflow while matching \"%s\"",
5917                          rp->pattern);
5918                   rp->error_signaled = TRUE;
5919                 }
5920               break;
5921             case -1:
5922               /* No match. */
5923               break;
5924             default:
5925               if (match == rp->regs.end[0])
5926                 {
5927                   if (!rp->error_signaled)
5928                     {
5929                       error ("regexp matches the empty string: \"%s\"",
5930                              rp->pattern);
5931                       rp->error_signaled = TRUE;
5932                     }
5933                   match = -3;   /* exit from while loop */
5934                   break;
5935                 }
5936
5937               /* Match occurred.  Construct a tag. */
5938               while (charno < rp->regs.end[0])
5939                 if (buffer[charno++] == '\n')
5940                   lineno++, linecharno = charno;
5941               name = rp->name;
5942               if (name[0] == '\0')
5943                 name = NULL;
5944               else /* make a named tag */
5945                 name = substitute (buffer, rp->name, &rp->regs);
5946               if (rp->force_explicit_name)
5947                 /* Force explicit tag name, if a name is there. */
5948                 pfnote (name, TRUE, buffer + linecharno,
5949                         charno - linecharno + 1, lineno, linecharno);
5950               else
5951                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5952                           charno - linecharno + 1, lineno, linecharno);
5953               break;
5954             }
5955         }
5956     }
5957 }
5958
5959 \f
5960 static bool
5961 nocase_tail (const char *cp)
5962 {
5963   register int len = 0;
5964
5965   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5966     cp++, len++;
5967   if (*cp == '\0' && !intoken (dbp[len]))
5968     {
5969       dbp += len;
5970       return TRUE;
5971     }
5972   return FALSE;
5973 }
5974
5975 static void
5976 get_tag (register char *bp, char **namepp)
5977 {
5978   register char *cp = bp;
5979
5980   if (*bp != '\0')
5981     {
5982       /* Go till you get to white space or a syntactic break */
5983       for (cp = bp + 1; !notinname (*cp); cp++)
5984         continue;
5985       make_tag (bp, cp - bp, TRUE,
5986                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5987     }
5988
5989   if (namepp != NULL)
5990     *namepp = savenstr (bp, cp - bp);
5991 }
5992
5993 /*
5994  * Read a line of text from `stream' into `lbp', excluding the
5995  * newline or CR-NL, if any.  Return the number of characters read from
5996  * `stream', which is the length of the line including the newline.
5997  *
5998  * On DOS or Windows we do not count the CR character, if any before the
5999  * NL, in the returned length; this mirrors the behavior of Emacs on those
6000  * platforms (for text files, it translates CR-NL to NL as it reads in the
6001  * file).
6002  *
6003  * If multi-line regular expressions are requested, each line read is
6004  * appended to `filebuf'.
6005  */
6006 static long
6007 readline_internal (linebuffer *lbp, register FILE *stream)
6008 {
6009   char *buffer = lbp->buffer;
6010   register char *p = lbp->buffer;
6011   register char *pend;
6012   int chars_deleted;
6013
6014   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6015
6016   for (;;)
6017     {
6018       register int c = getc (stream);
6019       if (p == pend)
6020         {
6021           /* We're at the end of linebuffer: expand it. */
6022           lbp->size *= 2;
6023           xrnew (buffer, lbp->size, char);
6024           p += buffer - lbp->buffer;
6025           pend = buffer + lbp->size;
6026           lbp->buffer = buffer;
6027         }
6028       if (c == EOF)
6029         {
6030           *p = '\0';
6031           chars_deleted = 0;
6032           break;
6033         }
6034       if (c == '\n')
6035         {
6036           if (p > buffer && p[-1] == '\r')
6037             {
6038               p -= 1;
6039 #ifdef DOS_NT
6040              /* Assume CRLF->LF translation will be performed by Emacs
6041                 when loading this file, so CRs won't appear in the buffer.
6042                 It would be cleaner to compensate within Emacs;
6043                 however, Emacs does not know how many CRs were deleted
6044                 before any given point in the file.  */
6045               chars_deleted = 1;
6046 #else
6047               chars_deleted = 2;
6048 #endif
6049             }
6050           else
6051             {
6052               chars_deleted = 1;
6053             }
6054           *p = '\0';
6055           break;
6056         }
6057       *p++ = c;
6058     }
6059   lbp->len = p - buffer;
6060
6061   if (need_filebuf              /* we need filebuf for multi-line regexps */
6062       && chars_deleted > 0)     /* not at EOF */
6063     {
6064       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6065         {
6066           /* Expand filebuf. */
6067           filebuf.size *= 2;
6068           xrnew (filebuf.buffer, filebuf.size, char);
6069         }
6070       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6071       filebuf.len += lbp->len;
6072       filebuf.buffer[filebuf.len++] = '\n';
6073       filebuf.buffer[filebuf.len] = '\0';
6074     }
6075
6076   return lbp->len + chars_deleted;
6077 }
6078
6079 /*
6080  * Like readline_internal, above, but in addition try to match the
6081  * input line against relevant regular expressions and manage #line
6082  * directives.
6083  */
6084 static void
6085 readline (linebuffer *lbp, FILE *stream)
6086 {
6087   long result;
6088
6089   linecharno = charno;          /* update global char number of line start */
6090   result = readline_internal (lbp, stream); /* read line */
6091   lineno += 1;                  /* increment global line number */
6092   charno += result;             /* increment global char number */
6093
6094   /* Honour #line directives. */
6095   if (!no_line_directive)
6096     {
6097       static bool discard_until_line_directive;
6098
6099       /* Check whether this is a #line directive. */
6100       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6101         {
6102           unsigned int lno;
6103           int start = 0;
6104
6105           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6106               && start > 0)     /* double quote character found */
6107             {
6108               char *endp = lbp->buffer + start;
6109
6110               while ((endp = etags_strchr (endp, '"')) != NULL
6111                      && endp[-1] == '\\')
6112                 endp++;
6113               if (endp != NULL)
6114                 /* Ok, this is a real #line directive.  Let's deal with it. */
6115                 {
6116                   char *taggedabsname;  /* absolute name of original file */
6117                   char *taggedfname;    /* name of original file as given */
6118                   char *name;           /* temp var */
6119
6120                   discard_until_line_directive = FALSE; /* found it */
6121                   name = lbp->buffer + start;
6122                   *endp = '\0';
6123                   canonicalize_filename (name);
6124                   taggedabsname = absolute_filename (name, tagfiledir);
6125                   if (filename_is_absolute (name)
6126                       || filename_is_absolute (curfdp->infname))
6127                     taggedfname = savestr (taggedabsname);
6128                   else
6129                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6130
6131                   if (streq (curfdp->taggedfname, taggedfname))
6132                     /* The #line directive is only a line number change.  We
6133                        deal with this afterwards. */
6134                     free (taggedfname);
6135                   else
6136                     /* The tags following this #line directive should be
6137                        attributed to taggedfname.  In order to do this, set
6138                        curfdp accordingly. */
6139                     {
6140                       fdesc *fdp; /* file description pointer */
6141
6142                       /* Go look for a file description already set up for the
6143                          file indicated in the #line directive.  If there is
6144                          one, use it from now until the next #line
6145                          directive. */
6146                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6147                         if (streq (fdp->infname, curfdp->infname)
6148                             && streq (fdp->taggedfname, taggedfname))
6149                           /* If we remove the second test above (after the &&)
6150                              then all entries pertaining to the same file are
6151                              coalesced in the tags file.  If we use it, then
6152                              entries pertaining to the same file but generated
6153                              from different files (via #line directives) will
6154                              go into separate sections in the tags file.  These
6155                              alternatives look equivalent.  The first one
6156                              destroys some apparently useless information. */
6157                           {
6158                             curfdp = fdp;
6159                             free (taggedfname);
6160                             break;
6161                           }
6162                       /* Else, if we already tagged the real file, skip all
6163                          input lines until the next #line directive. */
6164                       if (fdp == NULL) /* not found */
6165                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6166                           if (streq (fdp->infabsname, taggedabsname))
6167                             {
6168                               discard_until_line_directive = TRUE;
6169                               free (taggedfname);
6170                               break;
6171                             }
6172                       /* Else create a new file description and use that from
6173                          now on, until the next #line directive. */
6174                       if (fdp == NULL) /* not found */
6175                         {
6176                           fdp = fdhead;
6177                           fdhead = xnew (1, fdesc);
6178                           *fdhead = *curfdp; /* copy curr. file description */
6179                           fdhead->next = fdp;
6180                           fdhead->infname = savestr (curfdp->infname);
6181                           fdhead->infabsname = savestr (curfdp->infabsname);
6182                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6183                           fdhead->taggedfname = taggedfname;
6184                           fdhead->usecharno = FALSE;
6185                           fdhead->prop = NULL;
6186                           fdhead->written = FALSE;
6187                           curfdp = fdhead;
6188                         }
6189                     }
6190                   free (taggedabsname);
6191                   lineno = lno - 1;
6192                   readline (lbp, stream);
6193                   return;
6194                 } /* if a real #line directive */
6195             } /* if #line is followed by a number */
6196         } /* if line begins with "#line " */
6197
6198       /* If we are here, no #line directive was found. */
6199       if (discard_until_line_directive)
6200         {
6201           if (result > 0)
6202             {
6203               /* Do a tail recursion on ourselves, thus discarding the contents
6204                  of the line buffer. */
6205               readline (lbp, stream);
6206               return;
6207             }
6208           /* End of file. */
6209           discard_until_line_directive = FALSE;
6210           return;
6211         }
6212     } /* if #line directives should be considered */
6213
6214   {
6215     int match;
6216     regexp *rp;
6217     char *name;
6218
6219     /* Match against relevant regexps. */
6220     if (lbp->len > 0)
6221       for (rp = p_head; rp != NULL; rp = rp->p_next)
6222         {
6223           /* Only use generic regexps or those for the current language.
6224              Also do not use multiline regexps, which is the job of
6225              regex_tag_multiline. */
6226           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6227               || rp->multi_line)
6228             continue;
6229
6230           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6231           switch (match)
6232             {
6233             case -2:
6234               /* Some error. */
6235               if (!rp->error_signaled)
6236                 {
6237                   error ("regexp stack overflow while matching \"%s\"",
6238                          rp->pattern);
6239                   rp->error_signaled = TRUE;
6240                 }
6241               break;
6242             case -1:
6243               /* No match. */
6244               break;
6245             case 0:
6246               /* Empty string matched. */
6247               if (!rp->error_signaled)
6248                 {
6249                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6250                   rp->error_signaled = TRUE;
6251                 }
6252               break;
6253             default:
6254               /* Match occurred.  Construct a tag. */
6255               name = rp->name;
6256               if (name[0] == '\0')
6257                 name = NULL;
6258               else /* make a named tag */
6259                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6260               if (rp->force_explicit_name)
6261                 /* Force explicit tag name, if a name is there. */
6262                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6263               else
6264                 make_tag (name, strlen (name), TRUE,
6265                           lbp->buffer, match, lineno, linecharno);
6266               break;
6267             }
6268         }
6269   }
6270 }
6271
6272 \f
6273 /*
6274  * Return a pointer to a space of size strlen(cp)+1 allocated
6275  * with xnew where the string CP has been copied.
6276  */
6277 static char *
6278 savestr (const char *cp)
6279 {
6280   return savenstr (cp, strlen (cp));
6281 }
6282
6283 /*
6284  * Return a pointer to a space of size LEN+1 allocated with xnew where
6285  * the string CP has been copied for at most the first LEN characters.
6286  */
6287 static char *
6288 savenstr (const char *cp, int len)
6289 {
6290   register char *dp;
6291
6292   dp = xnew (len + 1, char);
6293   strncpy (dp, cp, len);
6294   dp[len] = '\0';
6295   return dp;
6296 }
6297
6298 /*
6299  * Return the ptr in sp at which the character c last
6300  * appears; NULL if not found
6301  *
6302  * Identical to POSIX strrchr, included for portability.
6303  */
6304 static char *
6305 etags_strrchr (register const char *sp, register int c)
6306 {
6307   register const char *r;
6308
6309   r = NULL;
6310   do
6311     {
6312       if (*sp == c)
6313         r = sp;
6314   } while (*sp++);
6315   return (char *)r;
6316 }
6317
6318 /*
6319  * Return the ptr in sp at which the character c first
6320  * appears; NULL if not found
6321  *
6322  * Identical to POSIX strchr, included for portability.
6323  */
6324 static char *
6325 etags_strchr (register const char *sp, register int c)
6326 {
6327   do
6328     {
6329       if (*sp == c)
6330         return (char *)sp;
6331     } while (*sp++);
6332   return NULL;
6333 }
6334
6335 /*
6336  * Compare two strings, ignoring case for alphabetic characters.
6337  *
6338  * Same as BSD's strcasecmp, included for portability.
6339  */
6340 static int
6341 etags_strcasecmp (register const char *s1, register const char *s2)
6342 {
6343   while (*s1 != '\0'
6344          && (ISALPHA (*s1) && ISALPHA (*s2)
6345              ? lowcase (*s1) == lowcase (*s2)
6346              : *s1 == *s2))
6347     s1++, s2++;
6348
6349   return (ISALPHA (*s1) && ISALPHA (*s2)
6350           ? lowcase (*s1) - lowcase (*s2)
6351           : *s1 - *s2);
6352 }
6353
6354 /*
6355  * Compare two strings, ignoring case for alphabetic characters.
6356  * Stop after a given number of characters
6357  *
6358  * Same as BSD's strncasecmp, included for portability.
6359  */
6360 static int
6361 etags_strncasecmp (register const char *s1, register const char *s2, register int n)
6362 {
6363   while (*s1 != '\0' && n-- > 0
6364          && (ISALPHA (*s1) && ISALPHA (*s2)
6365              ? lowcase (*s1) == lowcase (*s2)
6366              : *s1 == *s2))
6367     s1++, s2++;
6368
6369   if (n < 0)
6370     return 0;
6371   else
6372     return (ISALPHA (*s1) && ISALPHA (*s2)
6373             ? lowcase (*s1) - lowcase (*s2)
6374             : *s1 - *s2);
6375 }
6376
6377 /* Skip spaces (end of string is not space), return new pointer. */
6378 static char *
6379 skip_spaces (char *cp)
6380 {
6381   while (iswhite (*cp))
6382     cp++;
6383   return cp;
6384 }
6385
6386 /* Skip non spaces, except end of string, return new pointer. */
6387 static char *
6388 skip_non_spaces (char *cp)
6389 {
6390   while (*cp != '\0' && !iswhite (*cp))
6391     cp++;
6392   return cp;
6393 }
6394
6395 /* Print error message and exit.  */
6396 void
6397 fatal (const char *s1, const char *s2)
6398 {
6399   error (s1, s2);
6400   exit (EXIT_FAILURE);
6401 }
6402
6403 static void
6404 pfatal (const char *s1)
6405 {
6406   perror (s1);
6407   exit (EXIT_FAILURE);
6408 }
6409
6410 static void
6411 suggest_asking_for_help (void)
6412 {
6413   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6414            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6415   exit (EXIT_FAILURE);
6416 }
6417
6418 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6419 static void
6420 error (const char *s1, const char *s2)
6421 {
6422   fprintf (stderr, "%s: ", progname);
6423   fprintf (stderr, s1, s2);
6424   fprintf (stderr, "\n");
6425 }
6426
6427 /* Return a newly-allocated string whose contents
6428    concatenate those of s1, s2, s3.  */
6429 static char *
6430 concat (const char *s1, const char *s2, const char *s3)
6431 {
6432   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6433   char *result = xnew (len1 + len2 + len3 + 1, char);
6434
6435   strcpy (result, s1);
6436   strcpy (result + len1, s2);
6437   strcpy (result + len1 + len2, s3);
6438   result[len1 + len2 + len3] = '\0';
6439
6440   return result;
6441 }
6442
6443 \f
6444 /* Does the same work as the system V getcwd, but does not need to
6445    guess the buffer size in advance. */
6446 static char *
6447 etags_getcwd (void)
6448 {
6449 #ifdef HAVE_GETCWD
6450   int bufsize = 200;
6451   char *path = xnew (bufsize, char);
6452
6453   while (getcwd (path, bufsize) == NULL)
6454     {
6455       if (errno != ERANGE)
6456         pfatal ("getcwd");
6457       bufsize *= 2;
6458       free (path);
6459       path = xnew (bufsize, char);
6460     }
6461
6462   canonicalize_filename (path);
6463   return path;
6464
6465 #else /* not HAVE_GETCWD */
6466 #if MSDOS
6467
6468   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6469
6470   getwd (path);
6471
6472   for (p = path; *p != '\0'; p++)
6473     if (*p == '\\')
6474       *p = '/';
6475     else
6476       *p = lowcase (*p);
6477
6478   return strdup (path);
6479 #else /* not MSDOS */
6480   linebuffer path;
6481   FILE *pipe;
6482
6483   linebuffer_init (&path);
6484   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6485   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6486     pfatal ("pwd");
6487   pclose (pipe);
6488
6489   return path.buffer;
6490 #endif /* not MSDOS */
6491 #endif /* not HAVE_GETCWD */
6492 }
6493
6494 /* Return a newly allocated string containing the file name of FILE
6495    relative to the absolute directory DIR (which should end with a slash). */
6496 static char *
6497 relative_filename (char *file, char *dir)
6498 {
6499   char *fp, *dp, *afn, *res;
6500   int i;
6501
6502   /* Find the common root of file and dir (with a trailing slash). */
6503   afn = absolute_filename (file, cwd);
6504   fp = afn;
6505   dp = dir;
6506   while (*fp++ == *dp++)
6507     continue;
6508   fp--, dp--;                   /* back to the first differing char */
6509 #ifdef DOS_NT
6510   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6511     return afn;
6512 #endif
6513   do                            /* look at the equal chars until '/' */
6514     fp--, dp--;
6515   while (*fp != '/');
6516
6517   /* Build a sequence of "../" strings for the resulting relative file name. */
6518   i = 0;
6519   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6520     i += 1;
6521   res = xnew (3*i + strlen (fp + 1) + 1, char);
6522   res[0] = '\0';
6523   while (i-- > 0)
6524     strcat (res, "../");
6525
6526   /* Add the file name relative to the common root of file and dir. */
6527   strcat (res, fp + 1);
6528   free (afn);
6529
6530   return res;
6531 }
6532
6533 /* Return a newly allocated string containing the absolute file name
6534    of FILE given DIR (which should end with a slash). */
6535 static char *
6536 absolute_filename (char *file, char *dir)
6537 {
6538   char *slashp, *cp, *res;
6539
6540   if (filename_is_absolute (file))
6541     res = savestr (file);
6542 #ifdef DOS_NT
6543   /* We don't support non-absolute file names with a drive
6544      letter, like `d:NAME' (it's too much hassle).  */
6545   else if (file[1] == ':')
6546     fatal ("%s: relative file names with drive letters not supported", file);
6547 #endif
6548   else
6549     res = concat (dir, file, "");
6550
6551   /* Delete the "/dirname/.." and "/." substrings. */
6552   slashp = etags_strchr (res, '/');
6553   while (slashp != NULL && slashp[0] != '\0')
6554     {
6555       if (slashp[1] == '.')
6556         {
6557           if (slashp[2] == '.'
6558               && (slashp[3] == '/' || slashp[3] == '\0'))
6559             {
6560               cp = slashp;
6561               do
6562                 cp--;
6563               while (cp >= res && !filename_is_absolute (cp));
6564               if (cp < res)
6565                 cp = slashp;    /* the absolute name begins with "/.." */
6566 #ifdef DOS_NT
6567               /* Under MSDOS and NT we get `d:/NAME' as absolute
6568                  file name, so the luser could say `d:/../NAME'.
6569                  We silently treat this as `d:/NAME'.  */
6570               else if (cp[0] != '/')
6571                 cp = slashp;
6572 #endif
6573               memmove (cp, slashp + 3, strlen (slashp + 2));
6574               slashp = cp;
6575               continue;
6576             }
6577           else if (slashp[2] == '/' || slashp[2] == '\0')
6578             {
6579               memmove (slashp, slashp + 2, strlen (slashp + 1));
6580               continue;
6581             }
6582         }
6583
6584       slashp = etags_strchr (slashp + 1, '/');
6585     }
6586
6587   if (res[0] == '\0')           /* just a safety net: should never happen */
6588     {
6589       free (res);
6590       return savestr ("/");
6591     }
6592   else
6593     return res;
6594 }
6595
6596 /* Return a newly allocated string containing the absolute
6597    file name of dir where FILE resides given DIR (which should
6598    end with a slash). */
6599 static char *
6600 absolute_dirname (char *file, char *dir)
6601 {
6602   char *slashp, *res;
6603   char save;
6604
6605   slashp = etags_strrchr (file, '/');
6606   if (slashp == NULL)
6607     return savestr (dir);
6608   save = slashp[1];
6609   slashp[1] = '\0';
6610   res = absolute_filename (file, dir);
6611   slashp[1] = save;
6612
6613   return res;
6614 }
6615
6616 /* Whether the argument string is an absolute file name.  The argument
6617    string must have been canonicalized with canonicalize_filename. */
6618 static bool
6619 filename_is_absolute (char *fn)
6620 {
6621   return (fn[0] == '/'
6622 #ifdef DOS_NT
6623           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6624 #endif
6625           );
6626 }
6627
6628 /* Downcase DOS drive letter and collapse separators into single slashes.
6629    Works in place. */
6630 static void
6631 canonicalize_filename (register char *fn)
6632 {
6633   register char* cp;
6634   char sep = '/';
6635
6636 #ifdef DOS_NT
6637   /* Canonicalize drive letter case.  */
6638 # define ISUPPER(c)     isupper (CHAR(c))
6639   if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6640     fn[0] = lowcase (fn[0]);
6641
6642   sep = '\\';
6643 #endif
6644
6645   /* Collapse multiple separators into a single slash. */
6646   for (cp = fn; *cp != '\0'; cp++, fn++)
6647     if (*cp == sep)
6648       {
6649         *fn = '/';
6650         while (cp[1] == sep)
6651           cp++;
6652       }
6653     else
6654       *fn = *cp;
6655   *fn = '\0';
6656 }
6657
6658 \f
6659 /* Initialize a linebuffer for use. */
6660 static void
6661 linebuffer_init (linebuffer *lbp)
6662 {
6663   lbp->size = (DEBUG) ? 3 : 200;
6664   lbp->buffer = xnew (lbp->size, char);
6665   lbp->buffer[0] = '\0';
6666   lbp->len = 0;
6667 }
6668
6669 /* Set the minimum size of a string contained in a linebuffer. */
6670 static void
6671 linebuffer_setlen (linebuffer *lbp, int toksize)
6672 {
6673   while (lbp->size <= toksize)
6674     {
6675       lbp->size *= 2;
6676       xrnew (lbp->buffer, lbp->size, char);
6677     }
6678   lbp->len = toksize;
6679 }
6680
6681 /* Like malloc but get fatal error if memory is exhausted. */
6682 static PTR
6683 xmalloc (size_t size)
6684 {
6685   PTR result = (PTR) malloc (size);
6686   if (result == NULL)
6687     fatal ("virtual memory exhausted", (char *)NULL);
6688   return result;
6689 }
6690
6691 static PTR
6692 xrealloc (char *ptr, size_t size)
6693 {
6694   PTR result = (PTR) realloc (ptr, size);
6695   if (result == NULL)
6696     fatal ("virtual memory exhausted", (char *)NULL);
6697   return result;
6698 }
6699
6700 /*
6701  * Local Variables:
6702  * indent-tabs-mode: t
6703  * tab-width: 8
6704  * fill-column: 79
6705  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6706  * c-file-style: "gnu"
6707  * End:
6708  */
6709
6710 /* etags.c ends here */