lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2011
  32   Free Software Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  *
  72  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #define TRUE    1
  84 #define FALSE   0
  85
  86 #ifdef DEBUG
  87 #  undef DEBUG
  88 #  define DEBUG TRUE
  89 #else
  90 #  define DEBUG  FALSE
  91 #  define NDEBUG                /* disable assert */
  92 #endif
  93
  94 #ifdef HAVE_CONFIG_H
  95 # include <config.h>
  96   /* This is probably not necessary any more.  On some systems, config.h
  97      used to define static as nothing for the sake of unexec.  We don't
  98      want that here since we don't use unexec.  None of these systems
  99      are supported any more, but the idea is still mentioned in
 100      etc/PROBLEMS.  */
 101 # undef static
 102 # ifndef PTR                    /* for XEmacs */
 103 #   define PTR void *
 104 # endif
 105 #else  /* no config.h */
 106 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 107 #   define PTR void *           /* for generic pointers */
 108 # else /* not standard C */
 109 #   define const                /* remove const for old compilers' sake */
 110 #   define PTR long *           /* don't use void* */
 111 # endif
 112 #endif /* !HAVE_CONFIG_H */
 113
 114 #ifndef _GNU_SOURCE
 115 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 116 #endif
 117
 118 /* WIN32_NATIVE is for XEmacs.
 119    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 120 #ifdef WIN32_NATIVE
 121 # undef MSDOS
 122 # undef  WINDOWSNT
 123 # define WINDOWSNT
 124 #endif /* WIN32_NATIVE */
 125
 126 #ifdef MSDOS
 127 # undef MSDOS
 128 # define MSDOS TRUE
 129 # include <fcntl.h>
 130 # include <sys/param.h>
 131 # include <io.h>
 132 # ifndef HAVE_CONFIG_H
 133 #   define DOS_NT
 134 #   include <sys/config.h>
 135 # endif
 136 #else
 137 # define MSDOS FALSE
 138 #endif /* MSDOS */
 139
 140 #ifdef WINDOWSNT
 141 # include <fcntl.h>
 142 # include <direct.h>
 143 # include <io.h>
 144 # define MAXPATHLEN _MAX_PATH
 145 # undef HAVE_NTGUI
 146 # undef  DOS_NT
 147 # define DOS_NT
 148 # ifndef HAVE_GETCWD
 149 #   define HAVE_GETCWD
 150 # endif /* undef HAVE_GETCWD */
 151 #else /* not WINDOWSNT */
 152 #endif /* !WINDOWSNT */
 153
 154 #include <unistd.h>
 155 #ifndef HAVE_UNISTD_H
 156 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 157     extern char *getcwd (char *buf, size_t size);
 158 # endif
 159 #endif /* HAVE_UNISTD_H */
 160
 161 #include <stdlib.h>
 162 #include <string.h>
 163 #include <stdio.h>
 164 #include <ctype.h>
 165 #include <errno.h>
 166 #include <sys/types.h>
 167 #include <sys/stat.h>
 168
 169 #include <assert.h>
 170 #ifdef NDEBUG
 171 # undef  assert                 /* some systems have a buggy assert.h */
 172 # define assert(x) ((void) 0)
 173 #endif
 174
 175 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 176 # define NO_LONG_OPTIONS TRUE
 177 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 178   extern char *optarg;
 179   extern int optind, opterr;
 180 #else
 181 # define NO_LONG_OPTIONS FALSE
 182 # include <getopt.h>
 183 #endif /* NO_LONG_OPTIONS */
 184
 185 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 186 # ifdef __CYGWIN__              /* compiling on Cygwin */
 187                              !!! NOTICE !!!
 188  the regex.h distributed with Cygwin is not compatible with etags, alas!
 189 If you want regular expression support, you should delete this notice and
 190               arrange to use the GNU regex.h and regex.c.
 191 # endif
 192 #endif
 193 #include <regex.h>
 194
 195 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 196  Leave it undefined to make the program "etags", which makes emacs-style
 197  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 198 #ifdef CTAGS
 199 # undef  CTAGS
 200 # define CTAGS TRUE
 201 #else
 202 # define CTAGS FALSE
 203 #endif
 204
 205 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 206 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 207 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 208 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 209
 210 #define CHARS 256               /* 2^sizeof(char) */
 211 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 212 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 213 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 214 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 215 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 216 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 217
 218 #define ISALNUM(c)      isalnum (CHAR(c))
 219 #define ISALPHA(c)      isalpha (CHAR(c))
 220 #define ISDIGIT(c)      isdigit (CHAR(c))
 221 #define ISLOWER(c)      islower (CHAR(c))
 222
 223 #define lowcase(c)      tolower (CHAR(c))
 224
 225
 226 /*
 227  *      xnew, xrnew -- allocate, reallocate storage
 228  *
 229  * SYNOPSIS:    Type *xnew (int n, Type);
 230  *              void xrnew (OldPointer, int n, Type);
 231  */
 232 #if DEBUG
 233 # include "chkmalloc.h"
 234 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 235                                                   (n) * sizeof (Type)))
 236 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 237                                         (char *) (op), (n) * sizeof (Type)))
 238 #else
 239 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 240 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 241                                         (char *) (op), (n) * sizeof (Type)))
 242 #endif
 243
 244 #define bool int
 245
 246 typedef void Lang_function (FILE *);
 247
 248 typedef struct
 249 {
 250   const char *suffix;           /* file name suffix for this compressor */
 251   const char *command;          /* takes one arg and decompresses to stdout */
 252 } compressor;
 253
 254 typedef struct
 255 {
 256   const char *name;             /* language name */
 257   const char *help;             /* detailed help for the language */
 258   Lang_function *function;      /* parse function */
 259   const char **suffixes;        /* name suffixes of this language's files */
 260   const char **filenames;       /* names of this language's files */
 261   const char **interpreters;    /* interpreters for this language */
 262   bool metasource;              /* source used to generate other sources */
 263 } language;
 264
 265 typedef struct fdesc
 266 {
 267   struct fdesc *next;           /* for the linked list */
 268   char *infname;                /* uncompressed input file name */
 269   char *infabsname;             /* absolute uncompressed input file name */
 270   char *infabsdir;              /* absolute dir of input file */
 271   char *taggedfname;            /* file name to write in tagfile */
 272   language *lang;               /* language of file */
 273   char *prop;                   /* file properties to write in tagfile */
 274   bool usecharno;               /* etags tags shall contain char number */
 275   bool written;                 /* entry written in the tags file */
 276 } fdesc;
 277
 278 typedef struct node_st
 279 {                               /* sorting structure */
 280   struct node_st *left, *right; /* left and right sons */
 281   fdesc *fdp;                   /* description of file to whom tag belongs */
 282   char *name;                   /* tag name */
 283   char *regex;                  /* search regexp */
 284   bool valid;                   /* write this tag on the tag file */
 285   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 286   bool been_warned;             /* warning already given for duplicated tag */
 287   int lno;                      /* line number tag is on */
 288   long cno;                     /* character number line starts on */
 289 } node;
 290
 291 /*
 292  * A `linebuffer' is a structure which holds a line of text.
 293  * `readline_internal' reads a line from a stream into a linebuffer
 294  * and works regardless of the length of the line.
 295  * SIZE is the size of BUFFER, LEN is the length of the string in
 296  * BUFFER after readline reads it.
 297  */
 298 typedef struct
 299 {
 300   long size;
 301   int len;
 302   char *buffer;
 303 } linebuffer;
 304
 305 /* Used to support mixing of --lang and file names. */
 306 typedef struct
 307 {
 308   enum {
 309     at_language,                /* a language specification */
 310     at_regexp,                  /* a regular expression */
 311     at_filename,                /* a file name */
 312     at_stdin,                   /* read from stdin here */
 313     at_end                      /* stop parsing the list */
 314   } arg_type;                   /* argument type */
 315   language *lang;               /* language associated with the argument */
 316   char *what;                   /* the argument itself */
 317 } argument;
 318
 319 /* Structure defining a regular expression. */
 320 typedef struct regexp
 321 {
 322   struct regexp *p_next;        /* pointer to next in list */
 323   language *lang;               /* if set, use only for this language */
 324   char *pattern;                /* the regexp pattern */
 325   char *name;                   /* tag name */
 326   struct re_pattern_buffer *pat; /* the compiled pattern */
 327   struct re_registers regs;     /* re registers */
 328   bool error_signaled;          /* already signaled for this regexp */
 329   bool force_explicit_name;     /* do not allow implict tag name */
 330   bool ignore_case;             /* ignore case when matching */
 331   bool multi_line;              /* do a multi-line match on the whole file */
 332 } regexp;
 333
 334
 335 /* Many compilers barf on this:
 336         Lang_function Ada_funcs;
 337    so let's write it this way */
 338 static void Ada_funcs (FILE *);
 339 static void Asm_labels (FILE *);
 340 static void C_entries (int c_ext, FILE *);
 341 static void default_C_entries (FILE *);
 342 static void plain_C_entries (FILE *);
 343 static void Cjava_entries (FILE *);
 344 static void Cobol_paragraphs (FILE *);
 345 static void Cplusplus_entries (FILE *);
 346 static void Cstar_entries (FILE *);
 347 static void Erlang_functions (FILE *);
 348 static void Forth_words (FILE *);
 349 static void Fortran_functions (FILE *);
 350 static void HTML_labels (FILE *);
 351 static void Lisp_functions (FILE *);
 352 static void Lua_functions (FILE *);
 353 static void Makefile_targets (FILE *);
 354 static void Pascal_functions (FILE *);
 355 static void Perl_functions (FILE *);
 356 static void PHP_functions (FILE *);
 357 static void PS_functions (FILE *);
 358 static void Prolog_functions (FILE *);
 359 static void Python_functions (FILE *);
 360 static void Scheme_functions (FILE *);
 361 static void TeX_commands (FILE *);
 362 static void Texinfo_nodes (FILE *);
 363 static void Yacc_entries (FILE *);
 364 static void just_read_file (FILE *);
 365
 366 static void print_language_names (void);
 367 static void print_version (void);
 368 static void print_help (argument *);
 369 int main (int, char **);
 370
 371 static compressor *get_compressor_from_suffix (char *, char **);
 372 static language *get_language_from_langname (const char *);
 373 static language *get_language_from_interpreter (char *);
 374 static language *get_language_from_filename (char *, bool);
 375 static void readline (linebuffer *, FILE *);
 376 static long readline_internal (linebuffer *, FILE *);
 377 static bool nocase_tail (const char *);
 378 static void get_tag (char *, char **);
 379
 380 static void analyse_regex (char *);
 381 static void free_regexps (void);
 382 static void regex_tag_multiline (void);
 383 static void error (const char *, const char *);
 384 static void suggest_asking_for_help (void) NO_RETURN;
 385 void fatal (const char *, const char *) NO_RETURN;
 386 static void pfatal (const char *) NO_RETURN;
 387 static void add_node (node *, node **);
 388
 389 static void init (void);
 390 static void process_file_name (char *, language *);
 391 static void process_file (FILE *, char *, language *);
 392 static void find_entries (FILE *);
 393 static void free_tree (node *);
 394 static void free_fdesc (fdesc *);
 395 static void pfnote (char *, bool, char *, int, int, long);
 396 static void make_tag (const char *, int, bool, char *, int, int, long);
 397 static void invalidate_nodes (fdesc *, node **);
 398 static void put_entries (node *);
 399
 400 static char *concat (const char *, const char *, const char *);
 401 static char *skip_spaces (char *);
 402 static char *skip_non_spaces (char *);
 403 static char *savenstr (const char *, int);
 404 static char *savestr (const char *);
 405 static char *etags_strchr (const char *, int);
 406 static char *etags_strrchr (const char *, int);
 407 static int etags_strcasecmp (const char *, const char *);
 408 static int etags_strncasecmp (const char *, const char *, int);
 409 static char *etags_getcwd (void);
 410 static char *relative_filename (char *, char *);
 411 static char *absolute_filename (char *, char *);
 412 static char *absolute_dirname (char *, char *);
 413 static bool filename_is_absolute (char *f);
 414 static void canonicalize_filename (char *);
 415 static void linebuffer_init (linebuffer *);
 416 static void linebuffer_setlen (linebuffer *, int);
 417 static PTR xmalloc (unsigned int);
 418 static PTR xrealloc (char *, unsigned int);
 419
 420 \f
 421 static char searchar = '/';     /* use /.../ searches */
 422
 423 static char *tagfile;           /* output file */
 424 static char *progname;          /* name this program was invoked with */
 425 static char *cwd;               /* current working directory */
 426 static char *tagfiledir;        /* directory of tagfile */
 427 static FILE *tagf;              /* ioptr for tags file */
 428
 429 static fdesc *fdhead;           /* head of file description list */
 430 static fdesc *curfdp;           /* current file description */
 431 static int lineno;              /* line number of current line */
 432 static long charno;             /* current character number */
 433 static long linecharno;         /* charno of start of current line */
 434 static char *dbp;               /* pointer to start of current tag */
 435
 436 static const int invalidcharno = -1;
 437
 438 static node *nodehead;          /* the head of the binary tree of tags */
 439 static node *last_node;         /* the last node created */
 440
 441 static linebuffer lb;           /* the current line */
 442 static linebuffer filebuf;      /* a buffer containing the whole file */
 443 static linebuffer token_name;   /* a buffer containing a tag name */
 444
 445 /* boolean "functions" (see init)       */
 446 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 447 static const char
 448   /* white chars */
 449   *white = " \f\t\n\r\v",
 450   /* not in a name */
 451   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 452   /* token ending chars */
 453   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 454   /* token starting chars */
 455   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 456   /* valid in-token chars */
 457   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 458
 459 static bool append_to_tagfile;  /* -a: append to tags */
 460 /* The next five default to TRUE in C and derived languages.  */
 461 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 462 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 463                                 /* 0 struct/enum/union decls, and C++ */
 464                                 /* member functions. */
 465 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 466                                 /* constants and variables. */
 467                                 /* -D: opposite of -d.  Default under ctags. */
 468 static bool globals;            /* create tags for global variables */
 469 static bool members;            /* create tags for C member variables */
 470 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 471 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 472 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 473 static bool update;             /* -u: update tags */
 474 static bool vgrind_style;       /* -v: create vgrind style index output */
 475 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 476 static bool cxref_style;        /* -x: create cxref style output */
 477 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 478 static bool ignoreindent;       /* -I: ignore indentation in C */
 479 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 480
 481 /* STDIN is defined in LynxOS system headers */
 482 #ifdef STDIN
 483 # undef STDIN
 484 #endif
 485
 486 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 487 static bool parsing_stdin;      /* --parse-stdin used */
 488
 489 static regexp *p_head;          /* list of all regexps */
 490 static bool need_filebuf;       /* some regexes are multi-line */
 491
 492 static struct option longopts[] =
 493 {
 494   { "append",             no_argument,       NULL,               'a'   },
 495   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 496   { "c++",                no_argument,       NULL,               'C'   },
 497   { "declarations",       no_argument,       &declarations,      TRUE  },
 498   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 499   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 500   { "help",               no_argument,       NULL,               'h'   },
 501   { "help",               no_argument,       NULL,               'H'   },
 502   { "ignore-indentation", no_argument,       NULL,               'I'   },
 503   { "language",           required_argument, NULL,               'l'   },
 504   { "members",            no_argument,       &members,           TRUE  },
 505   { "no-members",         no_argument,       &members,           FALSE },
 506   { "output",             required_argument, NULL,               'o'   },
 507   { "regex",              required_argument, NULL,               'r'   },
 508   { "no-regex",           no_argument,       NULL,               'R'   },
 509   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 510   { "parse-stdin",        required_argument, NULL,               STDIN },
 511   { "version",            no_argument,       NULL,               'V'   },
 512
 513 #if CTAGS /* Ctags options */
 514   { "backward-search",    no_argument,       NULL,               'B'   },
 515   { "cxref",              no_argument,       NULL,               'x'   },
 516   { "defines",            no_argument,       NULL,               'd'   },
 517   { "globals",            no_argument,       &globals,           TRUE  },
 518   { "typedefs",           no_argument,       NULL,               't'   },
 519   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 520   { "update",             no_argument,       NULL,               'u'   },
 521   { "vgrind",             no_argument,       NULL,               'v'   },
 522   { "no-warn",            no_argument,       NULL,               'w'   },
 523
 524 #else /* Etags options */
 525   { "no-defines",         no_argument,       NULL,               'D'   },
 526   { "no-globals",         no_argument,       &globals,           FALSE },
 527   { "include",            required_argument, NULL,               'i'   },
 528 #endif
 529   { NULL }
 530 };
 531
 532 static compressor compressors[] =
 533 {
 534   { "z", "gzip -d -c"},
 535   { "Z", "gzip -d -c"},
 536   { "gz", "gzip -d -c"},
 537   { "GZ", "gzip -d -c"},
 538   { "bz2", "bzip2 -d -c" },
 539   { "xz", "xz -d -c" },
 540   { NULL }
 541 };
 542
 543 /*
 544  * Language stuff.
 545  */
 546
 547 /* Ada code */
 548 static const char *Ada_suffixes [] =
 549   { "ads", "adb", "ada", NULL };
 550 static const char Ada_help [] =
 551 "In Ada code, functions, procedures, packages, tasks and types are\n\
 552 tags.  Use the `--packages-only' option to create tags for\n\
 553 packages only.\n\
 554 Ada tag names have suffixes indicating the type of entity:\n\
 555         Entity type:    Qualifier:\n\
 556         ------------    ----------\n\
 557         function        /f\n\
 558         procedure       /p\n\
 559         package spec    /s\n\
 560         package body    /b\n\
 561         type            /t\n\
 562         task            /k\n\
 563 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 564 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 565 will just search for any tag `bidule'.";
 566
 567 /* Assembly code */
 568 static const char *Asm_suffixes [] =
 569   { "a",        /* Unix assembler */
 570     "asm", /* Microcontroller assembly */
 571     "def", /* BSO/Tasking definition includes  */
 572     "inc", /* Microcontroller include files */
 573     "ins", /* Microcontroller include files */
 574     "s", "sa", /* Unix assembler */
 575     "S",   /* cpp-processed Unix assembler */
 576     "src", /* BSO/Tasking C compiler output */
 577     NULL
 578   };
 579 static const char Asm_help [] =
 580 "In assembler code, labels appearing at the beginning of a line,\n\
 581 followed by a colon, are tags.";
 582
 583
 584 /* Note that .c and .h can be considered C++, if the --c++ flag was
 585    given, or if the `class' or `template' keywords are met inside the file.
 586    That is why default_C_entries is called for these. */
 587 static const char *default_C_suffixes [] =
 588   { "c", "h", NULL };
 589 #if CTAGS                               /* C help for Ctags */
 590 static const char default_C_help [] =
 591 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 592 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 593 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 594 Use --globals to tag global variables.\n\
 595 You can tag function declarations and external variables by\n\
 596 using `--declarations', and struct members by using `--members'.";
 597 #else                                   /* C help for Etags */
 598 static const char default_C_help [] =
 599 "In C code, any C function or typedef is a tag, and so are\n\
 600 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 601 definitions and `enum' constants are tags unless you specify\n\
 602 `--no-defines'.  Global variables are tags unless you specify\n\
 603 `--no-globals' and so are struct members unless you specify\n\
 604 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 605 `--no-members' can make the tags table file much smaller.\n\
 606 You can tag function declarations and external variables by\n\
 607 using `--declarations'.";
 608 #endif  /* C help for Ctags and Etags */
 609
 610 static const char *Cplusplus_suffixes [] =
 611   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 612     "M",                        /* Objective C++ */
 613     "pdb",                      /* Postscript with C syntax */
 614     NULL };
 615 static const char Cplusplus_help [] =
 616 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 617 --help --lang=c --lang=c++ for full help.)\n\
 618 In addition to C tags, member functions are also recognized.  Member\n\
 619 variables are recognized unless you use the `--no-members' option.\n\
 620 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 621 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 622 `operator+'.";
 623
 624 static const char *Cjava_suffixes [] =
 625   { "java", NULL };
 626 static char Cjava_help [] =
 627 "In Java code, all the tags constructs of C and C++ code are\n\
 628 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 629
 630
 631 static const char *Cobol_suffixes [] =
 632   { "COB", "cob", NULL };
 633 static char Cobol_help [] =
 634 "In Cobol code, tags are paragraph names; that is, any word\n\
 635 starting in column 8 and followed by a period.";
 636
 637 static const char *Cstar_suffixes [] =
 638   { "cs", "hs", NULL };
 639
 640 static const char *Erlang_suffixes [] =
 641   { "erl", "hrl", NULL };
 642 static const char Erlang_help [] =
 643 "In Erlang code, the tags are the functions, records and macros\n\
 644 defined in the file.";
 645
 646 const char *Forth_suffixes [] =
 647   { "fth", "tok", NULL };
 648 static const char Forth_help [] =
 649 "In Forth code, tags are words defined by `:',\n\
 650 constant, code, create, defer, value, variable, buffer:, field.";
 651
 652 static const char *Fortran_suffixes [] =
 653   { "F", "f", "f90", "for", NULL };
 654 static const char Fortran_help [] =
 655 "In Fortran code, functions, subroutines and block data are tags.";
 656
 657 static const char *HTML_suffixes [] =
 658   { "htm", "html", "shtml", NULL };
 659 static const char HTML_help [] =
 660 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 661 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 662 occurrences of `id='.";
 663
 664 static const char *Lisp_suffixes [] =
 665   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 666 static const char Lisp_help [] =
 667 "In Lisp code, any function defined with `defun', any variable\n\
 668 defined with `defvar' or `defconst', and in general the first\n\
 669 argument of any expression that starts with `(def' in column zero\n\
 670 is a tag.";
 671
 672 static const char *Lua_suffixes [] =
 673   { "lua", "LUA", NULL };
 674 static const char Lua_help [] =
 675 "In Lua scripts, all functions are tags.";
 676
 677 static const char *Makefile_filenames [] =
 678   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 679 static const char Makefile_help [] =
 680 "In makefiles, targets are tags; additionally, variables are tags\n\
 681 unless you specify `--no-globals'.";
 682
 683 static const char *Objc_suffixes [] =
 684   { "lm",                       /* Objective lex file */
 685     "m",                        /* Objective C file */
 686      NULL };
 687 static const char Objc_help [] =
 688 "In Objective C code, tags include Objective C definitions for classes,\n\
 689 class categories, methods and protocols.  Tags for variables and\n\
 690 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 691 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 692
 693 static const char *Pascal_suffixes [] =
 694   { "p", "pas", NULL };
 695 static const char Pascal_help [] =
 696 "In Pascal code, the tags are the functions and procedures defined\n\
 697 in the file.";
 698 /* " // this is for working around an Emacs highlighting bug... */
 699
 700 static const char *Perl_suffixes [] =
 701   { "pl", "pm", NULL };
 702 static const char *Perl_interpreters [] =
 703   { "perl", "@PERL@", NULL };
 704 static const char Perl_help [] =
 705 "In Perl code, the tags are the packages, subroutines and variables\n\
 706 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 707 `--globals' if you want to tag global variables.  Tags for\n\
 708 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 709 defined in the default package is `main::SUB'.";
 710
 711 static const char *PHP_suffixes [] =
 712   { "php", "php3", "php4", NULL };
 713 static const char PHP_help [] =
 714 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 715 the `--no-members' option, vars are tags too.";
 716
 717 static const char *plain_C_suffixes [] =
 718   { "pc",                       /* Pro*C file */
 719      NULL };
 720
 721 static const char *PS_suffixes [] =
 722   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 723 static const char PS_help [] =
 724 "In PostScript code, the tags are the functions.";
 725
 726 static const char *Prolog_suffixes [] =
 727   { "prolog", NULL };
 728 static const char Prolog_help [] =
 729 "In Prolog code, tags are predicates and rules at the beginning of\n\
 730 line.";
 731
 732 static const char *Python_suffixes [] =
 733   { "py", NULL };
 734 static const char Python_help [] =
 735 "In Python code, `def' or `class' at the beginning of a line\n\
 736 generate a tag.";
 737
 738 /* Can't do the `SCM' or `scm' prefix with a version number. */
 739 static const char *Scheme_suffixes [] =
 740   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 741 static const char Scheme_help [] =
 742 "In Scheme code, tags include anything defined with `def' or with a\n\
 743 construct whose name starts with `def'.  They also include\n\
 744 variables set with `set!' at top level in the file.";
 745
 746 static const char *TeX_suffixes [] =
 747   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 748 static const char TeX_help [] =
 749 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 750 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 751 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 752 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 753 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 754 \n\
 755 Other commands can be specified by setting the environment variable\n\
 756 `TEXTAGS' to a colon-separated list like, for example,\n\
 757      TEXTAGS=\"mycommand:myothercommand\".";
 758
 759
 760 static const char *Texinfo_suffixes [] =
 761   { "texi", "texinfo", "txi", NULL };
 762 static const char Texinfo_help [] =
 763 "for texinfo files, lines starting with @node are tagged.";
 764
 765 static const char *Yacc_suffixes [] =
 766   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 767 static const char Yacc_help [] =
 768 "In Bison or Yacc input files, each rule defines as a tag the\n\
 769 nonterminal it constructs.  The portions of the file that contain\n\
 770 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 771 for full help).";
 772
 773 static const char auto_help [] =
 774 "`auto' is not a real language, it indicates to use\n\
 775 a default language for files base on file name suffix and file contents.";
 776
 777 static const char none_help [] =
 778 "`none' is not a real language, it indicates to only do\n\
 779 regexp processing on files.";
 780
 781 static const char no_lang_help [] =
 782 "No detailed help available for this language.";
 783
 784
 785 /*
 786  * Table of languages.
 787  *
 788  * It is ok for a given function to be listed under more than one
 789  * name.  I just didn't.
 790  */
 791
 792 static language lang_names [] =
 793 {
 794   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 795   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 796   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 797   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 798   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 799   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 800   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 801   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 802   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 803   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 804   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 805   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 806   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 807   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 808   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 809   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 810   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 811   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 812   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 813   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 814   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 815   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 816   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 817   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 818   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 819   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 820   { "auto",      auto_help },                      /* default guessing scheme */
 821   { "none",      none_help,      just_read_file }, /* regexp matching only */
 822   { NULL }                /* end of list */
 823 };
 824
 825 \f
 826 static void
 827 print_language_names (void)
 828 {
 829   language *lang;
 830   const char **name, **ext;
 831
 832   puts ("\nThese are the currently supported languages, along with the\n\
 833 default file names and dot suffixes:");
 834   for (lang = lang_names; lang->name != NULL; lang++)
 835     {
 836       printf ("  %-*s", 10, lang->name);
 837       if (lang->filenames != NULL)
 838         for (name = lang->filenames; *name != NULL; name++)
 839           printf (" %s", *name);
 840       if (lang->suffixes != NULL)
 841         for (ext = lang->suffixes; *ext != NULL; ext++)
 842           printf (" .%s", *ext);
 843       puts ("");
 844     }
 845   puts ("where `auto' means use default language for files based on file\n\
 846 name suffix, and `none' means only do regexp processing on files.\n\
 847 If no language is specified and no matching suffix is found,\n\
 848 the first line of the file is read for a sharp-bang (#!) sequence\n\
 849 followed by the name of an interpreter.  If no such sequence is found,\n\
 850 Fortran is tried first; if no tags are found, C is tried next.\n\
 851 When parsing any C file, a \"class\" or \"template\" keyword\n\
 852 switches to C++.");
 853   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 854 \n\
 855 For detailed help on a given language use, for example,\n\
 856 etags --help --lang=ada.");
 857 }
 858
 859 #ifndef EMACS_NAME
 860 # define EMACS_NAME "standalone"
 861 #endif
 862 #ifndef VERSION
 863 # define VERSION "17.38.1.4"
 864 #endif
 865 static void
 866 print_version (void)
 867 {
 868   /* Makes it easier to update automatically. */
 869   char emacs_copyright[] = "Copyright (C) 2011 Free Software Foundation, Inc.";
 870
 871   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 872   puts (emacs_copyright);
 873   puts ("This program is distributed under the terms in ETAGS.README");
 874
 875   exit (EXIT_SUCCESS);
 876 }
 877
 878 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 879 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 880 #endif
 881
 882 static void
 883 print_help (argument *argbuffer)
 884 {
 885   bool help_for_lang = FALSE;
 886
 887   for (; argbuffer->arg_type != at_end; argbuffer++)
 888     if (argbuffer->arg_type == at_language)
 889       {
 890         if (help_for_lang)
 891           puts ("");
 892         puts (argbuffer->lang->help);
 893         help_for_lang = TRUE;
 894       }
 895
 896   if (help_for_lang)
 897     exit (EXIT_SUCCESS);
 898
 899   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 900 \n\
 901 These are the options accepted by %s.\n", progname, progname);
 902   if (NO_LONG_OPTIONS)
 903     puts ("WARNING: long option names do not work with this executable,\n\
 904 as it is not linked with GNU getopt.");
 905   else
 906     puts ("You may use unambiguous abbreviations for the long option names.");
 907   puts ("  A - as file name means read names from stdin (one per line).\n\
 908 Absolute names are stored in the output file as they are.\n\
 909 Relative ones are stored relative to the output file's directory.\n");
 910
 911   puts ("-a, --append\n\
 912         Append tag entries to existing tags file.");
 913
 914   puts ("--packages-only\n\
 915         For Ada files, only generate tags for packages.");
 916
 917   if (CTAGS)
 918     puts ("-B, --backward-search\n\
 919         Write the search commands for the tag entries using '?', the\n\
 920         backward-search command instead of '/', the forward-search command.");
 921
 922   /* This option is mostly obsolete, because etags can now automatically
 923      detect C++.  Retained for backward compatibility and for debugging and
 924      experimentation.  In principle, we could want to tag as C++ even
 925      before any "class" or "template" keyword.
 926   puts ("-C, --c++\n\
 927         Treat files whose name suffix defaults to C language as C++ files.");
 928   */
 929
 930   puts ("--declarations\n\
 931         In C and derived languages, create tags for function declarations,");
 932   if (CTAGS)
 933     puts ("\tand create tags for extern variables if --globals is used.");
 934   else
 935     puts
 936       ("\tand create tags for extern variables unless --no-globals is used.");
 937
 938   if (CTAGS)
 939     puts ("-d, --defines\n\
 940         Create tag entries for C #define constants and enum constants, too.");
 941   else
 942     puts ("-D, --no-defines\n\
 943         Don't create tag entries for C #define constants and enum constants.\n\
 944         This makes the tags file smaller.");
 945
 946   if (!CTAGS)
 947     puts ("-i FILE, --include=FILE\n\
 948         Include a note in tag file indicating that, when searching for\n\
 949         a tag, one should also consult the tags file FILE after\n\
 950         checking the current file.");
 951
 952   puts ("-l LANG, --language=LANG\n\
 953         Force the following files to be considered as written in the\n\
 954         named language up to the next --language=LANG option.");
 955
 956   if (CTAGS)
 957     puts ("--globals\n\
 958         Create tag entries for global variables in some languages.");
 959   else
 960     puts ("--no-globals\n\
 961         Do not create tag entries for global variables in some\n\
 962         languages.  This makes the tags file smaller.");
 963
 964   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 965     puts ("--no-line-directive\n\
 966         Ignore #line preprocessor directives in C and derived languages.");
 967
 968   if (CTAGS)
 969     puts ("--members\n\
 970         Create tag entries for members of structures in some languages.");
 971   else
 972     puts ("--no-members\n\
 973         Do not create tag entries for members of structures\n\
 974         in some languages.");
 975
 976   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 977         Make a tag for each line matching a regular expression pattern\n\
 978         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 979         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 980         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 981         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 982   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 983         For example Tcl named tags can be created with:\n\
 984           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 985         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 986         `m' means to allow multi-line matches, `s' implies `m' and\n\
 987         causes dot to match any character, including newline.");
 988
 989   puts ("-R, --no-regex\n\
 990         Don't create tags from regexps for the following files.");
 991
 992   puts ("-I, --ignore-indentation\n\
 993         In C and C++ do not assume that a closing brace in the first\n\
 994         column is the final brace of a function or structure definition.");
 995
 996   puts ("-o FILE, --output=FILE\n\
 997         Write the tags to FILE.");
 998
 999   puts ("--parse-stdin=NAME\n\
1000         Read from standard input and record tags as belonging to file NAME.");
1001
1002   if (CTAGS)
1003     {
1004       puts ("-t, --typedefs\n\
1005         Generate tag entries for C and Ada typedefs.");
1006       puts ("-T, --typedefs-and-c++\n\
1007         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1008         and C++ member functions.");
1009     }
1010
1011   if (CTAGS)
1012     puts ("-u, --update\n\
1013         Update the tag entries for the given files, leaving tag\n\
1014         entries for other files in place.  Currently, this is\n\
1015         implemented by deleting the existing entries for the given\n\
1016         files and then rewriting the new entries at the end of the\n\
1017         tags file.  It is often faster to simply rebuild the entire\n\
1018         tag file than to use this.");
1019
1020   if (CTAGS)
1021     {
1022       puts ("-v, --vgrind\n\
1023         Print on the standard output an index of items intended for\n\
1024         human consumption, similar to the output of vgrind.  The index\n\
1025         is sorted, and gives the page number of each item.");
1026
1027       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1028         puts ("-w, --no-duplicates\n\
1029         Do not create duplicate tag entries, for compatibility with\n\
1030         traditional ctags.");
1031
1032       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1033         puts ("-w, --no-warn\n\
1034         Suppress warning messages about duplicate tag entries.");
1035
1036       puts ("-x, --cxref\n\
1037         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1038         The output uses line numbers instead of page numbers, but\n\
1039         beyond that the differences are cosmetic; try both to see\n\
1040         which you like.");
1041     }
1042
1043   puts ("-V, --version\n\
1044         Print the version of the program.\n\
1045 -h, --help\n\
1046         Print this help message.\n\
1047         Followed by one or more `--language' options prints detailed\n\
1048         help about tag generation for the specified languages.");
1049
1050   print_language_names ();
1051
1052   puts ("");
1053   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1054
1055   exit (EXIT_SUCCESS);
1056 }
1057
1058 \f
1059 int
1060 main (int argc, char **argv)
1061 {
1062   int i;
1063   unsigned int nincluded_files;
1064   char **included_files;
1065   argument *argbuffer;
1066   int current_arg, file_count;
1067   linebuffer filename_lb;
1068   bool help_asked = FALSE;
1069  char *optstring;
1070  int opt;
1071
1072
1073 #ifdef DOS_NT
1074   _fmode = O_BINARY;   /* all of files are treated as binary files */
1075 #endif /* DOS_NT */
1076
1077   progname = argv[0];
1078   nincluded_files = 0;
1079   included_files = xnew (argc, char *);
1080   current_arg = 0;
1081   file_count = 0;
1082
1083   /* Allocate enough no matter what happens.  Overkill, but each one
1084      is small. */
1085   argbuffer = xnew (argc, argument);
1086
1087   /*
1088    * Always find typedefs and structure tags.
1089    * Also default to find macro constants, enum constants, struct
1090    * members and global variables.  Do it for both etags and ctags.
1091    */
1092   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1093   globals = members = TRUE;
1094
1095   /* When the optstring begins with a '-' getopt_long does not rearrange the
1096      non-options arguments to be at the end, but leaves them alone. */
1097   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1098                       "ac:Cf:Il:o:r:RSVhH",
1099                       (CTAGS) ? "BxdtTuvw" : "Di:");
1100
1101   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1102     switch (opt)
1103       {
1104       case 0:
1105         /* If getopt returns 0, then it has already processed a
1106            long-named option.  We should do nothing.  */
1107         break;
1108
1109       case 1:
1110         /* This means that a file name has been seen.  Record it. */
1111         argbuffer[current_arg].arg_type = at_filename;
1112         argbuffer[current_arg].what     = optarg;
1113         ++current_arg;
1114         ++file_count;
1115         break;
1116
1117       case STDIN:
1118         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1119         argbuffer[current_arg].arg_type = at_stdin;
1120         argbuffer[current_arg].what     = optarg;
1121         ++current_arg;
1122         ++file_count;
1123         if (parsing_stdin)
1124           fatal ("cannot parse standard input more than once", (char *)NULL);
1125         parsing_stdin = TRUE;
1126         break;
1127
1128         /* Common options. */
1129       case 'a': append_to_tagfile = TRUE;       break;
1130       case 'C': cplusplus = TRUE;               break;
1131       case 'f':         /* for compatibility with old makefiles */
1132       case 'o':
1133         if (tagfile)
1134           {
1135             error ("-o option may only be given once.", (char *)NULL);
1136             suggest_asking_for_help ();
1137             /* NOTREACHED */
1138           }
1139         tagfile = optarg;
1140         break;
1141       case 'I':
1142       case 'S':         /* for backward compatibility */
1143         ignoreindent = TRUE;
1144         break;
1145       case 'l':
1146         {
1147           language *lang = get_language_from_langname (optarg);
1148           if (lang != NULL)
1149             {
1150               argbuffer[current_arg].lang = lang;
1151               argbuffer[current_arg].arg_type = at_language;
1152               ++current_arg;
1153             }
1154         }
1155         break;
1156       case 'c':
1157         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1158         optarg = concat (optarg, "i", ""); /* memory leak here */
1159         /* FALLTHRU */
1160       case 'r':
1161         argbuffer[current_arg].arg_type = at_regexp;
1162         argbuffer[current_arg].what = optarg;
1163         ++current_arg;
1164         break;
1165       case 'R':
1166         argbuffer[current_arg].arg_type = at_regexp;
1167         argbuffer[current_arg].what = NULL;
1168         ++current_arg;
1169         break;
1170       case 'V':
1171         print_version ();
1172         break;
1173       case 'h':
1174       case 'H':
1175         help_asked = TRUE;
1176         break;
1177
1178         /* Etags options */
1179       case 'D': constantypedefs = FALSE;                        break;
1180       case 'i': included_files[nincluded_files++] = optarg;     break;
1181
1182         /* Ctags options. */
1183       case 'B': searchar = '?';                                 break;
1184       case 'd': constantypedefs = TRUE;                         break;
1185       case 't': typedefs = TRUE;                                break;
1186       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1187       case 'u': update = TRUE;                                  break;
1188       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1189       case 'x': cxref_style = TRUE;                             break;
1190       case 'w': no_warnings = TRUE;                             break;
1191       default:
1192         suggest_asking_for_help ();
1193         /* NOTREACHED */
1194       }
1195
1196   /* No more options.  Store the rest of arguments. */
1197   for (; optind < argc; optind++)
1198     {
1199       argbuffer[current_arg].arg_type = at_filename;
1200       argbuffer[current_arg].what = argv[optind];
1201       ++current_arg;
1202       ++file_count;
1203     }
1204
1205   argbuffer[current_arg].arg_type = at_end;
1206
1207   if (help_asked)
1208     print_help (argbuffer);
1209     /* NOTREACHED */
1210
1211   if (nincluded_files == 0 && file_count == 0)
1212     {
1213       error ("no input files specified.", (char *)NULL);
1214       suggest_asking_for_help ();
1215       /* NOTREACHED */
1216     }
1217
1218   if (tagfile == NULL)
1219     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1220   cwd = etags_getcwd ();        /* the current working directory */
1221   if (cwd[strlen (cwd) - 1] != '/')
1222     {
1223       char *oldcwd = cwd;
1224       cwd = concat (oldcwd, "/", "");
1225       free (oldcwd);
1226     }
1227
1228   /* Compute base directory for relative file names. */
1229   if (streq (tagfile, "-")
1230       || strneq (tagfile, "/dev/", 5))
1231     tagfiledir = cwd;            /* relative file names are relative to cwd */
1232   else
1233     {
1234       canonicalize_filename (tagfile);
1235       tagfiledir = absolute_dirname (tagfile, cwd);
1236     }
1237
1238   init ();                      /* set up boolean "functions" */
1239
1240   linebuffer_init (&lb);
1241   linebuffer_init (&filename_lb);
1242   linebuffer_init (&filebuf);
1243   linebuffer_init (&token_name);
1244
1245   if (!CTAGS)
1246     {
1247       if (streq (tagfile, "-"))
1248         {
1249           tagf = stdout;
1250 #ifdef DOS_NT
1251           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1252              doesn't take effect until after `stdout' is already open). */
1253           if (!isatty (fileno (stdout)))
1254             setmode (fileno (stdout), O_BINARY);
1255 #endif /* DOS_NT */
1256         }
1257       else
1258         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1259       if (tagf == NULL)
1260         pfatal (tagfile);
1261     }
1262
1263   /*
1264    * Loop through files finding functions.
1265    */
1266   for (i = 0; i < current_arg; i++)
1267     {
1268       static language *lang;    /* non-NULL if language is forced */
1269       char *this_file;
1270
1271       switch (argbuffer[i].arg_type)
1272         {
1273         case at_language:
1274           lang = argbuffer[i].lang;
1275           break;
1276         case at_regexp:
1277           analyse_regex (argbuffer[i].what);
1278           break;
1279         case at_filename:
1280               this_file = argbuffer[i].what;
1281               /* Input file named "-" means read file names from stdin
1282                  (one per line) and use them. */
1283               if (streq (this_file, "-"))
1284                 {
1285                   if (parsing_stdin)
1286                     fatal ("cannot parse standard input AND read file names from it",
1287                            (char *)NULL);
1288                   while (readline_internal (&filename_lb, stdin) > 0)
1289                     process_file_name (filename_lb.buffer, lang);
1290                 }
1291               else
1292                 process_file_name (this_file, lang);
1293           break;
1294         case at_stdin:
1295           this_file = argbuffer[i].what;
1296           process_file (stdin, this_file, lang);
1297           break;
1298         }
1299     }
1300
1301   free_regexps ();
1302   free (lb.buffer);
1303   free (filebuf.buffer);
1304   free (token_name.buffer);
1305
1306   if (!CTAGS || cxref_style)
1307     {
1308       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1309       put_entries (nodehead);
1310       free_tree (nodehead);
1311       nodehead = NULL;
1312       if (!CTAGS)
1313         {
1314           fdesc *fdp;
1315
1316           /* Output file entries that have no tags. */
1317           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1318             if (!fdp->written)
1319               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1320
1321           while (nincluded_files-- > 0)
1322             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1323
1324           if (fclose (tagf) == EOF)
1325             pfatal (tagfile);
1326         }
1327
1328       exit (EXIT_SUCCESS);
1329     }
1330
1331   /* From here on, we are in (CTAGS && !cxref_style) */
1332   if (update)
1333     {
1334       char cmd[BUFSIZ];
1335       for (i = 0; i < current_arg; ++i)
1336         {
1337           switch (argbuffer[i].arg_type)
1338             {
1339             case at_filename:
1340             case at_stdin:
1341               break;
1342             default:
1343               continue;         /* the for loop */
1344             }
1345           sprintf (cmd,
1346                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1347                    tagfile, argbuffer[i].what, tagfile);
1348           if (system (cmd) != EXIT_SUCCESS)
1349             fatal ("failed to execute shell command", (char *)NULL);
1350         }
1351       append_to_tagfile = TRUE;
1352     }
1353
1354   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1355   if (tagf == NULL)
1356     pfatal (tagfile);
1357   put_entries (nodehead);       /* write all the tags (CTAGS) */
1358   free_tree (nodehead);
1359   nodehead = NULL;
1360   if (fclose (tagf) == EOF)
1361     pfatal (tagfile);
1362
1363   if (CTAGS)
1364     if (append_to_tagfile || update)
1365       {
1366         char cmd[2*BUFSIZ+20];
1367         /* Maybe these should be used:
1368            setenv ("LC_COLLATE", "C", 1);
1369            setenv ("LC_ALL", "C", 1); */
1370         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1371         exit (system (cmd));
1372       }
1373   return EXIT_SUCCESS;
1374 }
1375
1376
1377 /*
1378  * Return a compressor given the file name.  If EXTPTR is non-zero,
1379  * return a pointer into FILE where the compressor-specific
1380  * extension begins.  If no compressor is found, NULL is returned
1381  * and EXTPTR is not significant.
1382  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1383  */
1384 static compressor *
1385 get_compressor_from_suffix (char *file, char **extptr)
1386 {
1387   compressor *compr;
1388   char *slash, *suffix;
1389
1390   /* File has been processed by canonicalize_filename,
1391      so we don't need to consider backslashes on DOS_NT.  */
1392   slash = etags_strrchr (file, '/');
1393   suffix = etags_strrchr (file, '.');
1394   if (suffix == NULL || suffix < slash)
1395     return NULL;
1396   if (extptr != NULL)
1397     *extptr = suffix;
1398   suffix += 1;
1399   /* Let those poor souls who live with DOS 8+3 file name limits get
1400      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1401      Only the first do loop is run if not MSDOS */
1402   do
1403     {
1404       for (compr = compressors; compr->suffix != NULL; compr++)
1405         if (streq (compr->suffix, suffix))
1406           return compr;
1407       if (!MSDOS)
1408         break;                  /* do it only once: not really a loop */
1409       if (extptr != NULL)
1410         *extptr = ++suffix;
1411     } while (*suffix != '\0');
1412   return NULL;
1413 }
1414
1415
1416
1417 /*
1418  * Return a language given the name.
1419  */
1420 static language *
1421 get_language_from_langname (const char *name)
1422 {
1423   language *lang;
1424
1425   if (name == NULL)
1426     error ("empty language name", (char *)NULL);
1427   else
1428     {
1429       for (lang = lang_names; lang->name != NULL; lang++)
1430         if (streq (name, lang->name))
1431           return lang;
1432       error ("unknown language \"%s\"", name);
1433     }
1434
1435   return NULL;
1436 }
1437
1438
1439 /*
1440  * Return a language given the interpreter name.
1441  */
1442 static language *
1443 get_language_from_interpreter (char *interpreter)
1444 {
1445   language *lang;
1446   const char **iname;
1447
1448   if (interpreter == NULL)
1449     return NULL;
1450   for (lang = lang_names; lang->name != NULL; lang++)
1451     if (lang->interpreters != NULL)
1452       for (iname = lang->interpreters; *iname != NULL; iname++)
1453         if (streq (*iname, interpreter))
1454             return lang;
1455
1456   return NULL;
1457 }
1458
1459
1460
1461 /*
1462  * Return a language given the file name.
1463  */
1464 static language *
1465 get_language_from_filename (char *file, int case_sensitive)
1466 {
1467   language *lang;
1468   const char **name, **ext, *suffix;
1469
1470   /* Try whole file name first. */
1471   for (lang = lang_names; lang->name != NULL; lang++)
1472     if (lang->filenames != NULL)
1473       for (name = lang->filenames; *name != NULL; name++)
1474         if ((case_sensitive)
1475             ? streq (*name, file)
1476             : strcaseeq (*name, file))
1477           return lang;
1478
1479   /* If not found, try suffix after last dot. */
1480   suffix = etags_strrchr (file, '.');
1481   if (suffix == NULL)
1482     return NULL;
1483   suffix += 1;
1484   for (lang = lang_names; lang->name != NULL; lang++)
1485     if (lang->suffixes != NULL)
1486       for (ext = lang->suffixes; *ext != NULL; ext++)
1487         if ((case_sensitive)
1488             ? streq (*ext, suffix)
1489             : strcaseeq (*ext, suffix))
1490           return lang;
1491   return NULL;
1492 }
1493
1494 \f
1495 /*
1496  * This routine is called on each file argument.
1497  */
1498 static void
1499 process_file_name (char *file, language *lang)
1500 {
1501   struct stat stat_buf;
1502   FILE *inf;
1503   fdesc *fdp;
1504   compressor *compr;
1505   char *compressed_name, *uncompressed_name;
1506   char *ext, *real_name;
1507   int retval;
1508
1509   canonicalize_filename (file);
1510   if (streq (file, tagfile) && !streq (tagfile, "-"))
1511     {
1512       error ("skipping inclusion of %s in self.", file);
1513       return;
1514     }
1515   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1516     {
1517       compressed_name = NULL;
1518       real_name = uncompressed_name = savestr (file);
1519     }
1520   else
1521     {
1522       real_name = compressed_name = savestr (file);
1523       uncompressed_name = savenstr (file, ext - file);
1524     }
1525
1526   /* If the canonicalized uncompressed name
1527      has already been dealt with, skip it silently. */
1528   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1529     {
1530       assert (fdp->infname != NULL);
1531       if (streq (uncompressed_name, fdp->infname))
1532         goto cleanup;
1533     }
1534
1535   if (stat (real_name, &stat_buf) != 0)
1536     {
1537       /* Reset real_name and try with a different name. */
1538       real_name = NULL;
1539       if (compressed_name != NULL) /* try with the given suffix */
1540         {
1541           if (stat (uncompressed_name, &stat_buf) == 0)
1542             real_name = uncompressed_name;
1543         }
1544       else                      /* try all possible suffixes */
1545         {
1546           for (compr = compressors; compr->suffix != NULL; compr++)
1547             {
1548               compressed_name = concat (file, ".", compr->suffix);
1549               if (stat (compressed_name, &stat_buf) != 0)
1550                 {
1551                   if (MSDOS)
1552                     {
1553                       char *suf = compressed_name + strlen (file);
1554                       size_t suflen = strlen (compr->suffix) + 1;
1555                       for ( ; suf[1]; suf++, suflen--)
1556                         {
1557                           memmove (suf, suf + 1, suflen);
1558                           if (stat (compressed_name, &stat_buf) == 0)
1559                             {
1560                               real_name = compressed_name;
1561                               break;
1562                             }
1563                         }
1564                       if (real_name != NULL)
1565                         break;
1566                     } /* MSDOS */
1567                   free (compressed_name);
1568                   compressed_name = NULL;
1569                 }
1570               else
1571                 {
1572                   real_name = compressed_name;
1573                   break;
1574                 }
1575             }
1576         }
1577       if (real_name == NULL)
1578         {
1579           perror (file);
1580           goto cleanup;
1581         }
1582     } /* try with a different name */
1583
1584   if (!S_ISREG (stat_buf.st_mode))
1585     {
1586       error ("skipping %s: it is not a regular file.", real_name);
1587       goto cleanup;
1588     }
1589   if (real_name == compressed_name)
1590     {
1591       char *cmd = concat (compr->command, " ", real_name);
1592       inf = (FILE *) popen (cmd, "r");
1593       free (cmd);
1594     }
1595   else
1596     inf = fopen (real_name, "r");
1597   if (inf == NULL)
1598     {
1599       perror (real_name);
1600       goto cleanup;
1601     }
1602
1603   process_file (inf, uncompressed_name, lang);
1604
1605   if (real_name == compressed_name)
1606     retval = pclose (inf);
1607   else
1608     retval = fclose (inf);
1609   if (retval < 0)
1610     pfatal (file);
1611
1612  cleanup:
1613   free (compressed_name);
1614   free (uncompressed_name);
1615   last_node = NULL;
1616   curfdp = NULL;
1617   return;
1618 }
1619
1620 static void
1621 process_file (FILE *fh, char *fn, language *lang)
1622 {
1623   static const fdesc emptyfdesc;
1624   fdesc *fdp;
1625
1626   /* Create a new input file description entry. */
1627   fdp = xnew (1, fdesc);
1628   *fdp = emptyfdesc;
1629   fdp->next = fdhead;
1630   fdp->infname = savestr (fn);
1631   fdp->lang = lang;
1632   fdp->infabsname = absolute_filename (fn, cwd);
1633   fdp->infabsdir = absolute_dirname (fn, cwd);
1634   if (filename_is_absolute (fn))
1635     {
1636       /* An absolute file name.  Canonicalize it. */
1637       fdp->taggedfname = absolute_filename (fn, NULL);
1638     }
1639   else
1640     {
1641       /* A file name relative to cwd.  Make it relative
1642          to the directory of the tags file. */
1643       fdp->taggedfname = relative_filename (fn, tagfiledir);
1644     }
1645   fdp->usecharno = TRUE;        /* use char position when making tags */
1646   fdp->prop = NULL;
1647   fdp->written = FALSE;         /* not written on tags file yet */
1648
1649   fdhead = fdp;
1650   curfdp = fdhead;              /* the current file description */
1651
1652   find_entries (fh);
1653
1654   /* If not Ctags, and if this is not metasource and if it contained no #line
1655      directives, we can write the tags and free all nodes pointing to
1656      curfdp. */
1657   if (!CTAGS
1658       && curfdp->usecharno      /* no #line directives in this file */
1659       && !curfdp->lang->metasource)
1660     {
1661       node *np, *prev;
1662
1663       /* Look for the head of the sublist relative to this file.  See add_node
1664          for the structure of the node tree. */
1665       prev = NULL;
1666       for (np = nodehead; np != NULL; prev = np, np = np->left)
1667         if (np->fdp == curfdp)
1668           break;
1669
1670       /* If we generated tags for this file, write and delete them. */
1671       if (np != NULL)
1672         {
1673           /* This is the head of the last sublist, if any.  The following
1674              instructions depend on this being true. */
1675           assert (np->left == NULL);
1676
1677           assert (fdhead == curfdp);
1678           assert (last_node->fdp == curfdp);
1679           put_entries (np);     /* write tags for file curfdp->taggedfname */
1680           free_tree (np);       /* remove the written nodes */
1681           if (prev == NULL)
1682             nodehead = NULL;    /* no nodes left */
1683           else
1684             prev->left = NULL;  /* delete the pointer to the sublist */
1685         }
1686     }
1687 }
1688
1689 /*
1690  * This routine sets up the boolean pseudo-functions which work
1691  * by setting boolean flags dependent upon the corresponding character.
1692  * Every char which is NOT in that string is not a white char.  Therefore,
1693  * all of the array "_wht" is set to FALSE, and then the elements
1694  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1695  * of a char is TRUE if it is the string "white", else FALSE.
1696  */
1697 static void
1698 init (void)
1699 {
1700   register const char *sp;
1701   register int i;
1702
1703   for (i = 0; i < CHARS; i++)
1704     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1705   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1706   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1707   notinname('\0') = notinname('\n');
1708   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1709   begtoken('\0') = begtoken('\n');
1710   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1711   intoken('\0') = intoken('\n');
1712   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1713   endtoken('\0') = endtoken('\n');
1714 }
1715
1716 /*
1717  * This routine opens the specified file and calls the function
1718  * which finds the function and type definitions.
1719  */
1720 static void
1721 find_entries (FILE *inf)
1722 {
1723   char *cp;
1724   language *lang = curfdp->lang;
1725   Lang_function *parser = NULL;
1726
1727   /* If user specified a language, use it. */
1728   if (lang != NULL && lang->function != NULL)
1729     {
1730       parser = lang->function;
1731     }
1732
1733   /* Else try to guess the language given the file name. */
1734   if (parser == NULL)
1735     {
1736       lang = get_language_from_filename (curfdp->infname, TRUE);
1737       if (lang != NULL && lang->function != NULL)
1738         {
1739           curfdp->lang = lang;
1740           parser = lang->function;
1741         }
1742     }
1743
1744   /* Else look for sharp-bang as the first two characters. */
1745   if (parser == NULL
1746       && readline_internal (&lb, inf) > 0
1747       && lb.len >= 2
1748       && lb.buffer[0] == '#'
1749       && lb.buffer[1] == '!')
1750     {
1751       char *lp;
1752
1753       /* Set lp to point at the first char after the last slash in the
1754          line or, if no slashes, at the first nonblank.  Then set cp to
1755          the first successive blank and terminate the string. */
1756       lp = etags_strrchr (lb.buffer+2, '/');
1757       if (lp != NULL)
1758         lp += 1;
1759       else
1760         lp = skip_spaces (lb.buffer + 2);
1761       cp = skip_non_spaces (lp);
1762       *cp = '\0';
1763
1764       if (strlen (lp) > 0)
1765         {
1766           lang = get_language_from_interpreter (lp);
1767           if (lang != NULL && lang->function != NULL)
1768             {
1769               curfdp->lang = lang;
1770               parser = lang->function;
1771             }
1772         }
1773     }
1774
1775   /* We rewind here, even if inf may be a pipe.  We fail if the
1776      length of the first line is longer than the pipe block size,
1777      which is unlikely. */
1778   rewind (inf);
1779
1780   /* Else try to guess the language given the case insensitive file name. */
1781   if (parser == NULL)
1782     {
1783       lang = get_language_from_filename (curfdp->infname, FALSE);
1784       if (lang != NULL && lang->function != NULL)
1785         {
1786           curfdp->lang = lang;
1787           parser = lang->function;
1788         }
1789     }
1790
1791   /* Else try Fortran or C. */
1792   if (parser == NULL)
1793     {
1794       node *old_last_node = last_node;
1795
1796       curfdp->lang = get_language_from_langname ("fortran");
1797       find_entries (inf);
1798
1799       if (old_last_node == last_node)
1800         /* No Fortran entries found.  Try C. */
1801         {
1802           /* We do not tag if rewind fails.
1803              Only the file name will be recorded in the tags file. */
1804           rewind (inf);
1805           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1806           find_entries (inf);
1807         }
1808       return;
1809     }
1810
1811   if (!no_line_directive
1812       && curfdp->lang != NULL && curfdp->lang->metasource)
1813     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1814        file, or anyway we parsed a file that is automatically generated from
1815        this one.  If this is the case, the bingo.c file contained #line
1816        directives that generated tags pointing to this file.  Let's delete
1817        them all before parsing this file, which is the real source. */
1818     {
1819       fdesc **fdpp = &fdhead;
1820       while (*fdpp != NULL)
1821         if (*fdpp != curfdp
1822             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1823           /* We found one of those!  We must delete both the file description
1824              and all tags referring to it. */
1825           {
1826             fdesc *badfdp = *fdpp;
1827
1828             /* Delete the tags referring to badfdp->taggedfname
1829                that were obtained from badfdp->infname. */
1830             invalidate_nodes (badfdp, &nodehead);
1831
1832             *fdpp = badfdp->next; /* remove the bad description from the list */
1833             free_fdesc (badfdp);
1834           }
1835         else
1836           fdpp = &(*fdpp)->next; /* advance the list pointer */
1837     }
1838
1839   assert (parser != NULL);
1840
1841   /* Generic initialisations before reading from file. */
1842   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1843
1844   /* Generic initialisations before parsing file with readline. */
1845   lineno = 0;                  /* reset global line number */
1846   charno = 0;                  /* reset global char number */
1847   linecharno = 0;              /* reset global char number of line start */
1848
1849   parser (inf);
1850
1851   regex_tag_multiline ();
1852 }
1853
1854 \f
1855 /*
1856  * Check whether an implicitly named tag should be created,
1857  * then call `pfnote'.
1858  * NAME is a string that is internally copied by this function.
1859  *
1860  * TAGS format specification
1861  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1862  * The following is explained in some more detail in etc/ETAGS.EBNF.
1863  *
1864  * make_tag creates tags with "implicit tag names" (unnamed tags)
1865  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1866  *  1. NAME does not contain any of the characters in NONAM;
1867  *  2. LINESTART contains name as either a rightmost, or rightmost but
1868  *     one character, substring;
1869  *  3. the character, if any, immediately before NAME in LINESTART must
1870  *     be a character in NONAM;
1871  *  4. the character, if any, immediately after NAME in LINESTART must
1872  *     also be a character in NONAM.
1873  *
1874  * The implementation uses the notinname() macro, which recognises the
1875  * characters stored in the string `nonam'.
1876  * etags.el needs to use the same characters that are in NONAM.
1877  */
1878 static void
1879 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1880           int namelen,          /* tag length */
1881           int is_func,          /* tag is a function */
1882           char *linestart,      /* start of the line where tag is */
1883           int linelen,          /* length of the line where tag is */
1884           int lno,              /* line number */
1885           long int cno)         /* character number */
1886 {
1887   bool named = (name != NULL && namelen > 0);
1888   char *nname = NULL;
1889
1890   if (!CTAGS && named)          /* maybe set named to false */
1891     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1892        such that etags.el can guess a name from it. */
1893     {
1894       int i;
1895       register const char *cp = name;
1896
1897       for (i = 0; i < namelen; i++)
1898         if (notinname (*cp++))
1899           break;
1900       if (i == namelen)                         /* rule #1 */
1901         {
1902           cp = linestart + linelen - namelen;
1903           if (notinname (linestart[linelen-1]))
1904             cp -= 1;                            /* rule #4 */
1905           if (cp >= linestart                   /* rule #2 */
1906               && (cp == linestart
1907                   || notinname (cp[-1]))        /* rule #3 */
1908               && strneq (name, cp, namelen))    /* rule #2 */
1909             named = FALSE;      /* use implicit tag name */
1910         }
1911     }
1912
1913   if (named)
1914     nname = savenstr (name, namelen);
1915
1916   pfnote (nname, is_func, linestart, linelen, lno, cno);
1917 }
1918
1919 /* Record a tag. */
1920 static void
1921 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1922                                 /* tag name, or NULL if unnamed */
1923                                 /* tag is a function */
1924                                 /* start of the line where tag is */
1925                                 /* length of the line where tag is */
1926                                 /* line number */
1927                                 /* character number */
1928 {
1929   register node *np;
1930
1931   assert (name == NULL || name[0] != '\0');
1932   if (CTAGS && name == NULL)
1933     return;
1934
1935   np = xnew (1, node);
1936
1937   /* If ctags mode, change name "main" to M<thisfilename>. */
1938   if (CTAGS && !cxref_style && streq (name, "main"))
1939     {
1940       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1941       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1942       fp = etags_strrchr (np->name, '.');
1943       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1944         fp[0] = '\0';
1945     }
1946   else
1947     np->name = name;
1948   np->valid = TRUE;
1949   np->been_warned = FALSE;
1950   np->fdp = curfdp;
1951   np->is_func = is_func;
1952   np->lno = lno;
1953   if (np->fdp->usecharno)
1954     /* Our char numbers are 0-base, because of C language tradition?
1955        ctags compatibility?  old versions compatibility?   I don't know.
1956        Anyway, since emacs's are 1-base we expect etags.el to take care
1957        of the difference.  If we wanted to have 1-based numbers, we would
1958        uncomment the +1 below. */
1959     np->cno = cno /* + 1 */ ;
1960   else
1961     np->cno = invalidcharno;
1962   np->left = np->right = NULL;
1963   if (CTAGS && !cxref_style)
1964     {
1965       if (strlen (linestart) < 50)
1966         np->regex = concat (linestart, "$", "");
1967       else
1968         np->regex = savenstr (linestart, 50);
1969     }
1970   else
1971     np->regex = savenstr (linestart, linelen);
1972
1973   add_node (np, &nodehead);
1974 }
1975
1976 /*
1977  * free_tree ()
1978  *      recurse on left children, iterate on right children.
1979  */
1980 static void
1981 free_tree (register node *np)
1982 {
1983   while (np)
1984     {
1985       register node *node_right = np->right;
1986       free_tree (np->left);
1987       free (np->name);
1988       free (np->regex);
1989       free (np);
1990       np = node_right;
1991     }
1992 }
1993
1994 /*
1995  * free_fdesc ()
1996  *      delete a file description
1997  */
1998 static void
1999 free_fdesc (register fdesc *fdp)
2000 {
2001   free (fdp->infname);
2002   free (fdp->infabsname);
2003   free (fdp->infabsdir);
2004   free (fdp->taggedfname);
2005   free (fdp->prop);
2006   free (fdp);
2007 }
2008
2009 /*
2010  * add_node ()
2011  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2012  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2013  *      balancing.
2014  *
2015  *      add_node is the only function allowed to add nodes, so it can
2016  *      maintain state.
2017  */
2018 static void
2019 add_node (node *np, node **cur_node_p)
2020 {
2021   register int dif;
2022   register node *cur_node = *cur_node_p;
2023
2024   if (cur_node == NULL)
2025     {
2026       *cur_node_p = np;
2027       last_node = np;
2028       return;
2029     }
2030
2031   if (!CTAGS)
2032     /* Etags Mode */
2033     {
2034       /* For each file name, tags are in a linked sublist on the right
2035          pointer.  The first tags of different files are a linked list
2036          on the left pointer.  last_node points to the end of the last
2037          used sublist. */
2038       if (last_node != NULL && last_node->fdp == np->fdp)
2039         {
2040           /* Let's use the same sublist as the last added node. */
2041           assert (last_node->right == NULL);
2042           last_node->right = np;
2043           last_node = np;
2044         }
2045       else if (cur_node->fdp == np->fdp)
2046         {
2047           /* Scanning the list we found the head of a sublist which is
2048              good for us.  Let's scan this sublist. */
2049           add_node (np, &cur_node->right);
2050         }
2051       else
2052         /* The head of this sublist is not good for us.  Let's try the
2053            next one. */
2054         add_node (np, &cur_node->left);
2055     } /* if ETAGS mode */
2056
2057   else
2058     {
2059       /* Ctags Mode */
2060       dif = strcmp (np->name, cur_node->name);
2061
2062       /*
2063        * If this tag name matches an existing one, then
2064        * do not add the node, but maybe print a warning.
2065        */
2066       if (no_duplicates && !dif)
2067         {
2068           if (np->fdp == cur_node->fdp)
2069             {
2070               if (!no_warnings)
2071                 {
2072                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2073                            np->fdp->infname, lineno, np->name);
2074                   fprintf (stderr, "Second entry ignored\n");
2075                 }
2076             }
2077           else if (!cur_node->been_warned && !no_warnings)
2078             {
2079               fprintf
2080                 (stderr,
2081                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2082                  np->fdp->infname, cur_node->fdp->infname, np->name);
2083               cur_node->been_warned = TRUE;
2084             }
2085           return;
2086         }
2087
2088       /* Actually add the node */
2089       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2090     } /* if CTAGS mode */
2091 }
2092
2093 /*
2094  * invalidate_nodes ()
2095  *      Scan the node tree and invalidate all nodes pointing to the
2096  *      given file description (CTAGS case) or free them (ETAGS case).
2097  */
2098 static void
2099 invalidate_nodes (fdesc *badfdp, node **npp)
2100 {
2101   node *np = *npp;
2102
2103   if (np == NULL)
2104     return;
2105
2106   if (CTAGS)
2107     {
2108       if (np->left != NULL)
2109         invalidate_nodes (badfdp, &np->left);
2110       if (np->fdp == badfdp)
2111         np->valid = FALSE;
2112       if (np->right != NULL)
2113         invalidate_nodes (badfdp, &np->right);
2114     }
2115   else
2116     {
2117       assert (np->fdp != NULL);
2118       if (np->fdp == badfdp)
2119         {
2120           *npp = np->left;      /* detach the sublist from the list */
2121           np->left = NULL;      /* isolate it */
2122           free_tree (np);       /* free it */
2123           invalidate_nodes (badfdp, npp);
2124         }
2125       else
2126         invalidate_nodes (badfdp, &np->left);
2127     }
2128 }
2129
2130 \f
2131 static int total_size_of_entries (node *);
2132 static int number_len (long);
2133
2134 /* Length of a non-negative number's decimal representation. */
2135 static int
2136 number_len (long int num)
2137 {
2138   int len = 1;
2139   while ((num /= 10) > 0)
2140     len += 1;
2141   return len;
2142 }
2143
2144 /*
2145  * Return total number of characters that put_entries will output for
2146  * the nodes in the linked list at the right of the specified node.
2147  * This count is irrelevant with etags.el since emacs 19.34 at least,
2148  * but is still supplied for backward compatibility.
2149  */
2150 static int
2151 total_size_of_entries (register node *np)
2152 {
2153   register int total = 0;
2154
2155   for (; np != NULL; np = np->right)
2156     if (np->valid)
2157       {
2158         total += strlen (np->regex) + 1;                /* pat\177 */
2159         if (np->name != NULL)
2160           total += strlen (np->name) + 1;               /* name\001 */
2161         total += number_len ((long) np->lno) + 1;       /* lno, */
2162         if (np->cno != invalidcharno)                   /* cno */
2163           total += number_len (np->cno);
2164         total += 1;                                     /* newline */
2165       }
2166
2167   return total;
2168 }
2169
2170 static void
2171 put_entries (register node *np)
2172 {
2173   register char *sp;
2174   static fdesc *fdp = NULL;
2175
2176   if (np == NULL)
2177     return;
2178
2179   /* Output subentries that precede this one */
2180   if (CTAGS)
2181     put_entries (np->left);
2182
2183   /* Output this entry */
2184   if (np->valid)
2185     {
2186       if (!CTAGS)
2187         {
2188           /* Etags mode */
2189           if (fdp != np->fdp)
2190             {
2191               fdp = np->fdp;
2192               fprintf (tagf, "\f\n%s,%d\n",
2193                        fdp->taggedfname, total_size_of_entries (np));
2194               fdp->written = TRUE;
2195             }
2196           fputs (np->regex, tagf);
2197           fputc ('\177', tagf);
2198           if (np->name != NULL)
2199             {
2200               fputs (np->name, tagf);
2201               fputc ('\001', tagf);
2202             }
2203           fprintf (tagf, "%d,", np->lno);
2204           if (np->cno != invalidcharno)
2205             fprintf (tagf, "%ld", np->cno);
2206           fputs ("\n", tagf);
2207         }
2208       else
2209         {
2210           /* Ctags mode */
2211           if (np->name == NULL)
2212             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2213
2214           if (cxref_style)
2215             {
2216               if (vgrind_style)
2217                 fprintf (stdout, "%s %s %d\n",
2218                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2219               else
2220                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2221                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2222             }
2223           else
2224             {
2225               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2226
2227               if (np->is_func)
2228                 {               /* function or #define macro with args */
2229                   putc (searchar, tagf);
2230                   putc ('^', tagf);
2231
2232                   for (sp = np->regex; *sp; sp++)
2233                     {
2234                       if (*sp == '\\' || *sp == searchar)
2235                         putc ('\\', tagf);
2236                       putc (*sp, tagf);
2237                     }
2238                   putc (searchar, tagf);
2239                 }
2240               else
2241                 {               /* anything else; text pattern inadequate */
2242                   fprintf (tagf, "%d", np->lno);
2243                 }
2244               putc ('\n', tagf);
2245             }
2246         }
2247     } /* if this node contains a valid tag */
2248
2249   /* Output subentries that follow this one */
2250   put_entries (np->right);
2251   if (!CTAGS)
2252     put_entries (np->left);
2253 }
2254
2255 \f
2256 /* C extensions. */
2257 #define C_EXT   0x00fff         /* C extensions */
2258 #define C_PLAIN 0x00000         /* C */
2259 #define C_PLPL  0x00001         /* C++ */
2260 #define C_STAR  0x00003         /* C* */
2261 #define C_JAVA  0x00005         /* JAVA */
2262 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2263 #define YACC    0x10000         /* yacc file */
2264
2265 /*
2266  * The C symbol tables.
2267  */
2268 enum sym_type
2269 {
2270   st_none,
2271   st_C_objprot, st_C_objimpl, st_C_objend,
2272   st_C_gnumacro,
2273   st_C_ignore, st_C_attribute,
2274   st_C_javastruct,
2275   st_C_operator,
2276   st_C_class, st_C_template,
2277   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2278 };
2279
2280 static unsigned int hash (const char *, unsigned int);
2281 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2282 static enum sym_type C_symtype (char *, int, int);
2283
2284 /* Feed stuff between (but not including) %[ and %] lines to:
2285      gperf -m 5
2286 %[
2287 %compare-strncmp
2288 %enum
2289 %struct-type
2290 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2291 %%
2292 if,             0,                      st_C_ignore
2293 for,            0,                      st_C_ignore
2294 while,          0,                      st_C_ignore
2295 switch,         0,                      st_C_ignore
2296 return,         0,                      st_C_ignore
2297 __attribute__,  0,                      st_C_attribute
2298 GTY,            0,                      st_C_attribute
2299 @interface,     0,                      st_C_objprot
2300 @protocol,      0,                      st_C_objprot
2301 @implementation,0,                      st_C_objimpl
2302 @end,           0,                      st_C_objend
2303 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2304 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2305 friend,         C_PLPL,                 st_C_ignore
2306 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2307 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2308 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2309 class,          0,                      st_C_class
2310 namespace,      C_PLPL,                 st_C_struct
2311 domain,         C_STAR,                 st_C_struct
2312 union,          0,                      st_C_struct
2313 struct,         0,                      st_C_struct
2314 extern,         0,                      st_C_extern
2315 enum,           0,                      st_C_enum
2316 typedef,        0,                      st_C_typedef
2317 define,         0,                      st_C_define
2318 undef,          0,                      st_C_define
2319 operator,       C_PLPL,                 st_C_operator
2320 template,       0,                      st_C_template
2321 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2322 DEFUN,          0,                      st_C_gnumacro
2323 SYSCALL,        0,                      st_C_gnumacro
2324 ENTRY,          0,                      st_C_gnumacro
2325 PSEUDO,         0,                      st_C_gnumacro
2326 # These are defined inside C functions, so currently they are not met.
2327 # EXFUN used in glibc, DEFVAR_* in emacs.
2328 #EXFUN,         0,                      st_C_gnumacro
2329 #DEFVAR_,       0,                      st_C_gnumacro
2330 %]
2331 and replace lines between %< and %> with its output, then:
2332  - remove the #if characterset check
2333  - make in_word_set static and not inline. */
2334 /*%<*/
2335 /* C code produced by gperf version 3.0.1 */
2336 /* Command-line: gperf -m 5  */
2337 /* Computed positions: -k'2-3' */
2338
2339 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2340 /* maximum key range = 33, duplicates = 0 */
2341
2342 static inline unsigned int
2343 hash (register const char *str, register unsigned int len)
2344 {
2345   static unsigned char asso_values[] =
2346     {
2347       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2348       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2349       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2350       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2351       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2352       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2353       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2354       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2355       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2356       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2357       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2358        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2359        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2360       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2361       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2362       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2363       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2364       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2365       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2366       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2367       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2368       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2369       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2370       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2371       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2372       35, 35, 35, 35, 35, 35
2373     };
2374   register int hval = len;
2375
2376   switch (hval)
2377     {
2378       default:
2379         hval += asso_values[(unsigned char)str[2]];
2380       /*FALLTHROUGH*/
2381       case 2:
2382         hval += asso_values[(unsigned char)str[1]];
2383         break;
2384     }
2385   return hval;
2386 }
2387
2388 static struct C_stab_entry *
2389 in_word_set (register const char *str, register unsigned int len)
2390 {
2391   enum
2392     {
2393       TOTAL_KEYWORDS = 33,
2394       MIN_WORD_LENGTH = 2,
2395       MAX_WORD_LENGTH = 15,
2396       MIN_HASH_VALUE = 2,
2397       MAX_HASH_VALUE = 34
2398     };
2399
2400   static struct C_stab_entry wordlist[] =
2401     {
2402       {""}, {""},
2403       {"if",            0,                      st_C_ignore},
2404       {"GTY",           0,                      st_C_attribute},
2405       {"@end",          0,                      st_C_objend},
2406       {"union",         0,                      st_C_struct},
2407       {"define",                0,                      st_C_define},
2408       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2409       {"template",      0,                      st_C_template},
2410       {"operator",      C_PLPL,                 st_C_operator},
2411       {"@interface",    0,                      st_C_objprot},
2412       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2413       {"friend",                C_PLPL,                 st_C_ignore},
2414       {"typedef",       0,                      st_C_typedef},
2415       {"return",                0,                      st_C_ignore},
2416       {"@implementation",0,                     st_C_objimpl},
2417       {"@protocol",     0,                      st_C_objprot},
2418       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2419       {"extern",                0,                      st_C_extern},
2420       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2421       {"struct",                0,                      st_C_struct},
2422       {"domain",                C_STAR,                 st_C_struct},
2423       {"switch",                0,                      st_C_ignore},
2424       {"enum",          0,                      st_C_enum},
2425       {"for",           0,                      st_C_ignore},
2426       {"namespace",     C_PLPL,                 st_C_struct},
2427       {"class",         0,                      st_C_class},
2428       {"while",         0,                      st_C_ignore},
2429       {"undef",         0,                      st_C_define},
2430       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2431       {"__attribute__", 0,                      st_C_attribute},
2432       {"SYSCALL",       0,                      st_C_gnumacro},
2433       {"ENTRY",         0,                      st_C_gnumacro},
2434       {"PSEUDO",                0,                      st_C_gnumacro},
2435       {"DEFUN",         0,                      st_C_gnumacro}
2436     };
2437
2438   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2439     {
2440       register int key = hash (str, len);
2441
2442       if (key <= MAX_HASH_VALUE && key >= 0)
2443         {
2444           register const char *s = wordlist[key].name;
2445
2446           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2447             return &wordlist[key];
2448         }
2449     }
2450   return 0;
2451 }
2452 /*%>*/
2453
2454 static enum sym_type
2455 C_symtype (char *str, int len, int c_ext)
2456 {
2457   register struct C_stab_entry *se = in_word_set (str, len);
2458
2459   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2460     return st_none;
2461   return se->type;
2462 }
2463
2464 \f
2465 /*
2466  * Ignoring __attribute__ ((list))
2467  */
2468 static bool inattribute;        /* looking at an __attribute__ construct */
2469
2470 /*
2471  * C functions and variables are recognized using a simple
2472  * finite automaton.  fvdef is its state variable.
2473  */
2474 static enum
2475 {
2476   fvnone,                       /* nothing seen */
2477   fdefunkey,                    /* Emacs DEFUN keyword seen */
2478   fdefunname,                   /* Emacs DEFUN name seen */
2479   foperator,                    /* func: operator keyword seen (cplpl) */
2480   fvnameseen,                   /* function or variable name seen */
2481   fstartlist,                   /* func: just after open parenthesis */
2482   finlist,                      /* func: in parameter list */
2483   flistseen,                    /* func: after parameter list */
2484   fignore,                      /* func: before open brace */
2485   vignore                       /* var-like: ignore until ';' */
2486 } fvdef;
2487
2488 static bool fvextern;           /* func or var: extern keyword seen; */
2489
2490 /*
2491  * typedefs are recognized using a simple finite automaton.
2492  * typdef is its state variable.
2493  */
2494 static enum
2495 {
2496   tnone,                        /* nothing seen */
2497   tkeyseen,                     /* typedef keyword seen */
2498   ttypeseen,                    /* defined type seen */
2499   tinbody,                      /* inside typedef body */
2500   tend,                         /* just before typedef tag */
2501   tignore                       /* junk after typedef tag */
2502 } typdef;
2503
2504 /*
2505  * struct-like structures (enum, struct and union) are recognized
2506  * using another simple finite automaton.  `structdef' is its state
2507  * variable.
2508  */
2509 static enum
2510 {
2511   snone,                        /* nothing seen yet,
2512                                    or in struct body if bracelev > 0 */
2513   skeyseen,                     /* struct-like keyword seen */
2514   stagseen,                     /* struct-like tag seen */
2515   scolonseen                    /* colon seen after struct-like tag */
2516 } structdef;
2517
2518 /*
2519  * When objdef is different from onone, objtag is the name of the class.
2520  */
2521 static const char *objtag = "<uninited>";
2522
2523 /*
2524  * Yet another little state machine to deal with preprocessor lines.
2525  */
2526 static enum
2527 {
2528   dnone,                        /* nothing seen */
2529   dsharpseen,                   /* '#' seen as first char on line */
2530   ddefineseen,                  /* '#' and 'define' seen */
2531   dignorerest                   /* ignore rest of line */
2532 } definedef;
2533
2534 /*
2535  * State machine for Objective C protocols and implementations.
2536  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2537  */
2538 static enum
2539 {
2540   onone,                        /* nothing seen */
2541   oprotocol,                    /* @interface or @protocol seen */
2542   oimplementation,              /* @implementations seen */
2543   otagseen,                     /* class name seen */
2544   oparenseen,                   /* parenthesis before category seen */
2545   ocatseen,                     /* category name seen */
2546   oinbody,                      /* in @implementation body */
2547   omethodsign,                  /* in @implementation body, after +/- */
2548   omethodtag,                   /* after method name */
2549   omethodcolon,                 /* after method colon */
2550   omethodparm,                  /* after method parameter */
2551   oignore                       /* wait for @end */
2552 } objdef;
2553
2554
2555 /*
2556  * Use this structure to keep info about the token read, and how it
2557  * should be tagged.  Used by the make_C_tag function to build a tag.
2558  */
2559 static struct tok
2560 {
2561   char *line;                   /* string containing the token */
2562   int offset;                   /* where the token starts in LINE */
2563   int length;                   /* token length */
2564   /*
2565     The previous members can be used to pass strings around for generic
2566     purposes.  The following ones specifically refer to creating tags.  In this
2567     case the token contained here is the pattern that will be used to create a
2568     tag.
2569   */
2570   bool valid;                   /* do not create a tag; the token should be
2571                                    invalidated whenever a state machine is
2572                                    reset prematurely */
2573   bool named;                   /* create a named tag */
2574   int lineno;                   /* source line number of tag */
2575   long linepos;                 /* source char number of tag */
2576 } token;                        /* latest token read */
2577
2578 /*
2579  * Variables and functions for dealing with nested structures.
2580  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2581  */
2582 static void pushclass_above (int, char *, int);
2583 static void popclass_above (int);
2584 static void write_classname (linebuffer *, const char *qualifier);
2585
2586 static struct {
2587   char **cname;                 /* nested class names */
2588   int *bracelev;                /* nested class brace level */
2589   int nl;                       /* class nesting level (elements used) */
2590   int size;                     /* length of the array */
2591 } cstack;                       /* stack for nested declaration tags */
2592 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2593 #define nestlev         (cstack.nl)
2594 /* After struct keyword or in struct body, not inside a nested function. */
2595 #define instruct        (structdef == snone && nestlev > 0                      \
2596                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2597
2598 static void
2599 pushclass_above (int bracelev, char *str, int len)
2600 {
2601   int nl;
2602
2603   popclass_above (bracelev);
2604   nl = cstack.nl;
2605   if (nl >= cstack.size)
2606     {
2607       int size = cstack.size *= 2;
2608       xrnew (cstack.cname, size, char *);
2609       xrnew (cstack.bracelev, size, int);
2610     }
2611   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2612   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2613   cstack.bracelev[nl] = bracelev;
2614   cstack.nl = nl + 1;
2615 }
2616
2617 static void
2618 popclass_above (int bracelev)
2619 {
2620   int nl;
2621
2622   for (nl = cstack.nl - 1;
2623        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2624        nl--)
2625     {
2626       free (cstack.cname[nl]);
2627       cstack.nl = nl;
2628     }
2629 }
2630
2631 static void
2632 write_classname (linebuffer *cn, const char *qualifier)
2633 {
2634   int i, len;
2635   int qlen = strlen (qualifier);
2636
2637   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2638     {
2639       len = 0;
2640       cn->len = 0;
2641       cn->buffer[0] = '\0';
2642     }
2643   else
2644     {
2645       len = strlen (cstack.cname[0]);
2646       linebuffer_setlen (cn, len);
2647       strcpy (cn->buffer, cstack.cname[0]);
2648     }
2649   for (i = 1; i < cstack.nl; i++)
2650     {
2651       char *s;
2652       int slen;
2653
2654       s = cstack.cname[i];
2655       if (s == NULL)
2656         continue;
2657       slen = strlen (s);
2658       len += slen + qlen;
2659       linebuffer_setlen (cn, len);
2660       strncat (cn->buffer, qualifier, qlen);
2661       strncat (cn->buffer, s, slen);
2662     }
2663 }
2664
2665 \f
2666 static bool consider_token (char *, int, int, int *, int, int, bool *);
2667 static void make_C_tag (bool);
2668
2669 /*
2670  * consider_token ()
2671  *      checks to see if the current token is at the start of a
2672  *      function or variable, or corresponds to a typedef, or
2673  *      is a struct/union/enum tag, or #define, or an enum constant.
2674  *
2675  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2676  *      with args.  C_EXTP points to which language we are looking at.
2677  *
2678  * Globals
2679  *      fvdef                   IN OUT
2680  *      structdef               IN OUT
2681  *      definedef               IN OUT
2682  *      typdef                  IN OUT
2683  *      objdef                  IN OUT
2684  */
2685
2686 static bool
2687 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2688                                 /* IN: token pointer */
2689                                 /* IN: token length */
2690                                 /* IN: first char after the token */
2691                                 /* IN, OUT: C extensions mask */
2692                                 /* IN: brace level */
2693                                 /* IN: parenthesis level */
2694                                 /* OUT: function or variable found */
2695 {
2696   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2697      structtype is the type of the preceding struct-like keyword, and
2698      structbracelev is the brace level where it has been seen. */
2699   static enum sym_type structtype;
2700   static int structbracelev;
2701   static enum sym_type toktype;
2702
2703
2704   toktype = C_symtype (str, len, *c_extp);
2705
2706   /*
2707    * Skip __attribute__
2708    */
2709   if (toktype == st_C_attribute)
2710     {
2711       inattribute = TRUE;
2712       return FALSE;
2713      }
2714
2715    /*
2716     * Advance the definedef state machine.
2717     */
2718    switch (definedef)
2719      {
2720      case dnone:
2721        /* We're not on a preprocessor line. */
2722        if (toktype == st_C_gnumacro)
2723          {
2724            fvdef = fdefunkey;
2725            return FALSE;
2726          }
2727        break;
2728      case dsharpseen:
2729        if (toktype == st_C_define)
2730          {
2731            definedef = ddefineseen;
2732          }
2733        else
2734          {
2735            definedef = dignorerest;
2736          }
2737        return FALSE;
2738      case ddefineseen:
2739        /*
2740         * Make a tag for any macro, unless it is a constant
2741         * and constantypedefs is FALSE.
2742         */
2743        definedef = dignorerest;
2744        *is_func_or_var = (c == '(');
2745        if (!*is_func_or_var && !constantypedefs)
2746          return FALSE;
2747        else
2748          return TRUE;
2749      case dignorerest:
2750        return FALSE;
2751      default:
2752        error ("internal error: definedef value.", (char *)NULL);
2753      }
2754
2755    /*
2756     * Now typedefs
2757     */
2758    switch (typdef)
2759      {
2760      case tnone:
2761        if (toktype == st_C_typedef)
2762          {
2763            if (typedefs)
2764              typdef = tkeyseen;
2765            fvextern = FALSE;
2766            fvdef = fvnone;
2767            return FALSE;
2768          }
2769        break;
2770      case tkeyseen:
2771        switch (toktype)
2772          {
2773          case st_none:
2774          case st_C_class:
2775          case st_C_struct:
2776          case st_C_enum:
2777            typdef = ttypeseen;
2778          }
2779        break;
2780      case ttypeseen:
2781        if (structdef == snone && fvdef == fvnone)
2782          {
2783            fvdef = fvnameseen;
2784            return TRUE;
2785          }
2786        break;
2787      case tend:
2788        switch (toktype)
2789          {
2790          case st_C_class:
2791          case st_C_struct:
2792          case st_C_enum:
2793            return FALSE;
2794          }
2795        return TRUE;
2796      }
2797
2798    switch (toktype)
2799      {
2800      case st_C_javastruct:
2801        if (structdef == stagseen)
2802          structdef = scolonseen;
2803        return FALSE;
2804      case st_C_template:
2805      case st_C_class:
2806        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2807            && bracelev == 0
2808            && definedef == dnone && structdef == snone
2809            && typdef == tnone && fvdef == fvnone)
2810          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2811        if (toktype == st_C_template)
2812          break;
2813        /* FALLTHRU */
2814      case st_C_struct:
2815      case st_C_enum:
2816        if (parlev == 0
2817            && fvdef != vignore
2818            && (typdef == tkeyseen
2819                || (typedefs_or_cplusplus && structdef == snone)))
2820          {
2821            structdef = skeyseen;
2822            structtype = toktype;
2823            structbracelev = bracelev;
2824            if (fvdef == fvnameseen)
2825              fvdef = fvnone;
2826          }
2827        return FALSE;
2828      }
2829
2830    if (structdef == skeyseen)
2831      {
2832        structdef = stagseen;
2833        return TRUE;
2834      }
2835
2836    if (typdef != tnone)
2837      definedef = dnone;
2838
2839    /* Detect Objective C constructs. */
2840    switch (objdef)
2841      {
2842      case onone:
2843        switch (toktype)
2844          {
2845          case st_C_objprot:
2846            objdef = oprotocol;
2847            return FALSE;
2848          case st_C_objimpl:
2849            objdef = oimplementation;
2850            return FALSE;
2851          }
2852        break;
2853      case oimplementation:
2854        /* Save the class tag for functions or variables defined inside. */
2855        objtag = savenstr (str, len);
2856        objdef = oinbody;
2857        return FALSE;
2858      case oprotocol:
2859        /* Save the class tag for categories. */
2860        objtag = savenstr (str, len);
2861        objdef = otagseen;
2862        *is_func_or_var = TRUE;
2863        return TRUE;
2864      case oparenseen:
2865        objdef = ocatseen;
2866        *is_func_or_var = TRUE;
2867        return TRUE;
2868      case oinbody:
2869        break;
2870      case omethodsign:
2871        if (parlev == 0)
2872          {
2873            fvdef = fvnone;
2874            objdef = omethodtag;
2875            linebuffer_setlen (&token_name, len);
2876            strncpy (token_name.buffer, str, len);
2877            token_name.buffer[len] = '\0';
2878            return TRUE;
2879          }
2880        return FALSE;
2881      case omethodcolon:
2882        if (parlev == 0)
2883          objdef = omethodparm;
2884        return FALSE;
2885      case omethodparm:
2886        if (parlev == 0)
2887          {
2888            fvdef = fvnone;
2889            objdef = omethodtag;
2890            linebuffer_setlen (&token_name, token_name.len + len);
2891            strncat (token_name.buffer, str, len);
2892            return TRUE;
2893          }
2894        return FALSE;
2895      case oignore:
2896        if (toktype == st_C_objend)
2897          {
2898            /* Memory leakage here: the string pointed by objtag is
2899               never released, because many tests would be needed to
2900               avoid breaking on incorrect input code.  The amount of
2901               memory leaked here is the sum of the lengths of the
2902               class tags.
2903            free (objtag); */
2904            objdef = onone;
2905          }
2906        return FALSE;
2907      }
2908
2909    /* A function, variable or enum constant? */
2910    switch (toktype)
2911      {
2912      case st_C_extern:
2913        fvextern = TRUE;
2914        switch  (fvdef)
2915          {
2916          case finlist:
2917          case flistseen:
2918          case fignore:
2919          case vignore:
2920            break;
2921          default:
2922            fvdef = fvnone;
2923          }
2924        return FALSE;
2925      case st_C_ignore:
2926        fvextern = FALSE;
2927        fvdef = vignore;
2928        return FALSE;
2929      case st_C_operator:
2930        fvdef = foperator;
2931        *is_func_or_var = TRUE;
2932        return TRUE;
2933      case st_none:
2934        if (constantypedefs
2935            && structdef == snone
2936            && structtype == st_C_enum && bracelev > structbracelev)
2937          return TRUE;           /* enum constant */
2938        switch (fvdef)
2939          {
2940          case fdefunkey:
2941            if (bracelev > 0)
2942              break;
2943            fvdef = fdefunname;  /* GNU macro */
2944            *is_func_or_var = TRUE;
2945            return TRUE;
2946          case fvnone:
2947            switch (typdef)
2948              {
2949              case ttypeseen:
2950                return FALSE;
2951              case tnone:
2952                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2953                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2954                  {
2955                    fvdef = vignore;
2956                    return FALSE;
2957                  }
2958                break;
2959              }
2960           /* FALLTHRU */
2961           case fvnameseen:
2962           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2963             {
2964               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2965                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2966               fvdef = foperator;
2967               *is_func_or_var = TRUE;
2968               return TRUE;
2969             }
2970           if (bracelev > 0 && !instruct)
2971             break;
2972           fvdef = fvnameseen;   /* function or variable */
2973           *is_func_or_var = TRUE;
2974           return TRUE;
2975         }
2976       break;
2977     }
2978
2979   return FALSE;
2980 }
2981
2982 \f
2983 /*
2984  * C_entries often keeps pointers to tokens or lines which are older than
2985  * the line currently read.  By keeping two line buffers, and switching
2986  * them at end of line, it is possible to use those pointers.
2987  */
2988 static struct
2989 {
2990   long linepos;
2991   linebuffer lb;
2992 } lbs[2];
2993
2994 #define current_lb_is_new (newndx == curndx)
2995 #define switch_line_buffers() (curndx = 1 - curndx)
2996
2997 #define curlb (lbs[curndx].lb)
2998 #define newlb (lbs[newndx].lb)
2999 #define curlinepos (lbs[curndx].linepos)
3000 #define newlinepos (lbs[newndx].linepos)
3001
3002 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3003 #define cplpl (c_ext & C_PLPL)
3004 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3005
3006 #define CNL_SAVE_DEFINEDEF()                                            \
3007 do {                                                                    \
3008   curlinepos = charno;                                                  \
3009   readline (&curlb, inf);                                               \
3010   lp = curlb.buffer;                                                    \
3011   quotednl = FALSE;                                                     \
3012   newndx = curndx;                                                      \
3013 } while (0)
3014
3015 #define CNL()                                                           \
3016 do {                                                                    \
3017   CNL_SAVE_DEFINEDEF();                                                 \
3018   if (savetoken.valid)                                                  \
3019     {                                                                   \
3020       token = savetoken;                                                \
3021       savetoken.valid = FALSE;                                          \
3022     }                                                                   \
3023   definedef = dnone;                                                    \
3024 } while (0)
3025
3026
3027 static void
3028 make_C_tag (int isfun)
3029 {
3030   /* This function is never called when token.valid is FALSE, but
3031      we must protect against invalid input or internal errors. */
3032   if (token.valid)
3033     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3034               token.offset+token.length+1, token.lineno, token.linepos);
3035   else if (DEBUG)
3036     {                             /* this branch is optimised away if !DEBUG */
3037       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3038                 token_name.len + 17, isfun, token.line,
3039                 token.offset+token.length+1, token.lineno, token.linepos);
3040       error ("INVALID TOKEN", NULL);
3041     }
3042
3043   token.valid = FALSE;
3044 }
3045
3046
3047 /*
3048  * C_entries ()
3049  *      This routine finds functions, variables, typedefs,
3050  *      #define's, enum constants and struct/union/enum definitions in
3051  *      C syntax and adds them to the list.
3052  */
3053 static void
3054 C_entries (int c_ext, FILE *inf)
3055                                 /* extension of C */
3056                                 /* input file */
3057 {
3058   register char c;              /* latest char read; '\0' for end of line */
3059   register char *lp;            /* pointer one beyond the character `c' */
3060   int curndx, newndx;           /* indices for current and new lb */
3061   register int tokoff;          /* offset in line of start of current token */
3062   register int toklen;          /* length of current token */
3063   const char *qualifier;        /* string used to qualify names */
3064   int qlen;                     /* length of qualifier */
3065   int bracelev;                 /* current brace level */
3066   int bracketlev;               /* current bracket level */
3067   int parlev;                   /* current parenthesis level */
3068   int attrparlev;               /* __attribute__ parenthesis level */
3069   int templatelev;              /* current template level */
3070   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3071   bool incomm, inquote, inchar, quotednl, midtoken;
3072   bool yacc_rules;              /* in the rules part of a yacc file */
3073   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3074
3075
3076   linebuffer_init (&lbs[0].lb);
3077   linebuffer_init (&lbs[1].lb);
3078   if (cstack.size == 0)
3079     {
3080       cstack.size = (DEBUG) ? 1 : 4;
3081       cstack.nl = 0;
3082       cstack.cname = xnew (cstack.size, char *);
3083       cstack.bracelev = xnew (cstack.size, int);
3084     }
3085
3086   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3087   curndx = newndx = 0;
3088   lp = curlb.buffer;
3089   *lp = 0;
3090
3091   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3092   structdef = snone; definedef = dnone; objdef = onone;
3093   yacc_rules = FALSE;
3094   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3095   token.valid = savetoken.valid = FALSE;
3096   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3097   if (cjava)
3098     { qualifier = "."; qlen = 1; }
3099   else
3100     { qualifier = "::"; qlen = 2; }
3101
3102
3103   while (!feof (inf))
3104     {
3105       c = *lp++;
3106       if (c == '\\')
3107         {
3108           /* If we are at the end of the line, the next character is a
3109              '\0'; do not skip it, because it is what tells us
3110              to read the next line.  */
3111           if (*lp == '\0')
3112             {
3113               quotednl = TRUE;
3114               continue;
3115             }
3116           lp++;
3117           c = ' ';
3118         }
3119       else if (incomm)
3120         {
3121           switch (c)
3122             {
3123             case '*':
3124               if (*lp == '/')
3125                 {
3126                   c = *lp++;
3127                   incomm = FALSE;
3128                 }
3129               break;
3130             case '\0':
3131               /* Newlines inside comments do not end macro definitions in
3132                  traditional cpp. */
3133               CNL_SAVE_DEFINEDEF ();
3134               break;
3135             }
3136           continue;
3137         }
3138       else if (inquote)
3139         {
3140           switch (c)
3141             {
3142             case '"':
3143               inquote = FALSE;
3144               break;
3145             case '\0':
3146               /* Newlines inside strings do not end macro definitions
3147                  in traditional cpp, even though compilers don't
3148                  usually accept them. */
3149               CNL_SAVE_DEFINEDEF ();
3150               break;
3151             }
3152           continue;
3153         }
3154       else if (inchar)
3155         {
3156           switch (c)
3157             {
3158             case '\0':
3159               /* Hmmm, something went wrong. */
3160               CNL ();
3161               /* FALLTHRU */
3162             case '\'':
3163               inchar = FALSE;
3164               break;
3165             }
3166           continue;
3167         }
3168       else if (bracketlev > 0)
3169         {
3170           switch (c)
3171             {
3172             case ']':
3173               if (--bracketlev > 0)
3174                 continue;
3175               break;
3176             case '\0':
3177               CNL_SAVE_DEFINEDEF ();
3178               break;
3179             }
3180           continue;
3181         }
3182       else switch (c)
3183         {
3184         case '"':
3185           inquote = TRUE;
3186           if (inattribute)
3187             break;
3188           switch (fvdef)
3189             {
3190             case fdefunkey:
3191             case fstartlist:
3192             case finlist:
3193             case fignore:
3194             case vignore:
3195               break;
3196             default:
3197               fvextern = FALSE;
3198               fvdef = fvnone;
3199             }
3200           continue;
3201         case '\'':
3202           inchar = TRUE;
3203           if (inattribute)
3204             break;
3205           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3206             {
3207               fvextern = FALSE;
3208               fvdef = fvnone;
3209             }
3210           continue;
3211         case '/':
3212           if (*lp == '*')
3213             {
3214               incomm = TRUE;
3215               lp++;
3216               c = ' ';
3217             }
3218           else if (/* cplpl && */ *lp == '/')
3219             {
3220               c = '\0';
3221             }
3222           break;
3223         case '%':
3224           if ((c_ext & YACC) && *lp == '%')
3225             {
3226               /* Entering or exiting rules section in yacc file. */
3227               lp++;
3228               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3229               typdef = tnone; structdef = snone;
3230               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3231               bracelev = 0;
3232               yacc_rules = !yacc_rules;
3233               continue;
3234             }
3235           else
3236             break;
3237         case '#':
3238           if (definedef == dnone)
3239             {
3240               char *cp;
3241               bool cpptoken = TRUE;
3242
3243               /* Look back on this line.  If all blanks, or nonblanks
3244                  followed by an end of comment, this is a preprocessor
3245                  token. */
3246               for (cp = newlb.buffer; cp < lp-1; cp++)
3247                 if (!iswhite (*cp))
3248                   {
3249                     if (*cp == '*' && *(cp+1) == '/')
3250                       {
3251                         cp++;
3252                         cpptoken = TRUE;
3253                       }
3254                     else
3255                       cpptoken = FALSE;
3256                   }
3257               if (cpptoken)
3258                 definedef = dsharpseen;
3259             } /* if (definedef == dnone) */
3260           continue;
3261         case '[':
3262           bracketlev++;
3263             continue;
3264         } /* switch (c) */
3265
3266
3267       /* Consider token only if some involved conditions are satisfied. */
3268       if (typdef != tignore
3269           && definedef != dignorerest
3270           && fvdef != finlist
3271           && templatelev == 0
3272           && (definedef != dnone
3273               || structdef != scolonseen)
3274           && !inattribute)
3275         {
3276           if (midtoken)
3277             {
3278               if (endtoken (c))
3279                 {
3280                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3281                     /* This handles :: in the middle,
3282                        but not at the beginning of an identifier.
3283                        Also, space-separated :: is not recognised. */
3284                     {
3285                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3286                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3287                       lp += 2;
3288                       toklen += 2;
3289                       c = lp[-1];
3290                       goto still_in_token;
3291                     }
3292                   else
3293                     {
3294                       bool funorvar = FALSE;
3295
3296                       if (yacc_rules
3297                           || consider_token (newlb.buffer + tokoff, toklen, c,
3298                                              &c_ext, bracelev, parlev,
3299                                              &funorvar))
3300                         {
3301                           if (fvdef == foperator)
3302                             {
3303                               char *oldlp = lp;
3304                               lp = skip_spaces (lp-1);
3305                               if (*lp != '\0')
3306                                 lp += 1;
3307                               while (*lp != '\0'
3308                                      && !iswhite (*lp) && *lp != '(')
3309                                 lp += 1;
3310                               c = *lp++;
3311                               toklen += lp - oldlp;
3312                             }
3313                           token.named = FALSE;
3314                           if (!plainc
3315                               && nestlev > 0 && definedef == dnone)
3316                             /* in struct body */
3317                             {
3318                               write_classname (&token_name, qualifier);
3319                               linebuffer_setlen (&token_name,
3320                                                  token_name.len+qlen+toklen);
3321                               strcat (token_name.buffer, qualifier);
3322                               strncat (token_name.buffer,
3323                                        newlb.buffer + tokoff, toklen);
3324                               token.named = TRUE;
3325                             }
3326                           else if (objdef == ocatseen)
3327                             /* Objective C category */
3328                             {
3329                               int len = strlen (objtag) + 2 + toklen;
3330                               linebuffer_setlen (&token_name, len);
3331                               strcpy (token_name.buffer, objtag);
3332                               strcat (token_name.buffer, "(");
3333                               strncat (token_name.buffer,
3334                                        newlb.buffer + tokoff, toklen);
3335                               strcat (token_name.buffer, ")");
3336                               token.named = TRUE;
3337                             }
3338                           else if (objdef == omethodtag
3339                                    || objdef == omethodparm)
3340                             /* Objective C method */
3341                             {
3342                               token.named = TRUE;
3343                             }
3344                           else if (fvdef == fdefunname)
3345                             /* GNU DEFUN and similar macros */
3346                             {
3347                               bool defun = (newlb.buffer[tokoff] == 'F');
3348                               int off = tokoff;
3349                               int len = toklen;
3350
3351                               /* Rewrite the tag so that emacs lisp DEFUNs
3352                                  can be found by their elisp name */
3353                               if (defun)
3354                                 {
3355                                   off += 1;
3356                                   len -= 1;
3357                                 }
3358                               linebuffer_setlen (&token_name, len);
3359                               strncpy (token_name.buffer,
3360                                        newlb.buffer + off, len);
3361                               token_name.buffer[len] = '\0';
3362                               if (defun)
3363                                 while (--len >= 0)
3364                                   if (token_name.buffer[len] == '_')
3365                                     token_name.buffer[len] = '-';
3366                               token.named = defun;
3367                             }
3368                           else
3369                             {
3370                               linebuffer_setlen (&token_name, toklen);
3371                               strncpy (token_name.buffer,
3372                                        newlb.buffer + tokoff, toklen);
3373                               token_name.buffer[toklen] = '\0';
3374                               /* Name macros and members. */
3375                               token.named = (structdef == stagseen
3376                                              || typdef == ttypeseen
3377                                              || typdef == tend
3378                                              || (funorvar
3379                                                  && definedef == dignorerest)
3380                                              || (funorvar
3381                                                  && definedef == dnone
3382                                                  && structdef == snone
3383                                                  && bracelev > 0));
3384                             }
3385                           token.lineno = lineno;
3386                           token.offset = tokoff;
3387                           token.length = toklen;
3388                           token.line = newlb.buffer;
3389                           token.linepos = newlinepos;
3390                           token.valid = TRUE;
3391
3392                           if (definedef == dnone
3393                               && (fvdef == fvnameseen
3394                                   || fvdef == foperator
3395                                   || structdef == stagseen
3396                                   || typdef == tend
3397                                   || typdef == ttypeseen
3398                                   || objdef != onone))
3399                             {
3400                               if (current_lb_is_new)
3401                                 switch_line_buffers ();
3402                             }
3403                           else if (definedef != dnone
3404                                    || fvdef == fdefunname
3405                                    || instruct)
3406                             make_C_tag (funorvar);
3407                         }
3408                       else /* not yacc and consider_token failed */
3409                         {
3410                           if (inattribute && fvdef == fignore)
3411                             {
3412                               /* We have just met __attribute__ after a
3413                                  function parameter list: do not tag the
3414                                  function again. */
3415                               fvdef = fvnone;
3416                             }
3417                         }
3418                       midtoken = FALSE;
3419                     }
3420                 } /* if (endtoken (c)) */
3421               else if (intoken (c))
3422                 still_in_token:
3423                 {
3424                   toklen++;
3425                   continue;
3426                 }
3427             } /* if (midtoken) */
3428           else if (begtoken (c))
3429             {
3430               switch (definedef)
3431                 {
3432                 case dnone:
3433                   switch (fvdef)
3434                     {
3435                     case fstartlist:
3436                       /* This prevents tagging fb in
3437                          void (__attribute__((noreturn)) *fb) (void);
3438                          Fixing this is not easy and not very important. */
3439                       fvdef = finlist;
3440                       continue;
3441                     case flistseen:
3442                       if (plainc || declarations)
3443                         {
3444                           make_C_tag (TRUE); /* a function */
3445                           fvdef = fignore;
3446                         }
3447                       break;
3448                     }
3449                   if (structdef == stagseen && !cjava)
3450                     {
3451                       popclass_above (bracelev);
3452                       structdef = snone;
3453                     }
3454                   break;
3455                 case dsharpseen:
3456                   savetoken = token;
3457                   break;
3458                 }
3459               if (!yacc_rules || lp == newlb.buffer + 1)
3460                 {
3461                   tokoff = lp - 1 - newlb.buffer;
3462                   toklen = 1;
3463                   midtoken = TRUE;
3464                 }
3465               continue;
3466             } /* if (begtoken) */
3467         } /* if must look at token */
3468
3469
3470       /* Detect end of line, colon, comma, semicolon and various braces
3471          after having handled a token.*/
3472       switch (c)
3473         {
3474         case ':':
3475           if (inattribute)
3476             break;
3477           if (yacc_rules && token.offset == 0 && token.valid)
3478             {
3479               make_C_tag (FALSE); /* a yacc function */
3480               break;
3481             }
3482           if (definedef != dnone)
3483             break;
3484           switch (objdef)
3485             {
3486             case  otagseen:
3487               objdef = oignore;
3488               make_C_tag (TRUE); /* an Objective C class */
3489               break;
3490             case omethodtag:
3491             case omethodparm:
3492               objdef = omethodcolon;
3493               linebuffer_setlen (&token_name, token_name.len + 1);
3494               strcat (token_name.buffer, ":");
3495               break;
3496             }
3497           if (structdef == stagseen)
3498             {
3499               structdef = scolonseen;
3500               break;
3501             }
3502           /* Should be useless, but may be work as a safety net. */
3503           if (cplpl && fvdef == flistseen)
3504             {
3505               make_C_tag (TRUE); /* a function */
3506               fvdef = fignore;
3507               break;
3508             }
3509           break;
3510         case ';':
3511           if (definedef != dnone || inattribute)
3512             break;
3513           switch (typdef)
3514             {
3515             case tend:
3516             case ttypeseen:
3517               make_C_tag (FALSE); /* a typedef */
3518               typdef = tnone;
3519               fvdef = fvnone;
3520               break;
3521             case tnone:
3522             case tinbody:
3523             case tignore:
3524               switch (fvdef)
3525                 {
3526                 case fignore:
3527                   if (typdef == tignore || cplpl)
3528                     fvdef = fvnone;
3529                   break;
3530                 case fvnameseen:
3531                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3532                       || (members && instruct))
3533                     make_C_tag (FALSE); /* a variable */
3534                   fvextern = FALSE;
3535                   fvdef = fvnone;
3536                   token.valid = FALSE;
3537                   break;
3538                 case flistseen:
3539                   if ((declarations
3540                        && (cplpl || !instruct)
3541                        && (typdef == tnone || (typdef != tignore && instruct)))
3542                       || (members
3543                           && plainc && instruct))
3544                     make_C_tag (TRUE);  /* a function */
3545                   /* FALLTHRU */
3546                 default:
3547                   fvextern = FALSE;
3548                   fvdef = fvnone;
3549                   if (declarations
3550                        && cplpl && structdef == stagseen)
3551                     make_C_tag (FALSE); /* forward declaration */
3552                   else
3553                     token.valid = FALSE;
3554                 } /* switch (fvdef) */
3555               /* FALLTHRU */
3556             default:
3557               if (!instruct)
3558                 typdef = tnone;
3559             }
3560           if (structdef == stagseen)
3561             structdef = snone;
3562           break;
3563         case ',':
3564           if (definedef != dnone || inattribute)
3565             break;
3566           switch (objdef)
3567             {
3568             case omethodtag:
3569             case omethodparm:
3570               make_C_tag (TRUE); /* an Objective C method */
3571               objdef = oinbody;
3572               break;
3573             }
3574           switch (fvdef)
3575             {
3576             case fdefunkey:
3577             case foperator:
3578             case fstartlist:
3579             case finlist:
3580             case fignore:
3581             case vignore:
3582               break;
3583             case fdefunname:
3584               fvdef = fignore;
3585               break;
3586             case fvnameseen:
3587               if (parlev == 0
3588                   && ((globals
3589                        && bracelev == 0
3590                        && templatelev == 0
3591                        && (!fvextern || declarations))
3592                       || (members && instruct)))
3593                   make_C_tag (FALSE); /* a variable */
3594               break;
3595             case flistseen:
3596               if ((declarations && typdef == tnone && !instruct)
3597                   || (members && typdef != tignore && instruct))
3598                 {
3599                   make_C_tag (TRUE); /* a function */
3600                   fvdef = fvnameseen;
3601                 }
3602               else if (!declarations)
3603                 fvdef = fvnone;
3604               token.valid = FALSE;
3605               break;
3606             default:
3607               fvdef = fvnone;
3608             }
3609           if (structdef == stagseen)
3610             structdef = snone;
3611           break;
3612         case ']':
3613           if (definedef != dnone || inattribute)
3614             break;
3615           if (structdef == stagseen)
3616             structdef = snone;
3617           switch (typdef)
3618             {
3619             case ttypeseen:
3620             case tend:
3621               typdef = tignore;
3622               make_C_tag (FALSE);       /* a typedef */
3623               break;
3624             case tnone:
3625             case tinbody:
3626               switch (fvdef)
3627                 {
3628                 case foperator:
3629                 case finlist:
3630                 case fignore:
3631                 case vignore:
3632                   break;
3633                 case fvnameseen:
3634                   if ((members && bracelev == 1)
3635                       || (globals && bracelev == 0
3636                           && (!fvextern || declarations)))
3637                     make_C_tag (FALSE); /* a variable */
3638                   /* FALLTHRU */
3639                 default:
3640                   fvdef = fvnone;
3641                 }
3642               break;
3643             }
3644           break;
3645         case '(':
3646           if (inattribute)
3647             {
3648               attrparlev++;
3649               break;
3650             }
3651           if (definedef != dnone)
3652             break;
3653           if (objdef == otagseen && parlev == 0)
3654             objdef = oparenseen;
3655           switch (fvdef)
3656             {
3657             case fvnameseen:
3658               if (typdef == ttypeseen
3659                   && *lp != '*'
3660                   && !instruct)
3661                 {
3662                   /* This handles constructs like:
3663                      typedef void OperatorFun (int fun); */
3664                   make_C_tag (FALSE);
3665                   typdef = tignore;
3666                   fvdef = fignore;
3667                   break;
3668                 }
3669               /* FALLTHRU */
3670             case foperator:
3671               fvdef = fstartlist;
3672               break;
3673             case flistseen:
3674               fvdef = finlist;
3675               break;
3676             }
3677           parlev++;
3678           break;
3679         case ')':
3680           if (inattribute)
3681             {
3682               if (--attrparlev == 0)
3683                 inattribute = FALSE;
3684               break;
3685             }
3686           if (definedef != dnone)
3687             break;
3688           if (objdef == ocatseen && parlev == 1)
3689             {
3690               make_C_tag (TRUE); /* an Objective C category */
3691               objdef = oignore;
3692             }
3693           if (--parlev == 0)
3694             {
3695               switch (fvdef)
3696                 {
3697                 case fstartlist:
3698                 case finlist:
3699                   fvdef = flistseen;
3700                   break;
3701                 }
3702               if (!instruct
3703                   && (typdef == tend
3704                       || typdef == ttypeseen))
3705                 {
3706                   typdef = tignore;
3707                   make_C_tag (FALSE); /* a typedef */
3708                 }
3709             }
3710           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3711             parlev = 0;
3712           break;
3713         case '{':
3714           if (definedef != dnone)
3715             break;
3716           if (typdef == ttypeseen)
3717             {
3718               /* Whenever typdef is set to tinbody (currently only
3719                  here), typdefbracelev should be set to bracelev. */
3720               typdef = tinbody;
3721               typdefbracelev = bracelev;
3722             }
3723           switch (fvdef)
3724             {
3725             case flistseen:
3726               make_C_tag (TRUE);    /* a function */
3727               /* FALLTHRU */
3728             case fignore:
3729               fvdef = fvnone;
3730               break;
3731             case fvnone:
3732               switch (objdef)
3733                 {
3734                 case otagseen:
3735                   make_C_tag (TRUE); /* an Objective C class */
3736                   objdef = oignore;
3737                   break;
3738                 case omethodtag:
3739                 case omethodparm:
3740                   make_C_tag (TRUE); /* an Objective C method */
3741                   objdef = oinbody;
3742                   break;
3743                 default:
3744                   /* Neutralize `extern "C" {' grot. */
3745                   if (bracelev == 0 && structdef == snone && nestlev == 0
3746                       && typdef == tnone)
3747                     bracelev = -1;
3748                 }
3749               break;
3750             }
3751           switch (structdef)
3752             {
3753             case skeyseen:         /* unnamed struct */
3754               pushclass_above (bracelev, NULL, 0);
3755               structdef = snone;
3756               break;
3757             case stagseen:         /* named struct or enum */
3758             case scolonseen:       /* a class */
3759               pushclass_above (bracelev,token.line+token.offset, token.length);
3760               structdef = snone;
3761               make_C_tag (FALSE);  /* a struct or enum */
3762               break;
3763             }
3764           bracelev += 1;
3765           break;
3766         case '*':
3767           if (definedef != dnone)
3768             break;
3769           if (fvdef == fstartlist)
3770             {
3771               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3772               token.valid = FALSE;
3773             }
3774           break;
3775         case '}':
3776           if (definedef != dnone)
3777             break;
3778           bracelev -= 1;
3779           if (!ignoreindent && lp == newlb.buffer + 1)
3780             {
3781               if (bracelev != 0)
3782                 token.valid = FALSE; /* unexpected value, token unreliable */
3783               bracelev = 0;     /* reset brace level if first column */
3784               parlev = 0;       /* also reset paren level, just in case... */
3785             }
3786           else if (bracelev < 0)
3787             {
3788               token.valid = FALSE; /* something gone amiss, token unreliable */
3789               bracelev = 0;
3790             }
3791           if (bracelev == 0 && fvdef == vignore)
3792             fvdef = fvnone;             /* end of function */
3793           popclass_above (bracelev);
3794           structdef = snone;
3795           /* Only if typdef == tinbody is typdefbracelev significant. */
3796           if (typdef == tinbody && bracelev <= typdefbracelev)
3797             {
3798               assert (bracelev == typdefbracelev);
3799               typdef = tend;
3800             }
3801           break;
3802         case '=':
3803           if (definedef != dnone)
3804             break;
3805           switch (fvdef)
3806             {
3807             case foperator:
3808             case finlist:
3809             case fignore:
3810             case vignore:
3811               break;
3812             case fvnameseen:
3813               if ((members && bracelev == 1)
3814                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3815                 make_C_tag (FALSE); /* a variable */
3816               /* FALLTHRU */
3817             default:
3818               fvdef = vignore;
3819             }
3820           break;
3821         case '<':
3822           if (cplpl
3823               && (structdef == stagseen || fvdef == fvnameseen))
3824             {
3825               templatelev++;
3826               break;
3827             }
3828           goto resetfvdef;
3829         case '>':
3830           if (templatelev > 0)
3831             {
3832               templatelev--;
3833               break;
3834             }
3835           goto resetfvdef;
3836         case '+':
3837         case '-':
3838           if (objdef == oinbody && bracelev == 0)
3839             {
3840               objdef = omethodsign;
3841               break;
3842             }
3843           /* FALLTHRU */
3844         resetfvdef:
3845         case '#': case '~': case '&': case '%': case '/':
3846         case '|': case '^': case '!': case '.': case '?':
3847           if (definedef != dnone)
3848             break;
3849           /* These surely cannot follow a function tag in C. */
3850           switch (fvdef)
3851             {
3852             case foperator:
3853             case finlist:
3854             case fignore:
3855             case vignore:
3856               break;
3857             default:
3858               fvdef = fvnone;
3859             }
3860           break;
3861         case '\0':
3862           if (objdef == otagseen)
3863             {
3864               make_C_tag (TRUE); /* an Objective C class */
3865               objdef = oignore;
3866             }
3867           /* If a macro spans multiple lines don't reset its state. */
3868           if (quotednl)
3869             CNL_SAVE_DEFINEDEF ();
3870           else
3871             CNL ();
3872           break;
3873         } /* switch (c) */
3874
3875     } /* while not eof */
3876
3877   free (lbs[0].lb.buffer);
3878   free (lbs[1].lb.buffer);
3879 }
3880
3881 /*
3882  * Process either a C++ file or a C file depending on the setting
3883  * of a global flag.
3884  */
3885 static void
3886 default_C_entries (FILE *inf)
3887 {
3888   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3889 }
3890
3891 /* Always do plain C. */
3892 static void
3893 plain_C_entries (FILE *inf)
3894 {
3895   C_entries (0, inf);
3896 }
3897
3898 /* Always do C++. */
3899 static void
3900 Cplusplus_entries (FILE *inf)
3901 {
3902   C_entries (C_PLPL, inf);
3903 }
3904
3905 /* Always do Java. */
3906 static void
3907 Cjava_entries (FILE *inf)
3908 {
3909   C_entries (C_JAVA, inf);
3910 }
3911
3912 /* Always do C*. */
3913 static void
3914 Cstar_entries (FILE *inf)
3915 {
3916   C_entries (C_STAR, inf);
3917 }
3918
3919 /* Always do Yacc. */
3920 static void
3921 Yacc_entries (FILE *inf)
3922 {
3923   C_entries (YACC, inf);
3924 }
3925
3926 \f
3927 /* Useful macros. */
3928 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3929   for (;                        /* loop initialization */               \
3930        !feof (file_pointer)     /* loop test */                         \
3931        &&                       /* instructions at start of loop */     \
3932           (readline (&line_buffer, file_pointer),                       \
3933            char_pointer = line_buffer.buffer,                           \
3934            TRUE);                                                       \
3935       )
3936
3937 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3938   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
3939    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
3940    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
3941    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
3942
3943 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3944 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3945   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
3946    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
3947    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
3948
3949 /*
3950  * Read a file, but do no processing.  This is used to do regexp
3951  * matching on files that have no language defined.
3952  */
3953 static void
3954 just_read_file (FILE *inf)
3955 {
3956   while (!feof (inf))
3957     readline (&lb, inf);
3958 }
3959
3960 \f
3961 /* Fortran parsing */
3962
3963 static void F_takeprec (void);
3964 static void F_getit (FILE *);
3965
3966 static void
3967 F_takeprec (void)
3968 {
3969   dbp = skip_spaces (dbp);
3970   if (*dbp != '*')
3971     return;
3972   dbp++;
3973   dbp = skip_spaces (dbp);
3974   if (strneq (dbp, "(*)", 3))
3975     {
3976       dbp += 3;
3977       return;
3978     }
3979   if (!ISDIGIT (*dbp))
3980     {
3981       --dbp;                    /* force failure */
3982       return;
3983     }
3984   do
3985     dbp++;
3986   while (ISDIGIT (*dbp));
3987 }
3988
3989 static void
3990 F_getit (FILE *inf)
3991 {
3992   register char *cp;
3993
3994   dbp = skip_spaces (dbp);
3995   if (*dbp == '\0')
3996     {
3997       readline (&lb, inf);
3998       dbp = lb.buffer;
3999       if (dbp[5] != '&')
4000         return;
4001       dbp += 6;
4002       dbp = skip_spaces (dbp);
4003     }
4004   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4005     return;
4006   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4007     continue;
4008   make_tag (dbp, cp-dbp, TRUE,
4009             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4010 }
4011
4012
4013 static void
4014 Fortran_functions (FILE *inf)
4015 {
4016   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4017     {
4018       if (*dbp == '%')
4019         dbp++;                  /* Ratfor escape to fortran */
4020       dbp = skip_spaces (dbp);
4021       if (*dbp == '\0')
4022         continue;
4023
4024       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4025         dbp = skip_spaces (dbp);
4026
4027       switch (lowcase (*dbp))
4028         {
4029         case 'i':
4030           if (nocase_tail ("integer"))
4031             F_takeprec ();
4032           break;
4033         case 'r':
4034           if (nocase_tail ("real"))
4035             F_takeprec ();
4036           break;
4037         case 'l':
4038           if (nocase_tail ("logical"))
4039             F_takeprec ();
4040           break;
4041         case 'c':
4042           if (nocase_tail ("complex") || nocase_tail ("character"))
4043             F_takeprec ();
4044           break;
4045         case 'd':
4046           if (nocase_tail ("double"))
4047             {
4048               dbp = skip_spaces (dbp);
4049               if (*dbp == '\0')
4050                 continue;
4051               if (nocase_tail ("precision"))
4052                 break;
4053               continue;
4054             }
4055           break;
4056         }
4057       dbp = skip_spaces (dbp);
4058       if (*dbp == '\0')
4059         continue;
4060       switch (lowcase (*dbp))
4061         {
4062         case 'f':
4063           if (nocase_tail ("function"))
4064             F_getit (inf);
4065           continue;
4066         case 's':
4067           if (nocase_tail ("subroutine"))
4068             F_getit (inf);
4069           continue;
4070         case 'e':
4071           if (nocase_tail ("entry"))
4072             F_getit (inf);
4073           continue;
4074         case 'b':
4075           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4076             {
4077               dbp = skip_spaces (dbp);
4078               if (*dbp == '\0') /* assume un-named */
4079                 make_tag ("blockdata", 9, TRUE,
4080                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4081               else
4082                 F_getit (inf);  /* look for name */
4083             }
4084           continue;
4085         }
4086     }
4087 }
4088
4089 \f
4090 /*
4091  * Ada parsing
4092  * Original code by
4093  * Philippe Waroquiers (1998)
4094  */
4095
4096 /* Once we are positioned after an "interesting" keyword, let's get
4097    the real tag value necessary. */
4098 static void
4099 Ada_getit (FILE *inf, const char *name_qualifier)
4100 {
4101   register char *cp;
4102   char *name;
4103   char c;
4104
4105   while (!feof (inf))
4106     {
4107       dbp = skip_spaces (dbp);
4108       if (*dbp == '\0'
4109           || (dbp[0] == '-' && dbp[1] == '-'))
4110         {
4111           readline (&lb, inf);
4112           dbp = lb.buffer;
4113         }
4114       switch (lowcase(*dbp))
4115         {
4116         case 'b':
4117           if (nocase_tail ("body"))
4118             {
4119               /* Skipping body of   procedure body   or   package body or ....
4120                  resetting qualifier to body instead of spec. */
4121               name_qualifier = "/b";
4122               continue;
4123             }
4124           break;
4125         case 't':
4126           /* Skipping type of   task type   or   protected type ... */
4127           if (nocase_tail ("type"))
4128             continue;
4129           break;
4130         }
4131       if (*dbp == '"')
4132         {
4133           dbp += 1;
4134           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4135             continue;
4136         }
4137       else
4138         {
4139           dbp = skip_spaces (dbp);
4140           for (cp = dbp;
4141                (*cp != '\0'
4142                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4143                cp++)
4144             continue;
4145           if (cp == dbp)
4146             return;
4147         }
4148       c = *cp;
4149       *cp = '\0';
4150       name = concat (dbp, name_qualifier, "");
4151       *cp = c;
4152       make_tag (name, strlen (name), TRUE,
4153                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4154       free (name);
4155       if (c == '"')
4156         dbp = cp + 1;
4157       return;
4158     }
4159 }
4160
4161 static void
4162 Ada_funcs (FILE *inf)
4163 {
4164   bool inquote = FALSE;
4165   bool skip_till_semicolumn = FALSE;
4166
4167   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4168     {
4169       while (*dbp != '\0')
4170         {
4171           /* Skip a string i.e. "abcd". */
4172           if (inquote || (*dbp == '"'))
4173             {
4174               dbp = etags_strchr (dbp + !inquote, '"');
4175               if (dbp != NULL)
4176                 {
4177                   inquote = FALSE;
4178                   dbp += 1;
4179                   continue;     /* advance char */
4180                 }
4181               else
4182                 {
4183                   inquote = TRUE;
4184                   break;        /* advance line */
4185                 }
4186             }
4187
4188           /* Skip comments. */
4189           if (dbp[0] == '-' && dbp[1] == '-')
4190             break;              /* advance line */
4191
4192           /* Skip character enclosed in single quote i.e. 'a'
4193              and skip single quote starting an attribute i.e. 'Image. */
4194           if (*dbp == '\'')
4195             {
4196               dbp++ ;
4197               if (*dbp != '\0')
4198                 dbp++;
4199               continue;
4200             }
4201
4202           if (skip_till_semicolumn)
4203             {
4204               if (*dbp == ';')
4205                 skip_till_semicolumn = FALSE;
4206               dbp++;
4207               continue;         /* advance char */
4208             }
4209
4210           /* Search for beginning of a token.  */
4211           if (!begtoken (*dbp))
4212             {
4213               dbp++;
4214               continue;         /* advance char */
4215             }
4216
4217           /* We are at the beginning of a token. */
4218           switch (lowcase(*dbp))
4219             {
4220             case 'f':
4221               if (!packages_only && nocase_tail ("function"))
4222                 Ada_getit (inf, "/f");
4223               else
4224                 break;          /* from switch */
4225               continue;         /* advance char */
4226             case 'p':
4227               if (!packages_only && nocase_tail ("procedure"))
4228                 Ada_getit (inf, "/p");
4229               else if (nocase_tail ("package"))
4230                 Ada_getit (inf, "/s");
4231               else if (nocase_tail ("protected")) /* protected type */
4232                 Ada_getit (inf, "/t");
4233               else
4234                 break;          /* from switch */
4235               continue;         /* advance char */
4236
4237             case 'u':
4238               if (typedefs && !packages_only && nocase_tail ("use"))
4239                 {
4240                   /* when tagging types, avoid tagging  use type Pack.Typename;
4241                      for this, we will skip everything till a ; */
4242                   skip_till_semicolumn = TRUE;
4243                   continue;     /* advance char */
4244                 }
4245
4246             case 't':
4247               if (!packages_only && nocase_tail ("task"))
4248                 Ada_getit (inf, "/k");
4249               else if (typedefs && !packages_only && nocase_tail ("type"))
4250                 {
4251                   Ada_getit (inf, "/t");
4252                   while (*dbp != '\0')
4253                     dbp += 1;
4254                 }
4255               else
4256                 break;          /* from switch */
4257               continue;         /* advance char */
4258             }
4259
4260           /* Look for the end of the token. */
4261           while (!endtoken (*dbp))
4262             dbp++;
4263
4264         } /* advance char */
4265     } /* advance line */
4266 }
4267
4268 \f
4269 /*
4270  * Unix and microcontroller assembly tag handling
4271  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4272  * Idea by Bob Weiner, Motorola Inc. (1994)
4273  */
4274 static void
4275 Asm_labels (FILE *inf)
4276 {
4277   register char *cp;
4278
4279   LOOP_ON_INPUT_LINES (inf, lb, cp)
4280     {
4281       /* If first char is alphabetic or one of [_.$], test for colon
4282          following identifier. */
4283       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4284         {
4285           /* Read past label. */
4286           cp++;
4287           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4288             cp++;
4289           if (*cp == ':' || iswhite (*cp))
4290             /* Found end of label, so copy it and add it to the table. */
4291             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4292                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4293         }
4294     }
4295 }
4296
4297 \f
4298 /*
4299  * Perl support
4300  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4301  * Perl variable names: /^(my|local).../
4302  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4303  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4304  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4305  */
4306 static void
4307 Perl_functions (FILE *inf)
4308 {
4309   char *package = savestr ("main"); /* current package name */
4310   register char *cp;
4311
4312   LOOP_ON_INPUT_LINES (inf, lb, cp)
4313     {
4314       cp = skip_spaces (cp);
4315
4316       if (LOOKING_AT (cp, "package"))
4317         {
4318           free (package);
4319           get_tag (cp, &package);
4320         }
4321       else if (LOOKING_AT (cp, "sub"))
4322         {
4323           char *pos;
4324           char *sp = cp;
4325
4326           while (!notinname (*cp))
4327             cp++;
4328           if (cp == sp)
4329             continue;           /* nothing found */
4330           if ((pos = etags_strchr (sp, ':')) != NULL
4331               && pos < cp && pos[1] == ':')
4332             /* The name is already qualified. */
4333             make_tag (sp, cp - sp, TRUE,
4334                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4335           else
4336             /* Qualify it. */
4337             {
4338               char savechar, *name;
4339
4340               savechar = *cp;
4341               *cp = '\0';
4342               name = concat (package, "::", sp);
4343               *cp = savechar;
4344               make_tag (name, strlen(name), TRUE,
4345                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4346               free (name);
4347             }
4348         }
4349        else if (globals)        /* only if we are tagging global vars */
4350         {
4351           /* Skip a qualifier, if any. */
4352           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4353           /* After "my" or "local", but before any following paren or space. */
4354           char *varstart = cp;
4355
4356           if (qual              /* should this be removed?  If yes, how? */
4357               && (*cp == '$' || *cp == '@' || *cp == '%'))
4358             {
4359               varstart += 1;
4360               do
4361                 cp++;
4362               while (ISALNUM (*cp) || *cp == '_');
4363             }
4364           else if (qual)
4365             {
4366               /* Should be examining a variable list at this point;
4367                  could insist on seeing an open parenthesis. */
4368               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4369                 cp++;
4370             }
4371           else
4372             continue;
4373
4374           make_tag (varstart, cp - varstart, FALSE,
4375                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4376         }
4377     }
4378   free (package);
4379 }
4380
4381
4382 /*
4383  * Python support
4384  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4385  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4386  * More ideas by seb bacon <seb@jamkit.com> (2002)
4387  */
4388 static void
4389 Python_functions (FILE *inf)
4390 {
4391   register char *cp;
4392
4393   LOOP_ON_INPUT_LINES (inf, lb, cp)
4394     {
4395       cp = skip_spaces (cp);
4396       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4397         {
4398           char *name = cp;
4399           while (!notinname (*cp) && *cp != ':')
4400             cp++;
4401           make_tag (name, cp - name, TRUE,
4402                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4403         }
4404     }
4405 }
4406
4407 \f
4408 /*
4409  * PHP support
4410  * Look for:
4411  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4412  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4413  *  - /^[ \t]*define\(\"[^\"]+/
4414  * Only with --members:
4415  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4416  * Idea by Diez B. Roggisch (2001)
4417  */
4418 static void
4419 PHP_functions (FILE *inf)
4420 {
4421   register char *cp, *name;
4422   bool search_identifier = FALSE;
4423
4424   LOOP_ON_INPUT_LINES (inf, lb, cp)
4425     {
4426       cp = skip_spaces (cp);
4427       name = cp;
4428       if (search_identifier
4429           && *cp != '\0')
4430         {
4431           while (!notinname (*cp))
4432             cp++;
4433           make_tag (name, cp - name, TRUE,
4434                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4435           search_identifier = FALSE;
4436         }
4437       else if (LOOKING_AT (cp, "function"))
4438         {
4439           if(*cp == '&')
4440             cp = skip_spaces (cp+1);
4441           if(*cp != '\0')
4442             {
4443               name = cp;
4444               while (!notinname (*cp))
4445                 cp++;
4446               make_tag (name, cp - name, TRUE,
4447                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4448             }
4449           else
4450             search_identifier = TRUE;
4451         }
4452       else if (LOOKING_AT (cp, "class"))
4453         {
4454           if (*cp != '\0')
4455             {
4456               name = cp;
4457               while (*cp != '\0' && !iswhite (*cp))
4458                 cp++;
4459               make_tag (name, cp - name, FALSE,
4460                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4461             }
4462           else
4463             search_identifier = TRUE;
4464         }
4465       else if (strneq (cp, "define", 6)
4466                && (cp = skip_spaces (cp+6))
4467                && *cp++ == '('
4468                && (*cp == '"' || *cp == '\''))
4469         {
4470           char quote = *cp++;
4471           name = cp;
4472           while (*cp != quote && *cp != '\0')
4473             cp++;
4474           make_tag (name, cp - name, FALSE,
4475                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4476         }
4477       else if (members
4478                && LOOKING_AT (cp, "var")
4479                && *cp == '$')
4480         {
4481           name = cp;
4482           while (!notinname(*cp))
4483             cp++;
4484           make_tag (name, cp - name, FALSE,
4485                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4486         }
4487     }
4488 }
4489
4490 \f
4491 /*
4492  * Cobol tag functions
4493  * We could look for anything that could be a paragraph name.
4494  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4495  * Idea by Corny de Souza (1993)
4496  */
4497 static void
4498 Cobol_paragraphs (FILE *inf)
4499 {
4500   register char *bp, *ep;
4501
4502   LOOP_ON_INPUT_LINES (inf, lb, bp)
4503     {
4504       if (lb.len < 9)
4505         continue;
4506       bp += 8;
4507
4508       /* If eoln, compiler option or comment ignore whole line. */
4509       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4510         continue;
4511
4512       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4513         continue;
4514       if (*ep++ == '.')
4515         make_tag (bp, ep - bp, TRUE,
4516                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4517     }
4518 }
4519
4520 \f
4521 /*
4522  * Makefile support
4523  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4524  */
4525 static void
4526 Makefile_targets (FILE *inf)
4527 {
4528   register char *bp;
4529
4530   LOOP_ON_INPUT_LINES (inf, lb, bp)
4531     {
4532       if (*bp == '\t' || *bp == '#')
4533         continue;
4534       while (*bp != '\0' && *bp != '=' && *bp != ':')
4535         bp++;
4536       if (*bp == ':' || (globals && *bp == '='))
4537         {
4538           /* We should detect if there is more than one tag, but we do not.
4539              We just skip initial and final spaces. */
4540           char * namestart = skip_spaces (lb.buffer);
4541           while (--bp > namestart)
4542             if (!notinname (*bp))
4543               break;
4544           make_tag (namestart, bp - namestart + 1, TRUE,
4545                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4546         }
4547     }
4548 }
4549
4550 \f
4551 /*
4552  * Pascal parsing
4553  * Original code by Mosur K. Mohan (1989)
4554  *
4555  *  Locates tags for procedures & functions.  Doesn't do any type- or
4556  *  var-definitions.  It does look for the keyword "extern" or
4557  *  "forward" immediately following the procedure statement; if found,
4558  *  the tag is skipped.
4559  */
4560 static void
4561 Pascal_functions (FILE *inf)
4562 {
4563   linebuffer tline;             /* mostly copied from C_entries */
4564   long save_lcno;
4565   int save_lineno, namelen, taglen;
4566   char c, *name;
4567
4568   bool                          /* each of these flags is TRUE if: */
4569     incomment,                  /* point is inside a comment */
4570     inquote,                    /* point is inside '..' string */
4571     get_tagname,                /* point is after PROCEDURE/FUNCTION
4572                                    keyword, so next item = potential tag */
4573     found_tag,                  /* point is after a potential tag */
4574     inparms,                    /* point is within parameter-list */
4575     verify_tag;                 /* point has passed the parm-list, so the
4576                                    next token will determine whether this
4577                                    is a FORWARD/EXTERN to be ignored, or
4578                                    whether it is a real tag */
4579
4580   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4581   name = NULL;                  /* keep compiler quiet */
4582   dbp = lb.buffer;
4583   *dbp = '\0';
4584   linebuffer_init (&tline);
4585
4586   incomment = inquote = FALSE;
4587   found_tag = FALSE;            /* have a proc name; check if extern */
4588   get_tagname = FALSE;          /* found "procedure" keyword         */
4589   inparms = FALSE;              /* found '(' after "proc"            */
4590   verify_tag = FALSE;           /* check if "extern" is ahead        */
4591
4592
4593   while (!feof (inf))           /* long main loop to get next char */
4594     {
4595       c = *dbp++;
4596       if (c == '\0')            /* if end of line */
4597         {
4598           readline (&lb, inf);
4599           dbp = lb.buffer;
4600           if (*dbp == '\0')
4601             continue;
4602           if (!((found_tag && verify_tag)
4603                 || get_tagname))
4604             c = *dbp++;         /* only if don't need *dbp pointing
4605                                    to the beginning of the name of
4606                                    the procedure or function */
4607         }
4608       if (incomment)
4609         {
4610           if (c == '}')         /* within { } comments */
4611             incomment = FALSE;
4612           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4613             {
4614               dbp++;
4615               incomment = FALSE;
4616             }
4617           continue;
4618         }
4619       else if (inquote)
4620         {
4621           if (c == '\'')
4622             inquote = FALSE;
4623           continue;
4624         }
4625       else
4626         switch (c)
4627           {
4628           case '\'':
4629             inquote = TRUE;     /* found first quote */
4630             continue;
4631           case '{':             /* found open { comment */
4632             incomment = TRUE;
4633             continue;
4634           case '(':
4635             if (*dbp == '*')    /* found open (* comment */
4636               {
4637                 incomment = TRUE;
4638                 dbp++;
4639               }
4640             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4641               inparms = TRUE;
4642             continue;
4643           case ')':             /* end of parms list */
4644             if (inparms)
4645               inparms = FALSE;
4646             continue;
4647           case ';':
4648             if (found_tag && !inparms) /* end of proc or fn stmt */
4649               {
4650                 verify_tag = TRUE;
4651                 break;
4652               }
4653             continue;
4654           }
4655       if (found_tag && verify_tag && (*dbp != ' '))
4656         {
4657           /* Check if this is an "extern" declaration. */
4658           if (*dbp == '\0')
4659             continue;
4660           if (lowcase (*dbp == 'e'))
4661             {
4662               if (nocase_tail ("extern")) /* superfluous, really! */
4663                 {
4664                   found_tag = FALSE;
4665                   verify_tag = FALSE;
4666                 }
4667             }
4668           else if (lowcase (*dbp) == 'f')
4669             {
4670               if (nocase_tail ("forward")) /* check for forward reference */
4671                 {
4672                   found_tag = FALSE;
4673                   verify_tag = FALSE;
4674                 }
4675             }
4676           if (found_tag && verify_tag) /* not external proc, so make tag */
4677             {
4678               found_tag = FALSE;
4679               verify_tag = FALSE;
4680               make_tag (name, namelen, TRUE,
4681                         tline.buffer, taglen, save_lineno, save_lcno);
4682               continue;
4683             }
4684         }
4685       if (get_tagname)          /* grab name of proc or fn */
4686         {
4687           char *cp;
4688
4689           if (*dbp == '\0')
4690             continue;
4691
4692           /* Find block name. */
4693           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4694             continue;
4695
4696           /* Save all values for later tagging. */
4697           linebuffer_setlen (&tline, lb.len);
4698           strcpy (tline.buffer, lb.buffer);
4699           save_lineno = lineno;
4700           save_lcno = linecharno;
4701           name = tline.buffer + (dbp - lb.buffer);
4702           namelen = cp - dbp;
4703           taglen = cp - lb.buffer + 1;
4704
4705           dbp = cp;             /* set dbp to e-o-token */
4706           get_tagname = FALSE;
4707           found_tag = TRUE;
4708           continue;
4709
4710           /* And proceed to check for "extern". */
4711         }
4712       else if (!incomment && !inquote && !found_tag)
4713         {
4714           /* Check for proc/fn keywords. */
4715           switch (lowcase (c))
4716             {
4717             case 'p':
4718               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4719                 get_tagname = TRUE;
4720               continue;
4721             case 'f':
4722               if (nocase_tail ("unction"))
4723                 get_tagname = TRUE;
4724               continue;
4725             }
4726         }
4727     } /* while not eof */
4728
4729   free (tline.buffer);
4730 }
4731
4732 \f
4733 /*
4734  * Lisp tag functions
4735  *  look for (def or (DEF, quote or QUOTE
4736  */
4737
4738 static void L_getit (void);
4739
4740 static void
4741 L_getit (void)
4742 {
4743   if (*dbp == '\'')             /* Skip prefix quote */
4744     dbp++;
4745   else if (*dbp == '(')
4746   {
4747     dbp++;
4748     /* Try to skip "(quote " */
4749     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4750       /* Ok, then skip "(" before name in (defstruct (foo)) */
4751       dbp = skip_spaces (dbp);
4752   }
4753   get_tag (dbp, NULL);
4754 }
4755
4756 static void
4757 Lisp_functions (FILE *inf)
4758 {
4759   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4760     {
4761       if (dbp[0] != '(')
4762         continue;
4763
4764       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4765         {
4766           dbp = skip_non_spaces (dbp);
4767           dbp = skip_spaces (dbp);
4768           L_getit ();
4769         }
4770       else
4771         {
4772           /* Check for (foo::defmumble name-defined ... */
4773           do
4774             dbp++;
4775           while (!notinname (*dbp) && *dbp != ':');
4776           if (*dbp == ':')
4777             {
4778               do
4779                 dbp++;
4780               while (*dbp == ':');
4781
4782               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4783                 {
4784                   dbp = skip_non_spaces (dbp);
4785                   dbp = skip_spaces (dbp);
4786                   L_getit ();
4787                 }
4788             }
4789         }
4790     }
4791 }
4792
4793 \f
4794 /*
4795  * Lua script language parsing
4796  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4797  *
4798  *  "function" and "local function" are tags if they start at column 1.
4799  */
4800 static void
4801 Lua_functions (FILE *inf)
4802 {
4803   register char *bp;
4804
4805   LOOP_ON_INPUT_LINES (inf, lb, bp)
4806     {
4807       if (bp[0] != 'f' && bp[0] != 'l')
4808         continue;
4809
4810       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4811
4812       if (LOOKING_AT (bp, "function"))
4813         get_tag (bp, NULL);
4814     }
4815 }
4816
4817 \f
4818 /*
4819  * Postscript tags
4820  * Just look for lines where the first character is '/'
4821  * Also look at "defineps" for PSWrap
4822  * Ideas by:
4823  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4824  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4825  */
4826 static void
4827 PS_functions (FILE *inf)
4828 {
4829   register char *bp, *ep;
4830
4831   LOOP_ON_INPUT_LINES (inf, lb, bp)
4832     {
4833       if (bp[0] == '/')
4834         {
4835           for (ep = bp+1;
4836                *ep != '\0' && *ep != ' ' && *ep != '{';
4837                ep++)
4838             continue;
4839           make_tag (bp, ep - bp, TRUE,
4840                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4841         }
4842       else if (LOOKING_AT (bp, "defineps"))
4843         get_tag (bp, NULL);
4844     }
4845 }
4846
4847 \f
4848 /*
4849  * Forth tags
4850  * Ignore anything after \ followed by space or in ( )
4851  * Look for words defined by :
4852  * Look for constant, code, create, defer, value, and variable
4853  * OBP extensions:  Look for buffer:, field,
4854  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4855  */
4856 static void
4857 Forth_words (FILE *inf)
4858 {
4859   register char *bp;
4860
4861   LOOP_ON_INPUT_LINES (inf, lb, bp)
4862     while ((bp = skip_spaces (bp))[0] != '\0')
4863       if (bp[0] == '\\' && iswhite(bp[1]))
4864         break;                  /* read next line */
4865       else if (bp[0] == '(' && iswhite(bp[1]))
4866         do                      /* skip to ) or eol */
4867           bp++;
4868         while (*bp != ')' && *bp != '\0');
4869       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4870                || LOOKING_AT_NOCASE (bp, "constant")
4871                || LOOKING_AT_NOCASE (bp, "code")
4872                || LOOKING_AT_NOCASE (bp, "create")
4873                || LOOKING_AT_NOCASE (bp, "defer")
4874                || LOOKING_AT_NOCASE (bp, "value")
4875                || LOOKING_AT_NOCASE (bp, "variable")
4876                || LOOKING_AT_NOCASE (bp, "buffer:")
4877                || LOOKING_AT_NOCASE (bp, "field"))
4878         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4879       else
4880         bp = skip_non_spaces (bp);
4881 }
4882
4883 \f
4884 /*
4885  * Scheme tag functions
4886  * look for (def... xyzzy
4887  *          (def... (xyzzy
4888  *          (def ... ((...(xyzzy ....
4889  *          (set! xyzzy
4890  * Original code by Ken Haase (1985?)
4891  */
4892 static void
4893 Scheme_functions (FILE *inf)
4894 {
4895   register char *bp;
4896
4897   LOOP_ON_INPUT_LINES (inf, lb, bp)
4898     {
4899       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4900         {
4901           bp = skip_non_spaces (bp+4);
4902           /* Skip over open parens and white space.  Don't continue past
4903              '\0'. */
4904           while (*bp && notinname (*bp))
4905             bp++;
4906           get_tag (bp, NULL);
4907         }
4908       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4909         get_tag (bp, NULL);
4910     }
4911 }
4912
4913 \f
4914 /* Find tags in TeX and LaTeX input files.  */
4915
4916 /* TEX_toktab is a table of TeX control sequences that define tags.
4917  * Each entry records one such control sequence.
4918  *
4919  * Original code from who knows whom.
4920  * Ideas by:
4921  *   Stefan Monnier (2002)
4922  */
4923
4924 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4925
4926 /* Default set of control sequences to put into TEX_toktab.
4927    The value of environment var TEXTAGS is prepended to this.  */
4928 static const char *TEX_defenv = "\
4929 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4930 :part:appendix:entry:index:def\
4931 :newcommand:renewcommand:newenvironment:renewenvironment";
4932
4933 static void TEX_mode (FILE *);
4934 static void TEX_decode_env (const char *, const char *);
4935
4936 static char TEX_esc = '\\';
4937 static char TEX_opgrp = '{';
4938 static char TEX_clgrp = '}';
4939
4940 /*
4941  * TeX/LaTeX scanning loop.
4942  */
4943 static void
4944 TeX_commands (FILE *inf)
4945 {
4946   char *cp;
4947   linebuffer *key;
4948
4949   /* Select either \ or ! as escape character.  */
4950   TEX_mode (inf);
4951
4952   /* Initialize token table once from environment. */
4953   if (TEX_toktab == NULL)
4954     TEX_decode_env ("TEXTAGS", TEX_defenv);
4955
4956   LOOP_ON_INPUT_LINES (inf, lb, cp)
4957     {
4958       /* Look at each TEX keyword in line. */
4959       for (;;)
4960         {
4961           /* Look for a TEX escape. */
4962           while (*cp++ != TEX_esc)
4963             if (cp[-1] == '\0' || cp[-1] == '%')
4964               goto tex_next_line;
4965
4966           for (key = TEX_toktab; key->buffer != NULL; key++)
4967             if (strneq (cp, key->buffer, key->len))
4968               {
4969                 register char *p;
4970                 int namelen, linelen;
4971                 bool opgrp = FALSE;
4972
4973                 cp = skip_spaces (cp + key->len);
4974                 if (*cp == TEX_opgrp)
4975                   {
4976                     opgrp = TRUE;
4977                     cp++;
4978                   }
4979                 for (p = cp;
4980                      (!iswhite (*p) && *p != '#' &&
4981                       *p != TEX_opgrp && *p != TEX_clgrp);
4982                      p++)
4983                   continue;
4984                 namelen = p - cp;
4985                 linelen = lb.len;
4986                 if (!opgrp || *p == TEX_clgrp)
4987                   {
4988                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4989                       p++;
4990                     linelen = p - lb.buffer + 1;
4991                   }
4992                 make_tag (cp, namelen, TRUE,
4993                           lb.buffer, linelen, lineno, linecharno);
4994                 goto tex_next_line; /* We only tag a line once */
4995               }
4996         }
4997     tex_next_line:
4998       ;
4999     }
5000 }
5001
5002 #define TEX_LESC '\\'
5003 #define TEX_SESC '!'
5004
5005 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5006    chars accordingly. */
5007 static void
5008 TEX_mode (FILE *inf)
5009 {
5010   int c;
5011
5012   while ((c = getc (inf)) != EOF)
5013     {
5014       /* Skip to next line if we hit the TeX comment char. */
5015       if (c == '%')
5016         while (c != '\n' && c != EOF)
5017           c = getc (inf);
5018       else if (c == TEX_LESC || c == TEX_SESC )
5019         break;
5020     }
5021
5022   if (c == TEX_LESC)
5023     {
5024       TEX_esc = TEX_LESC;
5025       TEX_opgrp = '{';
5026       TEX_clgrp = '}';
5027     }
5028   else
5029     {
5030       TEX_esc = TEX_SESC;
5031       TEX_opgrp = '<';
5032       TEX_clgrp = '>';
5033     }
5034   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5035      No attempt is made to correct the situation. */
5036   rewind (inf);
5037 }
5038
5039 /* Read environment and prepend it to the default string.
5040    Build token table. */
5041 static void
5042 TEX_decode_env (const char *evarname, const char *defenv)
5043 {
5044   register const char *env, *p;
5045   int i, len;
5046
5047   /* Append default string to environment. */
5048   env = getenv (evarname);
5049   if (!env)
5050     env = defenv;
5051   else
5052     env = concat (env, defenv, "");
5053
5054   /* Allocate a token table */
5055   for (len = 1, p = env; p;)
5056     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5057       len++;
5058   TEX_toktab = xnew (len, linebuffer);
5059
5060   /* Unpack environment string into token table. Be careful about */
5061   /* zero-length strings (leading ':', "::" and trailing ':') */
5062   for (i = 0; *env != '\0';)
5063     {
5064       p = etags_strchr (env, ':');
5065       if (!p)                   /* End of environment string. */
5066         p = env + strlen (env);
5067       if (p - env > 0)
5068         {                       /* Only non-zero strings. */
5069           TEX_toktab[i].buffer = savenstr (env, p - env);
5070           TEX_toktab[i].len = p - env;
5071           i++;
5072         }
5073       if (*p)
5074         env = p + 1;
5075       else
5076         {
5077           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5078           TEX_toktab[i].len = 0;
5079           break;
5080         }
5081     }
5082 }
5083
5084 \f
5085 /* Texinfo support.  Dave Love, Mar. 2000.  */
5086 static void
5087 Texinfo_nodes (FILE *inf)
5088 {
5089   char *cp, *start;
5090   LOOP_ON_INPUT_LINES (inf, lb, cp)
5091     if (LOOKING_AT (cp, "@node"))
5092       {
5093         start = cp;
5094         while (*cp != '\0' && *cp != ',')
5095           cp++;
5096         make_tag (start, cp - start, TRUE,
5097                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5098       }
5099 }
5100
5101 \f
5102 /*
5103  * HTML support.
5104  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5105  * Contents of <a name=xxx> are tags with name xxx.
5106  *
5107  * Francesco Potortì, 2002.
5108  */
5109 static void
5110 HTML_labels (FILE *inf)
5111 {
5112   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5113   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5114   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5115   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5116   char *end;
5117
5118
5119   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5120
5121   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5122     for (;;)                    /* loop on the same line */
5123       {
5124         if (skiptag)            /* skip HTML tag */
5125           {
5126             while (*dbp != '\0' && *dbp != '>')
5127               dbp++;
5128             if (*dbp == '>')
5129               {
5130                 dbp += 1;
5131                 skiptag = FALSE;
5132                 continue;       /* look on the same line */
5133               }
5134             break;              /* go to next line */
5135           }
5136
5137         else if (intag) /* look for "name=" or "id=" */
5138           {
5139             while (*dbp != '\0' && *dbp != '>'
5140                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5141               dbp++;
5142             if (*dbp == '\0')
5143               break;            /* go to next line */
5144             if (*dbp == '>')
5145               {
5146                 dbp += 1;
5147                 intag = FALSE;
5148                 continue;       /* look on the same line */
5149               }
5150             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5151                 || LOOKING_AT_NOCASE (dbp, "id="))
5152               {
5153                 bool quoted = (dbp[0] == '"');
5154
5155                 if (quoted)
5156                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5157                     continue;
5158                 else
5159                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5160                     continue;
5161                 linebuffer_setlen (&token_name, end - dbp);
5162                 strncpy (token_name.buffer, dbp, end - dbp);
5163                 token_name.buffer[end - dbp] = '\0';
5164
5165                 dbp = end;
5166                 intag = FALSE;  /* we found what we looked for */
5167                 skiptag = TRUE; /* skip to the end of the tag */
5168                 getnext = TRUE; /* then grab the text */
5169                 continue;       /* look on the same line */
5170               }
5171             dbp += 1;
5172           }
5173
5174         else if (getnext)       /* grab next tokens and tag them */
5175           {
5176             dbp = skip_spaces (dbp);
5177             if (*dbp == '\0')
5178               break;            /* go to next line */
5179             if (*dbp == '<')
5180               {
5181                 intag = TRUE;
5182                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5183                 continue;       /* look on the same line */
5184               }
5185
5186             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5187               continue;
5188             make_tag (token_name.buffer, token_name.len, TRUE,
5189                       dbp, end - dbp, lineno, linecharno);
5190             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5191             getnext = FALSE;
5192             break;              /* go to next line */
5193           }
5194
5195         else                    /* look for an interesting HTML tag */
5196           {
5197             while (*dbp != '\0' && *dbp != '<')
5198               dbp++;
5199             if (*dbp == '\0')
5200               break;            /* go to next line */
5201             intag = TRUE;
5202             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5203               {
5204                 inanchor = TRUE;
5205                 continue;       /* look on the same line */
5206               }
5207             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5208                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5209                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5210                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5211               {
5212                 intag = FALSE;
5213                 getnext = TRUE;
5214                 continue;       /* look on the same line */
5215               }
5216             dbp += 1;
5217           }
5218       }
5219 }
5220
5221 \f
5222 /*
5223  * Prolog support
5224  *
5225  * Assumes that the predicate or rule starts at column 0.
5226  * Only the first clause of a predicate or rule is added.
5227  * Original code by Sunichirou Sugou (1989)
5228  * Rewritten by Anders Lindgren (1996)
5229  */
5230 static size_t prolog_pr (char *, char *);
5231 static void prolog_skip_comment (linebuffer *, FILE *);
5232 static size_t prolog_atom (char *, size_t);
5233
5234 static void
5235 Prolog_functions (FILE *inf)
5236 {
5237   char *cp, *last;
5238   size_t len;
5239   size_t allocated;
5240
5241   allocated = 0;
5242   len = 0;
5243   last = NULL;
5244
5245   LOOP_ON_INPUT_LINES (inf, lb, cp)
5246     {
5247       if (cp[0] == '\0')        /* Empty line */
5248         continue;
5249       else if (iswhite (cp[0])) /* Not a predicate */
5250         continue;
5251       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5252         prolog_skip_comment (&lb, inf);
5253       else if ((len = prolog_pr (cp, last)) > 0)
5254         {
5255           /* Predicate or rule.  Store the function name so that we
5256              only generate a tag for the first clause.  */
5257           if (last == NULL)
5258             last = xnew(len + 1, char);
5259           else if (len + 1 > allocated)
5260             xrnew (last, len + 1, char);
5261           allocated = len + 1;
5262           strncpy (last, cp, len);
5263           last[len] = '\0';
5264         }
5265     }
5266   free (last);
5267 }
5268
5269
5270 static void
5271 prolog_skip_comment (linebuffer *plb, FILE *inf)
5272 {
5273   char *cp;
5274
5275   do
5276     {
5277       for (cp = plb->buffer; *cp != '\0'; cp++)
5278         if (cp[0] == '*' && cp[1] == '/')
5279           return;
5280       readline (plb, inf);
5281     }
5282   while (!feof(inf));
5283 }
5284
5285 /*
5286  * A predicate or rule definition is added if it matches:
5287  *     <beginning of line><Prolog Atom><whitespace>(
5288  * or  <beginning of line><Prolog Atom><whitespace>:-
5289  *
5290  * It is added to the tags database if it doesn't match the
5291  * name of the previous clause header.
5292  *
5293  * Return the size of the name of the predicate or rule, or 0 if no
5294  * header was found.
5295  */
5296 static size_t
5297 prolog_pr (char *s, char *last)
5298
5299                                 /* Name of last clause. */
5300 {
5301   size_t pos;
5302   size_t len;
5303
5304   pos = prolog_atom (s, 0);
5305   if (! pos)
5306     return 0;
5307
5308   len = pos;
5309   pos = skip_spaces (s + pos) - s;
5310
5311   if ((s[pos] == '.'
5312        || (s[pos] == '(' && (pos += 1))
5313        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5314       && (last == NULL          /* save only the first clause */
5315           || len != strlen (last)
5316           || !strneq (s, last, len)))
5317         {
5318           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5319           return len;
5320         }
5321   else
5322     return 0;
5323 }
5324
5325 /*
5326  * Consume a Prolog atom.
5327  * Return the number of bytes consumed, or 0 if there was an error.
5328  *
5329  * A prolog atom, in this context, could be one of:
5330  * - An alphanumeric sequence, starting with a lower case letter.
5331  * - A quoted arbitrary string. Single quotes can escape themselves.
5332  *   Backslash quotes everything.
5333  */
5334 static size_t
5335 prolog_atom (char *s, size_t pos)
5336 {
5337   size_t origpos;
5338
5339   origpos = pos;
5340
5341   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5342     {
5343       /* The atom is unquoted. */
5344       pos++;
5345       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5346         {
5347           pos++;
5348         }
5349       return pos - origpos;
5350     }
5351   else if (s[pos] == '\'')
5352     {
5353       pos++;
5354
5355       for (;;)
5356         {
5357           if (s[pos] == '\'')
5358             {
5359               pos++;
5360               if (s[pos] != '\'')
5361                 break;
5362               pos++;            /* A double quote */
5363             }
5364           else if (s[pos] == '\0')
5365             /* Multiline quoted atoms are ignored. */
5366             return 0;
5367           else if (s[pos] == '\\')
5368             {
5369               if (s[pos+1] == '\0')
5370                 return 0;
5371               pos += 2;
5372             }
5373           else
5374             pos++;
5375         }
5376       return pos - origpos;
5377     }
5378   else
5379     return 0;
5380 }
5381
5382 \f
5383 /*
5384  * Support for Erlang
5385  *
5386  * Generates tags for functions, defines, and records.
5387  * Assumes that Erlang functions start at column 0.
5388  * Original code by Anders Lindgren (1996)
5389  */
5390 static int erlang_func (char *, char *);
5391 static void erlang_attribute (char *);
5392 static int erlang_atom (char *);
5393
5394 static void
5395 Erlang_functions (FILE *inf)
5396 {
5397   char *cp, *last;
5398   int len;
5399   int allocated;
5400
5401   allocated = 0;
5402   len = 0;
5403   last = NULL;
5404
5405   LOOP_ON_INPUT_LINES (inf, lb, cp)
5406     {
5407       if (cp[0] == '\0')        /* Empty line */
5408         continue;
5409       else if (iswhite (cp[0])) /* Not function nor attribute */
5410         continue;
5411       else if (cp[0] == '%')    /* comment */
5412         continue;
5413       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5414         continue;
5415       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5416         {
5417           erlang_attribute (cp);
5418           if (last != NULL)
5419             {
5420               free (last);
5421               last = NULL;
5422             }
5423         }
5424       else if ((len = erlang_func (cp, last)) > 0)
5425         {
5426           /*
5427            * Function.  Store the function name so that we only
5428            * generates a tag for the first clause.
5429            */
5430           if (last == NULL)
5431             last = xnew (len + 1, char);
5432           else if (len + 1 > allocated)
5433             xrnew (last, len + 1, char);
5434           allocated = len + 1;
5435           strncpy (last, cp, len);
5436           last[len] = '\0';
5437         }
5438     }
5439   free (last);
5440 }
5441
5442
5443 /*
5444  * A function definition is added if it matches:
5445  *     <beginning of line><Erlang Atom><whitespace>(
5446  *
5447  * It is added to the tags database if it doesn't match the
5448  * name of the previous clause header.
5449  *
5450  * Return the size of the name of the function, or 0 if no function
5451  * was found.
5452  */
5453 static int
5454 erlang_func (char *s, char *last)
5455
5456                                 /* Name of last clause. */
5457 {
5458   int pos;
5459   int len;
5460
5461   pos = erlang_atom (s);
5462   if (pos < 1)
5463     return 0;
5464
5465   len = pos;
5466   pos = skip_spaces (s + pos) - s;
5467
5468   /* Save only the first clause. */
5469   if (s[pos++] == '('
5470       && (last == NULL
5471           || len != (int)strlen (last)
5472           || !strneq (s, last, len)))
5473         {
5474           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5475           return len;
5476         }
5477
5478   return 0;
5479 }
5480
5481
5482 /*
5483  * Handle attributes.  Currently, tags are generated for defines
5484  * and records.
5485  *
5486  * They are on the form:
5487  * -define(foo, bar).
5488  * -define(Foo(M, N), M+N).
5489  * -record(graph, {vtab = notable, cyclic = true}).
5490  */
5491 static void
5492 erlang_attribute (char *s)
5493 {
5494   char *cp = s;
5495
5496   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5497       && *cp++ == '(')
5498     {
5499       int len = erlang_atom (skip_spaces (cp));
5500       if (len > 0)
5501         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5502     }
5503   return;
5504 }
5505
5506
5507 /*
5508  * Consume an Erlang atom (or variable).
5509  * Return the number of bytes consumed, or -1 if there was an error.
5510  */
5511 static int
5512 erlang_atom (char *s)
5513 {
5514   int pos = 0;
5515
5516   if (ISALPHA (s[pos]) || s[pos] == '_')
5517     {
5518       /* The atom is unquoted. */
5519       do
5520         pos++;
5521       while (ISALNUM (s[pos]) || s[pos] == '_');
5522     }
5523   else if (s[pos] == '\'')
5524     {
5525       for (pos++; s[pos] != '\''; pos++)
5526         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5527             || (s[pos] == '\\' && s[++pos] == '\0'))
5528           return 0;
5529       pos++;
5530     }
5531
5532   return pos;
5533 }
5534
5535 \f
5536 static char *scan_separators (char *);
5537 static void add_regex (char *, language *);
5538 static char *substitute (char *, char *, struct re_registers *);
5539
5540 /*
5541  * Take a string like "/blah/" and turn it into "blah", verifying
5542  * that the first and last characters are the same, and handling
5543  * quoted separator characters.  Actually, stops on the occurrence of
5544  * an unquoted separator.  Also process \t, \n, etc. and turn into
5545  * appropriate characters. Works in place.  Null terminates name string.
5546  * Returns pointer to terminating separator, or NULL for
5547  * unterminated regexps.
5548  */
5549 static char *
5550 scan_separators (char *name)
5551 {
5552   char sep = name[0];
5553   char *copyto = name;
5554   bool quoted = FALSE;
5555
5556   for (++name; *name != '\0'; ++name)
5557     {
5558       if (quoted)
5559         {
5560           switch (*name)
5561             {
5562             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5563             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5564             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5565             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5566             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5567             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5568             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5569             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5570             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5571             default:
5572               if (*name == sep)
5573                 *copyto++ = sep;
5574               else
5575                 {
5576                   /* Something else is quoted, so preserve the quote. */
5577                   *copyto++ = '\\';
5578                   *copyto++ = *name;
5579                 }
5580               break;
5581             }
5582           quoted = FALSE;
5583         }
5584       else if (*name == '\\')
5585         quoted = TRUE;
5586       else if (*name == sep)
5587         break;
5588       else
5589         *copyto++ = *name;
5590     }
5591   if (*name != sep)
5592     name = NULL;                /* signal unterminated regexp */
5593
5594   /* Terminate copied string. */
5595   *copyto = '\0';
5596   return name;
5597 }
5598
5599 /* Look at the argument of --regex or --no-regex and do the right
5600    thing.  Same for each line of a regexp file. */
5601 static void
5602 analyse_regex (char *regex_arg)
5603 {
5604   if (regex_arg == NULL)
5605     {
5606       free_regexps ();          /* --no-regex: remove existing regexps */
5607       return;
5608     }
5609
5610   /* A real --regexp option or a line in a regexp file. */
5611   switch (regex_arg[0])
5612     {
5613       /* Comments in regexp file or null arg to --regex. */
5614     case '\0':
5615     case ' ':
5616     case '\t':
5617       break;
5618
5619       /* Read a regex file.  This is recursive and may result in a
5620          loop, which will stop when the file descriptors are exhausted. */
5621     case '@':
5622       {
5623         FILE *regexfp;
5624         linebuffer regexbuf;
5625         char *regexfile = regex_arg + 1;
5626
5627         /* regexfile is a file containing regexps, one per line. */
5628         regexfp = fopen (regexfile, "r");
5629         if (regexfp == NULL)
5630           {
5631             pfatal (regexfile);
5632             return;
5633           }
5634         linebuffer_init (&regexbuf);
5635         while (readline_internal (&regexbuf, regexfp) > 0)
5636           analyse_regex (regexbuf.buffer);
5637         free (regexbuf.buffer);
5638         fclose (regexfp);
5639       }
5640       break;
5641
5642       /* Regexp to be used for a specific language only. */
5643     case '{':
5644       {
5645         language *lang;
5646         char *lang_name = regex_arg + 1;
5647         char *cp;
5648
5649         for (cp = lang_name; *cp != '}'; cp++)
5650           if (*cp == '\0')
5651             {
5652               error ("unterminated language name in regex: %s", regex_arg);
5653               return;
5654             }
5655         *cp++ = '\0';
5656         lang = get_language_from_langname (lang_name);
5657         if (lang == NULL)
5658           return;
5659         add_regex (cp, lang);
5660       }
5661       break;
5662
5663       /* Regexp to be used for any language. */
5664     default:
5665       add_regex (regex_arg, NULL);
5666       break;
5667     }
5668 }
5669
5670 /* Separate the regexp pattern, compile it,
5671    and care for optional name and modifiers. */
5672 static void
5673 add_regex (char *regexp_pattern, language *lang)
5674 {
5675   static struct re_pattern_buffer zeropattern;
5676   char sep, *pat, *name, *modifiers;
5677   char empty[] = "";
5678   const char *err;
5679   struct re_pattern_buffer *patbuf;
5680   regexp *rp;
5681   bool
5682     force_explicit_name = TRUE, /* do not use implicit tag names */
5683     ignore_case = FALSE,        /* case is significant */
5684     multi_line = FALSE,         /* matches are done one line at a time */
5685     single_line = FALSE;        /* dot does not match newline */
5686
5687
5688   if (strlen(regexp_pattern) < 3)
5689     {
5690       error ("null regexp", (char *)NULL);
5691       return;
5692     }
5693   sep = regexp_pattern[0];
5694   name = scan_separators (regexp_pattern);
5695   if (name == NULL)
5696     {
5697       error ("%s: unterminated regexp", regexp_pattern);
5698       return;
5699     }
5700   if (name[1] == sep)
5701     {
5702       error ("null name for regexp \"%s\"", regexp_pattern);
5703       return;
5704     }
5705   modifiers = scan_separators (name);
5706   if (modifiers == NULL)        /* no terminating separator --> no name */
5707     {
5708       modifiers = name;
5709       name = empty;
5710     }
5711   else
5712     modifiers += 1;             /* skip separator */
5713
5714   /* Parse regex modifiers. */
5715   for (; modifiers[0] != '\0'; modifiers++)
5716     switch (modifiers[0])
5717       {
5718       case 'N':
5719         if (modifiers == name)
5720           error ("forcing explicit tag name but no name, ignoring", NULL);
5721         force_explicit_name = TRUE;
5722         break;
5723       case 'i':
5724         ignore_case = TRUE;
5725         break;
5726       case 's':
5727         single_line = TRUE;
5728         /* FALLTHRU */
5729       case 'm':
5730         multi_line = TRUE;
5731         need_filebuf = TRUE;
5732         break;
5733       default:
5734         {
5735           char wrongmod [2];
5736           wrongmod[0] = modifiers[0];
5737           wrongmod[1] = '\0';
5738           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5739         }
5740         break;
5741       }
5742
5743   patbuf = xnew (1, struct re_pattern_buffer);
5744   *patbuf = zeropattern;
5745   if (ignore_case)
5746     {
5747       static char lc_trans[CHARS];
5748       int i;
5749       for (i = 0; i < CHARS; i++)
5750         lc_trans[i] = lowcase (i);
5751       patbuf->translate = lc_trans;     /* translation table to fold case  */
5752     }
5753
5754   if (multi_line)
5755     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5756   else
5757     pat = regexp_pattern;
5758
5759   if (single_line)
5760     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5761   else
5762     re_set_syntax (RE_SYNTAX_EMACS);
5763
5764   err = re_compile_pattern (pat, strlen (pat), patbuf);
5765   if (multi_line)
5766     free (pat);
5767   if (err != NULL)
5768     {
5769       error ("%s while compiling pattern", err);
5770       return;
5771     }
5772
5773   rp = p_head;
5774   p_head = xnew (1, regexp);
5775   p_head->pattern = savestr (regexp_pattern);
5776   p_head->p_next = rp;
5777   p_head->lang = lang;
5778   p_head->pat = patbuf;
5779   p_head->name = savestr (name);
5780   p_head->error_signaled = FALSE;
5781   p_head->force_explicit_name = force_explicit_name;
5782   p_head->ignore_case = ignore_case;
5783   p_head->multi_line = multi_line;
5784 }
5785
5786 /*
5787  * Do the substitutions indicated by the regular expression and
5788  * arguments.
5789  */
5790 static char *
5791 substitute (char *in, char *out, struct re_registers *regs)
5792 {
5793   char *result, *t;
5794   int size, dig, diglen;
5795
5796   result = NULL;
5797   size = strlen (out);
5798
5799   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5800   if (out[size - 1] == '\\')
5801     fatal ("pattern error in \"%s\"", out);
5802   for (t = etags_strchr (out, '\\');
5803        t != NULL;
5804        t = etags_strchr (t + 2, '\\'))
5805     if (ISDIGIT (t[1]))
5806       {
5807         dig = t[1] - '0';
5808         diglen = regs->end[dig] - regs->start[dig];
5809         size += diglen - 2;
5810       }
5811     else
5812       size -= 1;
5813
5814   /* Allocate space and do the substitutions. */
5815   assert (size >= 0);
5816   result = xnew (size + 1, char);
5817
5818   for (t = result; *out != '\0'; out++)
5819     if (*out == '\\' && ISDIGIT (*++out))
5820       {
5821         dig = *out - '0';
5822         diglen = regs->end[dig] - regs->start[dig];
5823         strncpy (t, in + regs->start[dig], diglen);
5824         t += diglen;
5825       }
5826     else
5827       *t++ = *out;
5828   *t = '\0';
5829
5830   assert (t <= result + size);
5831   assert (t - result == (int)strlen (result));
5832
5833   return result;
5834 }
5835
5836 /* Deallocate all regexps. */
5837 static void
5838 free_regexps (void)
5839 {
5840   regexp *rp;
5841   while (p_head != NULL)
5842     {
5843       rp = p_head->p_next;
5844       free (p_head->pattern);
5845       free (p_head->name);
5846       free (p_head);
5847       p_head = rp;
5848     }
5849   return;
5850 }
5851
5852 /*
5853  * Reads the whole file as a single string from `filebuf' and looks for
5854  * multi-line regular expressions, creating tags on matches.
5855  * readline already dealt with normal regexps.
5856  *
5857  * Idea by Ben Wing <ben@666.com> (2002).
5858  */
5859 static void
5860 regex_tag_multiline (void)
5861 {
5862   char *buffer = filebuf.buffer;
5863   regexp *rp;
5864   char *name;
5865
5866   for (rp = p_head; rp != NULL; rp = rp->p_next)
5867     {
5868       int match = 0;
5869
5870       if (!rp->multi_line)
5871         continue;               /* skip normal regexps */
5872
5873       /* Generic initialisations before parsing file from memory. */
5874       lineno = 1;               /* reset global line number */
5875       charno = 0;               /* reset global char number */
5876       linecharno = 0;           /* reset global char number of line start */
5877
5878       /* Only use generic regexps or those for the current language. */
5879       if (rp->lang != NULL && rp->lang != curfdp->lang)
5880         continue;
5881
5882       while (match >= 0 && match < filebuf.len)
5883         {
5884           match = re_search (rp->pat, buffer, filebuf.len, charno,
5885                              filebuf.len - match, &rp->regs);
5886           switch (match)
5887             {
5888             case -2:
5889               /* Some error. */
5890               if (!rp->error_signaled)
5891                 {
5892                   error ("regexp stack overflow while matching \"%s\"",
5893                          rp->pattern);
5894                   rp->error_signaled = TRUE;
5895                 }
5896               break;
5897             case -1:
5898               /* No match. */
5899               break;
5900             default:
5901               if (match == rp->regs.end[0])
5902                 {
5903                   if (!rp->error_signaled)
5904                     {
5905                       error ("regexp matches the empty string: \"%s\"",
5906                              rp->pattern);
5907                       rp->error_signaled = TRUE;
5908                     }
5909                   match = -3;   /* exit from while loop */
5910                   break;
5911                 }
5912
5913               /* Match occurred.  Construct a tag. */
5914               while (charno < rp->regs.end[0])
5915                 if (buffer[charno++] == '\n')
5916                   lineno++, linecharno = charno;
5917               name = rp->name;
5918               if (name[0] == '\0')
5919                 name = NULL;
5920               else /* make a named tag */
5921                 name = substitute (buffer, rp->name, &rp->regs);
5922               if (rp->force_explicit_name)
5923                 /* Force explicit tag name, if a name is there. */
5924                 pfnote (name, TRUE, buffer + linecharno,
5925                         charno - linecharno + 1, lineno, linecharno);
5926               else
5927                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5928                           charno - linecharno + 1, lineno, linecharno);
5929               break;
5930             }
5931         }
5932     }
5933 }
5934
5935 \f
5936 static bool
5937 nocase_tail (const char *cp)
5938 {
5939   register int len = 0;
5940
5941   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5942     cp++, len++;
5943   if (*cp == '\0' && !intoken (dbp[len]))
5944     {
5945       dbp += len;
5946       return TRUE;
5947     }
5948   return FALSE;
5949 }
5950
5951 static void
5952 get_tag (register char *bp, char **namepp)
5953 {
5954   register char *cp = bp;
5955
5956   if (*bp != '\0')
5957     {
5958       /* Go till you get to white space or a syntactic break */
5959       for (cp = bp + 1; !notinname (*cp); cp++)
5960         continue;
5961       make_tag (bp, cp - bp, TRUE,
5962                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5963     }
5964
5965   if (namepp != NULL)
5966     *namepp = savenstr (bp, cp - bp);
5967 }
5968
5969 /*
5970  * Read a line of text from `stream' into `lbp', excluding the
5971  * newline or CR-NL, if any.  Return the number of characters read from
5972  * `stream', which is the length of the line including the newline.
5973  *
5974  * On DOS or Windows we do not count the CR character, if any before the
5975  * NL, in the returned length; this mirrors the behavior of Emacs on those
5976  * platforms (for text files, it translates CR-NL to NL as it reads in the
5977  * file).
5978  *
5979  * If multi-line regular expressions are requested, each line read is
5980  * appended to `filebuf'.
5981  */
5982 static long
5983 readline_internal (linebuffer *lbp, register FILE *stream)
5984 {
5985   char *buffer = lbp->buffer;
5986   register char *p = lbp->buffer;
5987   register char *pend;
5988   int chars_deleted;
5989
5990   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
5991
5992   for (;;)
5993     {
5994       register int c = getc (stream);
5995       if (p == pend)
5996         {
5997           /* We're at the end of linebuffer: expand it. */
5998           lbp->size *= 2;
5999           xrnew (buffer, lbp->size, char);
6000           p += buffer - lbp->buffer;
6001           pend = buffer + lbp->size;
6002           lbp->buffer = buffer;
6003         }
6004       if (c == EOF)
6005         {
6006           *p = '\0';
6007           chars_deleted = 0;
6008           break;
6009         }
6010       if (c == '\n')
6011         {
6012           if (p > buffer && p[-1] == '\r')
6013             {
6014               p -= 1;
6015 #ifdef DOS_NT
6016              /* Assume CRLF->LF translation will be performed by Emacs
6017                 when loading this file, so CRs won't appear in the buffer.
6018                 It would be cleaner to compensate within Emacs;
6019                 however, Emacs does not know how many CRs were deleted
6020                 before any given point in the file.  */
6021               chars_deleted = 1;
6022 #else
6023               chars_deleted = 2;
6024 #endif
6025             }
6026           else
6027             {
6028               chars_deleted = 1;
6029             }
6030           *p = '\0';
6031           break;
6032         }
6033       *p++ = c;
6034     }
6035   lbp->len = p - buffer;
6036
6037   if (need_filebuf              /* we need filebuf for multi-line regexps */
6038       && chars_deleted > 0)     /* not at EOF */
6039     {
6040       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6041         {
6042           /* Expand filebuf. */
6043           filebuf.size *= 2;
6044           xrnew (filebuf.buffer, filebuf.size, char);
6045         }
6046       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6047       filebuf.len += lbp->len;
6048       filebuf.buffer[filebuf.len++] = '\n';
6049       filebuf.buffer[filebuf.len] = '\0';
6050     }
6051
6052   return lbp->len + chars_deleted;
6053 }
6054
6055 /*
6056  * Like readline_internal, above, but in addition try to match the
6057  * input line against relevant regular expressions and manage #line
6058  * directives.
6059  */
6060 static void
6061 readline (linebuffer *lbp, FILE *stream)
6062 {
6063   long result;
6064
6065   linecharno = charno;          /* update global char number of line start */
6066   result = readline_internal (lbp, stream); /* read line */
6067   lineno += 1;                  /* increment global line number */
6068   charno += result;             /* increment global char number */
6069
6070   /* Honour #line directives. */
6071   if (!no_line_directive)
6072     {
6073       static bool discard_until_line_directive;
6074
6075       /* Check whether this is a #line directive. */
6076       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6077         {
6078           unsigned int lno;
6079           int start = 0;
6080
6081           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6082               && start > 0)     /* double quote character found */
6083             {
6084               char *endp = lbp->buffer + start;
6085
6086               while ((endp = etags_strchr (endp, '"')) != NULL
6087                      && endp[-1] == '\\')
6088                 endp++;
6089               if (endp != NULL)
6090                 /* Ok, this is a real #line directive.  Let's deal with it. */
6091                 {
6092                   char *taggedabsname;  /* absolute name of original file */
6093                   char *taggedfname;    /* name of original file as given */
6094                   char *name;           /* temp var */
6095
6096                   discard_until_line_directive = FALSE; /* found it */
6097                   name = lbp->buffer + start;
6098                   *endp = '\0';
6099                   canonicalize_filename (name);
6100                   taggedabsname = absolute_filename (name, tagfiledir);
6101                   if (filename_is_absolute (name)
6102                       || filename_is_absolute (curfdp->infname))
6103                     taggedfname = savestr (taggedabsname);
6104                   else
6105                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6106
6107                   if (streq (curfdp->taggedfname, taggedfname))
6108                     /* The #line directive is only a line number change.  We
6109                        deal with this afterwards. */
6110                     free (taggedfname);
6111                   else
6112                     /* The tags following this #line directive should be
6113                        attributed to taggedfname.  In order to do this, set
6114                        curfdp accordingly. */
6115                     {
6116                       fdesc *fdp; /* file description pointer */
6117
6118                       /* Go look for a file description already set up for the
6119                          file indicated in the #line directive.  If there is
6120                          one, use it from now until the next #line
6121                          directive. */
6122                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6123                         if (streq (fdp->infname, curfdp->infname)
6124                             && streq (fdp->taggedfname, taggedfname))
6125                           /* If we remove the second test above (after the &&)
6126                              then all entries pertaining to the same file are
6127                              coalesced in the tags file.  If we use it, then
6128                              entries pertaining to the same file but generated
6129                              from different files (via #line directives) will
6130                              go into separate sections in the tags file.  These
6131                              alternatives look equivalent.  The first one
6132                              destroys some apparently useless information. */
6133                           {
6134                             curfdp = fdp;
6135                             free (taggedfname);
6136                             break;
6137                           }
6138                       /* Else, if we already tagged the real file, skip all
6139                          input lines until the next #line directive. */
6140                       if (fdp == NULL) /* not found */
6141                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6142                           if (streq (fdp->infabsname, taggedabsname))
6143                             {
6144                               discard_until_line_directive = TRUE;
6145                               free (taggedfname);
6146                               break;
6147                             }
6148                       /* Else create a new file description and use that from
6149                          now on, until the next #line directive. */
6150                       if (fdp == NULL) /* not found */
6151                         {
6152                           fdp = fdhead;
6153                           fdhead = xnew (1, fdesc);
6154                           *fdhead = *curfdp; /* copy curr. file description */
6155                           fdhead->next = fdp;
6156                           fdhead->infname = savestr (curfdp->infname);
6157                           fdhead->infabsname = savestr (curfdp->infabsname);
6158                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6159                           fdhead->taggedfname = taggedfname;
6160                           fdhead->usecharno = FALSE;
6161                           fdhead->prop = NULL;
6162                           fdhead->written = FALSE;
6163                           curfdp = fdhead;
6164                         }
6165                     }
6166                   free (taggedabsname);
6167                   lineno = lno - 1;
6168                   readline (lbp, stream);
6169                   return;
6170                 } /* if a real #line directive */
6171             } /* if #line is followed by a number */
6172         } /* if line begins with "#line " */
6173
6174       /* If we are here, no #line directive was found. */
6175       if (discard_until_line_directive)
6176         {
6177           if (result > 0)
6178             {
6179               /* Do a tail recursion on ourselves, thus discarding the contents
6180                  of the line buffer. */
6181               readline (lbp, stream);
6182               return;
6183             }
6184           /* End of file. */
6185           discard_until_line_directive = FALSE;
6186           return;
6187         }
6188     } /* if #line directives should be considered */
6189
6190   {
6191     int match;
6192     regexp *rp;
6193     char *name;
6194
6195     /* Match against relevant regexps. */
6196     if (lbp->len > 0)
6197       for (rp = p_head; rp != NULL; rp = rp->p_next)
6198         {
6199           /* Only use generic regexps or those for the current language.
6200              Also do not use multiline regexps, which is the job of
6201              regex_tag_multiline. */
6202           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6203               || rp->multi_line)
6204             continue;
6205
6206           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6207           switch (match)
6208             {
6209             case -2:
6210               /* Some error. */
6211               if (!rp->error_signaled)
6212                 {
6213                   error ("regexp stack overflow while matching \"%s\"",
6214                          rp->pattern);
6215                   rp->error_signaled = TRUE;
6216                 }
6217               break;
6218             case -1:
6219               /* No match. */
6220               break;
6221             case 0:
6222               /* Empty string matched. */
6223               if (!rp->error_signaled)
6224                 {
6225                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6226                   rp->error_signaled = TRUE;
6227                 }
6228               break;
6229             default:
6230               /* Match occurred.  Construct a tag. */
6231               name = rp->name;
6232               if (name[0] == '\0')
6233                 name = NULL;
6234               else /* make a named tag */
6235                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6236               if (rp->force_explicit_name)
6237                 /* Force explicit tag name, if a name is there. */
6238                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6239               else
6240                 make_tag (name, strlen (name), TRUE,
6241                           lbp->buffer, match, lineno, linecharno);
6242               break;
6243             }
6244         }
6245   }
6246 }
6247
6248 \f
6249 /*
6250  * Return a pointer to a space of size strlen(cp)+1 allocated
6251  * with xnew where the string CP has been copied.
6252  */
6253 static char *
6254 savestr (const char *cp)
6255 {
6256   return savenstr (cp, strlen (cp));
6257 }
6258
6259 /*
6260  * Return a pointer to a space of size LEN+1 allocated with xnew where
6261  * the string CP has been copied for at most the first LEN characters.
6262  */
6263 static char *
6264 savenstr (const char *cp, int len)
6265 {
6266   register char *dp;
6267
6268   dp = xnew (len + 1, char);
6269   strncpy (dp, cp, len);
6270   dp[len] = '\0';
6271   return dp;
6272 }
6273
6274 /*
6275  * Return the ptr in sp at which the character c last
6276  * appears; NULL if not found
6277  *
6278  * Identical to POSIX strrchr, included for portability.
6279  */
6280 static char *
6281 etags_strrchr (register const char *sp, register int c)
6282 {
6283   register const char *r;
6284
6285   r = NULL;
6286   do
6287     {
6288       if (*sp == c)
6289         r = sp;
6290   } while (*sp++);
6291   return (char *)r;
6292 }
6293
6294 /*
6295  * Return the ptr in sp at which the character c first
6296  * appears; NULL if not found
6297  *
6298  * Identical to POSIX strchr, included for portability.
6299  */
6300 static char *
6301 etags_strchr (register const char *sp, register int c)
6302 {
6303   do
6304     {
6305       if (*sp == c)
6306         return (char *)sp;
6307     } while (*sp++);
6308   return NULL;
6309 }
6310
6311 /*
6312  * Compare two strings, ignoring case for alphabetic characters.
6313  *
6314  * Same as BSD's strcasecmp, included for portability.
6315  */
6316 static int
6317 etags_strcasecmp (register const char *s1, register const char *s2)
6318 {
6319   while (*s1 != '\0'
6320          && (ISALPHA (*s1) && ISALPHA (*s2)
6321              ? lowcase (*s1) == lowcase (*s2)
6322              : *s1 == *s2))
6323     s1++, s2++;
6324
6325   return (ISALPHA (*s1) && ISALPHA (*s2)
6326           ? lowcase (*s1) - lowcase (*s2)
6327           : *s1 - *s2);
6328 }
6329
6330 /*
6331  * Compare two strings, ignoring case for alphabetic characters.
6332  * Stop after a given number of characters
6333  *
6334  * Same as BSD's strncasecmp, included for portability.
6335  */
6336 static int
6337 etags_strncasecmp (register const char *s1, register const char *s2, register int n)
6338 {
6339   while (*s1 != '\0' && n-- > 0
6340          && (ISALPHA (*s1) && ISALPHA (*s2)
6341              ? lowcase (*s1) == lowcase (*s2)
6342              : *s1 == *s2))
6343     s1++, s2++;
6344
6345   if (n < 0)
6346     return 0;
6347   else
6348     return (ISALPHA (*s1) && ISALPHA (*s2)
6349             ? lowcase (*s1) - lowcase (*s2)
6350             : *s1 - *s2);
6351 }
6352
6353 /* Skip spaces (end of string is not space), return new pointer. */
6354 static char *
6355 skip_spaces (char *cp)
6356 {
6357   while (iswhite (*cp))
6358     cp++;
6359   return cp;
6360 }
6361
6362 /* Skip non spaces, except end of string, return new pointer. */
6363 static char *
6364 skip_non_spaces (char *cp)
6365 {
6366   while (*cp != '\0' && !iswhite (*cp))
6367     cp++;
6368   return cp;
6369 }
6370
6371 /* Print error message and exit.  */
6372 void
6373 fatal (const char *s1, const char *s2)
6374 {
6375   error (s1, s2);
6376   exit (EXIT_FAILURE);
6377 }
6378
6379 static void
6380 pfatal (const char *s1)
6381 {
6382   perror (s1);
6383   exit (EXIT_FAILURE);
6384 }
6385
6386 static void
6387 suggest_asking_for_help (void)
6388 {
6389   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6390            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6391   exit (EXIT_FAILURE);
6392 }
6393
6394 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6395 static void
6396 error (const char *s1, const char *s2)
6397 {
6398   fprintf (stderr, "%s: ", progname);
6399   fprintf (stderr, s1, s2);
6400   fprintf (stderr, "\n");
6401 }
6402
6403 /* Return a newly-allocated string whose contents
6404    concatenate those of s1, s2, s3.  */
6405 static char *
6406 concat (const char *s1, const char *s2, const char *s3)
6407 {
6408   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6409   char *result = xnew (len1 + len2 + len3 + 1, char);
6410
6411   strcpy (result, s1);
6412   strcpy (result + len1, s2);
6413   strcpy (result + len1 + len2, s3);
6414   result[len1 + len2 + len3] = '\0';
6415
6416   return result;
6417 }
6418
6419 \f
6420 /* Does the same work as the system V getcwd, but does not need to
6421    guess the buffer size in advance. */
6422 static char *
6423 etags_getcwd (void)
6424 {
6425 #ifdef HAVE_GETCWD
6426   int bufsize = 200;
6427   char *path = xnew (bufsize, char);
6428
6429   while (getcwd (path, bufsize) == NULL)
6430     {
6431       if (errno != ERANGE)
6432         pfatal ("getcwd");
6433       bufsize *= 2;
6434       free (path);
6435       path = xnew (bufsize, char);
6436     }
6437
6438   canonicalize_filename (path);
6439   return path;
6440
6441 #else /* not HAVE_GETCWD */
6442 #if MSDOS
6443
6444   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6445
6446   getwd (path);
6447
6448   for (p = path; *p != '\0'; p++)
6449     if (*p == '\\')
6450       *p = '/';
6451     else
6452       *p = lowcase (*p);
6453
6454   return strdup (path);
6455 #else /* not MSDOS */
6456   linebuffer path;
6457   FILE *pipe;
6458
6459   linebuffer_init (&path);
6460   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6461   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6462     pfatal ("pwd");
6463   pclose (pipe);
6464
6465   return path.buffer;
6466 #endif /* not MSDOS */
6467 #endif /* not HAVE_GETCWD */
6468 }
6469
6470 /* Return a newly allocated string containing the file name of FILE
6471    relative to the absolute directory DIR (which should end with a slash). */
6472 static char *
6473 relative_filename (char *file, char *dir)
6474 {
6475   char *fp, *dp, *afn, *res;
6476   int i;
6477
6478   /* Find the common root of file and dir (with a trailing slash). */
6479   afn = absolute_filename (file, cwd);
6480   fp = afn;
6481   dp = dir;
6482   while (*fp++ == *dp++)
6483     continue;
6484   fp--, dp--;                   /* back to the first differing char */
6485 #ifdef DOS_NT
6486   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6487     return afn;
6488 #endif
6489   do                            /* look at the equal chars until '/' */
6490     fp--, dp--;
6491   while (*fp != '/');
6492
6493   /* Build a sequence of "../" strings for the resulting relative file name. */
6494   i = 0;
6495   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6496     i += 1;
6497   res = xnew (3*i + strlen (fp + 1) + 1, char);
6498   res[0] = '\0';
6499   while (i-- > 0)
6500     strcat (res, "../");
6501
6502   /* Add the file name relative to the common root of file and dir. */
6503   strcat (res, fp + 1);
6504   free (afn);
6505
6506   return res;
6507 }
6508
6509 /* Return a newly allocated string containing the absolute file name
6510    of FILE given DIR (which should end with a slash). */
6511 static char *
6512 absolute_filename (char *file, char *dir)
6513 {
6514   char *slashp, *cp, *res;
6515
6516   if (filename_is_absolute (file))
6517     res = savestr (file);
6518 #ifdef DOS_NT
6519   /* We don't support non-absolute file names with a drive
6520      letter, like `d:NAME' (it's too much hassle).  */
6521   else if (file[1] == ':')
6522     fatal ("%s: relative file names with drive letters not supported", file);
6523 #endif
6524   else
6525     res = concat (dir, file, "");
6526
6527   /* Delete the "/dirname/.." and "/." substrings. */
6528   slashp = etags_strchr (res, '/');
6529   while (slashp != NULL && slashp[0] != '\0')
6530     {
6531       if (slashp[1] == '.')
6532         {
6533           if (slashp[2] == '.'
6534               && (slashp[3] == '/' || slashp[3] == '\0'))
6535             {
6536               cp = slashp;
6537               do
6538                 cp--;
6539               while (cp >= res && !filename_is_absolute (cp));
6540               if (cp < res)
6541                 cp = slashp;    /* the absolute name begins with "/.." */
6542 #ifdef DOS_NT
6543               /* Under MSDOS and NT we get `d:/NAME' as absolute
6544                  file name, so the luser could say `d:/../NAME'.
6545                  We silently treat this as `d:/NAME'.  */
6546               else if (cp[0] != '/')
6547                 cp = slashp;
6548 #endif
6549               memmove (cp, slashp + 3, strlen (slashp + 2));
6550               slashp = cp;
6551               continue;
6552             }
6553           else if (slashp[2] == '/' || slashp[2] == '\0')
6554             {
6555               memmove (slashp, slashp + 2, strlen (slashp + 1));
6556               continue;
6557             }
6558         }
6559
6560       slashp = etags_strchr (slashp + 1, '/');
6561     }
6562
6563   if (res[0] == '\0')           /* just a safety net: should never happen */
6564     {
6565       free (res);
6566       return savestr ("/");
6567     }
6568   else
6569     return res;
6570 }
6571
6572 /* Return a newly allocated string containing the absolute
6573    file name of dir where FILE resides given DIR (which should
6574    end with a slash). */
6575 static char *
6576 absolute_dirname (char *file, char *dir)
6577 {
6578   char *slashp, *res;
6579   char save;
6580
6581   slashp = etags_strrchr (file, '/');
6582   if (slashp == NULL)
6583     return savestr (dir);
6584   save = slashp[1];
6585   slashp[1] = '\0';
6586   res = absolute_filename (file, dir);
6587   slashp[1] = save;
6588
6589   return res;
6590 }
6591
6592 /* Whether the argument string is an absolute file name.  The argument
6593    string must have been canonicalized with canonicalize_filename. */
6594 static bool
6595 filename_is_absolute (char *fn)
6596 {
6597   return (fn[0] == '/'
6598 #ifdef DOS_NT
6599           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6600 #endif
6601           );
6602 }
6603
6604 /* Downcase DOS drive letter and collapse separators into single slashes.
6605    Works in place. */
6606 static void
6607 canonicalize_filename (register char *fn)
6608 {
6609   register char* cp;
6610   char sep = '/';
6611
6612 #ifdef DOS_NT
6613   /* Canonicalize drive letter case.  */
6614 # define ISUPPER(c)     isupper (CHAR(c))
6615   if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6616     fn[0] = lowcase (fn[0]);
6617
6618   sep = '\\';
6619 #endif
6620
6621   /* Collapse multiple separators into a single slash. */
6622   for (cp = fn; *cp != '\0'; cp++, fn++)
6623     if (*cp == sep)
6624       {
6625         *fn = '/';
6626         while (cp[1] == sep)
6627           cp++;
6628       }
6629     else
6630       *fn = *cp;
6631   *fn = '\0';
6632 }
6633
6634 \f
6635 /* Initialize a linebuffer for use. */
6636 static void
6637 linebuffer_init (linebuffer *lbp)
6638 {
6639   lbp->size = (DEBUG) ? 3 : 200;
6640   lbp->buffer = xnew (lbp->size, char);
6641   lbp->buffer[0] = '\0';
6642   lbp->len = 0;
6643 }
6644
6645 /* Set the minimum size of a string contained in a linebuffer. */
6646 static void
6647 linebuffer_setlen (linebuffer *lbp, int toksize)
6648 {
6649   while (lbp->size <= toksize)
6650     {
6651       lbp->size *= 2;
6652       xrnew (lbp->buffer, lbp->size, char);
6653     }
6654   lbp->len = toksize;
6655 }
6656
6657 /* Like malloc but get fatal error if memory is exhausted. */
6658 static PTR
6659 xmalloc (unsigned int size)
6660 {
6661   PTR result = (PTR) malloc (size);
6662   if (result == NULL)
6663     fatal ("virtual memory exhausted", (char *)NULL);
6664   return result;
6665 }
6666
6667 static PTR
6668 xrealloc (char *ptr, unsigned int size)
6669 {
6670   PTR result = (PTR) realloc (ptr, size);
6671   if (result == NULL)
6672     fatal ("virtual memory exhausted", (char *)NULL);
6673   return result;
6674 }
6675
6676 /*
6677  * Local Variables:
6678  * indent-tabs-mode: t
6679  * tab-width: 8
6680  * fill-column: 79
6681  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6682  * c-file-style: "gnu"
6683  * End:
6684  */
6685
6686 /* etags.c ends here */