lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2011
  32   Free Software Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  *
  72  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #define TRUE    1
  84 #define FALSE   0
  85
  86 #ifdef DEBUG
  87 #  undef DEBUG
  88 #  define DEBUG TRUE
  89 #else
  90 #  define DEBUG  FALSE
  91 #  define NDEBUG                /* disable assert */
  92 #endif
  93
  94 #ifdef HAVE_CONFIG_H
  95 # include <config.h>
  96   /* This is probably not necessary any more.  On some systems, config.h
  97      used to define static as nothing for the sake of unexec.  We don't
  98      want that here since we don't use unexec.  None of these systems
  99      are supported any more, but the idea is still mentioned in
 100      etc/PROBLEMS.  */
 101 # undef static
 102 # ifndef PTR                    /* for XEmacs */
 103 #   define PTR void *
 104 # endif
 105 #else  /* no config.h */
 106 # if defined (__STDC__) && (__STDC__ || defined (__SUNPRO_C))
 107 #   define PTR void *           /* for generic pointers */
 108 # else /* not standard C */
 109 #   define const                /* remove const for old compilers' sake */
 110 #   define PTR long *           /* don't use void* */
 111 # endif
 112 #endif /* !HAVE_CONFIG_H */
 113
 114 #ifndef _GNU_SOURCE
 115 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 116 #endif
 117
 118 /* WIN32_NATIVE is for XEmacs.
 119    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 120 #ifdef WIN32_NATIVE
 121 # undef MSDOS
 122 # undef  WINDOWSNT
 123 # define WINDOWSNT
 124 #endif /* WIN32_NATIVE */
 125
 126 #ifdef MSDOS
 127 # undef MSDOS
 128 # define MSDOS TRUE
 129 # include <fcntl.h>
 130 # include <sys/param.h>
 131 # include <io.h>
 132 # ifndef HAVE_CONFIG_H
 133 #   define DOS_NT
 134 #   include <sys/config.h>
 135 # endif
 136 #else
 137 # define MSDOS FALSE
 138 #endif /* MSDOS */
 139
 140 #ifdef WINDOWSNT
 141 # include <fcntl.h>
 142 # include <direct.h>
 143 # include <io.h>
 144 # define MAXPATHLEN _MAX_PATH
 145 # undef HAVE_NTGUI
 146 # undef  DOS_NT
 147 # define DOS_NT
 148 # ifndef HAVE_GETCWD
 149 #   define HAVE_GETCWD
 150 # endif /* undef HAVE_GETCWD */
 151 #else /* not WINDOWSNT */
 152 #endif /* !WINDOWSNT */
 153
 154 #include <unistd.h>
 155 #ifndef HAVE_UNISTD_H
 156 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 157     extern char *getcwd (char *buf, size_t size);
 158 # endif
 159 #endif /* HAVE_UNISTD_H */
 160
 161 #include <stdlib.h>
 162 #include <string.h>
 163 #include <stdio.h>
 164 #include <ctype.h>
 165 #include <errno.h>
 166 #include <sys/types.h>
 167 #include <sys/stat.h>
 168
 169 #include <assert.h>
 170 #ifdef NDEBUG
 171 # undef  assert                 /* some systems have a buggy assert.h */
 172 # define assert(x) ((void) 0)
 173 #endif
 174
 175 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 176 # define NO_LONG_OPTIONS TRUE
 177 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 178   extern char *optarg;
 179   extern int optind, opterr;
 180 #else
 181 # define NO_LONG_OPTIONS FALSE
 182 # include <getopt.h>
 183 #endif /* NO_LONG_OPTIONS */
 184
 185 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 186 # ifdef __CYGWIN__              /* compiling on Cygwin */
 187                              !!! NOTICE !!!
 188  the regex.h distributed with Cygwin is not compatible with etags, alas!
 189 If you want regular expression support, you should delete this notice and
 190               arrange to use the GNU regex.h and regex.c.
 191 # endif
 192 #endif
 193 #include <regex.h>
 194
 195 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 196  Leave it undefined to make the program "etags", which makes emacs-style
 197  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 198 #ifdef CTAGS
 199 # undef  CTAGS
 200 # define CTAGS TRUE
 201 #else
 202 # define CTAGS FALSE
 203 #endif
 204
 205 #define streq(s,t)      (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 206 #define strcaseeq(s,t)  (assert ((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 207 #define strneq(s,t,n)   (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 208 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 209
 210 #define CHARS 256               /* 2^sizeof(char) */
 211 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 212 #define iswhite(c)      (_wht[CHAR (c)]) /* c is white (see white) */
 213 #define notinname(c)    (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
 214 #define begtoken(c)     (_btk[CHAR (c)]) /* c can start token (see begtk) */
 215 #define intoken(c)      (_itk[CHAR (c)]) /* c can be in token (see midtk) */
 216 #define endtoken(c)     (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
 217
 218 #define ISALNUM(c)      isalnum (CHAR (c))
 219 #define ISALPHA(c)      isalpha (CHAR (c))
 220 #define ISDIGIT(c)      isdigit (CHAR (c))
 221 #define ISLOWER(c)      islower (CHAR (c))
 222
 223 #define lowcase(c)      tolower (CHAR (c))
 224
 225
 226 /*
 227  *      xnew, xrnew -- allocate, reallocate storage
 228  *
 229  * SYNOPSIS:    Type *xnew (int n, Type);
 230  *              void xrnew (OldPointer, int n, Type);
 231  */
 232 #if DEBUG
 233 # include "chkmalloc.h"
 234 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 235                                                   (n) * sizeof (Type)))
 236 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 237                                         (char *) (op), (n) * sizeof (Type)))
 238 #else
 239 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 240 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 241                                         (char *) (op), (n) * sizeof (Type)))
 242 #endif
 243
 244 #define bool int
 245
 246 typedef void Lang_function (FILE *);
 247
 248 typedef struct
 249 {
 250   const char *suffix;           /* file name suffix for this compressor */
 251   const char *command;          /* takes one arg and decompresses to stdout */
 252 } compressor;
 253
 254 typedef struct
 255 {
 256   const char *name;             /* language name */
 257   const char *help;             /* detailed help for the language */
 258   Lang_function *function;      /* parse function */
 259   const char **suffixes;        /* name suffixes of this language's files */
 260   const char **filenames;       /* names of this language's files */
 261   const char **interpreters;    /* interpreters for this language */
 262   bool metasource;              /* source used to generate other sources */
 263 } language;
 264
 265 typedef struct fdesc
 266 {
 267   struct fdesc *next;           /* for the linked list */
 268   char *infname;                /* uncompressed input file name */
 269   char *infabsname;             /* absolute uncompressed input file name */
 270   char *infabsdir;              /* absolute dir of input file */
 271   char *taggedfname;            /* file name to write in tagfile */
 272   language *lang;               /* language of file */
 273   char *prop;                   /* file properties to write in tagfile */
 274   bool usecharno;               /* etags tags shall contain char number */
 275   bool written;                 /* entry written in the tags file */
 276 } fdesc;
 277
 278 typedef struct node_st
 279 {                               /* sorting structure */
 280   struct node_st *left, *right; /* left and right sons */
 281   fdesc *fdp;                   /* description of file to whom tag belongs */
 282   char *name;                   /* tag name */
 283   char *regex;                  /* search regexp */
 284   bool valid;                   /* write this tag on the tag file */
 285   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 286   bool been_warned;             /* warning already given for duplicated tag */
 287   int lno;                      /* line number tag is on */
 288   long cno;                     /* character number line starts on */
 289 } node;
 290
 291 /*
 292  * A `linebuffer' is a structure which holds a line of text.
 293  * `readline_internal' reads a line from a stream into a linebuffer
 294  * and works regardless of the length of the line.
 295  * SIZE is the size of BUFFER, LEN is the length of the string in
 296  * BUFFER after readline reads it.
 297  */
 298 typedef struct
 299 {
 300   long size;
 301   int len;
 302   char *buffer;
 303 } linebuffer;
 304
 305 /* Used to support mixing of --lang and file names. */
 306 typedef struct
 307 {
 308   enum {
 309     at_language,                /* a language specification */
 310     at_regexp,                  /* a regular expression */
 311     at_filename,                /* a file name */
 312     at_stdin,                   /* read from stdin here */
 313     at_end                      /* stop parsing the list */
 314   } arg_type;                   /* argument type */
 315   language *lang;               /* language associated with the argument */
 316   char *what;                   /* the argument itself */
 317 } argument;
 318
 319 /* Structure defining a regular expression. */
 320 typedef struct regexp
 321 {
 322   struct regexp *p_next;        /* pointer to next in list */
 323   language *lang;               /* if set, use only for this language */
 324   char *pattern;                /* the regexp pattern */
 325   char *name;                   /* tag name */
 326   struct re_pattern_buffer *pat; /* the compiled pattern */
 327   struct re_registers regs;     /* re registers */
 328   bool error_signaled;          /* already signaled for this regexp */
 329   bool force_explicit_name;     /* do not allow implicit tag name */
 330   bool ignore_case;             /* ignore case when matching */
 331   bool multi_line;              /* do a multi-line match on the whole file */
 332 } regexp;
 333
 334
 335 /* Many compilers barf on this:
 336         Lang_function Ada_funcs;
 337    so let's write it this way */
 338 static void Ada_funcs (FILE *);
 339 static void Asm_labels (FILE *);
 340 static void C_entries (int c_ext, FILE *);
 341 static void default_C_entries (FILE *);
 342 static void plain_C_entries (FILE *);
 343 static void Cjava_entries (FILE *);
 344 static void Cobol_paragraphs (FILE *);
 345 static void Cplusplus_entries (FILE *);
 346 static void Cstar_entries (FILE *);
 347 static void Erlang_functions (FILE *);
 348 static void Forth_words (FILE *);
 349 static void Fortran_functions (FILE *);
 350 static void HTML_labels (FILE *);
 351 static void Lisp_functions (FILE *);
 352 static void Lua_functions (FILE *);
 353 static void Makefile_targets (FILE *);
 354 static void Pascal_functions (FILE *);
 355 static void Perl_functions (FILE *);
 356 static void PHP_functions (FILE *);
 357 static void PS_functions (FILE *);
 358 static void Prolog_functions (FILE *);
 359 static void Python_functions (FILE *);
 360 static void Scheme_functions (FILE *);
 361 static void TeX_commands (FILE *);
 362 static void Texinfo_nodes (FILE *);
 363 static void Yacc_entries (FILE *);
 364 static void just_read_file (FILE *);
 365
 366 static void print_language_names (void);
 367 static void print_version (void);
 368 static void print_help (argument *);
 369 int main (int, char **);
 370
 371 static compressor *get_compressor_from_suffix (char *, char **);
 372 static language *get_language_from_langname (const char *);
 373 static language *get_language_from_interpreter (char *);
 374 static language *get_language_from_filename (char *, bool);
 375 static void readline (linebuffer *, FILE *);
 376 static long readline_internal (linebuffer *, FILE *);
 377 static bool nocase_tail (const char *);
 378 static void get_tag (char *, char **);
 379
 380 static void analyse_regex (char *);
 381 static void free_regexps (void);
 382 static void regex_tag_multiline (void);
 383 static void error (const char *, const char *);
 384 static void suggest_asking_for_help (void) NO_RETURN;
 385 void fatal (const char *, const char *) NO_RETURN;
 386 static void pfatal (const char *) NO_RETURN;
 387 static void add_node (node *, node **);
 388
 389 static void init (void);
 390 static void process_file_name (char *, language *);
 391 static void process_file (FILE *, char *, language *);
 392 static void find_entries (FILE *);
 393 static void free_tree (node *);
 394 static void free_fdesc (fdesc *);
 395 static void pfnote (char *, bool, char *, int, int, long);
 396 static void make_tag (const char *, int, bool, char *, int, int, long);
 397 static void invalidate_nodes (fdesc *, node **);
 398 static void put_entries (node *);
 399
 400 static char *concat (const char *, const char *, const char *);
 401 static char *skip_spaces (char *);
 402 static char *skip_non_spaces (char *);
 403 static char *savenstr (const char *, int);
 404 static char *savestr (const char *);
 405 static char *etags_strchr (const char *, int);
 406 static char *etags_strrchr (const char *, int);
 407 static int etags_strcasecmp (const char *, const char *);
 408 static int etags_strncasecmp (const char *, const char *, int);
 409 static char *etags_getcwd (void);
 410 static char *relative_filename (char *, char *);
 411 static char *absolute_filename (char *, char *);
 412 static char *absolute_dirname (char *, char *);
 413 static bool filename_is_absolute (char *f);
 414 static void canonicalize_filename (char *);
 415 static void linebuffer_init (linebuffer *);
 416 static void linebuffer_setlen (linebuffer *, int);
 417 static PTR xmalloc (size_t);
 418 static PTR xrealloc (char *, size_t);
 419
 420 \f
 421 static char searchar = '/';     /* use /.../ searches */
 422
 423 static char *tagfile;           /* output file */
 424 static char *progname;          /* name this program was invoked with */
 425 static char *cwd;               /* current working directory */
 426 static char *tagfiledir;        /* directory of tagfile */
 427 static FILE *tagf;              /* ioptr for tags file */
 428 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 429
 430 static fdesc *fdhead;           /* head of file description list */
 431 static fdesc *curfdp;           /* current file description */
 432 static int lineno;              /* line number of current line */
 433 static long charno;             /* current character number */
 434 static long linecharno;         /* charno of start of current line */
 435 static char *dbp;               /* pointer to start of current tag */
 436
 437 static const int invalidcharno = -1;
 438
 439 static node *nodehead;          /* the head of the binary tree of tags */
 440 static node *last_node;         /* the last node created */
 441
 442 static linebuffer lb;           /* the current line */
 443 static linebuffer filebuf;      /* a buffer containing the whole file */
 444 static linebuffer token_name;   /* a buffer containing a tag name */
 445
 446 /* boolean "functions" (see init)       */
 447 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 448 static const char
 449   /* white chars */
 450   *white = " \f\t\n\r\v",
 451   /* not in a name */
 452   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 453   /* token ending chars */
 454   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 455   /* token starting chars */
 456   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 457   /* valid in-token chars */
 458   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 459
 460 static bool append_to_tagfile;  /* -a: append to tags */
 461 /* The next five default to TRUE in C and derived languages.  */
 462 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 463 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 464                                 /* 0 struct/enum/union decls, and C++ */
 465                                 /* member functions. */
 466 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 467                                 /* constants and variables. */
 468                                 /* -D: opposite of -d.  Default under ctags. */
 469 static bool globals;            /* create tags for global variables */
 470 static bool members;            /* create tags for C member variables */
 471 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 472 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 473 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 474 static bool update;             /* -u: update tags */
 475 static bool vgrind_style;       /* -v: create vgrind style index output */
 476 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 477 static bool cxref_style;        /* -x: create cxref style output */
 478 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 479 static bool ignoreindent;       /* -I: ignore indentation in C */
 480 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 481
 482 /* STDIN is defined in LynxOS system headers */
 483 #ifdef STDIN
 484 # undef STDIN
 485 #endif
 486
 487 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 488 static bool parsing_stdin;      /* --parse-stdin used */
 489
 490 static regexp *p_head;          /* list of all regexps */
 491 static bool need_filebuf;       /* some regexes are multi-line */
 492
 493 static struct option longopts[] =
 494 {
 495   { "append",             no_argument,       NULL,               'a'   },
 496   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 497   { "c++",                no_argument,       NULL,               'C'   },
 498   { "declarations",       no_argument,       &declarations,      TRUE  },
 499   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 500   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 501   { "help",               no_argument,       NULL,               'h'   },
 502   { "help",               no_argument,       NULL,               'H'   },
 503   { "ignore-indentation", no_argument,       NULL,               'I'   },
 504   { "language",           required_argument, NULL,               'l'   },
 505   { "members",            no_argument,       &members,           TRUE  },
 506   { "no-members",         no_argument,       &members,           FALSE },
 507   { "output",             required_argument, NULL,               'o'   },
 508   { "regex",              required_argument, NULL,               'r'   },
 509   { "no-regex",           no_argument,       NULL,               'R'   },
 510   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 511   { "parse-stdin",        required_argument, NULL,               STDIN },
 512   { "version",            no_argument,       NULL,               'V'   },
 513
 514 #if CTAGS /* Ctags options */
 515   { "backward-search",    no_argument,       NULL,               'B'   },
 516   { "cxref",              no_argument,       NULL,               'x'   },
 517   { "defines",            no_argument,       NULL,               'd'   },
 518   { "globals",            no_argument,       &globals,           TRUE  },
 519   { "typedefs",           no_argument,       NULL,               't'   },
 520   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 521   { "update",             no_argument,       NULL,               'u'   },
 522   { "vgrind",             no_argument,       NULL,               'v'   },
 523   { "no-warn",            no_argument,       NULL,               'w'   },
 524
 525 #else /* Etags options */
 526   { "no-defines",         no_argument,       NULL,               'D'   },
 527   { "no-globals",         no_argument,       &globals,           FALSE },
 528   { "include",            required_argument, NULL,               'i'   },
 529 #endif
 530   { NULL }
 531 };
 532
 533 static compressor compressors[] =
 534 {
 535   { "z", "gzip -d -c"},
 536   { "Z", "gzip -d -c"},
 537   { "gz", "gzip -d -c"},
 538   { "GZ", "gzip -d -c"},
 539   { "bz2", "bzip2 -d -c" },
 540   { "xz", "xz -d -c" },
 541   { NULL }
 542 };
 543
 544 /*
 545  * Language stuff.
 546  */
 547
 548 /* Ada code */
 549 static const char *Ada_suffixes [] =
 550   { "ads", "adb", "ada", NULL };
 551 static const char Ada_help [] =
 552 "In Ada code, functions, procedures, packages, tasks and types are\n\
 553 tags.  Use the `--packages-only' option to create tags for\n\
 554 packages only.\n\
 555 Ada tag names have suffixes indicating the type of entity:\n\
 556         Entity type:    Qualifier:\n\
 557         ------------    ----------\n\
 558         function        /f\n\
 559         procedure       /p\n\
 560         package spec    /s\n\
 561         package body    /b\n\
 562         type            /t\n\
 563         task            /k\n\
 564 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 565 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 566 will just search for any tag `bidule'.";
 567
 568 /* Assembly code */
 569 static const char *Asm_suffixes [] =
 570   { "a",        /* Unix assembler */
 571     "asm", /* Microcontroller assembly */
 572     "def", /* BSO/Tasking definition includes  */
 573     "inc", /* Microcontroller include files */
 574     "ins", /* Microcontroller include files */
 575     "s", "sa", /* Unix assembler */
 576     "S",   /* cpp-processed Unix assembler */
 577     "src", /* BSO/Tasking C compiler output */
 578     NULL
 579   };
 580 static const char Asm_help [] =
 581 "In assembler code, labels appearing at the beginning of a line,\n\
 582 followed by a colon, are tags.";
 583
 584
 585 /* Note that .c and .h can be considered C++, if the --c++ flag was
 586    given, or if the `class' or `template' keywords are met inside the file.
 587    That is why default_C_entries is called for these. */
 588 static const char *default_C_suffixes [] =
 589   { "c", "h", NULL };
 590 #if CTAGS                               /* C help for Ctags */
 591 static const char default_C_help [] =
 592 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 593 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 594 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 595 Use --globals to tag global variables.\n\
 596 You can tag function declarations and external variables by\n\
 597 using `--declarations', and struct members by using `--members'.";
 598 #else                                   /* C help for Etags */
 599 static const char default_C_help [] =
 600 "In C code, any C function or typedef is a tag, and so are\n\
 601 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 602 definitions and `enum' constants are tags unless you specify\n\
 603 `--no-defines'.  Global variables are tags unless you specify\n\
 604 `--no-globals' and so are struct members unless you specify\n\
 605 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 606 `--no-members' can make the tags table file much smaller.\n\
 607 You can tag function declarations and external variables by\n\
 608 using `--declarations'.";
 609 #endif  /* C help for Ctags and Etags */
 610
 611 static const char *Cplusplus_suffixes [] =
 612   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 613     "M",                        /* Objective C++ */
 614     "pdb",                      /* PostScript with C syntax */
 615     NULL };
 616 static const char Cplusplus_help [] =
 617 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 618 --help --lang=c --lang=c++ for full help.)\n\
 619 In addition to C tags, member functions are also recognized.  Member\n\
 620 variables are recognized unless you use the `--no-members' option.\n\
 621 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 622 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 623 `operator+'.";
 624
 625 static const char *Cjava_suffixes [] =
 626   { "java", NULL };
 627 static char Cjava_help [] =
 628 "In Java code, all the tags constructs of C and C++ code are\n\
 629 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 630
 631
 632 static const char *Cobol_suffixes [] =
 633   { "COB", "cob", NULL };
 634 static char Cobol_help [] =
 635 "In Cobol code, tags are paragraph names; that is, any word\n\
 636 starting in column 8 and followed by a period.";
 637
 638 static const char *Cstar_suffixes [] =
 639   { "cs", "hs", NULL };
 640
 641 static const char *Erlang_suffixes [] =
 642   { "erl", "hrl", NULL };
 643 static const char Erlang_help [] =
 644 "In Erlang code, the tags are the functions, records and macros\n\
 645 defined in the file.";
 646
 647 const char *Forth_suffixes [] =
 648   { "fth", "tok", NULL };
 649 static const char Forth_help [] =
 650 "In Forth code, tags are words defined by `:',\n\
 651 constant, code, create, defer, value, variable, buffer:, field.";
 652
 653 static const char *Fortran_suffixes [] =
 654   { "F", "f", "f90", "for", NULL };
 655 static const char Fortran_help [] =
 656 "In Fortran code, functions, subroutines and block data are tags.";
 657
 658 static const char *HTML_suffixes [] =
 659   { "htm", "html", "shtml", NULL };
 660 static const char HTML_help [] =
 661 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 662 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 663 occurrences of `id='.";
 664
 665 static const char *Lisp_suffixes [] =
 666   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 667 static const char Lisp_help [] =
 668 "In Lisp code, any function defined with `defun', any variable\n\
 669 defined with `defvar' or `defconst', and in general the first\n\
 670 argument of any expression that starts with `(def' in column zero\n\
 671 is a tag.";
 672
 673 static const char *Lua_suffixes [] =
 674   { "lua", "LUA", NULL };
 675 static const char Lua_help [] =
 676 "In Lua scripts, all functions are tags.";
 677
 678 static const char *Makefile_filenames [] =
 679   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 680 static const char Makefile_help [] =
 681 "In makefiles, targets are tags; additionally, variables are tags\n\
 682 unless you specify `--no-globals'.";
 683
 684 static const char *Objc_suffixes [] =
 685   { "lm",                       /* Objective lex file */
 686     "m",                        /* Objective C file */
 687      NULL };
 688 static const char Objc_help [] =
 689 "In Objective C code, tags include Objective C definitions for classes,\n\
 690 class categories, methods and protocols.  Tags for variables and\n\
 691 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 692 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 693
 694 static const char *Pascal_suffixes [] =
 695   { "p", "pas", NULL };
 696 static const char Pascal_help [] =
 697 "In Pascal code, the tags are the functions and procedures defined\n\
 698 in the file.";
 699 /* " // this is for working around an Emacs highlighting bug... */
 700
 701 static const char *Perl_suffixes [] =
 702   { "pl", "pm", NULL };
 703 static const char *Perl_interpreters [] =
 704   { "perl", "@PERL@", NULL };
 705 static const char Perl_help [] =
 706 "In Perl code, the tags are the packages, subroutines and variables\n\
 707 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 708 `--globals' if you want to tag global variables.  Tags for\n\
 709 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 710 defined in the default package is `main::SUB'.";
 711
 712 static const char *PHP_suffixes [] =
 713   { "php", "php3", "php4", NULL };
 714 static const char PHP_help [] =
 715 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 716 the `--no-members' option, vars are tags too.";
 717
 718 static const char *plain_C_suffixes [] =
 719   { "pc",                       /* Pro*C file */
 720      NULL };
 721
 722 static const char *PS_suffixes [] =
 723   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 724 static const char PS_help [] =
 725 "In PostScript code, the tags are the functions.";
 726
 727 static const char *Prolog_suffixes [] =
 728   { "prolog", NULL };
 729 static const char Prolog_help [] =
 730 "In Prolog code, tags are predicates and rules at the beginning of\n\
 731 line.";
 732
 733 static const char *Python_suffixes [] =
 734   { "py", NULL };
 735 static const char Python_help [] =
 736 "In Python code, `def' or `class' at the beginning of a line\n\
 737 generate a tag.";
 738
 739 /* Can't do the `SCM' or `scm' prefix with a version number. */
 740 static const char *Scheme_suffixes [] =
 741   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 742 static const char Scheme_help [] =
 743 "In Scheme code, tags include anything defined with `def' or with a\n\
 744 construct whose name starts with `def'.  They also include\n\
 745 variables set with `set!' at top level in the file.";
 746
 747 static const char *TeX_suffixes [] =
 748   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 749 static const char TeX_help [] =
 750 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 751 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 752 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 753 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 754 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 755 \n\
 756 Other commands can be specified by setting the environment variable\n\
 757 `TEXTAGS' to a colon-separated list like, for example,\n\
 758      TEXTAGS=\"mycommand:myothercommand\".";
 759
 760
 761 static const char *Texinfo_suffixes [] =
 762   { "texi", "texinfo", "txi", NULL };
 763 static const char Texinfo_help [] =
 764 "for texinfo files, lines starting with @node are tagged.";
 765
 766 static const char *Yacc_suffixes [] =
 767   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 768 static const char Yacc_help [] =
 769 "In Bison or Yacc input files, each rule defines as a tag the\n\
 770 nonterminal it constructs.  The portions of the file that contain\n\
 771 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 772 for full help).";
 773
 774 static const char auto_help [] =
 775 "`auto' is not a real language, it indicates to use\n\
 776 a default language for files base on file name suffix and file contents.";
 777
 778 static const char none_help [] =
 779 "`none' is not a real language, it indicates to only do\n\
 780 regexp processing on files.";
 781
 782 static const char no_lang_help [] =
 783 "No detailed help available for this language.";
 784
 785
 786 /*
 787  * Table of languages.
 788  *
 789  * It is ok for a given function to be listed under more than one
 790  * name.  I just didn't.
 791  */
 792
 793 static language lang_names [] =
 794 {
 795   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 796   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 797   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 798   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 799   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 800   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 801   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 802   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 803   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 804   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 805   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 806   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 807   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 808   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 809   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 810   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 811   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 812   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 813   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 814   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 815   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 816   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 817   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 818   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 819   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 820   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 821   { "auto",      auto_help },                      /* default guessing scheme */
 822   { "none",      none_help,      just_read_file }, /* regexp matching only */
 823   { NULL }                /* end of list */
 824 };
 825
 826 \f
 827 static void
 828 print_language_names (void)
 829 {
 830   language *lang;
 831   const char **name, **ext;
 832
 833   puts ("\nThese are the currently supported languages, along with the\n\
 834 default file names and dot suffixes:");
 835   for (lang = lang_names; lang->name != NULL; lang++)
 836     {
 837       printf ("  %-*s", 10, lang->name);
 838       if (lang->filenames != NULL)
 839         for (name = lang->filenames; *name != NULL; name++)
 840           printf (" %s", *name);
 841       if (lang->suffixes != NULL)
 842         for (ext = lang->suffixes; *ext != NULL; ext++)
 843           printf (" .%s", *ext);
 844       puts ("");
 845     }
 846   puts ("where `auto' means use default language for files based on file\n\
 847 name suffix, and `none' means only do regexp processing on files.\n\
 848 If no language is specified and no matching suffix is found,\n\
 849 the first line of the file is read for a sharp-bang (#!) sequence\n\
 850 followed by the name of an interpreter.  If no such sequence is found,\n\
 851 Fortran is tried first; if no tags are found, C is tried next.\n\
 852 When parsing any C file, a \"class\" or \"template\" keyword\n\
 853 switches to C++.");
 854   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 855 \n\
 856 For detailed help on a given language use, for example,\n\
 857 etags --help --lang=ada.");
 858 }
 859
 860 #ifndef EMACS_NAME
 861 # define EMACS_NAME "standalone"
 862 #endif
 863 #ifndef VERSION
 864 # define VERSION "17.38.1.4"
 865 #endif
 866 static void
 867 print_version (void)
 868 {
 869   /* Makes it easier to update automatically. */
 870   char emacs_copyright[] = "Copyright (C) 2012 Free Software Foundation, Inc.";
 871
 872   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 873   puts (emacs_copyright);
 874   puts ("This program is distributed under the terms in ETAGS.README");
 875
 876   exit (EXIT_SUCCESS);
 877 }
 878
 879 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 880 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 881 #endif
 882
 883 static void
 884 print_help (argument *argbuffer)
 885 {
 886   bool help_for_lang = FALSE;
 887
 888   for (; argbuffer->arg_type != at_end; argbuffer++)
 889     if (argbuffer->arg_type == at_language)
 890       {
 891         if (help_for_lang)
 892           puts ("");
 893         puts (argbuffer->lang->help);
 894         help_for_lang = TRUE;
 895       }
 896
 897   if (help_for_lang)
 898     exit (EXIT_SUCCESS);
 899
 900   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 901 \n\
 902 These are the options accepted by %s.\n", progname, progname);
 903   if (NO_LONG_OPTIONS)
 904     puts ("WARNING: long option names do not work with this executable,\n\
 905 as it is not linked with GNU getopt.");
 906   else
 907     puts ("You may use unambiguous abbreviations for the long option names.");
 908   puts ("  A - as file name means read names from stdin (one per line).\n\
 909 Absolute names are stored in the output file as they are.\n\
 910 Relative ones are stored relative to the output file's directory.\n");
 911
 912   puts ("-a, --append\n\
 913         Append tag entries to existing tags file.");
 914
 915   puts ("--packages-only\n\
 916         For Ada files, only generate tags for packages.");
 917
 918   if (CTAGS)
 919     puts ("-B, --backward-search\n\
 920         Write the search commands for the tag entries using '?', the\n\
 921         backward-search command instead of '/', the forward-search command.");
 922
 923   /* This option is mostly obsolete, because etags can now automatically
 924      detect C++.  Retained for backward compatibility and for debugging and
 925      experimentation.  In principle, we could want to tag as C++ even
 926      before any "class" or "template" keyword.
 927   puts ("-C, --c++\n\
 928         Treat files whose name suffix defaults to C language as C++ files.");
 929   */
 930
 931   puts ("--declarations\n\
 932         In C and derived languages, create tags for function declarations,");
 933   if (CTAGS)
 934     puts ("\tand create tags for extern variables if --globals is used.");
 935   else
 936     puts
 937       ("\tand create tags for extern variables unless --no-globals is used.");
 938
 939   if (CTAGS)
 940     puts ("-d, --defines\n\
 941         Create tag entries for C #define constants and enum constants, too.");
 942   else
 943     puts ("-D, --no-defines\n\
 944         Don't create tag entries for C #define constants and enum constants.\n\
 945         This makes the tags file smaller.");
 946
 947   if (!CTAGS)
 948     puts ("-i FILE, --include=FILE\n\
 949         Include a note in tag file indicating that, when searching for\n\
 950         a tag, one should also consult the tags file FILE after\n\
 951         checking the current file.");
 952
 953   puts ("-l LANG, --language=LANG\n\
 954         Force the following files to be considered as written in the\n\
 955         named language up to the next --language=LANG option.");
 956
 957   if (CTAGS)
 958     puts ("--globals\n\
 959         Create tag entries for global variables in some languages.");
 960   else
 961     puts ("--no-globals\n\
 962         Do not create tag entries for global variables in some\n\
 963         languages.  This makes the tags file smaller.");
 964
 965   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 966     puts ("--no-line-directive\n\
 967         Ignore #line preprocessor directives in C and derived languages.");
 968
 969   if (CTAGS)
 970     puts ("--members\n\
 971         Create tag entries for members of structures in some languages.");
 972   else
 973     puts ("--no-members\n\
 974         Do not create tag entries for members of structures\n\
 975         in some languages.");
 976
 977   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 978         Make a tag for each line matching a regular expression pattern\n\
 979         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 980         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 981         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 982         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 983   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 984         For example Tcl named tags can be created with:\n\
 985           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 986         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 987         `m' means to allow multi-line matches, `s' implies `m' and\n\
 988         causes dot to match any character, including newline.");
 989
 990   puts ("-R, --no-regex\n\
 991         Don't create tags from regexps for the following files.");
 992
 993   puts ("-I, --ignore-indentation\n\
 994         In C and C++ do not assume that a closing brace in the first\n\
 995         column is the final brace of a function or structure definition.");
 996
 997   puts ("-o FILE, --output=FILE\n\
 998         Write the tags to FILE.");
 999
1000   puts ("--parse-stdin=NAME\n\
1001         Read from standard input and record tags as belonging to file NAME.");
1002
1003   if (CTAGS)
1004     {
1005       puts ("-t, --typedefs\n\
1006         Generate tag entries for C and Ada typedefs.");
1007       puts ("-T, --typedefs-and-c++\n\
1008         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1009         and C++ member functions.");
1010     }
1011
1012   if (CTAGS)
1013     puts ("-u, --update\n\
1014         Update the tag entries for the given files, leaving tag\n\
1015         entries for other files in place.  Currently, this is\n\
1016         implemented by deleting the existing entries for the given\n\
1017         files and then rewriting the new entries at the end of the\n\
1018         tags file.  It is often faster to simply rebuild the entire\n\
1019         tag file than to use this.");
1020
1021   if (CTAGS)
1022     {
1023       puts ("-v, --vgrind\n\
1024         Print on the standard output an index of items intended for\n\
1025         human consumption, similar to the output of vgrind.  The index\n\
1026         is sorted, and gives the page number of each item.");
1027
1028       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1029         puts ("-w, --no-duplicates\n\
1030         Do not create duplicate tag entries, for compatibility with\n\
1031         traditional ctags.");
1032
1033       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1034         puts ("-w, --no-warn\n\
1035         Suppress warning messages about duplicate tag entries.");
1036
1037       puts ("-x, --cxref\n\
1038         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1039         The output uses line numbers instead of page numbers, but\n\
1040         beyond that the differences are cosmetic; try both to see\n\
1041         which you like.");
1042     }
1043
1044   puts ("-V, --version\n\
1045         Print the version of the program.\n\
1046 -h, --help\n\
1047         Print this help message.\n\
1048         Followed by one or more `--language' options prints detailed\n\
1049         help about tag generation for the specified languages.");
1050
1051   print_language_names ();
1052
1053   puts ("");
1054   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1055
1056   exit (EXIT_SUCCESS);
1057 }
1058
1059 \f
1060 int
1061 main (int argc, char **argv)
1062 {
1063   int i;
1064   unsigned int nincluded_files;
1065   char **included_files;
1066   argument *argbuffer;
1067   int current_arg, file_count;
1068   linebuffer filename_lb;
1069   bool help_asked = FALSE;
1070   ptrdiff_t len;
1071  char *optstring;
1072  int opt;
1073
1074
1075 #ifdef DOS_NT
1076   _fmode = O_BINARY;   /* all of files are treated as binary files */
1077 #endif /* DOS_NT */
1078
1079   progname = argv[0];
1080   nincluded_files = 0;
1081   included_files = xnew (argc, char *);
1082   current_arg = 0;
1083   file_count = 0;
1084
1085   /* Allocate enough no matter what happens.  Overkill, but each one
1086      is small. */
1087   argbuffer = xnew (argc, argument);
1088
1089   /*
1090    * Always find typedefs and structure tags.
1091    * Also default to find macro constants, enum constants, struct
1092    * members and global variables.  Do it for both etags and ctags.
1093    */
1094   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1095   globals = members = TRUE;
1096
1097   /* When the optstring begins with a '-' getopt_long does not rearrange the
1098      non-options arguments to be at the end, but leaves them alone. */
1099   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1100                       "ac:Cf:Il:o:r:RSVhH",
1101                       (CTAGS) ? "BxdtTuvw" : "Di:");
1102
1103   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1104     switch (opt)
1105       {
1106       case 0:
1107         /* If getopt returns 0, then it has already processed a
1108            long-named option.  We should do nothing.  */
1109         break;
1110
1111       case 1:
1112         /* This means that a file name has been seen.  Record it. */
1113         argbuffer[current_arg].arg_type = at_filename;
1114         argbuffer[current_arg].what     = optarg;
1115         len = strlen (optarg);
1116         if (whatlen_max < len)
1117           whatlen_max = len;
1118         ++current_arg;
1119         ++file_count;
1120         break;
1121
1122       case STDIN:
1123         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1124         argbuffer[current_arg].arg_type = at_stdin;
1125         argbuffer[current_arg].what     = optarg;
1126         len = strlen (optarg);
1127         if (whatlen_max < len)
1128           whatlen_max = len;
1129         ++current_arg;
1130         ++file_count;
1131         if (parsing_stdin)
1132           fatal ("cannot parse standard input more than once", (char *)NULL);
1133         parsing_stdin = TRUE;
1134         break;
1135
1136         /* Common options. */
1137       case 'a': append_to_tagfile = TRUE;       break;
1138       case 'C': cplusplus = TRUE;               break;
1139       case 'f':         /* for compatibility with old makefiles */
1140       case 'o':
1141         if (tagfile)
1142           {
1143             error ("-o option may only be given once.", (char *)NULL);
1144             suggest_asking_for_help ();
1145             /* NOTREACHED */
1146           }
1147         tagfile = optarg;
1148         break;
1149       case 'I':
1150       case 'S':         /* for backward compatibility */
1151         ignoreindent = TRUE;
1152         break;
1153       case 'l':
1154         {
1155           language *lang = get_language_from_langname (optarg);
1156           if (lang != NULL)
1157             {
1158               argbuffer[current_arg].lang = lang;
1159               argbuffer[current_arg].arg_type = at_language;
1160               ++current_arg;
1161             }
1162         }
1163         break;
1164       case 'c':
1165         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1166         optarg = concat (optarg, "i", ""); /* memory leak here */
1167         /* FALLTHRU */
1168       case 'r':
1169         argbuffer[current_arg].arg_type = at_regexp;
1170         argbuffer[current_arg].what = optarg;
1171         len = strlen (optarg);
1172         if (whatlen_max < len)
1173           whatlen_max = len;
1174         ++current_arg;
1175         break;
1176       case 'R':
1177         argbuffer[current_arg].arg_type = at_regexp;
1178         argbuffer[current_arg].what = NULL;
1179         ++current_arg;
1180         break;
1181       case 'V':
1182         print_version ();
1183         break;
1184       case 'h':
1185       case 'H':
1186         help_asked = TRUE;
1187         break;
1188
1189         /* Etags options */
1190       case 'D': constantypedefs = FALSE;                        break;
1191       case 'i': included_files[nincluded_files++] = optarg;     break;
1192
1193         /* Ctags options. */
1194       case 'B': searchar = '?';                                 break;
1195       case 'd': constantypedefs = TRUE;                         break;
1196       case 't': typedefs = TRUE;                                break;
1197       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1198       case 'u': update = TRUE;                                  break;
1199       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1200       case 'x': cxref_style = TRUE;                             break;
1201       case 'w': no_warnings = TRUE;                             break;
1202       default:
1203         suggest_asking_for_help ();
1204         /* NOTREACHED */
1205       }
1206
1207   /* No more options.  Store the rest of arguments. */
1208   for (; optind < argc; optind++)
1209     {
1210       argbuffer[current_arg].arg_type = at_filename;
1211       argbuffer[current_arg].what = argv[optind];
1212       len = strlen (argv[optind]);
1213       if (whatlen_max < len)
1214         whatlen_max = len;
1215       ++current_arg;
1216       ++file_count;
1217     }
1218
1219   argbuffer[current_arg].arg_type = at_end;
1220
1221   if (help_asked)
1222     print_help (argbuffer);
1223     /* NOTREACHED */
1224
1225   if (nincluded_files == 0 && file_count == 0)
1226     {
1227       error ("no input files specified.", (char *)NULL);
1228       suggest_asking_for_help ();
1229       /* NOTREACHED */
1230     }
1231
1232   if (tagfile == NULL)
1233     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1234   cwd = etags_getcwd ();        /* the current working directory */
1235   if (cwd[strlen (cwd) - 1] != '/')
1236     {
1237       char *oldcwd = cwd;
1238       cwd = concat (oldcwd, "/", "");
1239       free (oldcwd);
1240     }
1241
1242   /* Compute base directory for relative file names. */
1243   if (streq (tagfile, "-")
1244       || strneq (tagfile, "/dev/", 5))
1245     tagfiledir = cwd;            /* relative file names are relative to cwd */
1246   else
1247     {
1248       canonicalize_filename (tagfile);
1249       tagfiledir = absolute_dirname (tagfile, cwd);
1250     }
1251
1252   init ();                      /* set up boolean "functions" */
1253
1254   linebuffer_init (&lb);
1255   linebuffer_init (&filename_lb);
1256   linebuffer_init (&filebuf);
1257   linebuffer_init (&token_name);
1258
1259   if (!CTAGS)
1260     {
1261       if (streq (tagfile, "-"))
1262         {
1263           tagf = stdout;
1264 #ifdef DOS_NT
1265           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1266              doesn't take effect until after `stdout' is already open). */
1267           if (!isatty (fileno (stdout)))
1268             setmode (fileno (stdout), O_BINARY);
1269 #endif /* DOS_NT */
1270         }
1271       else
1272         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1273       if (tagf == NULL)
1274         pfatal (tagfile);
1275     }
1276
1277   /*
1278    * Loop through files finding functions.
1279    */
1280   for (i = 0; i < current_arg; i++)
1281     {
1282       static language *lang;    /* non-NULL if language is forced */
1283       char *this_file;
1284
1285       switch (argbuffer[i].arg_type)
1286         {
1287         case at_language:
1288           lang = argbuffer[i].lang;
1289           break;
1290         case at_regexp:
1291           analyse_regex (argbuffer[i].what);
1292           break;
1293         case at_filename:
1294               this_file = argbuffer[i].what;
1295               /* Input file named "-" means read file names from stdin
1296                  (one per line) and use them. */
1297               if (streq (this_file, "-"))
1298                 {
1299                   if (parsing_stdin)
1300                     fatal ("cannot parse standard input AND read file names from it",
1301                            (char *)NULL);
1302                   while (readline_internal (&filename_lb, stdin) > 0)
1303                     process_file_name (filename_lb.buffer, lang);
1304                 }
1305               else
1306                 process_file_name (this_file, lang);
1307           break;
1308         case at_stdin:
1309           this_file = argbuffer[i].what;
1310           process_file (stdin, this_file, lang);
1311           break;
1312         }
1313     }
1314
1315   free_regexps ();
1316   free (lb.buffer);
1317   free (filebuf.buffer);
1318   free (token_name.buffer);
1319
1320   if (!CTAGS || cxref_style)
1321     {
1322       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1323       put_entries (nodehead);
1324       free_tree (nodehead);
1325       nodehead = NULL;
1326       if (!CTAGS)
1327         {
1328           fdesc *fdp;
1329
1330           /* Output file entries that have no tags. */
1331           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1332             if (!fdp->written)
1333               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1334
1335           while (nincluded_files-- > 0)
1336             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1337
1338           if (fclose (tagf) == EOF)
1339             pfatal (tagfile);
1340         }
1341
1342       exit (EXIT_SUCCESS);
1343     }
1344
1345   /* From here on, we are in (CTAGS && !cxref_style) */
1346   if (update)
1347     {
1348       char *cmd =
1349         xmalloc (strlen (tagfile) + whatlen_max +
1350                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1351       for (i = 0; i < current_arg; ++i)
1352         {
1353           switch (argbuffer[i].arg_type)
1354             {
1355             case at_filename:
1356             case at_stdin:
1357               break;
1358             default:
1359               continue;         /* the for loop */
1360             }
1361           strcpy (cmd, "mv ");
1362           strcat (cmd, tagfile);
1363           strcat (cmd, " OTAGS;fgrep -v '\t");
1364           strcat (cmd, argbuffer[i].what);
1365           strcat (cmd, "\t' OTAGS >");
1366           strcat (cmd, tagfile);
1367           strcat (cmd, ";rm OTAGS");
1368           if (system (cmd) != EXIT_SUCCESS)
1369             fatal ("failed to execute shell command", (char *)NULL);
1370         }
1371       free (cmd);
1372       append_to_tagfile = TRUE;
1373     }
1374
1375   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1376   if (tagf == NULL)
1377     pfatal (tagfile);
1378   put_entries (nodehead);       /* write all the tags (CTAGS) */
1379   free_tree (nodehead);
1380   nodehead = NULL;
1381   if (fclose (tagf) == EOF)
1382     pfatal (tagfile);
1383
1384   if (CTAGS)
1385     if (append_to_tagfile || update)
1386       {
1387         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1388         /* Maybe these should be used:
1389            setenv ("LC_COLLATE", "C", 1);
1390            setenv ("LC_ALL", "C", 1); */
1391         strcpy (cmd, "sort -u -o ");
1392         strcat (cmd, tagfile);
1393         strcat (cmd, " ");
1394         strcat (cmd, tagfile);
1395         exit (system (cmd));
1396       }
1397   return EXIT_SUCCESS;
1398 }
1399
1400
1401 /*
1402  * Return a compressor given the file name.  If EXTPTR is non-zero,
1403  * return a pointer into FILE where the compressor-specific
1404  * extension begins.  If no compressor is found, NULL is returned
1405  * and EXTPTR is not significant.
1406  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1407  */
1408 static compressor *
1409 get_compressor_from_suffix (char *file, char **extptr)
1410 {
1411   compressor *compr;
1412   char *slash, *suffix;
1413
1414   /* File has been processed by canonicalize_filename,
1415      so we don't need to consider backslashes on DOS_NT.  */
1416   slash = etags_strrchr (file, '/');
1417   suffix = etags_strrchr (file, '.');
1418   if (suffix == NULL || suffix < slash)
1419     return NULL;
1420   if (extptr != NULL)
1421     *extptr = suffix;
1422   suffix += 1;
1423   /* Let those poor souls who live with DOS 8+3 file name limits get
1424      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1425      Only the first do loop is run if not MSDOS */
1426   do
1427     {
1428       for (compr = compressors; compr->suffix != NULL; compr++)
1429         if (streq (compr->suffix, suffix))
1430           return compr;
1431       if (!MSDOS)
1432         break;                  /* do it only once: not really a loop */
1433       if (extptr != NULL)
1434         *extptr = ++suffix;
1435     } while (*suffix != '\0');
1436   return NULL;
1437 }
1438
1439
1440
1441 /*
1442  * Return a language given the name.
1443  */
1444 static language *
1445 get_language_from_langname (const char *name)
1446 {
1447   language *lang;
1448
1449   if (name == NULL)
1450     error ("empty language name", (char *)NULL);
1451   else
1452     {
1453       for (lang = lang_names; lang->name != NULL; lang++)
1454         if (streq (name, lang->name))
1455           return lang;
1456       error ("unknown language \"%s\"", name);
1457     }
1458
1459   return NULL;
1460 }
1461
1462
1463 /*
1464  * Return a language given the interpreter name.
1465  */
1466 static language *
1467 get_language_from_interpreter (char *interpreter)
1468 {
1469   language *lang;
1470   const char **iname;
1471
1472   if (interpreter == NULL)
1473     return NULL;
1474   for (lang = lang_names; lang->name != NULL; lang++)
1475     if (lang->interpreters != NULL)
1476       for (iname = lang->interpreters; *iname != NULL; iname++)
1477         if (streq (*iname, interpreter))
1478             return lang;
1479
1480   return NULL;
1481 }
1482
1483
1484
1485 /*
1486  * Return a language given the file name.
1487  */
1488 static language *
1489 get_language_from_filename (char *file, int case_sensitive)
1490 {
1491   language *lang;
1492   const char **name, **ext, *suffix;
1493
1494   /* Try whole file name first. */
1495   for (lang = lang_names; lang->name != NULL; lang++)
1496     if (lang->filenames != NULL)
1497       for (name = lang->filenames; *name != NULL; name++)
1498         if ((case_sensitive)
1499             ? streq (*name, file)
1500             : strcaseeq (*name, file))
1501           return lang;
1502
1503   /* If not found, try suffix after last dot. */
1504   suffix = etags_strrchr (file, '.');
1505   if (suffix == NULL)
1506     return NULL;
1507   suffix += 1;
1508   for (lang = lang_names; lang->name != NULL; lang++)
1509     if (lang->suffixes != NULL)
1510       for (ext = lang->suffixes; *ext != NULL; ext++)
1511         if ((case_sensitive)
1512             ? streq (*ext, suffix)
1513             : strcaseeq (*ext, suffix))
1514           return lang;
1515   return NULL;
1516 }
1517
1518 \f
1519 /*
1520  * This routine is called on each file argument.
1521  */
1522 static void
1523 process_file_name (char *file, language *lang)
1524 {
1525   struct stat stat_buf;
1526   FILE *inf;
1527   fdesc *fdp;
1528   compressor *compr;
1529   char *compressed_name, *uncompressed_name;
1530   char *ext, *real_name;
1531   int retval;
1532
1533   canonicalize_filename (file);
1534   if (streq (file, tagfile) && !streq (tagfile, "-"))
1535     {
1536       error ("skipping inclusion of %s in self.", file);
1537       return;
1538     }
1539   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1540     {
1541       compressed_name = NULL;
1542       real_name = uncompressed_name = savestr (file);
1543     }
1544   else
1545     {
1546       real_name = compressed_name = savestr (file);
1547       uncompressed_name = savenstr (file, ext - file);
1548     }
1549
1550   /* If the canonicalized uncompressed name
1551      has already been dealt with, skip it silently. */
1552   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1553     {
1554       assert (fdp->infname != NULL);
1555       if (streq (uncompressed_name, fdp->infname))
1556         goto cleanup;
1557     }
1558
1559   if (stat (real_name, &stat_buf) != 0)
1560     {
1561       /* Reset real_name and try with a different name. */
1562       real_name = NULL;
1563       if (compressed_name != NULL) /* try with the given suffix */
1564         {
1565           if (stat (uncompressed_name, &stat_buf) == 0)
1566             real_name = uncompressed_name;
1567         }
1568       else                      /* try all possible suffixes */
1569         {
1570           for (compr = compressors; compr->suffix != NULL; compr++)
1571             {
1572               compressed_name = concat (file, ".", compr->suffix);
1573               if (stat (compressed_name, &stat_buf) != 0)
1574                 {
1575                   if (MSDOS)
1576                     {
1577                       char *suf = compressed_name + strlen (file);
1578                       size_t suflen = strlen (compr->suffix) + 1;
1579                       for ( ; suf[1]; suf++, suflen--)
1580                         {
1581                           memmove (suf, suf + 1, suflen);
1582                           if (stat (compressed_name, &stat_buf) == 0)
1583                             {
1584                               real_name = compressed_name;
1585                               break;
1586                             }
1587                         }
1588                       if (real_name != NULL)
1589                         break;
1590                     } /* MSDOS */
1591                   free (compressed_name);
1592                   compressed_name = NULL;
1593                 }
1594               else
1595                 {
1596                   real_name = compressed_name;
1597                   break;
1598                 }
1599             }
1600         }
1601       if (real_name == NULL)
1602         {
1603           perror (file);
1604           goto cleanup;
1605         }
1606     } /* try with a different name */
1607
1608   if (!S_ISREG (stat_buf.st_mode))
1609     {
1610       error ("skipping %s: it is not a regular file.", real_name);
1611       goto cleanup;
1612     }
1613   if (real_name == compressed_name)
1614     {
1615       char *cmd = concat (compr->command, " ", real_name);
1616       inf = (FILE *) popen (cmd, "r");
1617       free (cmd);
1618     }
1619   else
1620     inf = fopen (real_name, "r");
1621   if (inf == NULL)
1622     {
1623       perror (real_name);
1624       goto cleanup;
1625     }
1626
1627   process_file (inf, uncompressed_name, lang);
1628
1629   if (real_name == compressed_name)
1630     retval = pclose (inf);
1631   else
1632     retval = fclose (inf);
1633   if (retval < 0)
1634     pfatal (file);
1635
1636  cleanup:
1637   free (compressed_name);
1638   free (uncompressed_name);
1639   last_node = NULL;
1640   curfdp = NULL;
1641   return;
1642 }
1643
1644 static void
1645 process_file (FILE *fh, char *fn, language *lang)
1646 {
1647   static const fdesc emptyfdesc;
1648   fdesc *fdp;
1649
1650   /* Create a new input file description entry. */
1651   fdp = xnew (1, fdesc);
1652   *fdp = emptyfdesc;
1653   fdp->next = fdhead;
1654   fdp->infname = savestr (fn);
1655   fdp->lang = lang;
1656   fdp->infabsname = absolute_filename (fn, cwd);
1657   fdp->infabsdir = absolute_dirname (fn, cwd);
1658   if (filename_is_absolute (fn))
1659     {
1660       /* An absolute file name.  Canonicalize it. */
1661       fdp->taggedfname = absolute_filename (fn, NULL);
1662     }
1663   else
1664     {
1665       /* A file name relative to cwd.  Make it relative
1666          to the directory of the tags file. */
1667       fdp->taggedfname = relative_filename (fn, tagfiledir);
1668     }
1669   fdp->usecharno = TRUE;        /* use char position when making tags */
1670   fdp->prop = NULL;
1671   fdp->written = FALSE;         /* not written on tags file yet */
1672
1673   fdhead = fdp;
1674   curfdp = fdhead;              /* the current file description */
1675
1676   find_entries (fh);
1677
1678   /* If not Ctags, and if this is not metasource and if it contained no #line
1679      directives, we can write the tags and free all nodes pointing to
1680      curfdp. */
1681   if (!CTAGS
1682       && curfdp->usecharno      /* no #line directives in this file */
1683       && !curfdp->lang->metasource)
1684     {
1685       node *np, *prev;
1686
1687       /* Look for the head of the sublist relative to this file.  See add_node
1688          for the structure of the node tree. */
1689       prev = NULL;
1690       for (np = nodehead; np != NULL; prev = np, np = np->left)
1691         if (np->fdp == curfdp)
1692           break;
1693
1694       /* If we generated tags for this file, write and delete them. */
1695       if (np != NULL)
1696         {
1697           /* This is the head of the last sublist, if any.  The following
1698              instructions depend on this being true. */
1699           assert (np->left == NULL);
1700
1701           assert (fdhead == curfdp);
1702           assert (last_node->fdp == curfdp);
1703           put_entries (np);     /* write tags for file curfdp->taggedfname */
1704           free_tree (np);       /* remove the written nodes */
1705           if (prev == NULL)
1706             nodehead = NULL;    /* no nodes left */
1707           else
1708             prev->left = NULL;  /* delete the pointer to the sublist */
1709         }
1710     }
1711 }
1712
1713 /*
1714  * This routine sets up the boolean pseudo-functions which work
1715  * by setting boolean flags dependent upon the corresponding character.
1716  * Every char which is NOT in that string is not a white char.  Therefore,
1717  * all of the array "_wht" is set to FALSE, and then the elements
1718  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1719  * of a char is TRUE if it is the string "white", else FALSE.
1720  */
1721 static void
1722 init (void)
1723 {
1724   register const char *sp;
1725   register int i;
1726
1727   for (i = 0; i < CHARS; i++)
1728     iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i) = FALSE;
1729   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1730   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1731   notinname ('\0') = notinname ('\n');
1732   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1733   begtoken ('\0') = begtoken ('\n');
1734   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1735   intoken ('\0') = intoken ('\n');
1736   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1737   endtoken ('\0') = endtoken ('\n');
1738 }
1739
1740 /*
1741  * This routine opens the specified file and calls the function
1742  * which finds the function and type definitions.
1743  */
1744 static void
1745 find_entries (FILE *inf)
1746 {
1747   char *cp;
1748   language *lang = curfdp->lang;
1749   Lang_function *parser = NULL;
1750
1751   /* If user specified a language, use it. */
1752   if (lang != NULL && lang->function != NULL)
1753     {
1754       parser = lang->function;
1755     }
1756
1757   /* Else try to guess the language given the file name. */
1758   if (parser == NULL)
1759     {
1760       lang = get_language_from_filename (curfdp->infname, TRUE);
1761       if (lang != NULL && lang->function != NULL)
1762         {
1763           curfdp->lang = lang;
1764           parser = lang->function;
1765         }
1766     }
1767
1768   /* Else look for sharp-bang as the first two characters. */
1769   if (parser == NULL
1770       && readline_internal (&lb, inf) > 0
1771       && lb.len >= 2
1772       && lb.buffer[0] == '#'
1773       && lb.buffer[1] == '!')
1774     {
1775       char *lp;
1776
1777       /* Set lp to point at the first char after the last slash in the
1778          line or, if no slashes, at the first nonblank.  Then set cp to
1779          the first successive blank and terminate the string. */
1780       lp = etags_strrchr (lb.buffer+2, '/');
1781       if (lp != NULL)
1782         lp += 1;
1783       else
1784         lp = skip_spaces (lb.buffer + 2);
1785       cp = skip_non_spaces (lp);
1786       *cp = '\0';
1787
1788       if (strlen (lp) > 0)
1789         {
1790           lang = get_language_from_interpreter (lp);
1791           if (lang != NULL && lang->function != NULL)
1792             {
1793               curfdp->lang = lang;
1794               parser = lang->function;
1795             }
1796         }
1797     }
1798
1799   /* We rewind here, even if inf may be a pipe.  We fail if the
1800      length of the first line is longer than the pipe block size,
1801      which is unlikely. */
1802   rewind (inf);
1803
1804   /* Else try to guess the language given the case insensitive file name. */
1805   if (parser == NULL)
1806     {
1807       lang = get_language_from_filename (curfdp->infname, FALSE);
1808       if (lang != NULL && lang->function != NULL)
1809         {
1810           curfdp->lang = lang;
1811           parser = lang->function;
1812         }
1813     }
1814
1815   /* Else try Fortran or C. */
1816   if (parser == NULL)
1817     {
1818       node *old_last_node = last_node;
1819
1820       curfdp->lang = get_language_from_langname ("fortran");
1821       find_entries (inf);
1822
1823       if (old_last_node == last_node)
1824         /* No Fortran entries found.  Try C. */
1825         {
1826           /* We do not tag if rewind fails.
1827              Only the file name will be recorded in the tags file. */
1828           rewind (inf);
1829           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1830           find_entries (inf);
1831         }
1832       return;
1833     }
1834
1835   if (!no_line_directive
1836       && curfdp->lang != NULL && curfdp->lang->metasource)
1837     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1838        file, or anyway we parsed a file that is automatically generated from
1839        this one.  If this is the case, the bingo.c file contained #line
1840        directives that generated tags pointing to this file.  Let's delete
1841        them all before parsing this file, which is the real source. */
1842     {
1843       fdesc **fdpp = &fdhead;
1844       while (*fdpp != NULL)
1845         if (*fdpp != curfdp
1846             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1847           /* We found one of those!  We must delete both the file description
1848              and all tags referring to it. */
1849           {
1850             fdesc *badfdp = *fdpp;
1851
1852             /* Delete the tags referring to badfdp->taggedfname
1853                that were obtained from badfdp->infname. */
1854             invalidate_nodes (badfdp, &nodehead);
1855
1856             *fdpp = badfdp->next; /* remove the bad description from the list */
1857             free_fdesc (badfdp);
1858           }
1859         else
1860           fdpp = &(*fdpp)->next; /* advance the list pointer */
1861     }
1862
1863   assert (parser != NULL);
1864
1865   /* Generic initializations before reading from file. */
1866   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1867
1868   /* Generic initializations before parsing file with readline. */
1869   lineno = 0;                  /* reset global line number */
1870   charno = 0;                  /* reset global char number */
1871   linecharno = 0;              /* reset global char number of line start */
1872
1873   parser (inf);
1874
1875   regex_tag_multiline ();
1876 }
1877
1878 \f
1879 /*
1880  * Check whether an implicitly named tag should be created,
1881  * then call `pfnote'.
1882  * NAME is a string that is internally copied by this function.
1883  *
1884  * TAGS format specification
1885  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1886  * The following is explained in some more detail in etc/ETAGS.EBNF.
1887  *
1888  * make_tag creates tags with "implicit tag names" (unnamed tags)
1889  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1890  *  1. NAME does not contain any of the characters in NONAM;
1891  *  2. LINESTART contains name as either a rightmost, or rightmost but
1892  *     one character, substring;
1893  *  3. the character, if any, immediately before NAME in LINESTART must
1894  *     be a character in NONAM;
1895  *  4. the character, if any, immediately after NAME in LINESTART must
1896  *     also be a character in NONAM.
1897  *
1898  * The implementation uses the notinname() macro, which recognizes the
1899  * characters stored in the string `nonam'.
1900  * etags.el needs to use the same characters that are in NONAM.
1901  */
1902 static void
1903 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1904           int namelen,          /* tag length */
1905           int is_func,          /* tag is a function */
1906           char *linestart,      /* start of the line where tag is */
1907           int linelen,          /* length of the line where tag is */
1908           int lno,              /* line number */
1909           long int cno)         /* character number */
1910 {
1911   bool named = (name != NULL && namelen > 0);
1912   char *nname = NULL;
1913
1914   if (!CTAGS && named)          /* maybe set named to false */
1915     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1916        such that etags.el can guess a name from it. */
1917     {
1918       int i;
1919       register const char *cp = name;
1920
1921       for (i = 0; i < namelen; i++)
1922         if (notinname (*cp++))
1923           break;
1924       if (i == namelen)                         /* rule #1 */
1925         {
1926           cp = linestart + linelen - namelen;
1927           if (notinname (linestart[linelen-1]))
1928             cp -= 1;                            /* rule #4 */
1929           if (cp >= linestart                   /* rule #2 */
1930               && (cp == linestart
1931                   || notinname (cp[-1]))        /* rule #3 */
1932               && strneq (name, cp, namelen))    /* rule #2 */
1933             named = FALSE;      /* use implicit tag name */
1934         }
1935     }
1936
1937   if (named)
1938     nname = savenstr (name, namelen);
1939
1940   pfnote (nname, is_func, linestart, linelen, lno, cno);
1941 }
1942
1943 /* Record a tag. */
1944 static void
1945 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1946                                 /* tag name, or NULL if unnamed */
1947                                 /* tag is a function */
1948                                 /* start of the line where tag is */
1949                                 /* length of the line where tag is */
1950                                 /* line number */
1951                                 /* character number */
1952 {
1953   register node *np;
1954
1955   assert (name == NULL || name[0] != '\0');
1956   if (CTAGS && name == NULL)
1957     return;
1958
1959   np = xnew (1, node);
1960
1961   /* If ctags mode, change name "main" to M<thisfilename>. */
1962   if (CTAGS && !cxref_style && streq (name, "main"))
1963     {
1964       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1965       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1966       fp = etags_strrchr (np->name, '.');
1967       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1968         fp[0] = '\0';
1969     }
1970   else
1971     np->name = name;
1972   np->valid = TRUE;
1973   np->been_warned = FALSE;
1974   np->fdp = curfdp;
1975   np->is_func = is_func;
1976   np->lno = lno;
1977   if (np->fdp->usecharno)
1978     /* Our char numbers are 0-base, because of C language tradition?
1979        ctags compatibility?  old versions compatibility?   I don't know.
1980        Anyway, since emacs's are 1-base we expect etags.el to take care
1981        of the difference.  If we wanted to have 1-based numbers, we would
1982        uncomment the +1 below. */
1983     np->cno = cno /* + 1 */ ;
1984   else
1985     np->cno = invalidcharno;
1986   np->left = np->right = NULL;
1987   if (CTAGS && !cxref_style)
1988     {
1989       if (strlen (linestart) < 50)
1990         np->regex = concat (linestart, "$", "");
1991       else
1992         np->regex = savenstr (linestart, 50);
1993     }
1994   else
1995     np->regex = savenstr (linestart, linelen);
1996
1997   add_node (np, &nodehead);
1998 }
1999
2000 /*
2001  * free_tree ()
2002  *      recurse on left children, iterate on right children.
2003  */
2004 static void
2005 free_tree (register node *np)
2006 {
2007   while (np)
2008     {
2009       register node *node_right = np->right;
2010       free_tree (np->left);
2011       free (np->name);
2012       free (np->regex);
2013       free (np);
2014       np = node_right;
2015     }
2016 }
2017
2018 /*
2019  * free_fdesc ()
2020  *      delete a file description
2021  */
2022 static void
2023 free_fdesc (register fdesc *fdp)
2024 {
2025   free (fdp->infname);
2026   free (fdp->infabsname);
2027   free (fdp->infabsdir);
2028   free (fdp->taggedfname);
2029   free (fdp->prop);
2030   free (fdp);
2031 }
2032
2033 /*
2034  * add_node ()
2035  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2036  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2037  *      balancing.
2038  *
2039  *      add_node is the only function allowed to add nodes, so it can
2040  *      maintain state.
2041  */
2042 static void
2043 add_node (node *np, node **cur_node_p)
2044 {
2045   register int dif;
2046   register node *cur_node = *cur_node_p;
2047
2048   if (cur_node == NULL)
2049     {
2050       *cur_node_p = np;
2051       last_node = np;
2052       return;
2053     }
2054
2055   if (!CTAGS)
2056     /* Etags Mode */
2057     {
2058       /* For each file name, tags are in a linked sublist on the right
2059          pointer.  The first tags of different files are a linked list
2060          on the left pointer.  last_node points to the end of the last
2061          used sublist. */
2062       if (last_node != NULL && last_node->fdp == np->fdp)
2063         {
2064           /* Let's use the same sublist as the last added node. */
2065           assert (last_node->right == NULL);
2066           last_node->right = np;
2067           last_node = np;
2068         }
2069       else if (cur_node->fdp == np->fdp)
2070         {
2071           /* Scanning the list we found the head of a sublist which is
2072              good for us.  Let's scan this sublist. */
2073           add_node (np, &cur_node->right);
2074         }
2075       else
2076         /* The head of this sublist is not good for us.  Let's try the
2077            next one. */
2078         add_node (np, &cur_node->left);
2079     } /* if ETAGS mode */
2080
2081   else
2082     {
2083       /* Ctags Mode */
2084       dif = strcmp (np->name, cur_node->name);
2085
2086       /*
2087        * If this tag name matches an existing one, then
2088        * do not add the node, but maybe print a warning.
2089        */
2090       if (no_duplicates && !dif)
2091         {
2092           if (np->fdp == cur_node->fdp)
2093             {
2094               if (!no_warnings)
2095                 {
2096                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2097                            np->fdp->infname, lineno, np->name);
2098                   fprintf (stderr, "Second entry ignored\n");
2099                 }
2100             }
2101           else if (!cur_node->been_warned && !no_warnings)
2102             {
2103               fprintf
2104                 (stderr,
2105                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2106                  np->fdp->infname, cur_node->fdp->infname, np->name);
2107               cur_node->been_warned = TRUE;
2108             }
2109           return;
2110         }
2111
2112       /* Actually add the node */
2113       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2114     } /* if CTAGS mode */
2115 }
2116
2117 /*
2118  * invalidate_nodes ()
2119  *      Scan the node tree and invalidate all nodes pointing to the
2120  *      given file description (CTAGS case) or free them (ETAGS case).
2121  */
2122 static void
2123 invalidate_nodes (fdesc *badfdp, node **npp)
2124 {
2125   node *np = *npp;
2126
2127   if (np == NULL)
2128     return;
2129
2130   if (CTAGS)
2131     {
2132       if (np->left != NULL)
2133         invalidate_nodes (badfdp, &np->left);
2134       if (np->fdp == badfdp)
2135         np->valid = FALSE;
2136       if (np->right != NULL)
2137         invalidate_nodes (badfdp, &np->right);
2138     }
2139   else
2140     {
2141       assert (np->fdp != NULL);
2142       if (np->fdp == badfdp)
2143         {
2144           *npp = np->left;      /* detach the sublist from the list */
2145           np->left = NULL;      /* isolate it */
2146           free_tree (np);       /* free it */
2147           invalidate_nodes (badfdp, npp);
2148         }
2149       else
2150         invalidate_nodes (badfdp, &np->left);
2151     }
2152 }
2153
2154 \f
2155 static int total_size_of_entries (node *);
2156 static int number_len (long);
2157
2158 /* Length of a non-negative number's decimal representation. */
2159 static int
2160 number_len (long int num)
2161 {
2162   int len = 1;
2163   while ((num /= 10) > 0)
2164     len += 1;
2165   return len;
2166 }
2167
2168 /*
2169  * Return total number of characters that put_entries will output for
2170  * the nodes in the linked list at the right of the specified node.
2171  * This count is irrelevant with etags.el since emacs 19.34 at least,
2172  * but is still supplied for backward compatibility.
2173  */
2174 static int
2175 total_size_of_entries (register node *np)
2176 {
2177   register int total = 0;
2178
2179   for (; np != NULL; np = np->right)
2180     if (np->valid)
2181       {
2182         total += strlen (np->regex) + 1;                /* pat\177 */
2183         if (np->name != NULL)
2184           total += strlen (np->name) + 1;               /* name\001 */
2185         total += number_len ((long) np->lno) + 1;       /* lno, */
2186         if (np->cno != invalidcharno)                   /* cno */
2187           total += number_len (np->cno);
2188         total += 1;                                     /* newline */
2189       }
2190
2191   return total;
2192 }
2193
2194 static void
2195 put_entries (register node *np)
2196 {
2197   register char *sp;
2198   static fdesc *fdp = NULL;
2199
2200   if (np == NULL)
2201     return;
2202
2203   /* Output subentries that precede this one */
2204   if (CTAGS)
2205     put_entries (np->left);
2206
2207   /* Output this entry */
2208   if (np->valid)
2209     {
2210       if (!CTAGS)
2211         {
2212           /* Etags mode */
2213           if (fdp != np->fdp)
2214             {
2215               fdp = np->fdp;
2216               fprintf (tagf, "\f\n%s,%d\n",
2217                        fdp->taggedfname, total_size_of_entries (np));
2218               fdp->written = TRUE;
2219             }
2220           fputs (np->regex, tagf);
2221           fputc ('\177', tagf);
2222           if (np->name != NULL)
2223             {
2224               fputs (np->name, tagf);
2225               fputc ('\001', tagf);
2226             }
2227           fprintf (tagf, "%d,", np->lno);
2228           if (np->cno != invalidcharno)
2229             fprintf (tagf, "%ld", np->cno);
2230           fputs ("\n", tagf);
2231         }
2232       else
2233         {
2234           /* Ctags mode */
2235           if (np->name == NULL)
2236             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2237
2238           if (cxref_style)
2239             {
2240               if (vgrind_style)
2241                 fprintf (stdout, "%s %s %d\n",
2242                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2243               else
2244                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2245                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2246             }
2247           else
2248             {
2249               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2250
2251               if (np->is_func)
2252                 {               /* function or #define macro with args */
2253                   putc (searchar, tagf);
2254                   putc ('^', tagf);
2255
2256                   for (sp = np->regex; *sp; sp++)
2257                     {
2258                       if (*sp == '\\' || *sp == searchar)
2259                         putc ('\\', tagf);
2260                       putc (*sp, tagf);
2261                     }
2262                   putc (searchar, tagf);
2263                 }
2264               else
2265                 {               /* anything else; text pattern inadequate */
2266                   fprintf (tagf, "%d", np->lno);
2267                 }
2268               putc ('\n', tagf);
2269             }
2270         }
2271     } /* if this node contains a valid tag */
2272
2273   /* Output subentries that follow this one */
2274   put_entries (np->right);
2275   if (!CTAGS)
2276     put_entries (np->left);
2277 }
2278
2279 \f
2280 /* C extensions. */
2281 #define C_EXT   0x00fff         /* C extensions */
2282 #define C_PLAIN 0x00000         /* C */
2283 #define C_PLPL  0x00001         /* C++ */
2284 #define C_STAR  0x00003         /* C* */
2285 #define C_JAVA  0x00005         /* JAVA */
2286 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2287 #define YACC    0x10000         /* yacc file */
2288
2289 /*
2290  * The C symbol tables.
2291  */
2292 enum sym_type
2293 {
2294   st_none,
2295   st_C_objprot, st_C_objimpl, st_C_objend,
2296   st_C_gnumacro,
2297   st_C_ignore, st_C_attribute,
2298   st_C_javastruct,
2299   st_C_operator,
2300   st_C_class, st_C_template,
2301   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2302 };
2303
2304 static unsigned int hash (const char *, unsigned int);
2305 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2306 static enum sym_type C_symtype (char *, int, int);
2307
2308 /* Feed stuff between (but not including) %[ and %] lines to:
2309      gperf -m 5
2310 %[
2311 %compare-strncmp
2312 %enum
2313 %struct-type
2314 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2315 %%
2316 if,             0,                      st_C_ignore
2317 for,            0,                      st_C_ignore
2318 while,          0,                      st_C_ignore
2319 switch,         0,                      st_C_ignore
2320 return,         0,                      st_C_ignore
2321 __attribute__,  0,                      st_C_attribute
2322 GTY,            0,                      st_C_attribute
2323 @interface,     0,                      st_C_objprot
2324 @protocol,      0,                      st_C_objprot
2325 @implementation,0,                      st_C_objimpl
2326 @end,           0,                      st_C_objend
2327 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2328 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2329 friend,         C_PLPL,                 st_C_ignore
2330 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2331 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2332 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2333 class,          0,                      st_C_class
2334 namespace,      C_PLPL,                 st_C_struct
2335 domain,         C_STAR,                 st_C_struct
2336 union,          0,                      st_C_struct
2337 struct,         0,                      st_C_struct
2338 extern,         0,                      st_C_extern
2339 enum,           0,                      st_C_enum
2340 typedef,        0,                      st_C_typedef
2341 define,         0,                      st_C_define
2342 undef,          0,                      st_C_define
2343 operator,       C_PLPL,                 st_C_operator
2344 template,       0,                      st_C_template
2345 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2346 DEFUN,          0,                      st_C_gnumacro
2347 SYSCALL,        0,                      st_C_gnumacro
2348 ENTRY,          0,                      st_C_gnumacro
2349 PSEUDO,         0,                      st_C_gnumacro
2350 # These are defined inside C functions, so currently they are not met.
2351 # EXFUN used in glibc, DEFVAR_* in emacs.
2352 #EXFUN,         0,                      st_C_gnumacro
2353 #DEFVAR_,       0,                      st_C_gnumacro
2354 %]
2355 and replace lines between %< and %> with its output, then:
2356  - remove the #if characterset check
2357  - make in_word_set static and not inline. */
2358 /*%<*/
2359 /* C code produced by gperf version 3.0.1 */
2360 /* Command-line: gperf -m 5  */
2361 /* Computed positions: -k'2-3' */
2362
2363 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2364 /* maximum key range = 33, duplicates = 0 */
2365
2366 static inline unsigned int
2367 hash (register const char *str, register unsigned int len)
2368 {
2369   static unsigned char asso_values[] =
2370     {
2371       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2372       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2373       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2374       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2375       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2376       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2377       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2378       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2379       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2380       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2381       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2382        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2383        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2384       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2385       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2386       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2387       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2388       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2389       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2390       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2391       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2392       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2393       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2394       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2395       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2396       35, 35, 35, 35, 35, 35
2397     };
2398   register int hval = len;
2399
2400   switch (hval)
2401     {
2402       default:
2403         hval += asso_values[(unsigned char)str[2]];
2404       /*FALLTHROUGH*/
2405       case 2:
2406         hval += asso_values[(unsigned char)str[1]];
2407         break;
2408     }
2409   return hval;
2410 }
2411
2412 static struct C_stab_entry *
2413 in_word_set (register const char *str, register unsigned int len)
2414 {
2415   enum
2416     {
2417       TOTAL_KEYWORDS = 33,
2418       MIN_WORD_LENGTH = 2,
2419       MAX_WORD_LENGTH = 15,
2420       MIN_HASH_VALUE = 2,
2421       MAX_HASH_VALUE = 34
2422     };
2423
2424   static struct C_stab_entry wordlist[] =
2425     {
2426       {""}, {""},
2427       {"if",            0,                      st_C_ignore},
2428       {"GTY",           0,                      st_C_attribute},
2429       {"@end",          0,                      st_C_objend},
2430       {"union",         0,                      st_C_struct},
2431       {"define",                0,                      st_C_define},
2432       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2433       {"template",      0,                      st_C_template},
2434       {"operator",      C_PLPL,                 st_C_operator},
2435       {"@interface",    0,                      st_C_objprot},
2436       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2437       {"friend",                C_PLPL,                 st_C_ignore},
2438       {"typedef",       0,                      st_C_typedef},
2439       {"return",                0,                      st_C_ignore},
2440       {"@implementation",0,                     st_C_objimpl},
2441       {"@protocol",     0,                      st_C_objprot},
2442       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2443       {"extern",                0,                      st_C_extern},
2444       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2445       {"struct",                0,                      st_C_struct},
2446       {"domain",                C_STAR,                 st_C_struct},
2447       {"switch",                0,                      st_C_ignore},
2448       {"enum",          0,                      st_C_enum},
2449       {"for",           0,                      st_C_ignore},
2450       {"namespace",     C_PLPL,                 st_C_struct},
2451       {"class",         0,                      st_C_class},
2452       {"while",         0,                      st_C_ignore},
2453       {"undef",         0,                      st_C_define},
2454       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2455       {"__attribute__", 0,                      st_C_attribute},
2456       {"SYSCALL",       0,                      st_C_gnumacro},
2457       {"ENTRY",         0,                      st_C_gnumacro},
2458       {"PSEUDO",                0,                      st_C_gnumacro},
2459       {"DEFUN",         0,                      st_C_gnumacro}
2460     };
2461
2462   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2463     {
2464       register int key = hash (str, len);
2465
2466       if (key <= MAX_HASH_VALUE && key >= 0)
2467         {
2468           register const char *s = wordlist[key].name;
2469
2470           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2471             return &wordlist[key];
2472         }
2473     }
2474   return 0;
2475 }
2476 /*%>*/
2477
2478 static enum sym_type
2479 C_symtype (char *str, int len, int c_ext)
2480 {
2481   register struct C_stab_entry *se = in_word_set (str, len);
2482
2483   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2484     return st_none;
2485   return se->type;
2486 }
2487
2488 \f
2489 /*
2490  * Ignoring __attribute__ ((list))
2491  */
2492 static bool inattribute;        /* looking at an __attribute__ construct */
2493
2494 /*
2495  * C functions and variables are recognized using a simple
2496  * finite automaton.  fvdef is its state variable.
2497  */
2498 static enum
2499 {
2500   fvnone,                       /* nothing seen */
2501   fdefunkey,                    /* Emacs DEFUN keyword seen */
2502   fdefunname,                   /* Emacs DEFUN name seen */
2503   foperator,                    /* func: operator keyword seen (cplpl) */
2504   fvnameseen,                   /* function or variable name seen */
2505   fstartlist,                   /* func: just after open parenthesis */
2506   finlist,                      /* func: in parameter list */
2507   flistseen,                    /* func: after parameter list */
2508   fignore,                      /* func: before open brace */
2509   vignore                       /* var-like: ignore until ';' */
2510 } fvdef;
2511
2512 static bool fvextern;           /* func or var: extern keyword seen; */
2513
2514 /*
2515  * typedefs are recognized using a simple finite automaton.
2516  * typdef is its state variable.
2517  */
2518 static enum
2519 {
2520   tnone,                        /* nothing seen */
2521   tkeyseen,                     /* typedef keyword seen */
2522   ttypeseen,                    /* defined type seen */
2523   tinbody,                      /* inside typedef body */
2524   tend,                         /* just before typedef tag */
2525   tignore                       /* junk after typedef tag */
2526 } typdef;
2527
2528 /*
2529  * struct-like structures (enum, struct and union) are recognized
2530  * using another simple finite automaton.  `structdef' is its state
2531  * variable.
2532  */
2533 static enum
2534 {
2535   snone,                        /* nothing seen yet,
2536                                    or in struct body if bracelev > 0 */
2537   skeyseen,                     /* struct-like keyword seen */
2538   stagseen,                     /* struct-like tag seen */
2539   scolonseen                    /* colon seen after struct-like tag */
2540 } structdef;
2541
2542 /*
2543  * When objdef is different from onone, objtag is the name of the class.
2544  */
2545 static const char *objtag = "<uninited>";
2546
2547 /*
2548  * Yet another little state machine to deal with preprocessor lines.
2549  */
2550 static enum
2551 {
2552   dnone,                        /* nothing seen */
2553   dsharpseen,                   /* '#' seen as first char on line */
2554   ddefineseen,                  /* '#' and 'define' seen */
2555   dignorerest                   /* ignore rest of line */
2556 } definedef;
2557
2558 /*
2559  * State machine for Objective C protocols and implementations.
2560  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2561  */
2562 static enum
2563 {
2564   onone,                        /* nothing seen */
2565   oprotocol,                    /* @interface or @protocol seen */
2566   oimplementation,              /* @implementations seen */
2567   otagseen,                     /* class name seen */
2568   oparenseen,                   /* parenthesis before category seen */
2569   ocatseen,                     /* category name seen */
2570   oinbody,                      /* in @implementation body */
2571   omethodsign,                  /* in @implementation body, after +/- */
2572   omethodtag,                   /* after method name */
2573   omethodcolon,                 /* after method colon */
2574   omethodparm,                  /* after method parameter */
2575   oignore                       /* wait for @end */
2576 } objdef;
2577
2578
2579 /*
2580  * Use this structure to keep info about the token read, and how it
2581  * should be tagged.  Used by the make_C_tag function to build a tag.
2582  */
2583 static struct tok
2584 {
2585   char *line;                   /* string containing the token */
2586   int offset;                   /* where the token starts in LINE */
2587   int length;                   /* token length */
2588   /*
2589     The previous members can be used to pass strings around for generic
2590     purposes.  The following ones specifically refer to creating tags.  In this
2591     case the token contained here is the pattern that will be used to create a
2592     tag.
2593   */
2594   bool valid;                   /* do not create a tag; the token should be
2595                                    invalidated whenever a state machine is
2596                                    reset prematurely */
2597   bool named;                   /* create a named tag */
2598   int lineno;                   /* source line number of tag */
2599   long linepos;                 /* source char number of tag */
2600 } token;                        /* latest token read */
2601
2602 /*
2603  * Variables and functions for dealing with nested structures.
2604  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2605  */
2606 static void pushclass_above (int, char *, int);
2607 static void popclass_above (int);
2608 static void write_classname (linebuffer *, const char *qualifier);
2609
2610 static struct {
2611   char **cname;                 /* nested class names */
2612   int *bracelev;                /* nested class brace level */
2613   int nl;                       /* class nesting level (elements used) */
2614   int size;                     /* length of the array */
2615 } cstack;                       /* stack for nested declaration tags */
2616 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2617 #define nestlev         (cstack.nl)
2618 /* After struct keyword or in struct body, not inside a nested function. */
2619 #define instruct        (structdef == snone && nestlev > 0                      \
2620                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2621
2622 static void
2623 pushclass_above (int bracelev, char *str, int len)
2624 {
2625   int nl;
2626
2627   popclass_above (bracelev);
2628   nl = cstack.nl;
2629   if (nl >= cstack.size)
2630     {
2631       int size = cstack.size *= 2;
2632       xrnew (cstack.cname, size, char *);
2633       xrnew (cstack.bracelev, size, int);
2634     }
2635   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2636   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2637   cstack.bracelev[nl] = bracelev;
2638   cstack.nl = nl + 1;
2639 }
2640
2641 static void
2642 popclass_above (int bracelev)
2643 {
2644   int nl;
2645
2646   for (nl = cstack.nl - 1;
2647        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2648        nl--)
2649     {
2650       free (cstack.cname[nl]);
2651       cstack.nl = nl;
2652     }
2653 }
2654
2655 static void
2656 write_classname (linebuffer *cn, const char *qualifier)
2657 {
2658   int i, len;
2659   int qlen = strlen (qualifier);
2660
2661   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2662     {
2663       len = 0;
2664       cn->len = 0;
2665       cn->buffer[0] = '\0';
2666     }
2667   else
2668     {
2669       len = strlen (cstack.cname[0]);
2670       linebuffer_setlen (cn, len);
2671       strcpy (cn->buffer, cstack.cname[0]);
2672     }
2673   for (i = 1; i < cstack.nl; i++)
2674     {
2675       char *s;
2676       int slen;
2677
2678       s = cstack.cname[i];
2679       if (s == NULL)
2680         continue;
2681       slen = strlen (s);
2682       len += slen + qlen;
2683       linebuffer_setlen (cn, len);
2684       strncat (cn->buffer, qualifier, qlen);
2685       strncat (cn->buffer, s, slen);
2686     }
2687 }
2688
2689 \f
2690 static bool consider_token (char *, int, int, int *, int, int, bool *);
2691 static void make_C_tag (bool);
2692
2693 /*
2694  * consider_token ()
2695  *      checks to see if the current token is at the start of a
2696  *      function or variable, or corresponds to a typedef, or
2697  *      is a struct/union/enum tag, or #define, or an enum constant.
2698  *
2699  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2700  *      with args.  C_EXTP points to which language we are looking at.
2701  *
2702  * Globals
2703  *      fvdef                   IN OUT
2704  *      structdef               IN OUT
2705  *      definedef               IN OUT
2706  *      typdef                  IN OUT
2707  *      objdef                  IN OUT
2708  */
2709
2710 static bool
2711 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2712                                 /* IN: token pointer */
2713                                 /* IN: token length */
2714                                 /* IN: first char after the token */
2715                                 /* IN, OUT: C extensions mask */
2716                                 /* IN: brace level */
2717                                 /* IN: parenthesis level */
2718                                 /* OUT: function or variable found */
2719 {
2720   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2721      structtype is the type of the preceding struct-like keyword, and
2722      structbracelev is the brace level where it has been seen. */
2723   static enum sym_type structtype;
2724   static int structbracelev;
2725   static enum sym_type toktype;
2726
2727
2728   toktype = C_symtype (str, len, *c_extp);
2729
2730   /*
2731    * Skip __attribute__
2732    */
2733   if (toktype == st_C_attribute)
2734     {
2735       inattribute = TRUE;
2736       return FALSE;
2737      }
2738
2739    /*
2740     * Advance the definedef state machine.
2741     */
2742    switch (definedef)
2743      {
2744      case dnone:
2745        /* We're not on a preprocessor line. */
2746        if (toktype == st_C_gnumacro)
2747          {
2748            fvdef = fdefunkey;
2749            return FALSE;
2750          }
2751        break;
2752      case dsharpseen:
2753        if (toktype == st_C_define)
2754          {
2755            definedef = ddefineseen;
2756          }
2757        else
2758          {
2759            definedef = dignorerest;
2760          }
2761        return FALSE;
2762      case ddefineseen:
2763        /*
2764         * Make a tag for any macro, unless it is a constant
2765         * and constantypedefs is FALSE.
2766         */
2767        definedef = dignorerest;
2768        *is_func_or_var = (c == '(');
2769        if (!*is_func_or_var && !constantypedefs)
2770          return FALSE;
2771        else
2772          return TRUE;
2773      case dignorerest:
2774        return FALSE;
2775      default:
2776        error ("internal error: definedef value.", (char *)NULL);
2777      }
2778
2779    /*
2780     * Now typedefs
2781     */
2782    switch (typdef)
2783      {
2784      case tnone:
2785        if (toktype == st_C_typedef)
2786          {
2787            if (typedefs)
2788              typdef = tkeyseen;
2789            fvextern = FALSE;
2790            fvdef = fvnone;
2791            return FALSE;
2792          }
2793        break;
2794      case tkeyseen:
2795        switch (toktype)
2796          {
2797          case st_none:
2798          case st_C_class:
2799          case st_C_struct:
2800          case st_C_enum:
2801            typdef = ttypeseen;
2802          }
2803        break;
2804      case ttypeseen:
2805        if (structdef == snone && fvdef == fvnone)
2806          {
2807            fvdef = fvnameseen;
2808            return TRUE;
2809          }
2810        break;
2811      case tend:
2812        switch (toktype)
2813          {
2814          case st_C_class:
2815          case st_C_struct:
2816          case st_C_enum:
2817            return FALSE;
2818          }
2819        return TRUE;
2820      }
2821
2822    switch (toktype)
2823      {
2824      case st_C_javastruct:
2825        if (structdef == stagseen)
2826          structdef = scolonseen;
2827        return FALSE;
2828      case st_C_template:
2829      case st_C_class:
2830        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2831            && bracelev == 0
2832            && definedef == dnone && structdef == snone
2833            && typdef == tnone && fvdef == fvnone)
2834          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2835        if (toktype == st_C_template)
2836          break;
2837        /* FALLTHRU */
2838      case st_C_struct:
2839      case st_C_enum:
2840        if (parlev == 0
2841            && fvdef != vignore
2842            && (typdef == tkeyseen
2843                || (typedefs_or_cplusplus && structdef == snone)))
2844          {
2845            structdef = skeyseen;
2846            structtype = toktype;
2847            structbracelev = bracelev;
2848            if (fvdef == fvnameseen)
2849              fvdef = fvnone;
2850          }
2851        return FALSE;
2852      }
2853
2854    if (structdef == skeyseen)
2855      {
2856        structdef = stagseen;
2857        return TRUE;
2858      }
2859
2860    if (typdef != tnone)
2861      definedef = dnone;
2862
2863    /* Detect Objective C constructs. */
2864    switch (objdef)
2865      {
2866      case onone:
2867        switch (toktype)
2868          {
2869          case st_C_objprot:
2870            objdef = oprotocol;
2871            return FALSE;
2872          case st_C_objimpl:
2873            objdef = oimplementation;
2874            return FALSE;
2875          }
2876        break;
2877      case oimplementation:
2878        /* Save the class tag for functions or variables defined inside. */
2879        objtag = savenstr (str, len);
2880        objdef = oinbody;
2881        return FALSE;
2882      case oprotocol:
2883        /* Save the class tag for categories. */
2884        objtag = savenstr (str, len);
2885        objdef = otagseen;
2886        *is_func_or_var = TRUE;
2887        return TRUE;
2888      case oparenseen:
2889        objdef = ocatseen;
2890        *is_func_or_var = TRUE;
2891        return TRUE;
2892      case oinbody:
2893        break;
2894      case omethodsign:
2895        if (parlev == 0)
2896          {
2897            fvdef = fvnone;
2898            objdef = omethodtag;
2899            linebuffer_setlen (&token_name, len);
2900            strncpy (token_name.buffer, str, len);
2901            token_name.buffer[len] = '\0';
2902            return TRUE;
2903          }
2904        return FALSE;
2905      case omethodcolon:
2906        if (parlev == 0)
2907          objdef = omethodparm;
2908        return FALSE;
2909      case omethodparm:
2910        if (parlev == 0)
2911          {
2912            fvdef = fvnone;
2913            objdef = omethodtag;
2914            linebuffer_setlen (&token_name, token_name.len + len);
2915            strncat (token_name.buffer, str, len);
2916            return TRUE;
2917          }
2918        return FALSE;
2919      case oignore:
2920        if (toktype == st_C_objend)
2921          {
2922            /* Memory leakage here: the string pointed by objtag is
2923               never released, because many tests would be needed to
2924               avoid breaking on incorrect input code.  The amount of
2925               memory leaked here is the sum of the lengths of the
2926               class tags.
2927            free (objtag); */
2928            objdef = onone;
2929          }
2930        return FALSE;
2931      }
2932
2933    /* A function, variable or enum constant? */
2934    switch (toktype)
2935      {
2936      case st_C_extern:
2937        fvextern = TRUE;
2938        switch  (fvdef)
2939          {
2940          case finlist:
2941          case flistseen:
2942          case fignore:
2943          case vignore:
2944            break;
2945          default:
2946            fvdef = fvnone;
2947          }
2948        return FALSE;
2949      case st_C_ignore:
2950        fvextern = FALSE;
2951        fvdef = vignore;
2952        return FALSE;
2953      case st_C_operator:
2954        fvdef = foperator;
2955        *is_func_or_var = TRUE;
2956        return TRUE;
2957      case st_none:
2958        if (constantypedefs
2959            && structdef == snone
2960            && structtype == st_C_enum && bracelev > structbracelev)
2961          return TRUE;           /* enum constant */
2962        switch (fvdef)
2963          {
2964          case fdefunkey:
2965            if (bracelev > 0)
2966              break;
2967            fvdef = fdefunname;  /* GNU macro */
2968            *is_func_or_var = TRUE;
2969            return TRUE;
2970          case fvnone:
2971            switch (typdef)
2972              {
2973              case ttypeseen:
2974                return FALSE;
2975              case tnone:
2976                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2977                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2978                  {
2979                    fvdef = vignore;
2980                    return FALSE;
2981                  }
2982                break;
2983              }
2984           /* FALLTHRU */
2985           case fvnameseen:
2986           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2987             {
2988               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2989                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2990               fvdef = foperator;
2991               *is_func_or_var = TRUE;
2992               return TRUE;
2993             }
2994           if (bracelev > 0 && !instruct)
2995             break;
2996           fvdef = fvnameseen;   /* function or variable */
2997           *is_func_or_var = TRUE;
2998           return TRUE;
2999         }
3000       break;
3001     }
3002
3003   return FALSE;
3004 }
3005
3006 \f
3007 /*
3008  * C_entries often keeps pointers to tokens or lines which are older than
3009  * the line currently read.  By keeping two line buffers, and switching
3010  * them at end of line, it is possible to use those pointers.
3011  */
3012 static struct
3013 {
3014   long linepos;
3015   linebuffer lb;
3016 } lbs[2];
3017
3018 #define current_lb_is_new (newndx == curndx)
3019 #define switch_line_buffers() (curndx = 1 - curndx)
3020
3021 #define curlb (lbs[curndx].lb)
3022 #define newlb (lbs[newndx].lb)
3023 #define curlinepos (lbs[curndx].linepos)
3024 #define newlinepos (lbs[newndx].linepos)
3025
3026 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3027 #define cplpl (c_ext & C_PLPL)
3028 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3029
3030 #define CNL_SAVE_DEFINEDEF()                                            \
3031 do {                                                                    \
3032   curlinepos = charno;                                                  \
3033   readline (&curlb, inf);                                               \
3034   lp = curlb.buffer;                                                    \
3035   quotednl = FALSE;                                                     \
3036   newndx = curndx;                                                      \
3037 } while (0)
3038
3039 #define CNL()                                                           \
3040 do {                                                                    \
3041   CNL_SAVE_DEFINEDEF();                                                 \
3042   if (savetoken.valid)                                                  \
3043     {                                                                   \
3044       token = savetoken;                                                \
3045       savetoken.valid = FALSE;                                          \
3046     }                                                                   \
3047   definedef = dnone;                                                    \
3048 } while (0)
3049
3050
3051 static void
3052 make_C_tag (int isfun)
3053 {
3054   /* This function is never called when token.valid is FALSE, but
3055      we must protect against invalid input or internal errors. */
3056   if (token.valid)
3057     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3058               token.offset+token.length+1, token.lineno, token.linepos);
3059   else if (DEBUG)
3060     {                             /* this branch is optimized away if !DEBUG */
3061       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3062                 token_name.len + 17, isfun, token.line,
3063                 token.offset+token.length+1, token.lineno, token.linepos);
3064       error ("INVALID TOKEN", NULL);
3065     }
3066
3067   token.valid = FALSE;
3068 }
3069
3070
3071 /*
3072  * C_entries ()
3073  *      This routine finds functions, variables, typedefs,
3074  *      #define's, enum constants and struct/union/enum definitions in
3075  *      C syntax and adds them to the list.
3076  */
3077 static void
3078 C_entries (int c_ext, FILE *inf)
3079                                 /* extension of C */
3080                                 /* input file */
3081 {
3082   register char c;              /* latest char read; '\0' for end of line */
3083   register char *lp;            /* pointer one beyond the character `c' */
3084   int curndx, newndx;           /* indices for current and new lb */
3085   register int tokoff;          /* offset in line of start of current token */
3086   register int toklen;          /* length of current token */
3087   const char *qualifier;        /* string used to qualify names */
3088   int qlen;                     /* length of qualifier */
3089   int bracelev;                 /* current brace level */
3090   int bracketlev;               /* current bracket level */
3091   int parlev;                   /* current parenthesis level */
3092   int attrparlev;               /* __attribute__ parenthesis level */
3093   int templatelev;              /* current template level */
3094   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3095   bool incomm, inquote, inchar, quotednl, midtoken;
3096   bool yacc_rules;              /* in the rules part of a yacc file */
3097   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3098
3099
3100   linebuffer_init (&lbs[0].lb);
3101   linebuffer_init (&lbs[1].lb);
3102   if (cstack.size == 0)
3103     {
3104       cstack.size = (DEBUG) ? 1 : 4;
3105       cstack.nl = 0;
3106       cstack.cname = xnew (cstack.size, char *);
3107       cstack.bracelev = xnew (cstack.size, int);
3108     }
3109
3110   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3111   curndx = newndx = 0;
3112   lp = curlb.buffer;
3113   *lp = 0;
3114
3115   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3116   structdef = snone; definedef = dnone; objdef = onone;
3117   yacc_rules = FALSE;
3118   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3119   token.valid = savetoken.valid = FALSE;
3120   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3121   if (cjava)
3122     { qualifier = "."; qlen = 1; }
3123   else
3124     { qualifier = "::"; qlen = 2; }
3125
3126
3127   while (!feof (inf))
3128     {
3129       c = *lp++;
3130       if (c == '\\')
3131         {
3132           /* If we are at the end of the line, the next character is a
3133              '\0'; do not skip it, because it is what tells us
3134              to read the next line.  */
3135           if (*lp == '\0')
3136             {
3137               quotednl = TRUE;
3138               continue;
3139             }
3140           lp++;
3141           c = ' ';
3142         }
3143       else if (incomm)
3144         {
3145           switch (c)
3146             {
3147             case '*':
3148               if (*lp == '/')
3149                 {
3150                   c = *lp++;
3151                   incomm = FALSE;
3152                 }
3153               break;
3154             case '\0':
3155               /* Newlines inside comments do not end macro definitions in
3156                  traditional cpp. */
3157               CNL_SAVE_DEFINEDEF ();
3158               break;
3159             }
3160           continue;
3161         }
3162       else if (inquote)
3163         {
3164           switch (c)
3165             {
3166             case '"':
3167               inquote = FALSE;
3168               break;
3169             case '\0':
3170               /* Newlines inside strings do not end macro definitions
3171                  in traditional cpp, even though compilers don't
3172                  usually accept them. */
3173               CNL_SAVE_DEFINEDEF ();
3174               break;
3175             }
3176           continue;
3177         }
3178       else if (inchar)
3179         {
3180           switch (c)
3181             {
3182             case '\0':
3183               /* Hmmm, something went wrong. */
3184               CNL ();
3185               /* FALLTHRU */
3186             case '\'':
3187               inchar = FALSE;
3188               break;
3189             }
3190           continue;
3191         }
3192       else switch (c)
3193         {
3194         case '"':
3195           inquote = TRUE;
3196           if (bracketlev > 0)
3197             continue;
3198           if (inattribute)
3199             break;
3200           switch (fvdef)
3201             {
3202             case fdefunkey:
3203             case fstartlist:
3204             case finlist:
3205             case fignore:
3206             case vignore:
3207               break;
3208             default:
3209               fvextern = FALSE;
3210               fvdef = fvnone;
3211             }
3212           continue;
3213         case '\'':
3214           inchar = TRUE;
3215           if (bracketlev > 0)
3216             continue;
3217           if (inattribute)
3218             break;
3219           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3220             {
3221               fvextern = FALSE;
3222               fvdef = fvnone;
3223             }
3224           continue;
3225         case '/':
3226           if (*lp == '*')
3227             {
3228               incomm = TRUE;
3229               lp++;
3230               c = ' ';
3231               if (bracketlev > 0)
3232                 continue;
3233             }
3234           else if (/* cplpl && */ *lp == '/')
3235             {
3236               c = '\0';
3237             }
3238           break;
3239         case '%':
3240           if ((c_ext & YACC) && *lp == '%')
3241             {
3242               /* Entering or exiting rules section in yacc file. */
3243               lp++;
3244               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3245               typdef = tnone; structdef = snone;
3246               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3247               bracelev = 0;
3248               yacc_rules = !yacc_rules;
3249               continue;
3250             }
3251           else
3252             break;
3253         case '#':
3254           if (definedef == dnone)
3255             {
3256               char *cp;
3257               bool cpptoken = TRUE;
3258
3259               /* Look back on this line.  If all blanks, or nonblanks
3260                  followed by an end of comment, this is a preprocessor
3261                  token. */
3262               for (cp = newlb.buffer; cp < lp-1; cp++)
3263                 if (!iswhite (*cp))
3264                   {
3265                     if (*cp == '*' && cp[1] == '/')
3266                       {
3267                         cp++;
3268                         cpptoken = TRUE;
3269                       }
3270                     else
3271                       cpptoken = FALSE;
3272                   }
3273               if (cpptoken)
3274                 definedef = dsharpseen;
3275             } /* if (definedef == dnone) */
3276           continue;
3277         case '[':
3278           bracketlev++;
3279           continue;
3280         default:
3281           if (bracketlev > 0)
3282             {
3283               if (c == ']')
3284                 --bracketlev;
3285               else if (c == '\0')
3286                 CNL_SAVE_DEFINEDEF ();
3287               continue;
3288             }
3289           break;
3290         } /* switch (c) */
3291
3292
3293       /* Consider token only if some involved conditions are satisfied. */
3294       if (typdef != tignore
3295           && definedef != dignorerest
3296           && fvdef != finlist
3297           && templatelev == 0
3298           && (definedef != dnone
3299               || structdef != scolonseen)
3300           && !inattribute)
3301         {
3302           if (midtoken)
3303             {
3304               if (endtoken (c))
3305                 {
3306                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3307                     /* This handles :: in the middle,
3308                        but not at the beginning of an identifier.
3309                        Also, space-separated :: is not recognized. */
3310                     {
3311                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3312                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3313                       lp += 2;
3314                       toklen += 2;
3315                       c = lp[-1];
3316                       goto still_in_token;
3317                     }
3318                   else
3319                     {
3320                       bool funorvar = FALSE;
3321
3322                       if (yacc_rules
3323                           || consider_token (newlb.buffer + tokoff, toklen, c,
3324                                              &c_ext, bracelev, parlev,
3325                                              &funorvar))
3326                         {
3327                           if (fvdef == foperator)
3328                             {
3329                               char *oldlp = lp;
3330                               lp = skip_spaces (lp-1);
3331                               if (*lp != '\0')
3332                                 lp += 1;
3333                               while (*lp != '\0'
3334                                      && !iswhite (*lp) && *lp != '(')
3335                                 lp += 1;
3336                               c = *lp++;
3337                               toklen += lp - oldlp;
3338                             }
3339                           token.named = FALSE;
3340                           if (!plainc
3341                               && nestlev > 0 && definedef == dnone)
3342                             /* in struct body */
3343                             {
3344                               write_classname (&token_name, qualifier);
3345                               linebuffer_setlen (&token_name,
3346                                                  token_name.len+qlen+toklen);
3347                               strcat (token_name.buffer, qualifier);
3348                               strncat (token_name.buffer,
3349                                        newlb.buffer + tokoff, toklen);
3350                               token.named = TRUE;
3351                             }
3352                           else if (objdef == ocatseen)
3353                             /* Objective C category */
3354                             {
3355                               int len = strlen (objtag) + 2 + toklen;
3356                               linebuffer_setlen (&token_name, len);
3357                               strcpy (token_name.buffer, objtag);
3358                               strcat (token_name.buffer, "(");
3359                               strncat (token_name.buffer,
3360                                        newlb.buffer + tokoff, toklen);
3361                               strcat (token_name.buffer, ")");
3362                               token.named = TRUE;
3363                             }
3364                           else if (objdef == omethodtag
3365                                    || objdef == omethodparm)
3366                             /* Objective C method */
3367                             {
3368                               token.named = TRUE;
3369                             }
3370                           else if (fvdef == fdefunname)
3371                             /* GNU DEFUN and similar macros */
3372                             {
3373                               bool defun = (newlb.buffer[tokoff] == 'F');
3374                               int off = tokoff;
3375                               int len = toklen;
3376
3377                               /* Rewrite the tag so that emacs lisp DEFUNs
3378                                  can be found by their elisp name */
3379                               if (defun)
3380                                 {
3381                                   off += 1;
3382                                   len -= 1;
3383                                 }
3384                               linebuffer_setlen (&token_name, len);
3385                               strncpy (token_name.buffer,
3386                                        newlb.buffer + off, len);
3387                               token_name.buffer[len] = '\0';
3388                               if (defun)
3389                                 while (--len >= 0)
3390                                   if (token_name.buffer[len] == '_')
3391                                     token_name.buffer[len] = '-';
3392                               token.named = defun;
3393                             }
3394                           else
3395                             {
3396                               linebuffer_setlen (&token_name, toklen);
3397                               strncpy (token_name.buffer,
3398                                        newlb.buffer + tokoff, toklen);
3399                               token_name.buffer[toklen] = '\0';
3400                               /* Name macros and members. */
3401                               token.named = (structdef == stagseen
3402                                              || typdef == ttypeseen
3403                                              || typdef == tend
3404                                              || (funorvar
3405                                                  && definedef == dignorerest)
3406                                              || (funorvar
3407                                                  && definedef == dnone
3408                                                  && structdef == snone
3409                                                  && bracelev > 0));
3410                             }
3411                           token.lineno = lineno;
3412                           token.offset = tokoff;
3413                           token.length = toklen;
3414                           token.line = newlb.buffer;
3415                           token.linepos = newlinepos;
3416                           token.valid = TRUE;
3417
3418                           if (definedef == dnone
3419                               && (fvdef == fvnameseen
3420                                   || fvdef == foperator
3421                                   || structdef == stagseen
3422                                   || typdef == tend
3423                                   || typdef == ttypeseen
3424                                   || objdef != onone))
3425                             {
3426                               if (current_lb_is_new)
3427                                 switch_line_buffers ();
3428                             }
3429                           else if (definedef != dnone
3430                                    || fvdef == fdefunname
3431                                    || instruct)
3432                             make_C_tag (funorvar);
3433                         }
3434                       else /* not yacc and consider_token failed */
3435                         {
3436                           if (inattribute && fvdef == fignore)
3437                             {
3438                               /* We have just met __attribute__ after a
3439                                  function parameter list: do not tag the
3440                                  function again. */
3441                               fvdef = fvnone;
3442                             }
3443                         }
3444                       midtoken = FALSE;
3445                     }
3446                 } /* if (endtoken (c)) */
3447               else if (intoken (c))
3448                 still_in_token:
3449                 {
3450                   toklen++;
3451                   continue;
3452                 }
3453             } /* if (midtoken) */
3454           else if (begtoken (c))
3455             {
3456               switch (definedef)
3457                 {
3458                 case dnone:
3459                   switch (fvdef)
3460                     {
3461                     case fstartlist:
3462                       /* This prevents tagging fb in
3463                          void (__attribute__((noreturn)) *fb) (void);
3464                          Fixing this is not easy and not very important. */
3465                       fvdef = finlist;
3466                       continue;
3467                     case flistseen:
3468                       if (plainc || declarations)
3469                         {
3470                           make_C_tag (TRUE); /* a function */
3471                           fvdef = fignore;
3472                         }
3473                       break;
3474                     }
3475                   if (structdef == stagseen && !cjava)
3476                     {
3477                       popclass_above (bracelev);
3478                       structdef = snone;
3479                     }
3480                   break;
3481                 case dsharpseen:
3482                   savetoken = token;
3483                   break;
3484                 }
3485               if (!yacc_rules || lp == newlb.buffer + 1)
3486                 {
3487                   tokoff = lp - 1 - newlb.buffer;
3488                   toklen = 1;
3489                   midtoken = TRUE;
3490                 }
3491               continue;
3492             } /* if (begtoken) */
3493         } /* if must look at token */
3494
3495
3496       /* Detect end of line, colon, comma, semicolon and various braces
3497          after having handled a token.*/
3498       switch (c)
3499         {
3500         case ':':
3501           if (inattribute)
3502             break;
3503           if (yacc_rules && token.offset == 0 && token.valid)
3504             {
3505               make_C_tag (FALSE); /* a yacc function */
3506               break;
3507             }
3508           if (definedef != dnone)
3509             break;
3510           switch (objdef)
3511             {
3512             case  otagseen:
3513               objdef = oignore;
3514               make_C_tag (TRUE); /* an Objective C class */
3515               break;
3516             case omethodtag:
3517             case omethodparm:
3518               objdef = omethodcolon;
3519               linebuffer_setlen (&token_name, token_name.len + 1);
3520               strcat (token_name.buffer, ":");
3521               break;
3522             }
3523           if (structdef == stagseen)
3524             {
3525               structdef = scolonseen;
3526               break;
3527             }
3528           /* Should be useless, but may be work as a safety net. */
3529           if (cplpl && fvdef == flistseen)
3530             {
3531               make_C_tag (TRUE); /* a function */
3532               fvdef = fignore;
3533               break;
3534             }
3535           break;
3536         case ';':
3537           if (definedef != dnone || inattribute)
3538             break;
3539           switch (typdef)
3540             {
3541             case tend:
3542             case ttypeseen:
3543               make_C_tag (FALSE); /* a typedef */
3544               typdef = tnone;
3545               fvdef = fvnone;
3546               break;
3547             case tnone:
3548             case tinbody:
3549             case tignore:
3550               switch (fvdef)
3551                 {
3552                 case fignore:
3553                   if (typdef == tignore || cplpl)
3554                     fvdef = fvnone;
3555                   break;
3556                 case fvnameseen:
3557                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3558                       || (members && instruct))
3559                     make_C_tag (FALSE); /* a variable */
3560                   fvextern = FALSE;
3561                   fvdef = fvnone;
3562                   token.valid = FALSE;
3563                   break;
3564                 case flistseen:
3565                   if ((declarations
3566                        && (cplpl || !instruct)
3567                        && (typdef == tnone || (typdef != tignore && instruct)))
3568                       || (members
3569                           && plainc && instruct))
3570                     make_C_tag (TRUE);  /* a function */
3571                   /* FALLTHRU */
3572                 default:
3573                   fvextern = FALSE;
3574                   fvdef = fvnone;
3575                   if (declarations
3576                        && cplpl && structdef == stagseen)
3577                     make_C_tag (FALSE); /* forward declaration */
3578                   else
3579                     token.valid = FALSE;
3580                 } /* switch (fvdef) */
3581               /* FALLTHRU */
3582             default:
3583               if (!instruct)
3584                 typdef = tnone;
3585             }
3586           if (structdef == stagseen)
3587             structdef = snone;
3588           break;
3589         case ',':
3590           if (definedef != dnone || inattribute)
3591             break;
3592           switch (objdef)
3593             {
3594             case omethodtag:
3595             case omethodparm:
3596               make_C_tag (TRUE); /* an Objective C method */
3597               objdef = oinbody;
3598               break;
3599             }
3600           switch (fvdef)
3601             {
3602             case fdefunkey:
3603             case foperator:
3604             case fstartlist:
3605             case finlist:
3606             case fignore:
3607             case vignore:
3608               break;
3609             case fdefunname:
3610               fvdef = fignore;
3611               break;
3612             case fvnameseen:
3613               if (parlev == 0
3614                   && ((globals
3615                        && bracelev == 0
3616                        && templatelev == 0
3617                        && (!fvextern || declarations))
3618                       || (members && instruct)))
3619                   make_C_tag (FALSE); /* a variable */
3620               break;
3621             case flistseen:
3622               if ((declarations && typdef == tnone && !instruct)
3623                   || (members && typdef != tignore && instruct))
3624                 {
3625                   make_C_tag (TRUE); /* a function */
3626                   fvdef = fvnameseen;
3627                 }
3628               else if (!declarations)
3629                 fvdef = fvnone;
3630               token.valid = FALSE;
3631               break;
3632             default:
3633               fvdef = fvnone;
3634             }
3635           if (structdef == stagseen)
3636             structdef = snone;
3637           break;
3638         case ']':
3639           if (definedef != dnone || inattribute)
3640             break;
3641           if (structdef == stagseen)
3642             structdef = snone;
3643           switch (typdef)
3644             {
3645             case ttypeseen:
3646             case tend:
3647               typdef = tignore;
3648               make_C_tag (FALSE);       /* a typedef */
3649               break;
3650             case tnone:
3651             case tinbody:
3652               switch (fvdef)
3653                 {
3654                 case foperator:
3655                 case finlist:
3656                 case fignore:
3657                 case vignore:
3658                   break;
3659                 case fvnameseen:
3660                   if ((members && bracelev == 1)
3661                       || (globals && bracelev == 0
3662                           && (!fvextern || declarations)))
3663                     make_C_tag (FALSE); /* a variable */
3664                   /* FALLTHRU */
3665                 default:
3666                   fvdef = fvnone;
3667                 }
3668               break;
3669             }
3670           break;
3671         case '(':
3672           if (inattribute)
3673             {
3674               attrparlev++;
3675               break;
3676             }
3677           if (definedef != dnone)
3678             break;
3679           if (objdef == otagseen && parlev == 0)
3680             objdef = oparenseen;
3681           switch (fvdef)
3682             {
3683             case fvnameseen:
3684               if (typdef == ttypeseen
3685                   && *lp != '*'
3686                   && !instruct)
3687                 {
3688                   /* This handles constructs like:
3689                      typedef void OperatorFun (int fun); */
3690                   make_C_tag (FALSE);
3691                   typdef = tignore;
3692                   fvdef = fignore;
3693                   break;
3694                 }
3695               /* FALLTHRU */
3696             case foperator:
3697               fvdef = fstartlist;
3698               break;
3699             case flistseen:
3700               fvdef = finlist;
3701               break;
3702             }
3703           parlev++;
3704           break;
3705         case ')':
3706           if (inattribute)
3707             {
3708               if (--attrparlev == 0)
3709                 inattribute = FALSE;
3710               break;
3711             }
3712           if (definedef != dnone)
3713             break;
3714           if (objdef == ocatseen && parlev == 1)
3715             {
3716               make_C_tag (TRUE); /* an Objective C category */
3717               objdef = oignore;
3718             }
3719           if (--parlev == 0)
3720             {
3721               switch (fvdef)
3722                 {
3723                 case fstartlist:
3724                 case finlist:
3725                   fvdef = flistseen;
3726                   break;
3727                 }
3728               if (!instruct
3729                   && (typdef == tend
3730                       || typdef == ttypeseen))
3731                 {
3732                   typdef = tignore;
3733                   make_C_tag (FALSE); /* a typedef */
3734                 }
3735             }
3736           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3737             parlev = 0;
3738           break;
3739         case '{':
3740           if (definedef != dnone)
3741             break;
3742           if (typdef == ttypeseen)
3743             {
3744               /* Whenever typdef is set to tinbody (currently only
3745                  here), typdefbracelev should be set to bracelev. */
3746               typdef = tinbody;
3747               typdefbracelev = bracelev;
3748             }
3749           switch (fvdef)
3750             {
3751             case flistseen:
3752               make_C_tag (TRUE);    /* a function */
3753               /* FALLTHRU */
3754             case fignore:
3755               fvdef = fvnone;
3756               break;
3757             case fvnone:
3758               switch (objdef)
3759                 {
3760                 case otagseen:
3761                   make_C_tag (TRUE); /* an Objective C class */
3762                   objdef = oignore;
3763                   break;
3764                 case omethodtag:
3765                 case omethodparm:
3766                   make_C_tag (TRUE); /* an Objective C method */
3767                   objdef = oinbody;
3768                   break;
3769                 default:
3770                   /* Neutralize `extern "C" {' grot. */
3771                   if (bracelev == 0 && structdef == snone && nestlev == 0
3772                       && typdef == tnone)
3773                     bracelev = -1;
3774                 }
3775               break;
3776             }
3777           switch (structdef)
3778             {
3779             case skeyseen:         /* unnamed struct */
3780               pushclass_above (bracelev, NULL, 0);
3781               structdef = snone;
3782               break;
3783             case stagseen:         /* named struct or enum */
3784             case scolonseen:       /* a class */
3785               pushclass_above (bracelev,token.line+token.offset, token.length);
3786               structdef = snone;
3787               make_C_tag (FALSE);  /* a struct or enum */
3788               break;
3789             }
3790           bracelev += 1;
3791           break;
3792         case '*':
3793           if (definedef != dnone)
3794             break;
3795           if (fvdef == fstartlist)
3796             {
3797               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3798               token.valid = FALSE;
3799             }
3800           break;
3801         case '}':
3802           if (definedef != dnone)
3803             break;
3804           bracelev -= 1;
3805           if (!ignoreindent && lp == newlb.buffer + 1)
3806             {
3807               if (bracelev != 0)
3808                 token.valid = FALSE; /* unexpected value, token unreliable */
3809               bracelev = 0;     /* reset brace level if first column */
3810               parlev = 0;       /* also reset paren level, just in case... */
3811             }
3812           else if (bracelev < 0)
3813             {
3814               token.valid = FALSE; /* something gone amiss, token unreliable */
3815               bracelev = 0;
3816             }
3817           if (bracelev == 0 && fvdef == vignore)
3818             fvdef = fvnone;             /* end of function */
3819           popclass_above (bracelev);
3820           structdef = snone;
3821           /* Only if typdef == tinbody is typdefbracelev significant. */
3822           if (typdef == tinbody && bracelev <= typdefbracelev)
3823             {
3824               assert (bracelev == typdefbracelev);
3825               typdef = tend;
3826             }
3827           break;
3828         case '=':
3829           if (definedef != dnone)
3830             break;
3831           switch (fvdef)
3832             {
3833             case foperator:
3834             case finlist:
3835             case fignore:
3836             case vignore:
3837               break;
3838             case fvnameseen:
3839               if ((members && bracelev == 1)
3840                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3841                 make_C_tag (FALSE); /* a variable */
3842               /* FALLTHRU */
3843             default:
3844               fvdef = vignore;
3845             }
3846           break;
3847         case '<':
3848           if (cplpl
3849               && (structdef == stagseen || fvdef == fvnameseen))
3850             {
3851               templatelev++;
3852               break;
3853             }
3854           goto resetfvdef;
3855         case '>':
3856           if (templatelev > 0)
3857             {
3858               templatelev--;
3859               break;
3860             }
3861           goto resetfvdef;
3862         case '+':
3863         case '-':
3864           if (objdef == oinbody && bracelev == 0)
3865             {
3866               objdef = omethodsign;
3867               break;
3868             }
3869           /* FALLTHRU */
3870         resetfvdef:
3871         case '#': case '~': case '&': case '%': case '/':
3872         case '|': case '^': case '!': case '.': case '?':
3873           if (definedef != dnone)
3874             break;
3875           /* These surely cannot follow a function tag in C. */
3876           switch (fvdef)
3877             {
3878             case foperator:
3879             case finlist:
3880             case fignore:
3881             case vignore:
3882               break;
3883             default:
3884               fvdef = fvnone;
3885             }
3886           break;
3887         case '\0':
3888           if (objdef == otagseen)
3889             {
3890               make_C_tag (TRUE); /* an Objective C class */
3891               objdef = oignore;
3892             }
3893           /* If a macro spans multiple lines don't reset its state. */
3894           if (quotednl)
3895             CNL_SAVE_DEFINEDEF ();
3896           else
3897             CNL ();
3898           break;
3899         } /* switch (c) */
3900
3901     } /* while not eof */
3902
3903   free (lbs[0].lb.buffer);
3904   free (lbs[1].lb.buffer);
3905 }
3906
3907 /*
3908  * Process either a C++ file or a C file depending on the setting
3909  * of a global flag.
3910  */
3911 static void
3912 default_C_entries (FILE *inf)
3913 {
3914   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3915 }
3916
3917 /* Always do plain C. */
3918 static void
3919 plain_C_entries (FILE *inf)
3920 {
3921   C_entries (0, inf);
3922 }
3923
3924 /* Always do C++. */
3925 static void
3926 Cplusplus_entries (FILE *inf)
3927 {
3928   C_entries (C_PLPL, inf);
3929 }
3930
3931 /* Always do Java. */
3932 static void
3933 Cjava_entries (FILE *inf)
3934 {
3935   C_entries (C_JAVA, inf);
3936 }
3937
3938 /* Always do C*. */
3939 static void
3940 Cstar_entries (FILE *inf)
3941 {
3942   C_entries (C_STAR, inf);
3943 }
3944
3945 /* Always do Yacc. */
3946 static void
3947 Yacc_entries (FILE *inf)
3948 {
3949   C_entries (YACC, inf);
3950 }
3951
3952 \f
3953 /* Useful macros. */
3954 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3955   for (;                        /* loop initialization */               \
3956        !feof (file_pointer)     /* loop test */                         \
3957        &&                       /* instructions at start of loop */     \
3958           (readline (&line_buffer, file_pointer),                       \
3959            char_pointer = line_buffer.buffer,                           \
3960            TRUE);                                                       \
3961       )
3962
3963 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3964   ((assert ("" kw), TRUE)   /* syntax error if not a literal string */  \
3965    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
3966    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
3967    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3968
3969 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3970 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3971   ((assert ("" kw), TRUE) /* syntax error if not a literal string */    \
3972    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
3973    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
3974
3975 /*
3976  * Read a file, but do no processing.  This is used to do regexp
3977  * matching on files that have no language defined.
3978  */
3979 static void
3980 just_read_file (FILE *inf)
3981 {
3982   while (!feof (inf))
3983     readline (&lb, inf);
3984 }
3985
3986 \f
3987 /* Fortran parsing */
3988
3989 static void F_takeprec (void);
3990 static void F_getit (FILE *);
3991
3992 static void
3993 F_takeprec (void)
3994 {
3995   dbp = skip_spaces (dbp);
3996   if (*dbp != '*')
3997     return;
3998   dbp++;
3999   dbp = skip_spaces (dbp);
4000   if (strneq (dbp, "(*)", 3))
4001     {
4002       dbp += 3;
4003       return;
4004     }
4005   if (!ISDIGIT (*dbp))
4006     {
4007       --dbp;                    /* force failure */
4008       return;
4009     }
4010   do
4011     dbp++;
4012   while (ISDIGIT (*dbp));
4013 }
4014
4015 static void
4016 F_getit (FILE *inf)
4017 {
4018   register char *cp;
4019
4020   dbp = skip_spaces (dbp);
4021   if (*dbp == '\0')
4022     {
4023       readline (&lb, inf);
4024       dbp = lb.buffer;
4025       if (dbp[5] != '&')
4026         return;
4027       dbp += 6;
4028       dbp = skip_spaces (dbp);
4029     }
4030   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4031     return;
4032   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4033     continue;
4034   make_tag (dbp, cp-dbp, TRUE,
4035             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4036 }
4037
4038
4039 static void
4040 Fortran_functions (FILE *inf)
4041 {
4042   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4043     {
4044       if (*dbp == '%')
4045         dbp++;                  /* Ratfor escape to fortran */
4046       dbp = skip_spaces (dbp);
4047       if (*dbp == '\0')
4048         continue;
4049
4050       if (LOOKING_AT_NOCASE (dbp, "recursive"))
4051         dbp = skip_spaces (dbp);
4052
4053       if (LOOKING_AT_NOCASE (dbp, "pure"))
4054         dbp = skip_spaces (dbp);
4055
4056       if (LOOKING_AT_NOCASE (dbp, "elemental"))
4057         dbp = skip_spaces (dbp);
4058
4059       switch (lowcase (*dbp))
4060         {
4061         case 'i':
4062           if (nocase_tail ("integer"))
4063             F_takeprec ();
4064           break;
4065         case 'r':
4066           if (nocase_tail ("real"))
4067             F_takeprec ();
4068           break;
4069         case 'l':
4070           if (nocase_tail ("logical"))
4071             F_takeprec ();
4072           break;
4073         case 'c':
4074           if (nocase_tail ("complex") || nocase_tail ("character"))
4075             F_takeprec ();
4076           break;
4077         case 'd':
4078           if (nocase_tail ("double"))
4079             {
4080               dbp = skip_spaces (dbp);
4081               if (*dbp == '\0')
4082                 continue;
4083               if (nocase_tail ("precision"))
4084                 break;
4085               continue;
4086             }
4087           break;
4088         }
4089       dbp = skip_spaces (dbp);
4090       if (*dbp == '\0')
4091         continue;
4092       switch (lowcase (*dbp))
4093         {
4094         case 'f':
4095           if (nocase_tail ("function"))
4096             F_getit (inf);
4097           continue;
4098         case 's':
4099           if (nocase_tail ("subroutine"))
4100             F_getit (inf);
4101           continue;
4102         case 'e':
4103           if (nocase_tail ("entry"))
4104             F_getit (inf);
4105           continue;
4106         case 'b':
4107           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4108             {
4109               dbp = skip_spaces (dbp);
4110               if (*dbp == '\0') /* assume un-named */
4111                 make_tag ("blockdata", 9, TRUE,
4112                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4113               else
4114                 F_getit (inf);  /* look for name */
4115             }
4116           continue;
4117         }
4118     }
4119 }
4120
4121 \f
4122 /*
4123  * Ada parsing
4124  * Original code by
4125  * Philippe Waroquiers (1998)
4126  */
4127
4128 /* Once we are positioned after an "interesting" keyword, let's get
4129    the real tag value necessary. */
4130 static void
4131 Ada_getit (FILE *inf, const char *name_qualifier)
4132 {
4133   register char *cp;
4134   char *name;
4135   char c;
4136
4137   while (!feof (inf))
4138     {
4139       dbp = skip_spaces (dbp);
4140       if (*dbp == '\0'
4141           || (dbp[0] == '-' && dbp[1] == '-'))
4142         {
4143           readline (&lb, inf);
4144           dbp = lb.buffer;
4145         }
4146       switch (lowcase (*dbp))
4147         {
4148         case 'b':
4149           if (nocase_tail ("body"))
4150             {
4151               /* Skipping body of   procedure body   or   package body or ....
4152                  resetting qualifier to body instead of spec. */
4153               name_qualifier = "/b";
4154               continue;
4155             }
4156           break;
4157         case 't':
4158           /* Skipping type of   task type   or   protected type ... */
4159           if (nocase_tail ("type"))
4160             continue;
4161           break;
4162         }
4163       if (*dbp == '"')
4164         {
4165           dbp += 1;
4166           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4167             continue;
4168         }
4169       else
4170         {
4171           dbp = skip_spaces (dbp);
4172           for (cp = dbp;
4173                (*cp != '\0'
4174                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4175                cp++)
4176             continue;
4177           if (cp == dbp)
4178             return;
4179         }
4180       c = *cp;
4181       *cp = '\0';
4182       name = concat (dbp, name_qualifier, "");
4183       *cp = c;
4184       make_tag (name, strlen (name), TRUE,
4185                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4186       free (name);
4187       if (c == '"')
4188         dbp = cp + 1;
4189       return;
4190     }
4191 }
4192
4193 static void
4194 Ada_funcs (FILE *inf)
4195 {
4196   bool inquote = FALSE;
4197   bool skip_till_semicolumn = FALSE;
4198
4199   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4200     {
4201       while (*dbp != '\0')
4202         {
4203           /* Skip a string i.e. "abcd". */
4204           if (inquote || (*dbp == '"'))
4205             {
4206               dbp = etags_strchr (dbp + !inquote, '"');
4207               if (dbp != NULL)
4208                 {
4209                   inquote = FALSE;
4210                   dbp += 1;
4211                   continue;     /* advance char */
4212                 }
4213               else
4214                 {
4215                   inquote = TRUE;
4216                   break;        /* advance line */
4217                 }
4218             }
4219
4220           /* Skip comments. */
4221           if (dbp[0] == '-' && dbp[1] == '-')
4222             break;              /* advance line */
4223
4224           /* Skip character enclosed in single quote i.e. 'a'
4225              and skip single quote starting an attribute i.e. 'Image. */
4226           if (*dbp == '\'')
4227             {
4228               dbp++ ;
4229               if (*dbp != '\0')
4230                 dbp++;
4231               continue;
4232             }
4233
4234           if (skip_till_semicolumn)
4235             {
4236               if (*dbp == ';')
4237                 skip_till_semicolumn = FALSE;
4238               dbp++;
4239               continue;         /* advance char */
4240             }
4241
4242           /* Search for beginning of a token.  */
4243           if (!begtoken (*dbp))
4244             {
4245               dbp++;
4246               continue;         /* advance char */
4247             }
4248
4249           /* We are at the beginning of a token. */
4250           switch (lowcase (*dbp))
4251             {
4252             case 'f':
4253               if (!packages_only && nocase_tail ("function"))
4254                 Ada_getit (inf, "/f");
4255               else
4256                 break;          /* from switch */
4257               continue;         /* advance char */
4258             case 'p':
4259               if (!packages_only && nocase_tail ("procedure"))
4260                 Ada_getit (inf, "/p");
4261               else if (nocase_tail ("package"))
4262                 Ada_getit (inf, "/s");
4263               else if (nocase_tail ("protected")) /* protected type */
4264                 Ada_getit (inf, "/t");
4265               else
4266                 break;          /* from switch */
4267               continue;         /* advance char */
4268
4269             case 'u':
4270               if (typedefs && !packages_only && nocase_tail ("use"))
4271                 {
4272                   /* when tagging types, avoid tagging  use type Pack.Typename;
4273                      for this, we will skip everything till a ; */
4274                   skip_till_semicolumn = TRUE;
4275                   continue;     /* advance char */
4276                 }
4277
4278             case 't':
4279               if (!packages_only && nocase_tail ("task"))
4280                 Ada_getit (inf, "/k");
4281               else if (typedefs && !packages_only && nocase_tail ("type"))
4282                 {
4283                   Ada_getit (inf, "/t");
4284                   while (*dbp != '\0')
4285                     dbp += 1;
4286                 }
4287               else
4288                 break;          /* from switch */
4289               continue;         /* advance char */
4290             }
4291
4292           /* Look for the end of the token. */
4293           while (!endtoken (*dbp))
4294             dbp++;
4295
4296         } /* advance char */
4297     } /* advance line */
4298 }
4299
4300 \f
4301 /*
4302  * Unix and microcontroller assembly tag handling
4303  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4304  * Idea by Bob Weiner, Motorola Inc. (1994)
4305  */
4306 static void
4307 Asm_labels (FILE *inf)
4308 {
4309   register char *cp;
4310
4311   LOOP_ON_INPUT_LINES (inf, lb, cp)
4312     {
4313       /* If first char is alphabetic or one of [_.$], test for colon
4314          following identifier. */
4315       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4316         {
4317           /* Read past label. */
4318           cp++;
4319           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4320             cp++;
4321           if (*cp == ':' || iswhite (*cp))
4322             /* Found end of label, so copy it and add it to the table. */
4323             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4324                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4325         }
4326     }
4327 }
4328
4329 \f
4330 /*
4331  * Perl support
4332  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4333  * Perl variable names: /^(my|local).../
4334  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4335  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4336  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4337  */
4338 static void
4339 Perl_functions (FILE *inf)
4340 {
4341   char *package = savestr ("main"); /* current package name */
4342   register char *cp;
4343
4344   LOOP_ON_INPUT_LINES (inf, lb, cp)
4345     {
4346       cp = skip_spaces (cp);
4347
4348       if (LOOKING_AT (cp, "package"))
4349         {
4350           free (package);
4351           get_tag (cp, &package);
4352         }
4353       else if (LOOKING_AT (cp, "sub"))
4354         {
4355           char *pos;
4356           char *sp = cp;
4357
4358           while (!notinname (*cp))
4359             cp++;
4360           if (cp == sp)
4361             continue;           /* nothing found */
4362           if ((pos = etags_strchr (sp, ':')) != NULL
4363               && pos < cp && pos[1] == ':')
4364             /* The name is already qualified. */
4365             make_tag (sp, cp - sp, TRUE,
4366                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4367           else
4368             /* Qualify it. */
4369             {
4370               char savechar, *name;
4371
4372               savechar = *cp;
4373               *cp = '\0';
4374               name = concat (package, "::", sp);
4375               *cp = savechar;
4376               make_tag (name, strlen (name), TRUE,
4377                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4378               free (name);
4379             }
4380         }
4381        else if (globals)        /* only if we are tagging global vars */
4382         {
4383           /* Skip a qualifier, if any. */
4384           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4385           /* After "my" or "local", but before any following paren or space. */
4386           char *varstart = cp;
4387
4388           if (qual              /* should this be removed?  If yes, how? */
4389               && (*cp == '$' || *cp == '@' || *cp == '%'))
4390             {
4391               varstart += 1;
4392               do
4393                 cp++;
4394               while (ISALNUM (*cp) || *cp == '_');
4395             }
4396           else if (qual)
4397             {
4398               /* Should be examining a variable list at this point;
4399                  could insist on seeing an open parenthesis. */
4400               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4401                 cp++;
4402             }
4403           else
4404             continue;
4405
4406           make_tag (varstart, cp - varstart, FALSE,
4407                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4408         }
4409     }
4410   free (package);
4411 }
4412
4413
4414 /*
4415  * Python support
4416  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4417  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4418  * More ideas by seb bacon <seb@jamkit.com> (2002)
4419  */
4420 static void
4421 Python_functions (FILE *inf)
4422 {
4423   register char *cp;
4424
4425   LOOP_ON_INPUT_LINES (inf, lb, cp)
4426     {
4427       cp = skip_spaces (cp);
4428       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4429         {
4430           char *name = cp;
4431           while (!notinname (*cp) && *cp != ':')
4432             cp++;
4433           make_tag (name, cp - name, TRUE,
4434                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4435         }
4436     }
4437 }
4438
4439 \f
4440 /*
4441  * PHP support
4442  * Look for:
4443  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4444  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4445  *  - /^[ \t]*define\(\"[^\"]+/
4446  * Only with --members:
4447  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4448  * Idea by Diez B. Roggisch (2001)
4449  */
4450 static void
4451 PHP_functions (FILE *inf)
4452 {
4453   register char *cp, *name;
4454   bool search_identifier = FALSE;
4455
4456   LOOP_ON_INPUT_LINES (inf, lb, cp)
4457     {
4458       cp = skip_spaces (cp);
4459       name = cp;
4460       if (search_identifier
4461           && *cp != '\0')
4462         {
4463           while (!notinname (*cp))
4464             cp++;
4465           make_tag (name, cp - name, TRUE,
4466                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4467           search_identifier = FALSE;
4468         }
4469       else if (LOOKING_AT (cp, "function"))
4470         {
4471           if (*cp == '&')
4472             cp = skip_spaces (cp+1);
4473           if (*cp != '\0')
4474             {
4475               name = cp;
4476               while (!notinname (*cp))
4477                 cp++;
4478               make_tag (name, cp - name, TRUE,
4479                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4480             }
4481           else
4482             search_identifier = TRUE;
4483         }
4484       else if (LOOKING_AT (cp, "class"))
4485         {
4486           if (*cp != '\0')
4487             {
4488               name = cp;
4489               while (*cp != '\0' && !iswhite (*cp))
4490                 cp++;
4491               make_tag (name, cp - name, FALSE,
4492                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4493             }
4494           else
4495             search_identifier = TRUE;
4496         }
4497       else if (strneq (cp, "define", 6)
4498                && (cp = skip_spaces (cp+6))
4499                && *cp++ == '('
4500                && (*cp == '"' || *cp == '\''))
4501         {
4502           char quote = *cp++;
4503           name = cp;
4504           while (*cp != quote && *cp != '\0')
4505             cp++;
4506           make_tag (name, cp - name, FALSE,
4507                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4508         }
4509       else if (members
4510                && LOOKING_AT (cp, "var")
4511                && *cp == '$')
4512         {
4513           name = cp;
4514           while (!notinname (*cp))
4515             cp++;
4516           make_tag (name, cp - name, FALSE,
4517                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4518         }
4519     }
4520 }
4521
4522 \f
4523 /*
4524  * Cobol tag functions
4525  * We could look for anything that could be a paragraph name.
4526  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4527  * Idea by Corny de Souza (1993)
4528  */
4529 static void
4530 Cobol_paragraphs (FILE *inf)
4531 {
4532   register char *bp, *ep;
4533
4534   LOOP_ON_INPUT_LINES (inf, lb, bp)
4535     {
4536       if (lb.len < 9)
4537         continue;
4538       bp += 8;
4539
4540       /* If eoln, compiler option or comment ignore whole line. */
4541       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4542         continue;
4543
4544       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4545         continue;
4546       if (*ep++ == '.')
4547         make_tag (bp, ep - bp, TRUE,
4548                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4549     }
4550 }
4551
4552 \f
4553 /*
4554  * Makefile support
4555  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4556  */
4557 static void
4558 Makefile_targets (FILE *inf)
4559 {
4560   register char *bp;
4561
4562   LOOP_ON_INPUT_LINES (inf, lb, bp)
4563     {
4564       if (*bp == '\t' || *bp == '#')
4565         continue;
4566       while (*bp != '\0' && *bp != '=' && *bp != ':')
4567         bp++;
4568       if (*bp == ':' || (globals && *bp == '='))
4569         {
4570           /* We should detect if there is more than one tag, but we do not.
4571              We just skip initial and final spaces. */
4572           char * namestart = skip_spaces (lb.buffer);
4573           while (--bp > namestart)
4574             if (!notinname (*bp))
4575               break;
4576           make_tag (namestart, bp - namestart + 1, TRUE,
4577                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4578         }
4579     }
4580 }
4581
4582 \f
4583 /*
4584  * Pascal parsing
4585  * Original code by Mosur K. Mohan (1989)
4586  *
4587  *  Locates tags for procedures & functions.  Doesn't do any type- or
4588  *  var-definitions.  It does look for the keyword "extern" or
4589  *  "forward" immediately following the procedure statement; if found,
4590  *  the tag is skipped.
4591  */
4592 static void
4593 Pascal_functions (FILE *inf)
4594 {
4595   linebuffer tline;             /* mostly copied from C_entries */
4596   long save_lcno;
4597   int save_lineno, namelen, taglen;
4598   char c, *name;
4599
4600   bool                          /* each of these flags is TRUE if: */
4601     incomment,                  /* point is inside a comment */
4602     inquote,                    /* point is inside '..' string */
4603     get_tagname,                /* point is after PROCEDURE/FUNCTION
4604                                    keyword, so next item = potential tag */
4605     found_tag,                  /* point is after a potential tag */
4606     inparms,                    /* point is within parameter-list */
4607     verify_tag;                 /* point has passed the parm-list, so the
4608                                    next token will determine whether this
4609                                    is a FORWARD/EXTERN to be ignored, or
4610                                    whether it is a real tag */
4611
4612   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4613   name = NULL;                  /* keep compiler quiet */
4614   dbp = lb.buffer;
4615   *dbp = '\0';
4616   linebuffer_init (&tline);
4617
4618   incomment = inquote = FALSE;
4619   found_tag = FALSE;            /* have a proc name; check if extern */
4620   get_tagname = FALSE;          /* found "procedure" keyword         */
4621   inparms = FALSE;              /* found '(' after "proc"            */
4622   verify_tag = FALSE;           /* check if "extern" is ahead        */
4623
4624
4625   while (!feof (inf))           /* long main loop to get next char */
4626     {
4627       c = *dbp++;
4628       if (c == '\0')            /* if end of line */
4629         {
4630           readline (&lb, inf);
4631           dbp = lb.buffer;
4632           if (*dbp == '\0')
4633             continue;
4634           if (!((found_tag && verify_tag)
4635                 || get_tagname))
4636             c = *dbp++;         /* only if don't need *dbp pointing
4637                                    to the beginning of the name of
4638                                    the procedure or function */
4639         }
4640       if (incomment)
4641         {
4642           if (c == '}')         /* within { } comments */
4643             incomment = FALSE;
4644           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4645             {
4646               dbp++;
4647               incomment = FALSE;
4648             }
4649           continue;
4650         }
4651       else if (inquote)
4652         {
4653           if (c == '\'')
4654             inquote = FALSE;
4655           continue;
4656         }
4657       else
4658         switch (c)
4659           {
4660           case '\'':
4661             inquote = TRUE;     /* found first quote */
4662             continue;
4663           case '{':             /* found open { comment */
4664             incomment = TRUE;
4665             continue;
4666           case '(':
4667             if (*dbp == '*')    /* found open (* comment */
4668               {
4669                 incomment = TRUE;
4670                 dbp++;
4671               }
4672             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4673               inparms = TRUE;
4674             continue;
4675           case ')':             /* end of parms list */
4676             if (inparms)
4677               inparms = FALSE;
4678             continue;
4679           case ';':
4680             if (found_tag && !inparms) /* end of proc or fn stmt */
4681               {
4682                 verify_tag = TRUE;
4683                 break;
4684               }
4685             continue;
4686           }
4687       if (found_tag && verify_tag && (*dbp != ' '))
4688         {
4689           /* Check if this is an "extern" declaration. */
4690           if (*dbp == '\0')
4691             continue;
4692           if (lowcase (*dbp == 'e'))
4693             {
4694               if (nocase_tail ("extern")) /* superfluous, really! */
4695                 {
4696                   found_tag = FALSE;
4697                   verify_tag = FALSE;
4698                 }
4699             }
4700           else if (lowcase (*dbp) == 'f')
4701             {
4702               if (nocase_tail ("forward")) /* check for forward reference */
4703                 {
4704                   found_tag = FALSE;
4705                   verify_tag = FALSE;
4706                 }
4707             }
4708           if (found_tag && verify_tag) /* not external proc, so make tag */
4709             {
4710               found_tag = FALSE;
4711               verify_tag = FALSE;
4712               make_tag (name, namelen, TRUE,
4713                         tline.buffer, taglen, save_lineno, save_lcno);
4714               continue;
4715             }
4716         }
4717       if (get_tagname)          /* grab name of proc or fn */
4718         {
4719           char *cp;
4720
4721           if (*dbp == '\0')
4722             continue;
4723
4724           /* Find block name. */
4725           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4726             continue;
4727
4728           /* Save all values for later tagging. */
4729           linebuffer_setlen (&tline, lb.len);
4730           strcpy (tline.buffer, lb.buffer);
4731           save_lineno = lineno;
4732           save_lcno = linecharno;
4733           name = tline.buffer + (dbp - lb.buffer);
4734           namelen = cp - dbp;
4735           taglen = cp - lb.buffer + 1;
4736
4737           dbp = cp;             /* set dbp to e-o-token */
4738           get_tagname = FALSE;
4739           found_tag = TRUE;
4740           continue;
4741
4742           /* And proceed to check for "extern". */
4743         }
4744       else if (!incomment && !inquote && !found_tag)
4745         {
4746           /* Check for proc/fn keywords. */
4747           switch (lowcase (c))
4748             {
4749             case 'p':
4750               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4751                 get_tagname = TRUE;
4752               continue;
4753             case 'f':
4754               if (nocase_tail ("unction"))
4755                 get_tagname = TRUE;
4756               continue;
4757             }
4758         }
4759     } /* while not eof */
4760
4761   free (tline.buffer);
4762 }
4763
4764 \f
4765 /*
4766  * Lisp tag functions
4767  *  look for (def or (DEF, quote or QUOTE
4768  */
4769
4770 static void L_getit (void);
4771
4772 static void
4773 L_getit (void)
4774 {
4775   if (*dbp == '\'')             /* Skip prefix quote */
4776     dbp++;
4777   else if (*dbp == '(')
4778   {
4779     dbp++;
4780     /* Try to skip "(quote " */
4781     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4782       /* Ok, then skip "(" before name in (defstruct (foo)) */
4783       dbp = skip_spaces (dbp);
4784   }
4785   get_tag (dbp, NULL);
4786 }
4787
4788 static void
4789 Lisp_functions (FILE *inf)
4790 {
4791   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4792     {
4793       if (dbp[0] != '(')
4794         continue;
4795
4796       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4797         {
4798           dbp = skip_non_spaces (dbp);
4799           dbp = skip_spaces (dbp);
4800           L_getit ();
4801         }
4802       else
4803         {
4804           /* Check for (foo::defmumble name-defined ... */
4805           do
4806             dbp++;
4807           while (!notinname (*dbp) && *dbp != ':');
4808           if (*dbp == ':')
4809             {
4810               do
4811                 dbp++;
4812               while (*dbp == ':');
4813
4814               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4815                 {
4816                   dbp = skip_non_spaces (dbp);
4817                   dbp = skip_spaces (dbp);
4818                   L_getit ();
4819                 }
4820             }
4821         }
4822     }
4823 }
4824
4825 \f
4826 /*
4827  * Lua script language parsing
4828  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4829  *
4830  *  "function" and "local function" are tags if they start at column 1.
4831  */
4832 static void
4833 Lua_functions (FILE *inf)
4834 {
4835   register char *bp;
4836
4837   LOOP_ON_INPUT_LINES (inf, lb, bp)
4838     {
4839       if (bp[0] != 'f' && bp[0] != 'l')
4840         continue;
4841
4842       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4843
4844       if (LOOKING_AT (bp, "function"))
4845         get_tag (bp, NULL);
4846     }
4847 }
4848
4849 \f
4850 /*
4851  * PostScript tags
4852  * Just look for lines where the first character is '/'
4853  * Also look at "defineps" for PSWrap
4854  * Ideas by:
4855  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4856  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4857  */
4858 static void
4859 PS_functions (FILE *inf)
4860 {
4861   register char *bp, *ep;
4862
4863   LOOP_ON_INPUT_LINES (inf, lb, bp)
4864     {
4865       if (bp[0] == '/')
4866         {
4867           for (ep = bp+1;
4868                *ep != '\0' && *ep != ' ' && *ep != '{';
4869                ep++)
4870             continue;
4871           make_tag (bp, ep - bp, TRUE,
4872                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4873         }
4874       else if (LOOKING_AT (bp, "defineps"))
4875         get_tag (bp, NULL);
4876     }
4877 }
4878
4879 \f
4880 /*
4881  * Forth tags
4882  * Ignore anything after \ followed by space or in ( )
4883  * Look for words defined by :
4884  * Look for constant, code, create, defer, value, and variable
4885  * OBP extensions:  Look for buffer:, field,
4886  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4887  */
4888 static void
4889 Forth_words (FILE *inf)
4890 {
4891   register char *bp;
4892
4893   LOOP_ON_INPUT_LINES (inf, lb, bp)
4894     while ((bp = skip_spaces (bp))[0] != '\0')
4895       if (bp[0] == '\\' && iswhite (bp[1]))
4896         break;                  /* read next line */
4897       else if (bp[0] == '(' && iswhite (bp[1]))
4898         do                      /* skip to ) or eol */
4899           bp++;
4900         while (*bp != ')' && *bp != '\0');
4901       else if ((bp[0] == ':' && iswhite (bp[1]) && bp++)
4902                || LOOKING_AT_NOCASE (bp, "constant")
4903                || LOOKING_AT_NOCASE (bp, "code")
4904                || LOOKING_AT_NOCASE (bp, "create")
4905                || LOOKING_AT_NOCASE (bp, "defer")
4906                || LOOKING_AT_NOCASE (bp, "value")
4907                || LOOKING_AT_NOCASE (bp, "variable")
4908                || LOOKING_AT_NOCASE (bp, "buffer:")
4909                || LOOKING_AT_NOCASE (bp, "field"))
4910         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4911       else
4912         bp = skip_non_spaces (bp);
4913 }
4914
4915 \f
4916 /*
4917  * Scheme tag functions
4918  * look for (def... xyzzy
4919  *          (def... (xyzzy
4920  *          (def ... ((...(xyzzy ....
4921  *          (set! xyzzy
4922  * Original code by Ken Haase (1985?)
4923  */
4924 static void
4925 Scheme_functions (FILE *inf)
4926 {
4927   register char *bp;
4928
4929   LOOP_ON_INPUT_LINES (inf, lb, bp)
4930     {
4931       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4932         {
4933           bp = skip_non_spaces (bp+4);
4934           /* Skip over open parens and white space.  Don't continue past
4935              '\0'. */
4936           while (*bp && notinname (*bp))
4937             bp++;
4938           get_tag (bp, NULL);
4939         }
4940       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4941         get_tag (bp, NULL);
4942     }
4943 }
4944
4945 \f
4946 /* Find tags in TeX and LaTeX input files.  */
4947
4948 /* TEX_toktab is a table of TeX control sequences that define tags.
4949  * Each entry records one such control sequence.
4950  *
4951  * Original code from who knows whom.
4952  * Ideas by:
4953  *   Stefan Monnier (2002)
4954  */
4955
4956 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4957
4958 /* Default set of control sequences to put into TEX_toktab.
4959    The value of environment var TEXTAGS is prepended to this.  */
4960 static const char *TEX_defenv = "\
4961 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4962 :part:appendix:entry:index:def\
4963 :newcommand:renewcommand:newenvironment:renewenvironment";
4964
4965 static void TEX_mode (FILE *);
4966 static void TEX_decode_env (const char *, const char *);
4967
4968 static char TEX_esc = '\\';
4969 static char TEX_opgrp = '{';
4970 static char TEX_clgrp = '}';
4971
4972 /*
4973  * TeX/LaTeX scanning loop.
4974  */
4975 static void
4976 TeX_commands (FILE *inf)
4977 {
4978   char *cp;
4979   linebuffer *key;
4980
4981   /* Select either \ or ! as escape character.  */
4982   TEX_mode (inf);
4983
4984   /* Initialize token table once from environment. */
4985   if (TEX_toktab == NULL)
4986     TEX_decode_env ("TEXTAGS", TEX_defenv);
4987
4988   LOOP_ON_INPUT_LINES (inf, lb, cp)
4989     {
4990       /* Look at each TEX keyword in line. */
4991       for (;;)
4992         {
4993           /* Look for a TEX escape. */
4994           while (*cp++ != TEX_esc)
4995             if (cp[-1] == '\0' || cp[-1] == '%')
4996               goto tex_next_line;
4997
4998           for (key = TEX_toktab; key->buffer != NULL; key++)
4999             if (strneq (cp, key->buffer, key->len))
5000               {
5001                 register char *p;
5002                 int namelen, linelen;
5003                 bool opgrp = FALSE;
5004
5005                 cp = skip_spaces (cp + key->len);
5006                 if (*cp == TEX_opgrp)
5007                   {
5008                     opgrp = TRUE;
5009                     cp++;
5010                   }
5011                 for (p = cp;
5012                      (!iswhite (*p) && *p != '#' &&
5013                       *p != TEX_opgrp && *p != TEX_clgrp);
5014                      p++)
5015                   continue;
5016                 namelen = p - cp;
5017                 linelen = lb.len;
5018                 if (!opgrp || *p == TEX_clgrp)
5019                   {
5020                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5021                       p++;
5022                     linelen = p - lb.buffer + 1;
5023                   }
5024                 make_tag (cp, namelen, TRUE,
5025                           lb.buffer, linelen, lineno, linecharno);
5026                 goto tex_next_line; /* We only tag a line once */
5027               }
5028         }
5029     tex_next_line:
5030       ;
5031     }
5032 }
5033
5034 #define TEX_LESC '\\'
5035 #define TEX_SESC '!'
5036
5037 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5038    chars accordingly. */
5039 static void
5040 TEX_mode (FILE *inf)
5041 {
5042   int c;
5043
5044   while ((c = getc (inf)) != EOF)
5045     {
5046       /* Skip to next line if we hit the TeX comment char. */
5047       if (c == '%')
5048         while (c != '\n' && c != EOF)
5049           c = getc (inf);
5050       else if (c == TEX_LESC || c == TEX_SESC )
5051         break;
5052     }
5053
5054   if (c == TEX_LESC)
5055     {
5056       TEX_esc = TEX_LESC;
5057       TEX_opgrp = '{';
5058       TEX_clgrp = '}';
5059     }
5060   else
5061     {
5062       TEX_esc = TEX_SESC;
5063       TEX_opgrp = '<';
5064       TEX_clgrp = '>';
5065     }
5066   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5067      No attempt is made to correct the situation. */
5068   rewind (inf);
5069 }
5070
5071 /* Read environment and prepend it to the default string.
5072    Build token table. */
5073 static void
5074 TEX_decode_env (const char *evarname, const char *defenv)
5075 {
5076   register const char *env, *p;
5077   int i, len;
5078
5079   /* Append default string to environment. */
5080   env = getenv (evarname);
5081   if (!env)
5082     env = defenv;
5083   else
5084     env = concat (env, defenv, "");
5085
5086   /* Allocate a token table */
5087   for (len = 1, p = env; p;)
5088     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5089       len++;
5090   TEX_toktab = xnew (len, linebuffer);
5091
5092   /* Unpack environment string into token table. Be careful about */
5093   /* zero-length strings (leading ':', "::" and trailing ':') */
5094   for (i = 0; *env != '\0';)
5095     {
5096       p = etags_strchr (env, ':');
5097       if (!p)                   /* End of environment string. */
5098         p = env + strlen (env);
5099       if (p - env > 0)
5100         {                       /* Only non-zero strings. */
5101           TEX_toktab[i].buffer = savenstr (env, p - env);
5102           TEX_toktab[i].len = p - env;
5103           i++;
5104         }
5105       if (*p)
5106         env = p + 1;
5107       else
5108         {
5109           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5110           TEX_toktab[i].len = 0;
5111           break;
5112         }
5113     }
5114 }
5115
5116 \f
5117 /* Texinfo support.  Dave Love, Mar. 2000.  */
5118 static void
5119 Texinfo_nodes (FILE *inf)
5120 {
5121   char *cp, *start;
5122   LOOP_ON_INPUT_LINES (inf, lb, cp)
5123     if (LOOKING_AT (cp, "@node"))
5124       {
5125         start = cp;
5126         while (*cp != '\0' && *cp != ',')
5127           cp++;
5128         make_tag (start, cp - start, TRUE,
5129                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5130       }
5131 }
5132
5133 \f
5134 /*
5135  * HTML support.
5136  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5137  * Contents of <a name=xxx> are tags with name xxx.
5138  *
5139  * Francesco Potortì, 2002.
5140  */
5141 static void
5142 HTML_labels (FILE *inf)
5143 {
5144   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5145   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5146   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5147   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5148   char *end;
5149
5150
5151   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5152
5153   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5154     for (;;)                    /* loop on the same line */
5155       {
5156         if (skiptag)            /* skip HTML tag */
5157           {
5158             while (*dbp != '\0' && *dbp != '>')
5159               dbp++;
5160             if (*dbp == '>')
5161               {
5162                 dbp += 1;
5163                 skiptag = FALSE;
5164                 continue;       /* look on the same line */
5165               }
5166             break;              /* go to next line */
5167           }
5168
5169         else if (intag) /* look for "name=" or "id=" */
5170           {
5171             while (*dbp != '\0' && *dbp != '>'
5172                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5173               dbp++;
5174             if (*dbp == '\0')
5175               break;            /* go to next line */
5176             if (*dbp == '>')
5177               {
5178                 dbp += 1;
5179                 intag = FALSE;
5180                 continue;       /* look on the same line */
5181               }
5182             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5183                 || LOOKING_AT_NOCASE (dbp, "id="))
5184               {
5185                 bool quoted = (dbp[0] == '"');
5186
5187                 if (quoted)
5188                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5189                     continue;
5190                 else
5191                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5192                     continue;
5193                 linebuffer_setlen (&token_name, end - dbp);
5194                 strncpy (token_name.buffer, dbp, end - dbp);
5195                 token_name.buffer[end - dbp] = '\0';
5196
5197                 dbp = end;
5198                 intag = FALSE;  /* we found what we looked for */
5199                 skiptag = TRUE; /* skip to the end of the tag */
5200                 getnext = TRUE; /* then grab the text */
5201                 continue;       /* look on the same line */
5202               }
5203             dbp += 1;
5204           }
5205
5206         else if (getnext)       /* grab next tokens and tag them */
5207           {
5208             dbp = skip_spaces (dbp);
5209             if (*dbp == '\0')
5210               break;            /* go to next line */
5211             if (*dbp == '<')
5212               {
5213                 intag = TRUE;
5214                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5215                 continue;       /* look on the same line */
5216               }
5217
5218             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5219               continue;
5220             make_tag (token_name.buffer, token_name.len, TRUE,
5221                       dbp, end - dbp, lineno, linecharno);
5222             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5223             getnext = FALSE;
5224             break;              /* go to next line */
5225           }
5226
5227         else                    /* look for an interesting HTML tag */
5228           {
5229             while (*dbp != '\0' && *dbp != '<')
5230               dbp++;
5231             if (*dbp == '\0')
5232               break;            /* go to next line */
5233             intag = TRUE;
5234             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5235               {
5236                 inanchor = TRUE;
5237                 continue;       /* look on the same line */
5238               }
5239             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5240                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5241                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5242                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5243               {
5244                 intag = FALSE;
5245                 getnext = TRUE;
5246                 continue;       /* look on the same line */
5247               }
5248             dbp += 1;
5249           }
5250       }
5251 }
5252
5253 \f
5254 /*
5255  * Prolog support
5256  *
5257  * Assumes that the predicate or rule starts at column 0.
5258  * Only the first clause of a predicate or rule is added.
5259  * Original code by Sunichirou Sugou (1989)
5260  * Rewritten by Anders Lindgren (1996)
5261  */
5262 static size_t prolog_pr (char *, char *);
5263 static void prolog_skip_comment (linebuffer *, FILE *);
5264 static size_t prolog_atom (char *, size_t);
5265
5266 static void
5267 Prolog_functions (FILE *inf)
5268 {
5269   char *cp, *last;
5270   size_t len;
5271   size_t allocated;
5272
5273   allocated = 0;
5274   len = 0;
5275   last = NULL;
5276
5277   LOOP_ON_INPUT_LINES (inf, lb, cp)
5278     {
5279       if (cp[0] == '\0')        /* Empty line */
5280         continue;
5281       else if (iswhite (cp[0])) /* Not a predicate */
5282         continue;
5283       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5284         prolog_skip_comment (&lb, inf);
5285       else if ((len = prolog_pr (cp, last)) > 0)
5286         {
5287           /* Predicate or rule.  Store the function name so that we
5288              only generate a tag for the first clause.  */
5289           if (last == NULL)
5290             last = xnew (len + 1, char);
5291           else if (len + 1 > allocated)
5292             xrnew (last, len + 1, char);
5293           allocated = len + 1;
5294           strncpy (last, cp, len);
5295           last[len] = '\0';
5296         }
5297     }
5298   free (last);
5299 }
5300
5301
5302 static void
5303 prolog_skip_comment (linebuffer *plb, FILE *inf)
5304 {
5305   char *cp;
5306
5307   do
5308     {
5309       for (cp = plb->buffer; *cp != '\0'; cp++)
5310         if (cp[0] == '*' && cp[1] == '/')
5311           return;
5312       readline (plb, inf);
5313     }
5314   while (!feof (inf));
5315 }
5316
5317 /*
5318  * A predicate or rule definition is added if it matches:
5319  *     <beginning of line><Prolog Atom><whitespace>(
5320  * or  <beginning of line><Prolog Atom><whitespace>:-
5321  *
5322  * It is added to the tags database if it doesn't match the
5323  * name of the previous clause header.
5324  *
5325  * Return the size of the name of the predicate or rule, or 0 if no
5326  * header was found.
5327  */
5328 static size_t
5329 prolog_pr (char *s, char *last)
5330
5331                                 /* Name of last clause. */
5332 {
5333   size_t pos;
5334   size_t len;
5335
5336   pos = prolog_atom (s, 0);
5337   if (! pos)
5338     return 0;
5339
5340   len = pos;
5341   pos = skip_spaces (s + pos) - s;
5342
5343   if ((s[pos] == '.'
5344        || (s[pos] == '(' && (pos += 1))
5345        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5346       && (last == NULL          /* save only the first clause */
5347           || len != strlen (last)
5348           || !strneq (s, last, len)))
5349         {
5350           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5351           return len;
5352         }
5353   else
5354     return 0;
5355 }
5356
5357 /*
5358  * Consume a Prolog atom.
5359  * Return the number of bytes consumed, or 0 if there was an error.
5360  *
5361  * A prolog atom, in this context, could be one of:
5362  * - An alphanumeric sequence, starting with a lower case letter.
5363  * - A quoted arbitrary string. Single quotes can escape themselves.
5364  *   Backslash quotes everything.
5365  */
5366 static size_t
5367 prolog_atom (char *s, size_t pos)
5368 {
5369   size_t origpos;
5370
5371   origpos = pos;
5372
5373   if (ISLOWER (s[pos]) || (s[pos] == '_'))
5374     {
5375       /* The atom is unquoted. */
5376       pos++;
5377       while (ISALNUM (s[pos]) || (s[pos] == '_'))
5378         {
5379           pos++;
5380         }
5381       return pos - origpos;
5382     }
5383   else if (s[pos] == '\'')
5384     {
5385       pos++;
5386
5387       for (;;)
5388         {
5389           if (s[pos] == '\'')
5390             {
5391               pos++;
5392               if (s[pos] != '\'')
5393                 break;
5394               pos++;            /* A double quote */
5395             }
5396           else if (s[pos] == '\0')
5397             /* Multiline quoted atoms are ignored. */
5398             return 0;
5399           else if (s[pos] == '\\')
5400             {
5401               if (s[pos+1] == '\0')
5402                 return 0;
5403               pos += 2;
5404             }
5405           else
5406             pos++;
5407         }
5408       return pos - origpos;
5409     }
5410   else
5411     return 0;
5412 }
5413
5414 \f
5415 /*
5416  * Support for Erlang
5417  *
5418  * Generates tags for functions, defines, and records.
5419  * Assumes that Erlang functions start at column 0.
5420  * Original code by Anders Lindgren (1996)
5421  */
5422 static int erlang_func (char *, char *);
5423 static void erlang_attribute (char *);
5424 static int erlang_atom (char *);
5425
5426 static void
5427 Erlang_functions (FILE *inf)
5428 {
5429   char *cp, *last;
5430   int len;
5431   int allocated;
5432
5433   allocated = 0;
5434   len = 0;
5435   last = NULL;
5436
5437   LOOP_ON_INPUT_LINES (inf, lb, cp)
5438     {
5439       if (cp[0] == '\0')        /* Empty line */
5440         continue;
5441       else if (iswhite (cp[0])) /* Not function nor attribute */
5442         continue;
5443       else if (cp[0] == '%')    /* comment */
5444         continue;
5445       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5446         continue;
5447       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5448         {
5449           erlang_attribute (cp);
5450           if (last != NULL)
5451             {
5452               free (last);
5453               last = NULL;
5454             }
5455         }
5456       else if ((len = erlang_func (cp, last)) > 0)
5457         {
5458           /*
5459            * Function.  Store the function name so that we only
5460            * generates a tag for the first clause.
5461            */
5462           if (last == NULL)
5463             last = xnew (len + 1, char);
5464           else if (len + 1 > allocated)
5465             xrnew (last, len + 1, char);
5466           allocated = len + 1;
5467           strncpy (last, cp, len);
5468           last[len] = '\0';
5469         }
5470     }
5471   free (last);
5472 }
5473
5474
5475 /*
5476  * A function definition is added if it matches:
5477  *     <beginning of line><Erlang Atom><whitespace>(
5478  *
5479  * It is added to the tags database if it doesn't match the
5480  * name of the previous clause header.
5481  *
5482  * Return the size of the name of the function, or 0 if no function
5483  * was found.
5484  */
5485 static int
5486 erlang_func (char *s, char *last)
5487
5488                                 /* Name of last clause. */
5489 {
5490   int pos;
5491   int len;
5492
5493   pos = erlang_atom (s);
5494   if (pos < 1)
5495     return 0;
5496
5497   len = pos;
5498   pos = skip_spaces (s + pos) - s;
5499
5500   /* Save only the first clause. */
5501   if (s[pos++] == '('
5502       && (last == NULL
5503           || len != (int)strlen (last)
5504           || !strneq (s, last, len)))
5505         {
5506           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5507           return len;
5508         }
5509
5510   return 0;
5511 }
5512
5513
5514 /*
5515  * Handle attributes.  Currently, tags are generated for defines
5516  * and records.
5517  *
5518  * They are on the form:
5519  * -define(foo, bar).
5520  * -define(Foo(M, N), M+N).
5521  * -record(graph, {vtab = notable, cyclic = true}).
5522  */
5523 static void
5524 erlang_attribute (char *s)
5525 {
5526   char *cp = s;
5527
5528   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5529       && *cp++ == '(')
5530     {
5531       int len = erlang_atom (skip_spaces (cp));
5532       if (len > 0)
5533         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5534     }
5535   return;
5536 }
5537
5538
5539 /*
5540  * Consume an Erlang atom (or variable).
5541  * Return the number of bytes consumed, or -1 if there was an error.
5542  */
5543 static int
5544 erlang_atom (char *s)
5545 {
5546   int pos = 0;
5547
5548   if (ISALPHA (s[pos]) || s[pos] == '_')
5549     {
5550       /* The atom is unquoted. */
5551       do
5552         pos++;
5553       while (ISALNUM (s[pos]) || s[pos] == '_');
5554     }
5555   else if (s[pos] == '\'')
5556     {
5557       for (pos++; s[pos] != '\''; pos++)
5558         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5559             || (s[pos] == '\\' && s[++pos] == '\0'))
5560           return 0;
5561       pos++;
5562     }
5563
5564   return pos;
5565 }
5566
5567 \f
5568 static char *scan_separators (char *);
5569 static void add_regex (char *, language *);
5570 static char *substitute (char *, char *, struct re_registers *);
5571
5572 /*
5573  * Take a string like "/blah/" and turn it into "blah", verifying
5574  * that the first and last characters are the same, and handling
5575  * quoted separator characters.  Actually, stops on the occurrence of
5576  * an unquoted separator.  Also process \t, \n, etc. and turn into
5577  * appropriate characters. Works in place.  Null terminates name string.
5578  * Returns pointer to terminating separator, or NULL for
5579  * unterminated regexps.
5580  */
5581 static char *
5582 scan_separators (char *name)
5583 {
5584   char sep = name[0];
5585   char *copyto = name;
5586   bool quoted = FALSE;
5587
5588   for (++name; *name != '\0'; ++name)
5589     {
5590       if (quoted)
5591         {
5592           switch (*name)
5593             {
5594             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5595             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5596             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5597             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5598             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5599             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5600             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5601             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5602             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5603             default:
5604               if (*name == sep)
5605                 *copyto++ = sep;
5606               else
5607                 {
5608                   /* Something else is quoted, so preserve the quote. */
5609                   *copyto++ = '\\';
5610                   *copyto++ = *name;
5611                 }
5612               break;
5613             }
5614           quoted = FALSE;
5615         }
5616       else if (*name == '\\')
5617         quoted = TRUE;
5618       else if (*name == sep)
5619         break;
5620       else
5621         *copyto++ = *name;
5622     }
5623   if (*name != sep)
5624     name = NULL;                /* signal unterminated regexp */
5625
5626   /* Terminate copied string. */
5627   *copyto = '\0';
5628   return name;
5629 }
5630
5631 /* Look at the argument of --regex or --no-regex and do the right
5632    thing.  Same for each line of a regexp file. */
5633 static void
5634 analyse_regex (char *regex_arg)
5635 {
5636   if (regex_arg == NULL)
5637     {
5638       free_regexps ();          /* --no-regex: remove existing regexps */
5639       return;
5640     }
5641
5642   /* A real --regexp option or a line in a regexp file. */
5643   switch (regex_arg[0])
5644     {
5645       /* Comments in regexp file or null arg to --regex. */
5646     case '\0':
5647     case ' ':
5648     case '\t':
5649       break;
5650
5651       /* Read a regex file.  This is recursive and may result in a
5652          loop, which will stop when the file descriptors are exhausted. */
5653     case '@':
5654       {
5655         FILE *regexfp;
5656         linebuffer regexbuf;
5657         char *regexfile = regex_arg + 1;
5658
5659         /* regexfile is a file containing regexps, one per line. */
5660         regexfp = fopen (regexfile, "r");
5661         if (regexfp == NULL)
5662           {
5663             pfatal (regexfile);
5664             return;
5665           }
5666         linebuffer_init (&regexbuf);
5667         while (readline_internal (&regexbuf, regexfp) > 0)
5668           analyse_regex (regexbuf.buffer);
5669         free (regexbuf.buffer);
5670         fclose (regexfp);
5671       }
5672       break;
5673
5674       /* Regexp to be used for a specific language only. */
5675     case '{':
5676       {
5677         language *lang;
5678         char *lang_name = regex_arg + 1;
5679         char *cp;
5680
5681         for (cp = lang_name; *cp != '}'; cp++)
5682           if (*cp == '\0')
5683             {
5684               error ("unterminated language name in regex: %s", regex_arg);
5685               return;
5686             }
5687         *cp++ = '\0';
5688         lang = get_language_from_langname (lang_name);
5689         if (lang == NULL)
5690           return;
5691         add_regex (cp, lang);
5692       }
5693       break;
5694
5695       /* Regexp to be used for any language. */
5696     default:
5697       add_regex (regex_arg, NULL);
5698       break;
5699     }
5700 }
5701
5702 /* Separate the regexp pattern, compile it,
5703    and care for optional name and modifiers. */
5704 static void
5705 add_regex (char *regexp_pattern, language *lang)
5706 {
5707   static struct re_pattern_buffer zeropattern;
5708   char sep, *pat, *name, *modifiers;
5709   char empty[] = "";
5710   const char *err;
5711   struct re_pattern_buffer *patbuf;
5712   regexp *rp;
5713   bool
5714     force_explicit_name = TRUE, /* do not use implicit tag names */
5715     ignore_case = FALSE,        /* case is significant */
5716     multi_line = FALSE,         /* matches are done one line at a time */
5717     single_line = FALSE;        /* dot does not match newline */
5718
5719
5720   if (strlen (regexp_pattern) < 3)
5721     {
5722       error ("null regexp", (char *)NULL);
5723       return;
5724     }
5725   sep = regexp_pattern[0];
5726   name = scan_separators (regexp_pattern);
5727   if (name == NULL)
5728     {
5729       error ("%s: unterminated regexp", regexp_pattern);
5730       return;
5731     }
5732   if (name[1] == sep)
5733     {
5734       error ("null name for regexp \"%s\"", regexp_pattern);
5735       return;
5736     }
5737   modifiers = scan_separators (name);
5738   if (modifiers == NULL)        /* no terminating separator --> no name */
5739     {
5740       modifiers = name;
5741       name = empty;
5742     }
5743   else
5744     modifiers += 1;             /* skip separator */
5745
5746   /* Parse regex modifiers. */
5747   for (; modifiers[0] != '\0'; modifiers++)
5748     switch (modifiers[0])
5749       {
5750       case 'N':
5751         if (modifiers == name)
5752           error ("forcing explicit tag name but no name, ignoring", NULL);
5753         force_explicit_name = TRUE;
5754         break;
5755       case 'i':
5756         ignore_case = TRUE;
5757         break;
5758       case 's':
5759         single_line = TRUE;
5760         /* FALLTHRU */
5761       case 'm':
5762         multi_line = TRUE;
5763         need_filebuf = TRUE;
5764         break;
5765       default:
5766         {
5767           char wrongmod [2];
5768           wrongmod[0] = modifiers[0];
5769           wrongmod[1] = '\0';
5770           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5771         }
5772         break;
5773       }
5774
5775   patbuf = xnew (1, struct re_pattern_buffer);
5776   *patbuf = zeropattern;
5777   if (ignore_case)
5778     {
5779       static char lc_trans[CHARS];
5780       int i;
5781       for (i = 0; i < CHARS; i++)
5782         lc_trans[i] = lowcase (i);
5783       patbuf->translate = lc_trans;     /* translation table to fold case  */
5784     }
5785
5786   if (multi_line)
5787     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5788   else
5789     pat = regexp_pattern;
5790
5791   if (single_line)
5792     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5793   else
5794     re_set_syntax (RE_SYNTAX_EMACS);
5795
5796   err = re_compile_pattern (pat, strlen (pat), patbuf);
5797   if (multi_line)
5798     free (pat);
5799   if (err != NULL)
5800     {
5801       error ("%s while compiling pattern", err);
5802       return;
5803     }
5804
5805   rp = p_head;
5806   p_head = xnew (1, regexp);
5807   p_head->pattern = savestr (regexp_pattern);
5808   p_head->p_next = rp;
5809   p_head->lang = lang;
5810   p_head->pat = patbuf;
5811   p_head->name = savestr (name);
5812   p_head->error_signaled = FALSE;
5813   p_head->force_explicit_name = force_explicit_name;
5814   p_head->ignore_case = ignore_case;
5815   p_head->multi_line = multi_line;
5816 }
5817
5818 /*
5819  * Do the substitutions indicated by the regular expression and
5820  * arguments.
5821  */
5822 static char *
5823 substitute (char *in, char *out, struct re_registers *regs)
5824 {
5825   char *result, *t;
5826   int size, dig, diglen;
5827
5828   result = NULL;
5829   size = strlen (out);
5830
5831   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5832   if (out[size - 1] == '\\')
5833     fatal ("pattern error in \"%s\"", out);
5834   for (t = etags_strchr (out, '\\');
5835        t != NULL;
5836        t = etags_strchr (t + 2, '\\'))
5837     if (ISDIGIT (t[1]))
5838       {
5839         dig = t[1] - '0';
5840         diglen = regs->end[dig] - regs->start[dig];
5841         size += diglen - 2;
5842       }
5843     else
5844       size -= 1;
5845
5846   /* Allocate space and do the substitutions. */
5847   assert (size >= 0);
5848   result = xnew (size + 1, char);
5849
5850   for (t = result; *out != '\0'; out++)
5851     if (*out == '\\' && ISDIGIT (*++out))
5852       {
5853         dig = *out - '0';
5854         diglen = regs->end[dig] - regs->start[dig];
5855         strncpy (t, in + regs->start[dig], diglen);
5856         t += diglen;
5857       }
5858     else
5859       *t++ = *out;
5860   *t = '\0';
5861
5862   assert (t <= result + size);
5863   assert (t - result == (int)strlen (result));
5864
5865   return result;
5866 }
5867
5868 /* Deallocate all regexps. */
5869 static void
5870 free_regexps (void)
5871 {
5872   regexp *rp;
5873   while (p_head != NULL)
5874     {
5875       rp = p_head->p_next;
5876       free (p_head->pattern);
5877       free (p_head->name);
5878       free (p_head);
5879       p_head = rp;
5880     }
5881   return;
5882 }
5883
5884 /*
5885  * Reads the whole file as a single string from `filebuf' and looks for
5886  * multi-line regular expressions, creating tags on matches.
5887  * readline already dealt with normal regexps.
5888  *
5889  * Idea by Ben Wing <ben@666.com> (2002).
5890  */
5891 static void
5892 regex_tag_multiline (void)
5893 {
5894   char *buffer = filebuf.buffer;
5895   regexp *rp;
5896   char *name;
5897
5898   for (rp = p_head; rp != NULL; rp = rp->p_next)
5899     {
5900       int match = 0;
5901
5902       if (!rp->multi_line)
5903         continue;               /* skip normal regexps */
5904
5905       /* Generic initializations before parsing file from memory. */
5906       lineno = 1;               /* reset global line number */
5907       charno = 0;               /* reset global char number */
5908       linecharno = 0;           /* reset global char number of line start */
5909
5910       /* Only use generic regexps or those for the current language. */
5911       if (rp->lang != NULL && rp->lang != curfdp->lang)
5912         continue;
5913
5914       while (match >= 0 && match < filebuf.len)
5915         {
5916           match = re_search (rp->pat, buffer, filebuf.len, charno,
5917                              filebuf.len - match, &rp->regs);
5918           switch (match)
5919             {
5920             case -2:
5921               /* Some error. */
5922               if (!rp->error_signaled)
5923                 {
5924                   error ("regexp stack overflow while matching \"%s\"",
5925                          rp->pattern);
5926                   rp->error_signaled = TRUE;
5927                 }
5928               break;
5929             case -1:
5930               /* No match. */
5931               break;
5932             default:
5933               if (match == rp->regs.end[0])
5934                 {
5935                   if (!rp->error_signaled)
5936                     {
5937                       error ("regexp matches the empty string: \"%s\"",
5938                              rp->pattern);
5939                       rp->error_signaled = TRUE;
5940                     }
5941                   match = -3;   /* exit from while loop */
5942                   break;
5943                 }
5944
5945               /* Match occurred.  Construct a tag. */
5946               while (charno < rp->regs.end[0])
5947                 if (buffer[charno++] == '\n')
5948                   lineno++, linecharno = charno;
5949               name = rp->name;
5950               if (name[0] == '\0')
5951                 name = NULL;
5952               else /* make a named tag */
5953                 name = substitute (buffer, rp->name, &rp->regs);
5954               if (rp->force_explicit_name)
5955                 /* Force explicit tag name, if a name is there. */
5956                 pfnote (name, TRUE, buffer + linecharno,
5957                         charno - linecharno + 1, lineno, linecharno);
5958               else
5959                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5960                           charno - linecharno + 1, lineno, linecharno);
5961               break;
5962             }
5963         }
5964     }
5965 }
5966
5967 \f
5968 static bool
5969 nocase_tail (const char *cp)
5970 {
5971   register int len = 0;
5972
5973   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5974     cp++, len++;
5975   if (*cp == '\0' && !intoken (dbp[len]))
5976     {
5977       dbp += len;
5978       return TRUE;
5979     }
5980   return FALSE;
5981 }
5982
5983 static void
5984 get_tag (register char *bp, char **namepp)
5985 {
5986   register char *cp = bp;
5987
5988   if (*bp != '\0')
5989     {
5990       /* Go till you get to white space or a syntactic break */
5991       for (cp = bp + 1; !notinname (*cp); cp++)
5992         continue;
5993       make_tag (bp, cp - bp, TRUE,
5994                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5995     }
5996
5997   if (namepp != NULL)
5998     *namepp = savenstr (bp, cp - bp);
5999 }
6000
6001 /*
6002  * Read a line of text from `stream' into `lbp', excluding the
6003  * newline or CR-NL, if any.  Return the number of characters read from
6004  * `stream', which is the length of the line including the newline.
6005  *
6006  * On DOS or Windows we do not count the CR character, if any before the
6007  * NL, in the returned length; this mirrors the behavior of Emacs on those
6008  * platforms (for text files, it translates CR-NL to NL as it reads in the
6009  * file).
6010  *
6011  * If multi-line regular expressions are requested, each line read is
6012  * appended to `filebuf'.
6013  */
6014 static long
6015 readline_internal (linebuffer *lbp, register FILE *stream)
6016 {
6017   char *buffer = lbp->buffer;
6018   register char *p = lbp->buffer;
6019   register char *pend;
6020   int chars_deleted;
6021
6022   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6023
6024   for (;;)
6025     {
6026       register int c = getc (stream);
6027       if (p == pend)
6028         {
6029           /* We're at the end of linebuffer: expand it. */
6030           lbp->size *= 2;
6031           xrnew (buffer, lbp->size, char);
6032           p += buffer - lbp->buffer;
6033           pend = buffer + lbp->size;
6034           lbp->buffer = buffer;
6035         }
6036       if (c == EOF)
6037         {
6038           *p = '\0';
6039           chars_deleted = 0;
6040           break;
6041         }
6042       if (c == '\n')
6043         {
6044           if (p > buffer && p[-1] == '\r')
6045             {
6046               p -= 1;
6047 #ifdef DOS_NT
6048              /* Assume CRLF->LF translation will be performed by Emacs
6049                 when loading this file, so CRs won't appear in the buffer.
6050                 It would be cleaner to compensate within Emacs;
6051                 however, Emacs does not know how many CRs were deleted
6052                 before any given point in the file.  */
6053               chars_deleted = 1;
6054 #else
6055               chars_deleted = 2;
6056 #endif
6057             }
6058           else
6059             {
6060               chars_deleted = 1;
6061             }
6062           *p = '\0';
6063           break;
6064         }
6065       *p++ = c;
6066     }
6067   lbp->len = p - buffer;
6068
6069   if (need_filebuf              /* we need filebuf for multi-line regexps */
6070       && chars_deleted > 0)     /* not at EOF */
6071     {
6072       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6073         {
6074           /* Expand filebuf. */
6075           filebuf.size *= 2;
6076           xrnew (filebuf.buffer, filebuf.size, char);
6077         }
6078       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6079       filebuf.len += lbp->len;
6080       filebuf.buffer[filebuf.len++] = '\n';
6081       filebuf.buffer[filebuf.len] = '\0';
6082     }
6083
6084   return lbp->len + chars_deleted;
6085 }
6086
6087 /*
6088  * Like readline_internal, above, but in addition try to match the
6089  * input line against relevant regular expressions and manage #line
6090  * directives.
6091  */
6092 static void
6093 readline (linebuffer *lbp, FILE *stream)
6094 {
6095   long result;
6096
6097   linecharno = charno;          /* update global char number of line start */
6098   result = readline_internal (lbp, stream); /* read line */
6099   lineno += 1;                  /* increment global line number */
6100   charno += result;             /* increment global char number */
6101
6102   /* Honor #line directives. */
6103   if (!no_line_directive)
6104     {
6105       static bool discard_until_line_directive;
6106
6107       /* Check whether this is a #line directive. */
6108       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6109         {
6110           unsigned int lno;
6111           int start = 0;
6112
6113           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6114               && start > 0)     /* double quote character found */
6115             {
6116               char *endp = lbp->buffer + start;
6117
6118               while ((endp = etags_strchr (endp, '"')) != NULL
6119                      && endp[-1] == '\\')
6120                 endp++;
6121               if (endp != NULL)
6122                 /* Ok, this is a real #line directive.  Let's deal with it. */
6123                 {
6124                   char *taggedabsname;  /* absolute name of original file */
6125                   char *taggedfname;    /* name of original file as given */
6126                   char *name;           /* temp var */
6127
6128                   discard_until_line_directive = FALSE; /* found it */
6129                   name = lbp->buffer + start;
6130                   *endp = '\0';
6131                   canonicalize_filename (name);
6132                   taggedabsname = absolute_filename (name, tagfiledir);
6133                   if (filename_is_absolute (name)
6134                       || filename_is_absolute (curfdp->infname))
6135                     taggedfname = savestr (taggedabsname);
6136                   else
6137                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6138
6139                   if (streq (curfdp->taggedfname, taggedfname))
6140                     /* The #line directive is only a line number change.  We
6141                        deal with this afterwards. */
6142                     free (taggedfname);
6143                   else
6144                     /* The tags following this #line directive should be
6145                        attributed to taggedfname.  In order to do this, set
6146                        curfdp accordingly. */
6147                     {
6148                       fdesc *fdp; /* file description pointer */
6149
6150                       /* Go look for a file description already set up for the
6151                          file indicated in the #line directive.  If there is
6152                          one, use it from now until the next #line
6153                          directive. */
6154                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6155                         if (streq (fdp->infname, curfdp->infname)
6156                             && streq (fdp->taggedfname, taggedfname))
6157                           /* If we remove the second test above (after the &&)
6158                              then all entries pertaining to the same file are
6159                              coalesced in the tags file.  If we use it, then
6160                              entries pertaining to the same file but generated
6161                              from different files (via #line directives) will
6162                              go into separate sections in the tags file.  These
6163                              alternatives look equivalent.  The first one
6164                              destroys some apparently useless information. */
6165                           {
6166                             curfdp = fdp;
6167                             free (taggedfname);
6168                             break;
6169                           }
6170                       /* Else, if we already tagged the real file, skip all
6171                          input lines until the next #line directive. */
6172                       if (fdp == NULL) /* not found */
6173                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6174                           if (streq (fdp->infabsname, taggedabsname))
6175                             {
6176                               discard_until_line_directive = TRUE;
6177                               free (taggedfname);
6178                               break;
6179                             }
6180                       /* Else create a new file description and use that from
6181                          now on, until the next #line directive. */
6182                       if (fdp == NULL) /* not found */
6183                         {
6184                           fdp = fdhead;
6185                           fdhead = xnew (1, fdesc);
6186                           *fdhead = *curfdp; /* copy curr. file description */
6187                           fdhead->next = fdp;
6188                           fdhead->infname = savestr (curfdp->infname);
6189                           fdhead->infabsname = savestr (curfdp->infabsname);
6190                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6191                           fdhead->taggedfname = taggedfname;
6192                           fdhead->usecharno = FALSE;
6193                           fdhead->prop = NULL;
6194                           fdhead->written = FALSE;
6195                           curfdp = fdhead;
6196                         }
6197                     }
6198                   free (taggedabsname);
6199                   lineno = lno - 1;
6200                   readline (lbp, stream);
6201                   return;
6202                 } /* if a real #line directive */
6203             } /* if #line is followed by a number */
6204         } /* if line begins with "#line " */
6205
6206       /* If we are here, no #line directive was found. */
6207       if (discard_until_line_directive)
6208         {
6209           if (result > 0)
6210             {
6211               /* Do a tail recursion on ourselves, thus discarding the contents
6212                  of the line buffer. */
6213               readline (lbp, stream);
6214               return;
6215             }
6216           /* End of file. */
6217           discard_until_line_directive = FALSE;
6218           return;
6219         }
6220     } /* if #line directives should be considered */
6221
6222   {
6223     int match;
6224     regexp *rp;
6225     char *name;
6226
6227     /* Match against relevant regexps. */
6228     if (lbp->len > 0)
6229       for (rp = p_head; rp != NULL; rp = rp->p_next)
6230         {
6231           /* Only use generic regexps or those for the current language.
6232              Also do not use multiline regexps, which is the job of
6233              regex_tag_multiline. */
6234           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6235               || rp->multi_line)
6236             continue;
6237
6238           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6239           switch (match)
6240             {
6241             case -2:
6242               /* Some error. */
6243               if (!rp->error_signaled)
6244                 {
6245                   error ("regexp stack overflow while matching \"%s\"",
6246                          rp->pattern);
6247                   rp->error_signaled = TRUE;
6248                 }
6249               break;
6250             case -1:
6251               /* No match. */
6252               break;
6253             case 0:
6254               /* Empty string matched. */
6255               if (!rp->error_signaled)
6256                 {
6257                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6258                   rp->error_signaled = TRUE;
6259                 }
6260               break;
6261             default:
6262               /* Match occurred.  Construct a tag. */
6263               name = rp->name;
6264               if (name[0] == '\0')
6265                 name = NULL;
6266               else /* make a named tag */
6267                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6268               if (rp->force_explicit_name)
6269                 /* Force explicit tag name, if a name is there. */
6270                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6271               else
6272                 make_tag (name, strlen (name), TRUE,
6273                           lbp->buffer, match, lineno, linecharno);
6274               break;
6275             }
6276         }
6277   }
6278 }
6279
6280 \f
6281 /*
6282  * Return a pointer to a space of size strlen(cp)+1 allocated
6283  * with xnew where the string CP has been copied.
6284  */
6285 static char *
6286 savestr (const char *cp)
6287 {
6288   return savenstr (cp, strlen (cp));
6289 }
6290
6291 /*
6292  * Return a pointer to a space of size LEN+1 allocated with xnew where
6293  * the string CP has been copied for at most the first LEN characters.
6294  */
6295 static char *
6296 savenstr (const char *cp, int len)
6297 {
6298   register char *dp;
6299
6300   dp = xnew (len + 1, char);
6301   strncpy (dp, cp, len);
6302   dp[len] = '\0';
6303   return dp;
6304 }
6305
6306 /*
6307  * Return the ptr in sp at which the character c last
6308  * appears; NULL if not found
6309  *
6310  * Identical to POSIX strrchr, included for portability.
6311  */
6312 static char *
6313 etags_strrchr (register const char *sp, register int c)
6314 {
6315   register const char *r;
6316
6317   r = NULL;
6318   do
6319     {
6320       if (*sp == c)
6321         r = sp;
6322   } while (*sp++);
6323   return (char *)r;
6324 }
6325
6326 /*
6327  * Return the ptr in sp at which the character c first
6328  * appears; NULL if not found
6329  *
6330  * Identical to POSIX strchr, included for portability.
6331  */
6332 static char *
6333 etags_strchr (register const char *sp, register int c)
6334 {
6335   do
6336     {
6337       if (*sp == c)
6338         return (char *)sp;
6339     } while (*sp++);
6340   return NULL;
6341 }
6342
6343 /*
6344  * Compare two strings, ignoring case for alphabetic characters.
6345  *
6346  * Same as BSD's strcasecmp, included for portability.
6347  */
6348 static int
6349 etags_strcasecmp (register const char *s1, register const char *s2)
6350 {
6351   while (*s1 != '\0'
6352          && (ISALPHA (*s1) && ISALPHA (*s2)
6353              ? lowcase (*s1) == lowcase (*s2)
6354              : *s1 == *s2))
6355     s1++, s2++;
6356
6357   return (ISALPHA (*s1) && ISALPHA (*s2)
6358           ? lowcase (*s1) - lowcase (*s2)
6359           : *s1 - *s2);
6360 }
6361
6362 /*
6363  * Compare two strings, ignoring case for alphabetic characters.
6364  * Stop after a given number of characters
6365  *
6366  * Same as BSD's strncasecmp, included for portability.
6367  */
6368 static int
6369 etags_strncasecmp (register const char *s1, register const char *s2, register int n)
6370 {
6371   while (*s1 != '\0' && n-- > 0
6372          && (ISALPHA (*s1) && ISALPHA (*s2)
6373              ? lowcase (*s1) == lowcase (*s2)
6374              : *s1 == *s2))
6375     s1++, s2++;
6376
6377   if (n < 0)
6378     return 0;
6379   else
6380     return (ISALPHA (*s1) && ISALPHA (*s2)
6381             ? lowcase (*s1) - lowcase (*s2)
6382             : *s1 - *s2);
6383 }
6384
6385 /* Skip spaces (end of string is not space), return new pointer. */
6386 static char *
6387 skip_spaces (char *cp)
6388 {
6389   while (iswhite (*cp))
6390     cp++;
6391   return cp;
6392 }
6393
6394 /* Skip non spaces, except end of string, return new pointer. */
6395 static char *
6396 skip_non_spaces (char *cp)
6397 {
6398   while (*cp != '\0' && !iswhite (*cp))
6399     cp++;
6400   return cp;
6401 }
6402
6403 /* Print error message and exit.  */
6404 void
6405 fatal (const char *s1, const char *s2)
6406 {
6407   error (s1, s2);
6408   exit (EXIT_FAILURE);
6409 }
6410
6411 static void
6412 pfatal (const char *s1)
6413 {
6414   perror (s1);
6415   exit (EXIT_FAILURE);
6416 }
6417
6418 static void
6419 suggest_asking_for_help (void)
6420 {
6421   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6422            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6423   exit (EXIT_FAILURE);
6424 }
6425
6426 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6427 static void
6428 error (const char *s1, const char *s2)
6429 {
6430   fprintf (stderr, "%s: ", progname);
6431   fprintf (stderr, s1, s2);
6432   fprintf (stderr, "\n");
6433 }
6434
6435 /* Return a newly-allocated string whose contents
6436    concatenate those of s1, s2, s3.  */
6437 static char *
6438 concat (const char *s1, const char *s2, const char *s3)
6439 {
6440   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6441   char *result = xnew (len1 + len2 + len3 + 1, char);
6442
6443   strcpy (result, s1);
6444   strcpy (result + len1, s2);
6445   strcpy (result + len1 + len2, s3);
6446   result[len1 + len2 + len3] = '\0';
6447
6448   return result;
6449 }
6450
6451 \f
6452 /* Does the same work as the system V getcwd, but does not need to
6453    guess the buffer size in advance. */
6454 static char *
6455 etags_getcwd (void)
6456 {
6457 #ifdef HAVE_GETCWD
6458   int bufsize = 200;
6459   char *path = xnew (bufsize, char);
6460
6461   while (getcwd (path, bufsize) == NULL)
6462     {
6463       if (errno != ERANGE)
6464         pfatal ("getcwd");
6465       bufsize *= 2;
6466       free (path);
6467       path = xnew (bufsize, char);
6468     }
6469
6470   canonicalize_filename (path);
6471   return path;
6472
6473 #else /* not HAVE_GETCWD */
6474 #if MSDOS
6475
6476   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6477
6478   getwd (path);
6479
6480   for (p = path; *p != '\0'; p++)
6481     if (*p == '\\')
6482       *p = '/';
6483     else
6484       *p = lowcase (*p);
6485
6486   return strdup (path);
6487 #else /* not MSDOS */
6488   linebuffer path;
6489   FILE *pipe;
6490
6491   linebuffer_init (&path);
6492   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6493   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6494     pfatal ("pwd");
6495   pclose (pipe);
6496
6497   return path.buffer;
6498 #endif /* not MSDOS */
6499 #endif /* not HAVE_GETCWD */
6500 }
6501
6502 /* Return a newly allocated string containing the file name of FILE
6503    relative to the absolute directory DIR (which should end with a slash). */
6504 static char *
6505 relative_filename (char *file, char *dir)
6506 {
6507   char *fp, *dp, *afn, *res;
6508   int i;
6509
6510   /* Find the common root of file and dir (with a trailing slash). */
6511   afn = absolute_filename (file, cwd);
6512   fp = afn;
6513   dp = dir;
6514   while (*fp++ == *dp++)
6515     continue;
6516   fp--, dp--;                   /* back to the first differing char */
6517 #ifdef DOS_NT
6518   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6519     return afn;
6520 #endif
6521   do                            /* look at the equal chars until '/' */
6522     fp--, dp--;
6523   while (*fp != '/');
6524
6525   /* Build a sequence of "../" strings for the resulting relative file name. */
6526   i = 0;
6527   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6528     i += 1;
6529   res = xnew (3*i + strlen (fp + 1) + 1, char);
6530   res[0] = '\0';
6531   while (i-- > 0)
6532     strcat (res, "../");
6533
6534   /* Add the file name relative to the common root of file and dir. */
6535   strcat (res, fp + 1);
6536   free (afn);
6537
6538   return res;
6539 }
6540
6541 /* Return a newly allocated string containing the absolute file name
6542    of FILE given DIR (which should end with a slash). */
6543 static char *
6544 absolute_filename (char *file, char *dir)
6545 {
6546   char *slashp, *cp, *res;
6547
6548   if (filename_is_absolute (file))
6549     res = savestr (file);
6550 #ifdef DOS_NT
6551   /* We don't support non-absolute file names with a drive
6552      letter, like `d:NAME' (it's too much hassle).  */
6553   else if (file[1] == ':')
6554     fatal ("%s: relative file names with drive letters not supported", file);
6555 #endif
6556   else
6557     res = concat (dir, file, "");
6558
6559   /* Delete the "/dirname/.." and "/." substrings. */
6560   slashp = etags_strchr (res, '/');
6561   while (slashp != NULL && slashp[0] != '\0')
6562     {
6563       if (slashp[1] == '.')
6564         {
6565           if (slashp[2] == '.'
6566               && (slashp[3] == '/' || slashp[3] == '\0'))
6567             {
6568               cp = slashp;
6569               do
6570                 cp--;
6571               while (cp >= res && !filename_is_absolute (cp));
6572               if (cp < res)
6573                 cp = slashp;    /* the absolute name begins with "/.." */
6574 #ifdef DOS_NT
6575               /* Under MSDOS and NT we get `d:/NAME' as absolute
6576                  file name, so the luser could say `d:/../NAME'.
6577                  We silently treat this as `d:/NAME'.  */
6578               else if (cp[0] != '/')
6579                 cp = slashp;
6580 #endif
6581               memmove (cp, slashp + 3, strlen (slashp + 2));
6582               slashp = cp;
6583               continue;
6584             }
6585           else if (slashp[2] == '/' || slashp[2] == '\0')
6586             {
6587               memmove (slashp, slashp + 2, strlen (slashp + 1));
6588               continue;
6589             }
6590         }
6591
6592       slashp = etags_strchr (slashp + 1, '/');
6593     }
6594
6595   if (res[0] == '\0')           /* just a safety net: should never happen */
6596     {
6597       free (res);
6598       return savestr ("/");
6599     }
6600   else
6601     return res;
6602 }
6603
6604 /* Return a newly allocated string containing the absolute
6605    file name of dir where FILE resides given DIR (which should
6606    end with a slash). */
6607 static char *
6608 absolute_dirname (char *file, char *dir)
6609 {
6610   char *slashp, *res;
6611   char save;
6612
6613   slashp = etags_strrchr (file, '/');
6614   if (slashp == NULL)
6615     return savestr (dir);
6616   save = slashp[1];
6617   slashp[1] = '\0';
6618   res = absolute_filename (file, dir);
6619   slashp[1] = save;
6620
6621   return res;
6622 }
6623
6624 /* Whether the argument string is an absolute file name.  The argument
6625    string must have been canonicalized with canonicalize_filename. */
6626 static bool
6627 filename_is_absolute (char *fn)
6628 {
6629   return (fn[0] == '/'
6630 #ifdef DOS_NT
6631           || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/')
6632 #endif
6633           );
6634 }
6635
6636 /* Downcase DOS drive letter and collapse separators into single slashes.
6637    Works in place. */
6638 static void
6639 canonicalize_filename (register char *fn)
6640 {
6641   register char* cp;
6642   char sep = '/';
6643
6644 #ifdef DOS_NT
6645   /* Canonicalize drive letter case.  */
6646 # define ISUPPER(c)     isupper (CHAR (c))
6647   if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6648     fn[0] = lowcase (fn[0]);
6649
6650   sep = '\\';
6651 #endif
6652
6653   /* Collapse multiple separators into a single slash. */
6654   for (cp = fn; *cp != '\0'; cp++, fn++)
6655     if (*cp == sep)
6656       {
6657         *fn = '/';
6658         while (cp[1] == sep)
6659           cp++;
6660       }
6661     else
6662       *fn = *cp;
6663   *fn = '\0';
6664 }
6665
6666 \f
6667 /* Initialize a linebuffer for use. */
6668 static void
6669 linebuffer_init (linebuffer *lbp)
6670 {
6671   lbp->size = (DEBUG) ? 3 : 200;
6672   lbp->buffer = xnew (lbp->size, char);
6673   lbp->buffer[0] = '\0';
6674   lbp->len = 0;
6675 }
6676
6677 /* Set the minimum size of a string contained in a linebuffer. */
6678 static void
6679 linebuffer_setlen (linebuffer *lbp, int toksize)
6680 {
6681   while (lbp->size <= toksize)
6682     {
6683       lbp->size *= 2;
6684       xrnew (lbp->buffer, lbp->size, char);
6685     }
6686   lbp->len = toksize;
6687 }
6688
6689 /* Like malloc but get fatal error if memory is exhausted. */
6690 static PTR
6691 xmalloc (size_t size)
6692 {
6693   PTR result = (PTR) malloc (size);
6694   if (result == NULL)
6695     fatal ("virtual memory exhausted", (char *)NULL);
6696   return result;
6697 }
6698
6699 static PTR
6700 xrealloc (char *ptr, size_t size)
6701 {
6702   PTR result = (PTR) realloc (ptr, size);
6703   if (result == NULL)
6704     fatal ("virtual memory exhausted", (char *)NULL);
6705   return result;
6706 }
6707
6708 /*
6709  * Local Variables:
6710  * indent-tabs-mode: t
6711  * tab-width: 8
6712  * fill-column: 79
6713  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6714  * c-file-style: "gnu"
6715  * End:
6716  */
6717
6718 /* etags.c ends here */